app.ai_providers.base

Core abstractions, constants, and shared utilities for AI providers.

This module contains the foundational components shared across all AI provider implementations: the AIProvider abstract base class, the AIProviderError exception, rate-limit retry logic, configuration resolution helpers, and error-detection utilities.

Attributes:
  • DEFAULT_MAX_TOKENS: Default maximum completion tokens across all providers.
  • RATE_LIMIT_MAX_RETRIES: Number of retries on rate-limit (HTTP 429) errors.
  • DEFAULT_LOCAL_BASE_URL: Default Ollama-style local endpoint URL.
  • DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS: Default HTTP timeout for cloud provider endpoints (10 minutes).
  • DEFAULT_LOCAL_REQUEST_TIMEOUT_SECONDS: Default HTTP timeout for local endpoints (1 hour, to accommodate large model inference).
  • DEFAULT_KIMI_BASE_URL: Default Moonshot Kimi API base URL.
  • DEFAULT_CLAUDE_MODEL: Default Anthropic Claude model identifier.
  • DEFAULT_OPENAI_MODEL: Default OpenAI model identifier.
  • DEFAULT_KIMI_MODEL: Default Moonshot Kimi model identifier.
  • DEFAULT_KIMI_FILE_UPLOAD_PURPOSE: File upload purpose string for Kimi.
  • DEFAULT_LOCAL_MODEL: Default model identifier for local providers.
  • _RATE_LIMIT_STATE: Module-level dict mapping provider names to RateLimitState instances.
  • _RATE_LIMIT_STATE_LOCK: Threading lock protecting _RATE_LIMIT_STATE.
  1"""Core abstractions, constants, and shared utilities for AI providers.
  2
  3This module contains the foundational components shared across all AI provider
  4implementations: the ``AIProvider`` abstract base class, the ``AIProviderError``
  5exception, rate-limit retry logic, configuration resolution helpers, and
  6error-detection utilities.
  7
  8Attributes:
  9    DEFAULT_MAX_TOKENS: Default maximum completion tokens across all providers.
 10    RATE_LIMIT_MAX_RETRIES: Number of retries on rate-limit (HTTP 429) errors.
 11    DEFAULT_LOCAL_BASE_URL: Default Ollama-style local endpoint URL.
 12    DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS: Default HTTP timeout for cloud
 13        provider endpoints (10 minutes).
 14    DEFAULT_LOCAL_REQUEST_TIMEOUT_SECONDS: Default HTTP timeout for local
 15        endpoints (1 hour, to accommodate large model inference).
 16    DEFAULT_KIMI_BASE_URL: Default Moonshot Kimi API base URL.
 17    DEFAULT_CLAUDE_MODEL: Default Anthropic Claude model identifier.
 18    DEFAULT_OPENAI_MODEL: Default OpenAI model identifier.
 19    DEFAULT_KIMI_MODEL: Default Moonshot Kimi model identifier.
 20    DEFAULT_KIMI_FILE_UPLOAD_PURPOSE: File upload purpose string for Kimi.
 21    DEFAULT_LOCAL_MODEL: Default model identifier for local providers.
 22    _RATE_LIMIT_STATE: Module-level dict mapping provider names to
 23        ``RateLimitState`` instances.
 24    _RATE_LIMIT_STATE_LOCK: Threading lock protecting ``_RATE_LIMIT_STATE``.
 25"""
 26
 27from __future__ import annotations
 28
 29import logging
 30import os
 31import re
 32import threading
 33import time
 34from abc import ABC, abstractmethod
 35from dataclasses import dataclass, field
 36from typing import Any, Callable, Iterator, Mapping, TypeVar
 37from urllib.parse import urlsplit, urlunsplit
 38
 39logger = logging.getLogger(__name__)
 40_T = TypeVar("_T")
 41
 42# ---------------------------------------------------------------------------
 43# Constants
 44# ---------------------------------------------------------------------------
 45
 46DEFAULT_MAX_TOKENS = 256000
 47RATE_LIMIT_MAX_RETRIES = 3
 48DEFAULT_LOCAL_BASE_URL = "http://localhost:11434/v1"
 49DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS = 600.0
 50DEFAULT_LOCAL_REQUEST_TIMEOUT_SECONDS = 3600.0
 51DEFAULT_KIMI_BASE_URL = "https://api.moonshot.ai/v1"
 52DEFAULT_CLAUDE_MODEL = "claude-opus-4-6"
 53DEFAULT_OPENAI_MODEL = "gpt-5.4"
 54DEFAULT_KIMI_MODEL = "kimi-k2-turbo-preview"
 55DEFAULT_KIMI_FILE_UPLOAD_PURPOSE = "file-extract"
 56DEFAULT_LOCAL_MODEL = "llama3.1:70b"
 57
 58_KIMI_MODEL_ALIASES = {
 59    "kimi-v2.5": DEFAULT_KIMI_MODEL,
 60}
 61
 62_CONTEXT_LENGTH_PATTERNS = (
 63    "context length",
 64    "context window",
 65    "context_length_exceeded",
 66    "maximum context",
 67    "too many tokens",
 68    "token limit",
 69    "prompt is too long",
 70    "input is too long",
 71)
 72
 73_KIMI_MODEL_NOT_AVAILABLE_PATTERNS = (
 74    "not found the model",
 75    "model not found",
 76    "resource_not_found_error",
 77    "permission denied",
 78    "unknown model",
 79)
 80
 81_SUPPORTED_COMPLETION_TOKEN_LIMIT_RE = re.compile(
 82    r"supports\s+at\s+most\s+(?P<limit>\d+)\s+(?:completion\s+)?tokens",
 83    flags=re.IGNORECASE,
 84)
 85_MAX_TOKENS_UPPER_BOUND_RE = re.compile(
 86    r"max[_\s]?tokens?\s*:\s*\d+\s*>\s*(?P<limit>\d+)",
 87    flags=re.IGNORECASE,
 88)
 89
 90# ---------------------------------------------------------------------------
 91# Rate-limit state
 92# ---------------------------------------------------------------------------
 93
 94
 95@dataclass
 96class RateLimitState:
 97    """Persistent rate-limit tracking state for a single AI provider.
 98
 99    Attributes:
100        last_request_time: Monotonic timestamp of the last API request attempt.
101        backoff_duration: Current backoff duration in seconds. Reset to 0.0
102            on a successful request.
103        consecutive_error_count: Number of consecutive rate-limit errors.
104            Reset to 0 on a successful request.
105        lock: Per-provider lock for thread-safe state access.
106    """
107
108    last_request_time: float = 0.0
109    backoff_duration: float = 0.0
110    consecutive_error_count: int = 0
111    lock: threading.Lock = field(default_factory=threading.Lock)
112
113
114_RATE_LIMIT_STATE: dict[str, RateLimitState] = {}
115_RATE_LIMIT_STATE_LOCK = threading.Lock()
116
117
118def _get_rate_limit_state(provider_name: str) -> RateLimitState:
119    """Get or create the persistent rate-limit state for a provider.
120
121    Args:
122        provider_name: Human-readable provider name (e.g., ``"Claude"``).
123
124    Returns:
125        The ``RateLimitState`` instance for the given provider.
126    """
127    with _RATE_LIMIT_STATE_LOCK:
128        if provider_name not in _RATE_LIMIT_STATE:
129            _RATE_LIMIT_STATE[provider_name] = RateLimitState()
130        return _RATE_LIMIT_STATE[provider_name]
131
132
133# ---------------------------------------------------------------------------
134# Exception
135# ---------------------------------------------------------------------------
136
137
138class AIProviderError(RuntimeError):
139    """Raised when an AI provider request fails with a user-facing message.
140
141    All provider implementations translate SDK-specific exceptions into this
142    single exception type so that callers only need one ``except`` clause for
143    AI-related failures. The message is safe for display in the web UI.
144    """
145
146
147# ---------------------------------------------------------------------------
148# Abstract base class
149# ---------------------------------------------------------------------------
150
151
152class AIProvider(ABC):
153    """Abstract base class defining the interface for all AI providers.
154
155    Every concrete provider (Claude, OpenAI, Kimi, Local) implements this
156    interface so that the forensic analysis engine can call any provider
157    interchangeably.
158
159    Subclasses must implement:
160        * ``analyze`` -- single-shot prompt-to-text generation.
161        * ``analyze_stream`` -- incremental (streaming) text generation.
162        * ``get_model_info`` -- provider/model metadata dictionary.
163
164    Subclasses may override:
165        * ``analyze_with_attachments`` -- analysis with CSV file attachments.
166
167    Attributes:
168        attach_csv_as_file (bool): Whether to attempt uploading CSV artifacts
169            as file attachments rather than inlining them into the prompt.
170    """
171
172    @abstractmethod
173    def analyze(
174        self,
175        system_prompt: str,
176        user_prompt: str,
177        max_tokens: int = DEFAULT_MAX_TOKENS,
178    ) -> str:
179        """Send a prompt to the provider and return the complete generated text.
180
181        Args:
182            system_prompt: The system-level instruction text.
183            user_prompt: The user-facing prompt with investigation context.
184            max_tokens: Maximum number of tokens the model may generate.
185
186        Returns:
187            The generated text response as a string.
188
189        Raises:
190            AIProviderError: If the request fails for any reason.
191        """
192
193    @abstractmethod
194    def analyze_stream(
195        self,
196        system_prompt: str,
197        user_prompt: str,
198        max_tokens: int = DEFAULT_MAX_TOKENS,
199    ) -> Iterator[str]:
200        """Stream generated text chunks for the provided prompt.
201
202        Args:
203            system_prompt: The system-level instruction text.
204            user_prompt: The user-facing prompt with investigation context.
205            max_tokens: Maximum number of tokens the model may generate.
206
207        Yields:
208            Individual text chunks (deltas) as they are generated.
209
210        Raises:
211            AIProviderError: If the streaming request fails or produces no output.
212        """
213
214    @abstractmethod
215    def get_model_info(self) -> dict[str, str]:
216        """Return provider and model metadata for audit logging and reports.
217
218        Returns:
219            A dictionary with at least ``"provider"`` and ``"model"`` keys.
220        """
221
222    def analyze_with_attachments(
223        self,
224        system_prompt: str,
225        user_prompt: str,
226        attachments: list[Mapping[str, str]] | None,
227        max_tokens: int = DEFAULT_MAX_TOKENS,
228    ) -> str:
229        """Analyze with optional file attachments.
230
231        Providers that support file uploads override this method. The default
232        implementation inlines attachment content into the prompt so the model
233        always receives the evidence data, then delegates to ``analyze``.
234
235        Args:
236            system_prompt: The system-level instruction text.
237            user_prompt: The user-facing prompt with investigation context.
238            attachments: Optional list of attachment descriptors with
239                ``"path"``, ``"name"``, and ``"mime_type"`` keys.
240            max_tokens: Maximum number of tokens the model may generate.
241
242        Returns:
243            The generated text response as a string.
244
245        Raises:
246            AIProviderError: If the request fails.
247        """
248        from .utils import _inline_attachment_data_into_prompt
249
250        effective_prompt = user_prompt
251        if attachments:
252            effective_prompt, inlined = _inline_attachment_data_into_prompt(
253                user_prompt=user_prompt,
254                attachments=attachments,
255            )
256            if inlined:
257                logger.info("Base provider inlined attachment data into prompt.")
258
259        return self.analyze(
260            system_prompt=system_prompt,
261            user_prompt=effective_prompt,
262            max_tokens=max_tokens,
263        )
264
265    def _prepare_csv_attachments(
266        self,
267        attachments: list[Mapping[str, str]] | None,
268        *,
269        supports_file_attachments: bool = True,
270    ) -> list[dict[str, str]] | None:
271        """Apply shared CSV-attachment preflight checks and normalization.
272
273        Args:
274            attachments: Raw attachment descriptors from the caller.
275            supports_file_attachments: Whether the provider's SDK client
276                exposes the necessary file-upload APIs.
277
278        Returns:
279            A list of normalized attachment dicts, or ``None`` if attachment
280            mode should be skipped.
281        """
282        from .utils import normalize_attachment_inputs
283
284        if not bool(getattr(self, "attach_csv_as_file", False)):
285            return None
286        if not attachments:
287            return None
288        if getattr(self, "_csv_attachment_supported", None) is False:
289            return None
290        if not supports_file_attachments:
291            if hasattr(self, "_csv_attachment_supported"):
292                setattr(self, "_csv_attachment_supported", False)
293            return None
294
295        normalized_attachments = normalize_attachment_inputs(attachments)
296        if not normalized_attachments:
297            return None
298        return normalized_attachments
299
300
301# ---------------------------------------------------------------------------
302# Rate-limit retry wrapper
303# ---------------------------------------------------------------------------
304
305
306def _run_with_rate_limit_retries(
307    request_fn: Callable[[], _T],
308    rate_limit_error_type: type[Exception],
309    provider_name: str,
310) -> _T:
311    """Retry rate-limited requests with exponential backoff.
312
313    Executes the given request callable and retries up to
314    ``RATE_LIMIT_MAX_RETRIES`` times if a rate-limit error is raised.
315    Uses ``Retry-After`` headers when available, otherwise falls back to
316    exponential backoff (1s, 2s, 4s, ...).
317
318    Args:
319        request_fn: A zero-argument callable that performs the API request.
320        rate_limit_error_type: The exception class to catch as a rate-limit
321            signal (e.g., ``anthropic.RateLimitError``).
322        provider_name: Human-readable provider name for log messages.
323
324    Returns:
325        The return value of ``request_fn`` on a successful call.
326
327    Raises:
328        AIProviderError: If the rate limit is still exceeded after all retries.
329    """
330    state = _get_rate_limit_state(provider_name)
331    last_error: Exception | None = None
332
333    with state.lock:
334        if state.backoff_duration > 0.0 and state.last_request_time > 0.0:
335            elapsed = time.monotonic() - state.last_request_time
336            remaining = state.backoff_duration - elapsed
337            if remaining > 0.0:
338                logger.info(
339                    "%s: honouring residual backoff from prior request, "
340                    "waiting %.1fs before first attempt",
341                    provider_name,
342                    remaining,
343                )
344                wait_time = remaining
345            else:
346                wait_time = 0.0
347        else:
348            wait_time = 0.0
349
350    if wait_time > 0.0:
351        time.sleep(wait_time)
352
353    for retry_count in range(RATE_LIMIT_MAX_RETRIES + 1):
354        try:
355            with state.lock:
356                state.last_request_time = time.monotonic()
357
358            result = request_fn()
359
360            with state.lock:
361                state.backoff_duration = 0.0
362                state.consecutive_error_count = 0
363
364            return result
365        except rate_limit_error_type as error:
366            last_error = error
367
368            retry_after = _extract_retry_after_seconds(error)
369            if retry_after is None:
370                retry_after = float(2**retry_count)
371
372            with state.lock:
373                state.consecutive_error_count += 1
374                state.backoff_duration = retry_after
375                state.last_request_time = time.monotonic()
376
377            if retry_count >= RATE_LIMIT_MAX_RETRIES:
378                break
379
380            logger.warning(
381                "%s rate limited (attempt %d/%d, %d consecutive), "
382                "retrying in %.1fs",
383                provider_name,
384                retry_count + 1,
385                RATE_LIMIT_MAX_RETRIES,
386                state.consecutive_error_count,
387                retry_after,
388            )
389            time.sleep(retry_after)
390
391    detail = f" Details: {last_error}" if last_error else ""
392    raise AIProviderError(
393        f"{provider_name} rate limit exceeded after {RATE_LIMIT_MAX_RETRIES} retries.{detail}"
394    ) from last_error
395
396
397# ---------------------------------------------------------------------------
398# Configuration resolution helpers
399# ---------------------------------------------------------------------------
400
401
402def _normalize_api_key_value(value: Any) -> str:
403    """Normalize API key-like values from config/env sources.
404
405    Args:
406        value: Raw API key value. May be ``None``, empty, or whitespace-padded.
407
408    Returns:
409        The stripped string, or empty string if input is ``None``.
410    """
411    if value is None:
412        return ""
413    return str(value).strip()
414
415
416def _resolve_api_key(config_key: Any, env_var: str) -> str:
417    """Return the API key from config, falling back to an environment variable.
418
419    Args:
420        config_key: The API key value from ``config.yaml``.
421        env_var: The environment variable name to check as fallback.
422
423    Returns:
424        The resolved API key string, or empty string if not found.
425    """
426    normalized_config_key = _normalize_api_key_value(config_key)
427    if normalized_config_key:
428        return normalized_config_key
429    return _normalize_api_key_value(os.environ.get(env_var, ""))
430
431
432def _resolve_api_key_candidates(config_key: Any, env_vars: tuple[str, ...]) -> str:
433    """Return API key from config, falling back across multiple environment variables.
434
435    Args:
436        config_key: The API key value from ``config.yaml``.
437        env_vars: Tuple of environment variable names to check in order.
438
439    Returns:
440        The resolved API key string, or empty string if not found.
441    """
442    normalized_config_key = _normalize_api_key_value(config_key)
443    if normalized_config_key:
444        return normalized_config_key
445
446    for env_var in env_vars:
447        normalized_value = _normalize_api_key_value(os.environ.get(env_var, ""))
448        if normalized_value:
449            return normalized_value
450    return ""
451
452
453def _resolve_timeout_seconds(value: Any, default_seconds: float) -> float:
454    """Normalize timeout values from config/env inputs.
455
456    Args:
457        value: Raw timeout value from configuration.
458        default_seconds: Fallback timeout in seconds.
459
460    Returns:
461        A positive float representing the timeout in seconds.
462    """
463    try:
464        timeout_seconds = float(value)
465    except (TypeError, ValueError):
466        return float(default_seconds)
467
468    if timeout_seconds <= 0:
469        return float(default_seconds)
470    return timeout_seconds
471
472
473# ---------------------------------------------------------------------------
474# Error detection helpers
475# ---------------------------------------------------------------------------
476
477
478def _extract_retry_after_seconds(error: Exception) -> float | None:
479    """Read ``Retry-After`` hints from API error responses when present.
480
481    Args:
482        error: The rate-limit or API exception that may carry HTTP headers.
483
484    Returns:
485        The retry delay in seconds, or ``None`` if not present.
486    """
487    response = getattr(error, "response", None)
488    headers = getattr(response, "headers", None)
489    if headers is None:
490        headers = getattr(error, "headers", None)
491    if headers is None:
492        return None
493
494    retry_after_value = headers.get("retry-after") or headers.get("Retry-After")
495    if retry_after_value is None:
496        return None
497
498    try:
499        retry_after = float(retry_after_value)
500    except (TypeError, ValueError):
501        return None
502
503    return max(0.0, retry_after)
504
505
506def _is_context_length_error(error: Exception) -> bool:
507    """Best-effort detection for context/token-length failures.
508
509    Args:
510        error: The API exception to inspect.
511
512    Returns:
513        ``True`` if the error appears to be a context-length overflow.
514    """
515    message = str(error).lower()
516    if any(pattern in message for pattern in _CONTEXT_LENGTH_PATTERNS):
517        return True
518
519    code = getattr(error, "code", None)
520    if isinstance(code, str) and "context" in code.lower():
521        return True
522
523    body = getattr(error, "body", None)
524    if isinstance(body, dict):
525        body_text = str(body).lower()
526        if any(pattern in body_text for pattern in _CONTEXT_LENGTH_PATTERNS):
527            return True
528
529    return False
530
531
532def _is_attachment_unsupported_error(error: Exception) -> bool:
533    """Detect API errors that indicate attachment/file APIs are unsupported.
534
535    Args:
536        error: The API exception to inspect.
537
538    Returns:
539        ``True`` if the error indicates file-attachment APIs are unavailable.
540    """
541    message = str(error).lower()
542    unsupported_markers = (
543        "404",
544        "not found",
545        "unsupported",
546        "does not support",
547        "input_file",
548        "/responses",
549        "/files",
550        "unrecognized request url",
551        "unknown field",
552        "supported format",
553        "context stuffing file type",
554        "but got .csv",
555    )
556    return any(marker in message for marker in unsupported_markers)
557
558
559def _is_anthropic_streaming_required_error(error: Exception) -> bool:
560    """Detect Anthropic SDK non-streaming timeout guardrails.
561
562    Args:
563        error: The exception to inspect (typically a ``ValueError``).
564
565    Returns:
566        ``True`` if streaming is required due to expected long processing time.
567    """
568    message = str(error).lower()
569    if "streaming is required for operations that may take longer than 10 minutes" in message:
570        return True
571    return "streaming is required" in message and "10 minutes" in message
572
573
574def _is_unsupported_parameter_error(error: Exception, parameter_name: str) -> bool:
575    """Detect API errors that indicate a specific parameter is unsupported.
576
577    Args:
578        error: The API exception to inspect.
579        parameter_name: The parameter name to check for.
580
581    Returns:
582        ``True`` if the error indicates the parameter is unsupported.
583    """
584    parameter = str(parameter_name or "").strip().lower()
585    if not parameter:
586        return False
587
588    param = getattr(error, "param", None)
589    if isinstance(param, str) and param.lower() == parameter:
590        return True
591
592    body = getattr(error, "body", None)
593    if isinstance(body, dict):
594        error_payload = body.get("error", body)
595        if isinstance(error_payload, Mapping):
596            body_param = error_payload.get("param")
597            if isinstance(body_param, str) and body_param.lower() == parameter:
598                return True
599            body_message = error_payload.get("message")
600            if isinstance(body_message, str):
601                lowered_message = body_message.lower()
602                if parameter in lowered_message and "unsupported parameter" in lowered_message:
603                    return True
604
605        lowered_body = str(body).lower()
606        if parameter in lowered_body and "unsupported parameter" in lowered_body:
607            return True
608
609    lowered_message = str(error).lower()
610    return parameter in lowered_message and "unsupported parameter" in lowered_message
611
612
613def _extract_supported_completion_token_limit(error: Exception) -> int | None:
614    """Extract a provider-declared completion token cap from an API error.
615
616    Args:
617        error: The API exception whose message may contain the token limit.
618
619    Returns:
620        The maximum completion token count, or ``None`` if not found.
621    """
622    candidate_messages: list[str] = []
623    body = getattr(error, "body", None)
624    if isinstance(body, dict):
625        error_payload = body.get("error", body)
626        if isinstance(error_payload, Mapping):
627            body_message = error_payload.get("message")
628            if isinstance(body_message, str):
629                candidate_messages.append(body_message)
630        candidate_messages.append(str(body))
631    candidate_messages.append(str(error))
632
633    patterns = (
634        _SUPPORTED_COMPLETION_TOKEN_LIMIT_RE,
635        _MAX_TOKENS_UPPER_BOUND_RE,
636    )
637    for message in candidate_messages:
638        for pattern in patterns:
639            match = pattern.search(message)
640            if not match:
641                continue
642            try:
643                limit = int(match.group("limit"))
644            except (TypeError, ValueError):
645                continue
646            if limit > 0:
647                return limit
648    return None
649
650
651def _resolve_completion_token_retry_limit(
652    error: Exception,
653    requested_tokens: int,
654) -> int | None:
655    """Return a reduced token count when the API reports the model maximum.
656
657    Args:
658        error: The API exception that triggered the token-limit failure.
659        requested_tokens: The ``max_tokens`` value that was rejected.
660
661    Returns:
662        A reduced token count for retry, or ``None`` if not recoverable.
663    """
664    if requested_tokens <= 0:
665        return None
666    supported_limit = _extract_supported_completion_token_limit(error)
667    if supported_limit is None or supported_limit >= requested_tokens:
668        return None
669    return supported_limit
670
671
672# ---------------------------------------------------------------------------
673# URL / model normalization
674# ---------------------------------------------------------------------------
675
676
677def _normalize_openai_compatible_base_url(base_url: str, default_base_url: str) -> str:
678    """Normalize OpenAI-compatible base URLs.
679
680    Ensures the URL has a versioned path prefix (``/v1``). Ollama users
681    often provide ``http://localhost:11434/``; this normalizes it.
682
683    Args:
684        base_url: Raw base URL string from configuration.
685        default_base_url: Fallback URL when ``base_url`` is empty.
686
687    Returns:
688        The normalized base URL string.
689    """
690    raw = str(base_url or "").strip()
691    if not raw:
692        return default_base_url
693
694    parsed = urlsplit(raw)
695    if not parsed.scheme or not parsed.netloc:
696        return raw.rstrip("/")
697
698    normalized_path = parsed.path.rstrip("/")
699    if normalized_path in ("", "/"):
700        normalized_path = "/v1"
701
702    return urlunsplit((parsed.scheme, parsed.netloc, normalized_path, parsed.query, parsed.fragment))
703
704
705def _normalize_kimi_model_name(model: str) -> str:
706    """Normalize Kimi model names and map deprecated aliases.
707
708    Args:
709        model: Raw model name string from configuration.
710
711    Returns:
712        The canonical Kimi model identifier string.
713    """
714    raw = str(model or "").strip()
715    if not raw:
716        return DEFAULT_KIMI_MODEL
717
718    mapped = _KIMI_MODEL_ALIASES.get(raw.lower())
719    if mapped:
720        logger.warning("Kimi model '%s' is deprecated; using '%s'.", raw, mapped)
721        return mapped
722    return raw
723
724
725def _is_kimi_model_not_available_error(error: Exception) -> bool:
726    """Detect model-not-found or model-permission failures from Kimi.
727
728    Args:
729        error: The API exception to inspect.
730
731    Returns:
732        ``True`` if the error indicates the model is unavailable.
733    """
734    message = str(error).lower()
735    if "model" in message and any(pattern in message for pattern in _KIMI_MODEL_NOT_AVAILABLE_PATTERNS):
736        return True
737
738    body = getattr(error, "body", None)
739    if isinstance(body, dict):
740        body_text = str(body).lower()
741        if "model" in body_text and any(pattern in body_text for pattern in _KIMI_MODEL_NOT_AVAILABLE_PATTERNS):
742            return True
743
744    return False
logger = <Logger app.ai_providers.base (WARNING)>
DEFAULT_MAX_TOKENS = 256000
RATE_LIMIT_MAX_RETRIES = 3
DEFAULT_LOCAL_BASE_URL = 'http://localhost:11434/v1'
DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS = 600.0
DEFAULT_LOCAL_REQUEST_TIMEOUT_SECONDS = 3600.0
DEFAULT_KIMI_BASE_URL = 'https://api.moonshot.ai/v1'
DEFAULT_CLAUDE_MODEL = 'claude-opus-4-6'
DEFAULT_OPENAI_MODEL = 'gpt-5.4'
DEFAULT_KIMI_MODEL = 'kimi-k2-turbo-preview'
DEFAULT_KIMI_FILE_UPLOAD_PURPOSE = 'file-extract'
DEFAULT_LOCAL_MODEL = 'llama3.1:70b'
@dataclass
class RateLimitState:
 96@dataclass
 97class RateLimitState:
 98    """Persistent rate-limit tracking state for a single AI provider.
 99
100    Attributes:
101        last_request_time: Monotonic timestamp of the last API request attempt.
102        backoff_duration: Current backoff duration in seconds. Reset to 0.0
103            on a successful request.
104        consecutive_error_count: Number of consecutive rate-limit errors.
105            Reset to 0 on a successful request.
106        lock: Per-provider lock for thread-safe state access.
107    """
108
109    last_request_time: float = 0.0
110    backoff_duration: float = 0.0
111    consecutive_error_count: int = 0
112    lock: threading.Lock = field(default_factory=threading.Lock)

Persistent rate-limit tracking state for a single AI provider.

Attributes:
  • last_request_time: Monotonic timestamp of the last API request attempt.
  • backoff_duration: Current backoff duration in seconds. Reset to 0.0 on a successful request.
  • consecutive_error_count: Number of consecutive rate-limit errors. Reset to 0 on a successful request.
  • lock: Per-provider lock for thread-safe state access.
RateLimitState( last_request_time: float = 0.0, backoff_duration: float = 0.0, consecutive_error_count: int = 0, lock: <built-in function allocate_lock> = <factory>)
last_request_time: float = 0.0
backoff_duration: float = 0.0
consecutive_error_count: int = 0
lock: <built-in function allocate_lock>
class AIProviderError(builtins.RuntimeError):
139class AIProviderError(RuntimeError):
140    """Raised when an AI provider request fails with a user-facing message.
141
142    All provider implementations translate SDK-specific exceptions into this
143    single exception type so that callers only need one ``except`` clause for
144    AI-related failures. The message is safe for display in the web UI.
145    """

Raised when an AI provider request fails with a user-facing message.

All provider implementations translate SDK-specific exceptions into this single exception type so that callers only need one except clause for AI-related failures. The message is safe for display in the web UI.

class AIProvider(abc.ABC):
153class AIProvider(ABC):
154    """Abstract base class defining the interface for all AI providers.
155
156    Every concrete provider (Claude, OpenAI, Kimi, Local) implements this
157    interface so that the forensic analysis engine can call any provider
158    interchangeably.
159
160    Subclasses must implement:
161        * ``analyze`` -- single-shot prompt-to-text generation.
162        * ``analyze_stream`` -- incremental (streaming) text generation.
163        * ``get_model_info`` -- provider/model metadata dictionary.
164
165    Subclasses may override:
166        * ``analyze_with_attachments`` -- analysis with CSV file attachments.
167
168    Attributes:
169        attach_csv_as_file (bool): Whether to attempt uploading CSV artifacts
170            as file attachments rather than inlining them into the prompt.
171    """
172
173    @abstractmethod
174    def analyze(
175        self,
176        system_prompt: str,
177        user_prompt: str,
178        max_tokens: int = DEFAULT_MAX_TOKENS,
179    ) -> str:
180        """Send a prompt to the provider and return the complete generated text.
181
182        Args:
183            system_prompt: The system-level instruction text.
184            user_prompt: The user-facing prompt with investigation context.
185            max_tokens: Maximum number of tokens the model may generate.
186
187        Returns:
188            The generated text response as a string.
189
190        Raises:
191            AIProviderError: If the request fails for any reason.
192        """
193
194    @abstractmethod
195    def analyze_stream(
196        self,
197        system_prompt: str,
198        user_prompt: str,
199        max_tokens: int = DEFAULT_MAX_TOKENS,
200    ) -> Iterator[str]:
201        """Stream generated text chunks for the provided prompt.
202
203        Args:
204            system_prompt: The system-level instruction text.
205            user_prompt: The user-facing prompt with investigation context.
206            max_tokens: Maximum number of tokens the model may generate.
207
208        Yields:
209            Individual text chunks (deltas) as they are generated.
210
211        Raises:
212            AIProviderError: If the streaming request fails or produces no output.
213        """
214
215    @abstractmethod
216    def get_model_info(self) -> dict[str, str]:
217        """Return provider and model metadata for audit logging and reports.
218
219        Returns:
220            A dictionary with at least ``"provider"`` and ``"model"`` keys.
221        """
222
223    def analyze_with_attachments(
224        self,
225        system_prompt: str,
226        user_prompt: str,
227        attachments: list[Mapping[str, str]] | None,
228        max_tokens: int = DEFAULT_MAX_TOKENS,
229    ) -> str:
230        """Analyze with optional file attachments.
231
232        Providers that support file uploads override this method. The default
233        implementation inlines attachment content into the prompt so the model
234        always receives the evidence data, then delegates to ``analyze``.
235
236        Args:
237            system_prompt: The system-level instruction text.
238            user_prompt: The user-facing prompt with investigation context.
239            attachments: Optional list of attachment descriptors with
240                ``"path"``, ``"name"``, and ``"mime_type"`` keys.
241            max_tokens: Maximum number of tokens the model may generate.
242
243        Returns:
244            The generated text response as a string.
245
246        Raises:
247            AIProviderError: If the request fails.
248        """
249        from .utils import _inline_attachment_data_into_prompt
250
251        effective_prompt = user_prompt
252        if attachments:
253            effective_prompt, inlined = _inline_attachment_data_into_prompt(
254                user_prompt=user_prompt,
255                attachments=attachments,
256            )
257            if inlined:
258                logger.info("Base provider inlined attachment data into prompt.")
259
260        return self.analyze(
261            system_prompt=system_prompt,
262            user_prompt=effective_prompt,
263            max_tokens=max_tokens,
264        )
265
266    def _prepare_csv_attachments(
267        self,
268        attachments: list[Mapping[str, str]] | None,
269        *,
270        supports_file_attachments: bool = True,
271    ) -> list[dict[str, str]] | None:
272        """Apply shared CSV-attachment preflight checks and normalization.
273
274        Args:
275            attachments: Raw attachment descriptors from the caller.
276            supports_file_attachments: Whether the provider's SDK client
277                exposes the necessary file-upload APIs.
278
279        Returns:
280            A list of normalized attachment dicts, or ``None`` if attachment
281            mode should be skipped.
282        """
283        from .utils import normalize_attachment_inputs
284
285        if not bool(getattr(self, "attach_csv_as_file", False)):
286            return None
287        if not attachments:
288            return None
289        if getattr(self, "_csv_attachment_supported", None) is False:
290            return None
291        if not supports_file_attachments:
292            if hasattr(self, "_csv_attachment_supported"):
293                setattr(self, "_csv_attachment_supported", False)
294            return None
295
296        normalized_attachments = normalize_attachment_inputs(attachments)
297        if not normalized_attachments:
298            return None
299        return normalized_attachments

Abstract base class defining the interface for all AI providers.

Every concrete provider (Claude, OpenAI, Kimi, Local) implements this interface so that the forensic analysis engine can call any provider interchangeably.

Subclasses must implement:
Subclasses may override:
Attributes:
  • attach_csv_as_file (bool): Whether to attempt uploading CSV artifacts as file attachments rather than inlining them into the prompt.
@abstractmethod
def analyze( self, system_prompt: str, user_prompt: str, max_tokens: int = 256000) -> str:
173    @abstractmethod
174    def analyze(
175        self,
176        system_prompt: str,
177        user_prompt: str,
178        max_tokens: int = DEFAULT_MAX_TOKENS,
179    ) -> str:
180        """Send a prompt to the provider and return the complete generated text.
181
182        Args:
183            system_prompt: The system-level instruction text.
184            user_prompt: The user-facing prompt with investigation context.
185            max_tokens: Maximum number of tokens the model may generate.
186
187        Returns:
188            The generated text response as a string.
189
190        Raises:
191            AIProviderError: If the request fails for any reason.
192        """

Send a prompt to the provider and return the complete generated text.

Arguments:
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt with investigation context.
  • max_tokens: Maximum number of tokens the model may generate.
Returns:

The generated text response as a string.

Raises:
  • AIProviderError: If the request fails for any reason.
@abstractmethod
def analyze_stream( self, system_prompt: str, user_prompt: str, max_tokens: int = 256000) -> Iterator[str]:
194    @abstractmethod
195    def analyze_stream(
196        self,
197        system_prompt: str,
198        user_prompt: str,
199        max_tokens: int = DEFAULT_MAX_TOKENS,
200    ) -> Iterator[str]:
201        """Stream generated text chunks for the provided prompt.
202
203        Args:
204            system_prompt: The system-level instruction text.
205            user_prompt: The user-facing prompt with investigation context.
206            max_tokens: Maximum number of tokens the model may generate.
207
208        Yields:
209            Individual text chunks (deltas) as they are generated.
210
211        Raises:
212            AIProviderError: If the streaming request fails or produces no output.
213        """

Stream generated text chunks for the provided prompt.

Arguments:
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt with investigation context.
  • max_tokens: Maximum number of tokens the model may generate.
Yields:

Individual text chunks (deltas) as they are generated.

Raises:
  • AIProviderError: If the streaming request fails or produces no output.
@abstractmethod
def get_model_info(self) -> dict[str, str]:
215    @abstractmethod
216    def get_model_info(self) -> dict[str, str]:
217        """Return provider and model metadata for audit logging and reports.
218
219        Returns:
220            A dictionary with at least ``"provider"`` and ``"model"`` keys.
221        """

Return provider and model metadata for audit logging and reports.

Returns:

A dictionary with at least "provider" and "model" keys.

def analyze_with_attachments( self, system_prompt: str, user_prompt: str, attachments: list[typing.Mapping[str, str]] | None, max_tokens: int = 256000) -> str:
223    def analyze_with_attachments(
224        self,
225        system_prompt: str,
226        user_prompt: str,
227        attachments: list[Mapping[str, str]] | None,
228        max_tokens: int = DEFAULT_MAX_TOKENS,
229    ) -> str:
230        """Analyze with optional file attachments.
231
232        Providers that support file uploads override this method. The default
233        implementation inlines attachment content into the prompt so the model
234        always receives the evidence data, then delegates to ``analyze``.
235
236        Args:
237            system_prompt: The system-level instruction text.
238            user_prompt: The user-facing prompt with investigation context.
239            attachments: Optional list of attachment descriptors with
240                ``"path"``, ``"name"``, and ``"mime_type"`` keys.
241            max_tokens: Maximum number of tokens the model may generate.
242
243        Returns:
244            The generated text response as a string.
245
246        Raises:
247            AIProviderError: If the request fails.
248        """
249        from .utils import _inline_attachment_data_into_prompt
250
251        effective_prompt = user_prompt
252        if attachments:
253            effective_prompt, inlined = _inline_attachment_data_into_prompt(
254                user_prompt=user_prompt,
255                attachments=attachments,
256            )
257            if inlined:
258                logger.info("Base provider inlined attachment data into prompt.")
259
260        return self.analyze(
261            system_prompt=system_prompt,
262            user_prompt=effective_prompt,
263            max_tokens=max_tokens,
264        )

Analyze with optional file attachments.

Providers that support file uploads override this method. The default implementation inlines attachment content into the prompt so the model always receives the evidence data, then delegates to analyze.

Arguments:
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt with investigation context.
  • attachments: Optional list of attachment descriptors with "path", "name", and "mime_type" keys.
  • max_tokens: Maximum number of tokens the model may generate.
Returns:

The generated text response as a string.

Raises:
  • AIProviderError: If the request fails.