app.ai_providers.openai_provider

OpenAI API provider implementation.

Uses the openai Python SDK to communicate with the OpenAI Chat Completions and Responses APIs. Supports synchronous and streaming generation, CSV file attachments via the Responses API, and automatic fallback between max_completion_tokens and max_tokens parameters.

Attributes:
  • logger: Module-level logger for OpenAI provider operations.
  1"""OpenAI API provider implementation.
  2
  3Uses the ``openai`` Python SDK to communicate with the OpenAI Chat
  4Completions and Responses APIs. Supports synchronous and streaming
  5generation, CSV file attachments via the Responses API, and automatic
  6fallback between ``max_completion_tokens`` and ``max_tokens`` parameters.
  7
  8Attributes:
  9    logger: Module-level logger for OpenAI provider operations.
 10"""
 11
 12from __future__ import annotations
 13
 14import logging
 15from typing import Any, Callable, Iterator, Mapping
 16
 17from .base import (
 18    AIProvider,
 19    AIProviderError,
 20    DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 21    DEFAULT_MAX_TOKENS,
 22    _is_attachment_unsupported_error,
 23    _is_context_length_error,
 24    _is_unsupported_parameter_error,
 25    _normalize_api_key_value,
 26    _resolve_completion_token_retry_limit,
 27    _resolve_timeout_seconds,
 28    _run_with_rate_limit_retries,
 29    _T,
 30)
 31from .utils import (
 32    _extract_openai_delta_text,
 33    _extract_openai_text,
 34    _inline_attachment_data_into_prompt,
 35    upload_and_request_via_responses_api,
 36)
 37
 38logger = logging.getLogger(__name__)
 39
 40DEFAULT_OPENAI_MODEL = "gpt-5.4"
 41
 42
 43class OpenAIProvider(AIProvider):
 44    """OpenAI API provider implementation.
 45
 46    Attributes:
 47        api_key (str): The OpenAI API key.
 48        model (str): The OpenAI model identifier.
 49        attach_csv_as_file (bool): Whether to upload CSV artifacts as
 50            file attachments via the Responses API.
 51        request_timeout_seconds (float): HTTP timeout in seconds.
 52        client: The ``openai.OpenAI`` SDK client instance.
 53    """
 54
 55    def __init__(
 56        self,
 57        api_key: str,
 58        model: str = DEFAULT_OPENAI_MODEL,
 59        attach_csv_as_file: bool = True,
 60        request_timeout_seconds: float = DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 61    ) -> None:
 62        """Initialize the OpenAI provider.
 63
 64        Args:
 65            api_key: OpenAI API key. Must be non-empty.
 66            model: OpenAI model identifier.
 67            attach_csv_as_file: If ``True``, attempt file uploads via
 68                the Responses API.
 69            request_timeout_seconds: HTTP timeout in seconds.
 70
 71        Raises:
 72            AIProviderError: If the ``openai`` SDK is not installed or
 73                the API key is empty.
 74        """
 75        try:
 76            import openai
 77        except ImportError as error:
 78            raise AIProviderError(
 79                "openai SDK is not installed. Install it with `pip install openai`."
 80            ) from error
 81
 82        normalized_api_key = _normalize_api_key_value(api_key)
 83        if not normalized_api_key:
 84            raise AIProviderError(
 85                "OpenAI API key is not configured. "
 86                "Set `ai.openai.api_key` in config.yaml or the OPENAI_API_KEY environment variable."
 87            )
 88
 89        self._openai = openai
 90        self.api_key = normalized_api_key
 91        self.model = model
 92        self.attach_csv_as_file = bool(attach_csv_as_file)
 93        self._csv_attachment_supported: bool | None = None
 94        self.request_timeout_seconds = _resolve_timeout_seconds(
 95            request_timeout_seconds,
 96            DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 97        )
 98        self.client = openai.OpenAI(
 99            api_key=normalized_api_key,
100            timeout=self.request_timeout_seconds,
101        )
102        logger.info("Initialized OpenAI provider with model %s (timeout %.1fs)", model, self.request_timeout_seconds)
103
104    def analyze(
105        self,
106        system_prompt: str,
107        user_prompt: str,
108        max_tokens: int = DEFAULT_MAX_TOKENS,
109    ) -> str:
110        """Send a prompt to OpenAI and return the generated text.
111
112        Args:
113            system_prompt: The system-level instruction text.
114            user_prompt: The user-facing prompt with investigation context.
115            max_tokens: Maximum completion tokens.
116
117        Returns:
118            The generated analysis text.
119
120        Raises:
121            AIProviderError: On any API or network failure.
122        """
123        return self.analyze_with_attachments(
124            system_prompt=system_prompt,
125            user_prompt=user_prompt,
126            attachments=None,
127            max_tokens=max_tokens,
128        )
129
130    def analyze_stream(
131        self,
132        system_prompt: str,
133        user_prompt: str,
134        max_tokens: int = DEFAULT_MAX_TOKENS,
135    ) -> Iterator[str]:
136        """Stream generated text chunks from OpenAI.
137
138        Args:
139            system_prompt: The system-level instruction text.
140            user_prompt: The user-facing prompt with investigation context.
141            max_tokens: Maximum completion tokens.
142
143        Yields:
144            Text chunk strings as they are generated.
145
146        Raises:
147            AIProviderError: On empty response or API failure.
148        """
149        def _stream() -> Iterator[str]:
150            messages = [
151                {"role": "system", "content": system_prompt},
152                {"role": "user", "content": user_prompt},
153            ]
154            stream = self._run_openai_request(
155                lambda: self._create_chat_completion(
156                    messages=messages,
157                    max_tokens=max_tokens,
158                    stream=True,
159                )
160            )
161            emitted = False
162            try:
163                for chunk in stream:
164                    choices = getattr(chunk, "choices", None)
165                    if not choices:
166                        continue
167                    choice = choices[0]
168                    delta = getattr(choice, "delta", None)
169                    if delta is None and isinstance(choice, dict):
170                        delta = choice.get("delta")
171                    chunk_text = _extract_openai_delta_text(
172                        delta,
173                        ("content", "reasoning_content", "reasoning", "refusal"),
174                    )
175                    if not chunk_text:
176                        continue
177                    emitted = True
178                    yield chunk_text
179            except AIProviderError:
180                raise
181            except self._openai.APIConnectionError as error:
182                raise AIProviderError(
183                    "Unable to connect to OpenAI API. Check network access and endpoint configuration."
184                ) from error
185            except self._openai.AuthenticationError as error:
186                raise AIProviderError(
187                    "OpenAI authentication failed. Check `ai.openai.api_key` or OPENAI_API_KEY."
188                ) from error
189            except self._openai.BadRequestError as error:
190                if _is_context_length_error(error):
191                    raise AIProviderError(
192                        "OpenAI request exceeded the model context length. Reduce prompt size and retry."
193                    ) from error
194                raise AIProviderError(f"OpenAI request was rejected: {error}") from error
195            except self._openai.APIError as error:
196                raise AIProviderError(f"OpenAI API error: {error}") from error
197            except Exception as error:
198                raise AIProviderError(f"Unexpected OpenAI provider error: {error}") from error
199
200            if not emitted:
201                raise AIProviderError("OpenAI returned an empty response.")
202
203        return _stream()
204
205    def analyze_with_attachments(
206        self,
207        system_prompt: str,
208        user_prompt: str,
209        attachments: list[Mapping[str, str]] | None,
210        max_tokens: int = DEFAULT_MAX_TOKENS,
211    ) -> str:
212        """Analyze with optional CSV file attachments via the Responses API.
213
214        Args:
215            system_prompt: The system-level instruction text.
216            user_prompt: The user-facing prompt with investigation context.
217            attachments: Optional list of attachment descriptors.
218            max_tokens: Maximum completion tokens.
219
220        Returns:
221            The generated analysis text.
222
223        Raises:
224            AIProviderError: On any API or network failure.
225        """
226        def _request() -> str:
227            return self._request_non_stream(
228                system_prompt=system_prompt,
229                user_prompt=user_prompt,
230                max_tokens=max_tokens,
231                attachments=attachments,
232            )
233
234        return self._run_openai_request(_request)
235
236    def _run_openai_request(self, request_fn: Callable[[], _T]) -> _T:
237        """Execute an OpenAI request with rate-limit retries and error mapping.
238
239        Args:
240            request_fn: A zero-argument callable that performs the request.
241
242        Returns:
243            The return value of ``request_fn`` on success.
244
245        Raises:
246            AIProviderError: On any OpenAI SDK error.
247        """
248        try:
249            return _run_with_rate_limit_retries(
250                request_fn=request_fn,
251                rate_limit_error_type=self._openai.RateLimitError,
252                provider_name="OpenAI",
253            )
254        except AIProviderError:
255            raise
256        except self._openai.APIConnectionError as error:
257            raise AIProviderError(
258                "Unable to connect to OpenAI API. Check network access and endpoint configuration."
259            ) from error
260        except self._openai.AuthenticationError as error:
261            raise AIProviderError(
262                "OpenAI authentication failed. Check `ai.openai.api_key` or OPENAI_API_KEY."
263            ) from error
264        except self._openai.BadRequestError as error:
265            if _is_context_length_error(error):
266                raise AIProviderError(
267                    "OpenAI request exceeded the model context length. Reduce prompt size and retry."
268                ) from error
269            raise AIProviderError(f"OpenAI request was rejected: {error}") from error
270        except self._openai.APIError as error:
271            raise AIProviderError(f"OpenAI API error: {error}") from error
272        except Exception as error:
273            raise AIProviderError(f"Unexpected OpenAI provider error: {error}") from error
274
275    def _request_non_stream(
276        self,
277        system_prompt: str,
278        user_prompt: str,
279        max_tokens: int,
280        attachments: list[Mapping[str, str]] | None = None,
281    ) -> str:
282        """Perform a non-streaming OpenAI request with attachment handling.
283
284        Tries file-attachment mode first, then falls back to inlining
285        attachment data, and finally issues a plain Chat Completions request.
286
287        Args:
288            system_prompt: The system-level instruction text.
289            user_prompt: The user-facing prompt text.
290            max_tokens: Maximum completion tokens.
291            attachments: Optional list of attachment descriptors.
292
293        Returns:
294            The generated analysis text.
295
296        Raises:
297            AIProviderError: If the response is empty.
298        """
299        attachment_response = self._request_with_csv_attachments(
300            system_prompt=system_prompt,
301            user_prompt=user_prompt,
302            max_tokens=max_tokens,
303            attachments=attachments,
304        )
305        if attachment_response:
306            return attachment_response
307
308        prompt_for_completion = user_prompt
309        if attachments:
310            prompt_for_completion, inlined_attachment_data = _inline_attachment_data_into_prompt(
311                user_prompt=user_prompt,
312                attachments=attachments,
313            )
314            if inlined_attachment_data:
315                logger.info("OpenAI attachment fallback inlined attachment data into prompt.")
316
317        messages = [
318            {"role": "system", "content": system_prompt},
319            {"role": "user", "content": prompt_for_completion},
320        ]
321        response = self._create_chat_completion(
322            messages=messages,
323            max_tokens=max_tokens,
324        )
325        text = _extract_openai_text(response)
326        if not text:
327            raise AIProviderError("OpenAI returned an empty response.")
328        return text
329
330    def _create_chat_completion(
331        self,
332        messages: list[dict[str, str]],
333        max_tokens: int,
334        stream: bool = False,
335    ) -> Any:
336        """Create a Chat Completions request with token parameter fallback.
337
338        Tries ``max_completion_tokens`` first, then falls back to
339        ``max_tokens`` if the endpoint reports the parameter as unsupported.
340        Also retries with a reduced token count when the provider rejects
341        the requested maximum.
342
343        Args:
344            messages: The conversation messages list.
345            max_tokens: Maximum completion tokens.
346            stream: If ``True``, return a streaming response iterator.
347
348        Returns:
349            The OpenAI ``ChatCompletion`` response or streaming iterator.
350        """
351        def _create_with_token_parameter(token_parameter: str, token_count: int) -> Any:
352            """Try creating with a specific token parameter, retrying on token limit."""
353            request_kwargs: dict[str, Any] = {
354                "model": self.model,
355                "messages": messages,
356                token_parameter: token_count,
357            }
358            if stream:
359                request_kwargs["stream"] = True
360            try:
361                return self.client.chat.completions.create(**request_kwargs)
362            except self._openai.BadRequestError as error:
363                retry_token_count = _resolve_completion_token_retry_limit(
364                    error=error,
365                    requested_tokens=token_count,
366                )
367                if retry_token_count is None:
368                    raise
369                logger.warning(
370                    "OpenAI rejected %s=%d; retrying with %s=%d.",
371                    token_parameter,
372                    token_count,
373                    token_parameter,
374                    retry_token_count,
375                )
376                request_kwargs[token_parameter] = retry_token_count
377                return self.client.chat.completions.create(**request_kwargs)
378
379        try:
380            return _create_with_token_parameter(
381                token_parameter="max_completion_tokens",
382                token_count=max_tokens,
383            )
384        except self._openai.BadRequestError as error:
385            if not _is_unsupported_parameter_error(error, "max_completion_tokens"):
386                raise
387            return _create_with_token_parameter(
388                token_parameter="max_tokens",
389                token_count=max_tokens,
390            )
391
392    def _request_with_csv_attachments(
393        self,
394        system_prompt: str,
395        user_prompt: str,
396        max_tokens: int,
397        attachments: list[Mapping[str, str]] | None,
398    ) -> str | None:
399        """Attempt to send a request with CSV files via the Responses API.
400
401        Args:
402            system_prompt: The system-level instruction text.
403            user_prompt: The user-facing prompt text.
404            max_tokens: Maximum completion tokens.
405            attachments: Optional list of attachment descriptors.
406
407        Returns:
408            The generated text if succeeded, or ``None`` if skipped.
409        """
410        normalized_attachments = self._prepare_csv_attachments(
411            attachments,
412            supports_file_attachments=hasattr(self.client, "files") and hasattr(self.client, "responses"),
413        )
414        if not normalized_attachments:
415            return None
416
417        try:
418            text = upload_and_request_via_responses_api(
419                client=self.client,
420                openai_module=self._openai,
421                model=self.model,
422                normalized_attachments=normalized_attachments,
423                system_prompt=system_prompt,
424                user_prompt=user_prompt,
425                max_tokens=max_tokens,
426                provider_name="OpenAI",
427                upload_purpose="assistants",
428                convert_csv_to_txt=True,
429            )
430            self._csv_attachment_supported = True
431            return text
432        except Exception as error:
433            if _is_attachment_unsupported_error(error):
434                self._csv_attachment_supported = False
435                logger.info(
436                    "OpenAI endpoint does not support CSV attachments via /files + /responses; "
437                    "falling back to chat.completions text mode."
438                )
439                return None
440            raise
441
442    def get_model_info(self) -> dict[str, str]:
443        """Return OpenAI provider and model metadata.
444
445        Returns:
446            A dictionary with ``"provider"`` and ``"model"`` keys.
447        """
448        return {"provider": "openai", "model": self.model}
logger = <Logger app.ai_providers.openai_provider (WARNING)>
DEFAULT_OPENAI_MODEL = 'gpt-5.4'
class OpenAIProvider(app.ai_providers.base.AIProvider):
 44class OpenAIProvider(AIProvider):
 45    """OpenAI API provider implementation.
 46
 47    Attributes:
 48        api_key (str): The OpenAI API key.
 49        model (str): The OpenAI model identifier.
 50        attach_csv_as_file (bool): Whether to upload CSV artifacts as
 51            file attachments via the Responses API.
 52        request_timeout_seconds (float): HTTP timeout in seconds.
 53        client: The ``openai.OpenAI`` SDK client instance.
 54    """
 55
 56    def __init__(
 57        self,
 58        api_key: str,
 59        model: str = DEFAULT_OPENAI_MODEL,
 60        attach_csv_as_file: bool = True,
 61        request_timeout_seconds: float = DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 62    ) -> None:
 63        """Initialize the OpenAI provider.
 64
 65        Args:
 66            api_key: OpenAI API key. Must be non-empty.
 67            model: OpenAI model identifier.
 68            attach_csv_as_file: If ``True``, attempt file uploads via
 69                the Responses API.
 70            request_timeout_seconds: HTTP timeout in seconds.
 71
 72        Raises:
 73            AIProviderError: If the ``openai`` SDK is not installed or
 74                the API key is empty.
 75        """
 76        try:
 77            import openai
 78        except ImportError as error:
 79            raise AIProviderError(
 80                "openai SDK is not installed. Install it with `pip install openai`."
 81            ) from error
 82
 83        normalized_api_key = _normalize_api_key_value(api_key)
 84        if not normalized_api_key:
 85            raise AIProviderError(
 86                "OpenAI API key is not configured. "
 87                "Set `ai.openai.api_key` in config.yaml or the OPENAI_API_KEY environment variable."
 88            )
 89
 90        self._openai = openai
 91        self.api_key = normalized_api_key
 92        self.model = model
 93        self.attach_csv_as_file = bool(attach_csv_as_file)
 94        self._csv_attachment_supported: bool | None = None
 95        self.request_timeout_seconds = _resolve_timeout_seconds(
 96            request_timeout_seconds,
 97            DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 98        )
 99        self.client = openai.OpenAI(
100            api_key=normalized_api_key,
101            timeout=self.request_timeout_seconds,
102        )
103        logger.info("Initialized OpenAI provider with model %s (timeout %.1fs)", model, self.request_timeout_seconds)
104
105    def analyze(
106        self,
107        system_prompt: str,
108        user_prompt: str,
109        max_tokens: int = DEFAULT_MAX_TOKENS,
110    ) -> str:
111        """Send a prompt to OpenAI and return the generated text.
112
113        Args:
114            system_prompt: The system-level instruction text.
115            user_prompt: The user-facing prompt with investigation context.
116            max_tokens: Maximum completion tokens.
117
118        Returns:
119            The generated analysis text.
120
121        Raises:
122            AIProviderError: On any API or network failure.
123        """
124        return self.analyze_with_attachments(
125            system_prompt=system_prompt,
126            user_prompt=user_prompt,
127            attachments=None,
128            max_tokens=max_tokens,
129        )
130
131    def analyze_stream(
132        self,
133        system_prompt: str,
134        user_prompt: str,
135        max_tokens: int = DEFAULT_MAX_TOKENS,
136    ) -> Iterator[str]:
137        """Stream generated text chunks from OpenAI.
138
139        Args:
140            system_prompt: The system-level instruction text.
141            user_prompt: The user-facing prompt with investigation context.
142            max_tokens: Maximum completion tokens.
143
144        Yields:
145            Text chunk strings as they are generated.
146
147        Raises:
148            AIProviderError: On empty response or API failure.
149        """
150        def _stream() -> Iterator[str]:
151            messages = [
152                {"role": "system", "content": system_prompt},
153                {"role": "user", "content": user_prompt},
154            ]
155            stream = self._run_openai_request(
156                lambda: self._create_chat_completion(
157                    messages=messages,
158                    max_tokens=max_tokens,
159                    stream=True,
160                )
161            )
162            emitted = False
163            try:
164                for chunk in stream:
165                    choices = getattr(chunk, "choices", None)
166                    if not choices:
167                        continue
168                    choice = choices[0]
169                    delta = getattr(choice, "delta", None)
170                    if delta is None and isinstance(choice, dict):
171                        delta = choice.get("delta")
172                    chunk_text = _extract_openai_delta_text(
173                        delta,
174                        ("content", "reasoning_content", "reasoning", "refusal"),
175                    )
176                    if not chunk_text:
177                        continue
178                    emitted = True
179                    yield chunk_text
180            except AIProviderError:
181                raise
182            except self._openai.APIConnectionError as error:
183                raise AIProviderError(
184                    "Unable to connect to OpenAI API. Check network access and endpoint configuration."
185                ) from error
186            except self._openai.AuthenticationError as error:
187                raise AIProviderError(
188                    "OpenAI authentication failed. Check `ai.openai.api_key` or OPENAI_API_KEY."
189                ) from error
190            except self._openai.BadRequestError as error:
191                if _is_context_length_error(error):
192                    raise AIProviderError(
193                        "OpenAI request exceeded the model context length. Reduce prompt size and retry."
194                    ) from error
195                raise AIProviderError(f"OpenAI request was rejected: {error}") from error
196            except self._openai.APIError as error:
197                raise AIProviderError(f"OpenAI API error: {error}") from error
198            except Exception as error:
199                raise AIProviderError(f"Unexpected OpenAI provider error: {error}") from error
200
201            if not emitted:
202                raise AIProviderError("OpenAI returned an empty response.")
203
204        return _stream()
205
206    def analyze_with_attachments(
207        self,
208        system_prompt: str,
209        user_prompt: str,
210        attachments: list[Mapping[str, str]] | None,
211        max_tokens: int = DEFAULT_MAX_TOKENS,
212    ) -> str:
213        """Analyze with optional CSV file attachments via the Responses API.
214
215        Args:
216            system_prompt: The system-level instruction text.
217            user_prompt: The user-facing prompt with investigation context.
218            attachments: Optional list of attachment descriptors.
219            max_tokens: Maximum completion tokens.
220
221        Returns:
222            The generated analysis text.
223
224        Raises:
225            AIProviderError: On any API or network failure.
226        """
227        def _request() -> str:
228            return self._request_non_stream(
229                system_prompt=system_prompt,
230                user_prompt=user_prompt,
231                max_tokens=max_tokens,
232                attachments=attachments,
233            )
234
235        return self._run_openai_request(_request)
236
237    def _run_openai_request(self, request_fn: Callable[[], _T]) -> _T:
238        """Execute an OpenAI request with rate-limit retries and error mapping.
239
240        Args:
241            request_fn: A zero-argument callable that performs the request.
242
243        Returns:
244            The return value of ``request_fn`` on success.
245
246        Raises:
247            AIProviderError: On any OpenAI SDK error.
248        """
249        try:
250            return _run_with_rate_limit_retries(
251                request_fn=request_fn,
252                rate_limit_error_type=self._openai.RateLimitError,
253                provider_name="OpenAI",
254            )
255        except AIProviderError:
256            raise
257        except self._openai.APIConnectionError as error:
258            raise AIProviderError(
259                "Unable to connect to OpenAI API. Check network access and endpoint configuration."
260            ) from error
261        except self._openai.AuthenticationError as error:
262            raise AIProviderError(
263                "OpenAI authentication failed. Check `ai.openai.api_key` or OPENAI_API_KEY."
264            ) from error
265        except self._openai.BadRequestError as error:
266            if _is_context_length_error(error):
267                raise AIProviderError(
268                    "OpenAI request exceeded the model context length. Reduce prompt size and retry."
269                ) from error
270            raise AIProviderError(f"OpenAI request was rejected: {error}") from error
271        except self._openai.APIError as error:
272            raise AIProviderError(f"OpenAI API error: {error}") from error
273        except Exception as error:
274            raise AIProviderError(f"Unexpected OpenAI provider error: {error}") from error
275
276    def _request_non_stream(
277        self,
278        system_prompt: str,
279        user_prompt: str,
280        max_tokens: int,
281        attachments: list[Mapping[str, str]] | None = None,
282    ) -> str:
283        """Perform a non-streaming OpenAI request with attachment handling.
284
285        Tries file-attachment mode first, then falls back to inlining
286        attachment data, and finally issues a plain Chat Completions request.
287
288        Args:
289            system_prompt: The system-level instruction text.
290            user_prompt: The user-facing prompt text.
291            max_tokens: Maximum completion tokens.
292            attachments: Optional list of attachment descriptors.
293
294        Returns:
295            The generated analysis text.
296
297        Raises:
298            AIProviderError: If the response is empty.
299        """
300        attachment_response = self._request_with_csv_attachments(
301            system_prompt=system_prompt,
302            user_prompt=user_prompt,
303            max_tokens=max_tokens,
304            attachments=attachments,
305        )
306        if attachment_response:
307            return attachment_response
308
309        prompt_for_completion = user_prompt
310        if attachments:
311            prompt_for_completion, inlined_attachment_data = _inline_attachment_data_into_prompt(
312                user_prompt=user_prompt,
313                attachments=attachments,
314            )
315            if inlined_attachment_data:
316                logger.info("OpenAI attachment fallback inlined attachment data into prompt.")
317
318        messages = [
319            {"role": "system", "content": system_prompt},
320            {"role": "user", "content": prompt_for_completion},
321        ]
322        response = self._create_chat_completion(
323            messages=messages,
324            max_tokens=max_tokens,
325        )
326        text = _extract_openai_text(response)
327        if not text:
328            raise AIProviderError("OpenAI returned an empty response.")
329        return text
330
331    def _create_chat_completion(
332        self,
333        messages: list[dict[str, str]],
334        max_tokens: int,
335        stream: bool = False,
336    ) -> Any:
337        """Create a Chat Completions request with token parameter fallback.
338
339        Tries ``max_completion_tokens`` first, then falls back to
340        ``max_tokens`` if the endpoint reports the parameter as unsupported.
341        Also retries with a reduced token count when the provider rejects
342        the requested maximum.
343
344        Args:
345            messages: The conversation messages list.
346            max_tokens: Maximum completion tokens.
347            stream: If ``True``, return a streaming response iterator.
348
349        Returns:
350            The OpenAI ``ChatCompletion`` response or streaming iterator.
351        """
352        def _create_with_token_parameter(token_parameter: str, token_count: int) -> Any:
353            """Try creating with a specific token parameter, retrying on token limit."""
354            request_kwargs: dict[str, Any] = {
355                "model": self.model,
356                "messages": messages,
357                token_parameter: token_count,
358            }
359            if stream:
360                request_kwargs["stream"] = True
361            try:
362                return self.client.chat.completions.create(**request_kwargs)
363            except self._openai.BadRequestError as error:
364                retry_token_count = _resolve_completion_token_retry_limit(
365                    error=error,
366                    requested_tokens=token_count,
367                )
368                if retry_token_count is None:
369                    raise
370                logger.warning(
371                    "OpenAI rejected %s=%d; retrying with %s=%d.",
372                    token_parameter,
373                    token_count,
374                    token_parameter,
375                    retry_token_count,
376                )
377                request_kwargs[token_parameter] = retry_token_count
378                return self.client.chat.completions.create(**request_kwargs)
379
380        try:
381            return _create_with_token_parameter(
382                token_parameter="max_completion_tokens",
383                token_count=max_tokens,
384            )
385        except self._openai.BadRequestError as error:
386            if not _is_unsupported_parameter_error(error, "max_completion_tokens"):
387                raise
388            return _create_with_token_parameter(
389                token_parameter="max_tokens",
390                token_count=max_tokens,
391            )
392
393    def _request_with_csv_attachments(
394        self,
395        system_prompt: str,
396        user_prompt: str,
397        max_tokens: int,
398        attachments: list[Mapping[str, str]] | None,
399    ) -> str | None:
400        """Attempt to send a request with CSV files via the Responses API.
401
402        Args:
403            system_prompt: The system-level instruction text.
404            user_prompt: The user-facing prompt text.
405            max_tokens: Maximum completion tokens.
406            attachments: Optional list of attachment descriptors.
407
408        Returns:
409            The generated text if succeeded, or ``None`` if skipped.
410        """
411        normalized_attachments = self._prepare_csv_attachments(
412            attachments,
413            supports_file_attachments=hasattr(self.client, "files") and hasattr(self.client, "responses"),
414        )
415        if not normalized_attachments:
416            return None
417
418        try:
419            text = upload_and_request_via_responses_api(
420                client=self.client,
421                openai_module=self._openai,
422                model=self.model,
423                normalized_attachments=normalized_attachments,
424                system_prompt=system_prompt,
425                user_prompt=user_prompt,
426                max_tokens=max_tokens,
427                provider_name="OpenAI",
428                upload_purpose="assistants",
429                convert_csv_to_txt=True,
430            )
431            self._csv_attachment_supported = True
432            return text
433        except Exception as error:
434            if _is_attachment_unsupported_error(error):
435                self._csv_attachment_supported = False
436                logger.info(
437                    "OpenAI endpoint does not support CSV attachments via /files + /responses; "
438                    "falling back to chat.completions text mode."
439                )
440                return None
441            raise
442
443    def get_model_info(self) -> dict[str, str]:
444        """Return OpenAI provider and model metadata.
445
446        Returns:
447            A dictionary with ``"provider"`` and ``"model"`` keys.
448        """
449        return {"provider": "openai", "model": self.model}

OpenAI API provider implementation.

Attributes:
  • api_key (str): The OpenAI API key.
  • model (str): The OpenAI model identifier.
  • attach_csv_as_file (bool): Whether to upload CSV artifacts as file attachments via the Responses API.
  • request_timeout_seconds (float): HTTP timeout in seconds.
  • client: The openai.OpenAI SDK client instance.
OpenAIProvider( api_key: str, model: str = 'gpt-5.4', attach_csv_as_file: bool = True, request_timeout_seconds: float = 600.0)
 56    def __init__(
 57        self,
 58        api_key: str,
 59        model: str = DEFAULT_OPENAI_MODEL,
 60        attach_csv_as_file: bool = True,
 61        request_timeout_seconds: float = DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 62    ) -> None:
 63        """Initialize the OpenAI provider.
 64
 65        Args:
 66            api_key: OpenAI API key. Must be non-empty.
 67            model: OpenAI model identifier.
 68            attach_csv_as_file: If ``True``, attempt file uploads via
 69                the Responses API.
 70            request_timeout_seconds: HTTP timeout in seconds.
 71
 72        Raises:
 73            AIProviderError: If the ``openai`` SDK is not installed or
 74                the API key is empty.
 75        """
 76        try:
 77            import openai
 78        except ImportError as error:
 79            raise AIProviderError(
 80                "openai SDK is not installed. Install it with `pip install openai`."
 81            ) from error
 82
 83        normalized_api_key = _normalize_api_key_value(api_key)
 84        if not normalized_api_key:
 85            raise AIProviderError(
 86                "OpenAI API key is not configured. "
 87                "Set `ai.openai.api_key` in config.yaml or the OPENAI_API_KEY environment variable."
 88            )
 89
 90        self._openai = openai
 91        self.api_key = normalized_api_key
 92        self.model = model
 93        self.attach_csv_as_file = bool(attach_csv_as_file)
 94        self._csv_attachment_supported: bool | None = None
 95        self.request_timeout_seconds = _resolve_timeout_seconds(
 96            request_timeout_seconds,
 97            DEFAULT_CLOUD_REQUEST_TIMEOUT_SECONDS,
 98        )
 99        self.client = openai.OpenAI(
100            api_key=normalized_api_key,
101            timeout=self.request_timeout_seconds,
102        )
103        logger.info("Initialized OpenAI provider with model %s (timeout %.1fs)", model, self.request_timeout_seconds)

Initialize the OpenAI provider.

Arguments:
  • api_key: OpenAI API key. Must be non-empty.
  • model: OpenAI model identifier.
  • attach_csv_as_file: If True, attempt file uploads via the Responses API.
  • request_timeout_seconds: HTTP timeout in seconds.
Raises:
  • AIProviderError: If the openai SDK is not installed or the API key is empty.
api_key
model
attach_csv_as_file
request_timeout_seconds
client
def analyze( self, system_prompt: str, user_prompt: str, max_tokens: int = 256000) -> str:
105    def analyze(
106        self,
107        system_prompt: str,
108        user_prompt: str,
109        max_tokens: int = DEFAULT_MAX_TOKENS,
110    ) -> str:
111        """Send a prompt to OpenAI and return the generated text.
112
113        Args:
114            system_prompt: The system-level instruction text.
115            user_prompt: The user-facing prompt with investigation context.
116            max_tokens: Maximum completion tokens.
117
118        Returns:
119            The generated analysis text.
120
121        Raises:
122            AIProviderError: On any API or network failure.
123        """
124        return self.analyze_with_attachments(
125            system_prompt=system_prompt,
126            user_prompt=user_prompt,
127            attachments=None,
128            max_tokens=max_tokens,
129        )

Send a prompt to OpenAI and return the generated text.

Arguments:
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt with investigation context.
  • max_tokens: Maximum completion tokens.
Returns:

The generated analysis text.

Raises:
  • AIProviderError: On any API or network failure.
def analyze_stream( self, system_prompt: str, user_prompt: str, max_tokens: int = 256000) -> Iterator[str]:
131    def analyze_stream(
132        self,
133        system_prompt: str,
134        user_prompt: str,
135        max_tokens: int = DEFAULT_MAX_TOKENS,
136    ) -> Iterator[str]:
137        """Stream generated text chunks from OpenAI.
138
139        Args:
140            system_prompt: The system-level instruction text.
141            user_prompt: The user-facing prompt with investigation context.
142            max_tokens: Maximum completion tokens.
143
144        Yields:
145            Text chunk strings as they are generated.
146
147        Raises:
148            AIProviderError: On empty response or API failure.
149        """
150        def _stream() -> Iterator[str]:
151            messages = [
152                {"role": "system", "content": system_prompt},
153                {"role": "user", "content": user_prompt},
154            ]
155            stream = self._run_openai_request(
156                lambda: self._create_chat_completion(
157                    messages=messages,
158                    max_tokens=max_tokens,
159                    stream=True,
160                )
161            )
162            emitted = False
163            try:
164                for chunk in stream:
165                    choices = getattr(chunk, "choices", None)
166                    if not choices:
167                        continue
168                    choice = choices[0]
169                    delta = getattr(choice, "delta", None)
170                    if delta is None and isinstance(choice, dict):
171                        delta = choice.get("delta")
172                    chunk_text = _extract_openai_delta_text(
173                        delta,
174                        ("content", "reasoning_content", "reasoning", "refusal"),
175                    )
176                    if not chunk_text:
177                        continue
178                    emitted = True
179                    yield chunk_text
180            except AIProviderError:
181                raise
182            except self._openai.APIConnectionError as error:
183                raise AIProviderError(
184                    "Unable to connect to OpenAI API. Check network access and endpoint configuration."
185                ) from error
186            except self._openai.AuthenticationError as error:
187                raise AIProviderError(
188                    "OpenAI authentication failed. Check `ai.openai.api_key` or OPENAI_API_KEY."
189                ) from error
190            except self._openai.BadRequestError as error:
191                if _is_context_length_error(error):
192                    raise AIProviderError(
193                        "OpenAI request exceeded the model context length. Reduce prompt size and retry."
194                    ) from error
195                raise AIProviderError(f"OpenAI request was rejected: {error}") from error
196            except self._openai.APIError as error:
197                raise AIProviderError(f"OpenAI API error: {error}") from error
198            except Exception as error:
199                raise AIProviderError(f"Unexpected OpenAI provider error: {error}") from error
200
201            if not emitted:
202                raise AIProviderError("OpenAI returned an empty response.")
203
204        return _stream()

Stream generated text chunks from OpenAI.

Arguments:
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt with investigation context.
  • max_tokens: Maximum completion tokens.
Yields:

Text chunk strings as they are generated.

Raises:
  • AIProviderError: On empty response or API failure.
def analyze_with_attachments( self, system_prompt: str, user_prompt: str, attachments: list[typing.Mapping[str, str]] | None, max_tokens: int = 256000) -> str:
206    def analyze_with_attachments(
207        self,
208        system_prompt: str,
209        user_prompt: str,
210        attachments: list[Mapping[str, str]] | None,
211        max_tokens: int = DEFAULT_MAX_TOKENS,
212    ) -> str:
213        """Analyze with optional CSV file attachments via the Responses API.
214
215        Args:
216            system_prompt: The system-level instruction text.
217            user_prompt: The user-facing prompt with investigation context.
218            attachments: Optional list of attachment descriptors.
219            max_tokens: Maximum completion tokens.
220
221        Returns:
222            The generated analysis text.
223
224        Raises:
225            AIProviderError: On any API or network failure.
226        """
227        def _request() -> str:
228            return self._request_non_stream(
229                system_prompt=system_prompt,
230                user_prompt=user_prompt,
231                max_tokens=max_tokens,
232                attachments=attachments,
233            )
234
235        return self._run_openai_request(_request)

Analyze with optional CSV file attachments via the Responses API.

Arguments:
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt with investigation context.
  • attachments: Optional list of attachment descriptors.
  • max_tokens: Maximum completion tokens.
Returns:

The generated analysis text.

Raises:
  • AIProviderError: On any API or network failure.
def get_model_info(self) -> dict[str, str]:
443    def get_model_info(self) -> dict[str, str]:
444        """Return OpenAI provider and model metadata.
445
446        Returns:
447            A dictionary with ``"provider"`` and ``"model"`` keys.
448        """
449        return {"provider": "openai", "model": self.model}

Return OpenAI provider and model metadata.

Returns:

A dictionary with "provider" and "model" keys.