app.ai_providers.utils

Text extraction, attachment handling, and response processing utilities.

This module contains all functions for extracting text from AI provider responses (Anthropic and OpenAI formats), normalizing and inlining file attachments, stripping reasoning blocks from local model output, and shared Responses API file-upload logic used by OpenAI, Kimi, and Local providers.

Attributes:
  • _LEADING_REASONING_BLOCK_RE: Regex pattern matching leading <think>, <thinking>, <reasoning> XML blocks or fenced code blocks.
  1"""Text extraction, attachment handling, and response processing utilities.
  2
  3This module contains all functions for extracting text from AI provider
  4responses (Anthropic and OpenAI formats), normalizing and inlining file
  5attachments, stripping reasoning blocks from local model output, and
  6shared Responses API file-upload logic used by OpenAI, Kimi, and Local
  7providers.
  8
  9Attributes:
 10    _LEADING_REASONING_BLOCK_RE: Regex pattern matching leading ``<think>``,
 11        ``<thinking>``, ``<reasoning>`` XML blocks or fenced code blocks.
 12"""
 13
 14from __future__ import annotations
 15
 16import logging
 17import re
 18from pathlib import Path
 19from typing import Any, Mapping
 20
 21logger = logging.getLogger(__name__)
 22
 23_LEADING_REASONING_BLOCK_RE = re.compile(
 24    r"^\s*(?:"
 25    r"(?:<\s*(?:think|thinking|reasoning)\b[^>]*>.*?<\s*/\s*(?:think|thinking|reasoning)\s*>\s*)"
 26    r"|(?:```(?:think|thinking|reasoning)[^\n]*\n.*?```\s*)"
 27    r")+",
 28    flags=re.IGNORECASE | re.DOTALL,
 29)
 30
 31
 32# ---------------------------------------------------------------------------
 33# Anthropic text extraction
 34# ---------------------------------------------------------------------------
 35
 36
 37def _extract_anthropic_text(response: Any) -> str:
 38    """Extract the concatenated text from an Anthropic Messages API response.
 39
 40    Iterates over content blocks in the response, collecting text from
 41    both object-style blocks (with a ``.text`` attribute) and dict-style
 42    blocks (with a ``"text"`` key).
 43
 44    Args:
 45        response: The Anthropic ``Message`` response object.
 46
 47    Returns:
 48        The joined text content, stripped of whitespace.
 49    """
 50    content = getattr(response, "content", None)
 51    if not isinstance(content, list):
 52        return ""
 53
 54    chunks: list[str] = []
 55    for block in content:
 56        text = getattr(block, "text", None)
 57        if isinstance(text, str):
 58            chunks.append(text)
 59            continue
 60
 61        if isinstance(block, dict):
 62            block_text = block.get("text")
 63            if isinstance(block_text, str):
 64                chunks.append(block_text)
 65
 66    return "".join(chunks).strip()
 67
 68
 69def _extract_anthropic_stream_text(event: Any) -> str:
 70    """Extract text deltas from Anthropic streamed events.
 71
 72    Handles ``content_block_delta``, ``content_block_start``, and generic
 73    delta events from the Anthropic streaming API.
 74
 75    Args:
 76        event: A single streamed event from the Anthropic Messages API.
 77
 78    Returns:
 79        The text delta string, or empty string if no text content.
 80    """
 81    if event is None:
 82        return ""
 83
 84    event_type = getattr(event, "type", None)
 85    if event_type is None and isinstance(event, dict):
 86        event_type = event.get("type")
 87
 88    if event_type == "content_block_delta":
 89        delta = getattr(event, "delta", None)
 90        if delta is None and isinstance(event, dict):
 91            delta = event.get("delta")
 92        text = getattr(delta, "text", None)
 93        if text is None and isinstance(delta, dict):
 94            text = delta.get("text")
 95        if isinstance(text, str):
 96            return text
 97
 98    if event_type == "content_block_start":
 99        content_block = getattr(event, "content_block", None)
100        if content_block is None and isinstance(event, dict):
101            content_block = event.get("content_block")
102        text = getattr(content_block, "text", None)
103        if text is None and isinstance(content_block, dict):
104            text = content_block.get("text")
105        if isinstance(text, str):
106            return text
107
108    delta = getattr(event, "delta", None)
109    if delta is None and isinstance(event, dict):
110        delta = event.get("delta")
111    if delta is not None:
112        text = getattr(delta, "text", None)
113        if text is None and isinstance(delta, dict):
114            text = delta.get("text")
115        if isinstance(text, str):
116            return text
117
118    return ""
119
120
121# ---------------------------------------------------------------------------
122# OpenAI text extraction
123# ---------------------------------------------------------------------------
124
125
126def _coerce_openai_text(value: Any) -> str:
127    """Normalize OpenAI-compatible response text payloads into plain strings.
128
129    Handles string values, lists of text items (objects or dicts), and
130    returns an empty string for unsupported types.
131
132    Args:
133        value: A text value from an OpenAI-compatible response.
134
135    Returns:
136        The concatenated plain text string.
137    """
138    if isinstance(value, str):
139        return value
140
141    if isinstance(value, list):
142        parts: list[str] = []
143        for item in value:
144            if isinstance(item, str):
145                parts.append(item)
146                continue
147            item_text = getattr(item, "text", None)
148            if isinstance(item_text, str):
149                parts.append(item_text)
150                continue
151            if isinstance(item, dict):
152                dict_text = item.get("text")
153                if isinstance(dict_text, str):
154                    parts.append(dict_text)
155                    continue
156                dict_content = item.get("content")
157                if isinstance(dict_content, str):
158                    parts.append(dict_content)
159        return "".join(parts)
160
161    return ""
162
163
164def _extract_openai_text(response: Any) -> str:
165    """Extract the generated text from an OpenAI Chat Completions API response.
166
167    Handles plain string content, structured content arrays, and
168    reasoning-model fallback fields (``reasoning_content``, ``reasoning``,
169    ``refusal``).
170
171    Args:
172        response: The OpenAI ``ChatCompletion`` response object.
173
174    Returns:
175        The extracted text content, stripped of whitespace.
176    """
177    choices = getattr(response, "choices", None)
178    if not choices:
179        return ""
180
181    first_choice = choices[0]
182    message = getattr(first_choice, "message", None)
183    if message is None and isinstance(first_choice, dict):
184        message = first_choice.get("message")
185
186    if message is None:
187        return ""
188
189    content = getattr(message, "content", None)
190    if content is None and isinstance(message, dict):
191        content = message.get("content")
192
193    if isinstance(content, str):
194        stripped_content = content.strip()
195        if stripped_content:
196            return stripped_content
197
198    if isinstance(content, list):
199        parts: list[str] = []
200        for chunk in content:
201            text = getattr(chunk, "text", None)
202            if isinstance(text, str):
203                parts.append(text)
204                continue
205
206            if isinstance(chunk, dict):
207                chunk_text = chunk.get("text")
208                if isinstance(chunk_text, str):
209                    parts.append(chunk_text)
210                    continue
211                chunk_content = chunk.get("content")
212                if isinstance(chunk_content, str):
213                    parts.append(chunk_content)
214        joined = "".join(parts).strip()
215        if joined:
216            return joined
217
218    for field_name in ("reasoning_content", "reasoning", "refusal"):
219        field_value = getattr(message, field_name, None)
220        if field_value is None and isinstance(message, dict):
221            field_value = message.get(field_name)
222        text = _coerce_openai_text(field_value)
223        stripped = text.strip()
224        if stripped:
225            return stripped
226
227    return ""
228
229
230def _extract_openai_delta_text(delta: Any, field_names: tuple[str, ...]) -> str:
231    """Extract streaming delta text for one of the requested fields.
232
233    Args:
234        delta: The streaming chunk delta object or dict.
235        field_names: Tuple of field names to check in priority order.
236
237    Returns:
238        The first non-empty text value found, or empty string.
239    """
240    if delta is None:
241        return ""
242
243    for field_name in field_names:
244        value = getattr(delta, field_name, None)
245        if value is None and isinstance(delta, dict):
246            value = delta.get(field_name)
247        text = _coerce_openai_text(value)
248        if text:
249            return text
250    return ""
251
252
253def _extract_openai_responses_text(response: Any) -> str:
254    """Extract output text from OpenAI Responses API payloads.
255
256    First attempts the ``output_text`` attribute, then falls back to
257    iterating over structured output items.
258
259    Args:
260        response: The OpenAI Responses API response object or dict.
261
262    Returns:
263        The extracted and stripped text content.
264    """
265    output_text = getattr(response, "output_text", None)
266    text = _coerce_openai_text(output_text).strip()
267    if text:
268        return text
269
270    output_items = getattr(response, "output", None)
271    if output_items is None and isinstance(response, dict):
272        output_items = response.get("output")
273    if not isinstance(output_items, list):
274        return ""
275
276    parts: list[str] = []
277    for item in output_items:
278        content = getattr(item, "content", None)
279        if content is None and isinstance(item, dict):
280            content = item.get("content")
281        if not isinstance(content, list):
282            continue
283
284        for block in content:
285            block_type = getattr(block, "type", None)
286            if block_type is None and isinstance(block, dict):
287                block_type = block.get("type")
288            if str(block_type) not in {"output_text", "text"}:
289                continue
290
291            block_text = getattr(block, "text", None)
292            if block_text is None and isinstance(block, dict):
293                block_text = block.get("text")
294            normalized = _coerce_openai_text(block_text)
295            if normalized:
296                parts.append(normalized)
297
298    return "".join(parts).strip()
299
300
301# ---------------------------------------------------------------------------
302# Reasoning block handling
303# ---------------------------------------------------------------------------
304
305
306def _strip_leading_reasoning_blocks(text: str) -> str:
307    """Remove leading model-thinking blocks from OpenAI-compatible output.
308
309    Some local reasoning models emit ``<think>`` or ``<reasoning>`` blocks
310    at the start of their output. This strips those blocks.
311
312    Args:
313        text: Raw model output that may begin with reasoning blocks.
314
315    Returns:
316        The text with leading reasoning blocks removed.
317    """
318    value = str(text or "").strip()
319    if not value:
320        return ""
321    return _LEADING_REASONING_BLOCK_RE.sub("", value, count=1).strip()
322
323
324def _clean_streamed_answer_text(answer_text: str, thinking_text: str) -> str:
325    """Drop duplicated streamed thinking text from the final answer channel.
326
327    Args:
328        answer_text: The accumulated answer-channel text from streaming.
329        thinking_text: The accumulated thinking-channel text from streaming.
330
331    Returns:
332        The cleaned answer text with duplicated reasoning removed.
333    """
334    answer = str(answer_text or "").strip()
335    if not answer:
336        return ""
337
338    thinking = str(thinking_text or "").strip()
339    if thinking and len(thinking) >= 24 and answer.startswith(thinking):
340        answer = answer[len(thinking) :].lstrip()
341
342    return _strip_leading_reasoning_blocks(answer)
343
344
345# ---------------------------------------------------------------------------
346# Attachment normalization
347# ---------------------------------------------------------------------------
348
349
350def normalize_attachment_input(attachment: Mapping[str, str] | Any) -> dict[str, str] | None:
351    """Validate and normalize a single attachment descriptor.
352
353    Args:
354        attachment: A raw attachment descriptor with at least a ``"path"`` key.
355
356    Returns:
357        A normalized dict with ``"path"``, ``"name"``, ``"mime_type"`` keys,
358        or ``None`` if invalid.
359    """
360    if not isinstance(attachment, Mapping):
361        return None
362
363    path_value = str(attachment.get("path", "")).strip()
364    if not path_value:
365        return None
366
367    path = Path(path_value)
368    if not path.exists() or not path.is_file():
369        return None
370
371    filename = str(attachment.get("name", "")).strip() or path.name
372    mime_type = str(attachment.get("mime_type", "")).strip() or "text/csv"
373    return {
374        "path": str(path),
375        "name": filename,
376        "mime_type": mime_type,
377    }
378
379
380def normalize_attachment_inputs(
381    attachments: list[Mapping[str, str]] | None,
382) -> list[dict[str, str]]:
383    """Validate and normalize a list of attachment descriptors.
384
385    Args:
386        attachments: Optional list of raw attachment descriptors.
387
388    Returns:
389        A list of validated attachment dicts. May be empty.
390    """
391    normalized: list[dict[str, str]] = []
392    for attachment in attachments or []:
393        candidate = normalize_attachment_input(attachment)
394        if candidate is not None:
395            normalized.append(candidate)
396    return normalized
397
398
399def _prepare_openai_attachment_upload(attachment: Mapping[str, str]) -> tuple[str, str, bool]:
400    """Normalize OpenAI attachment upload metadata.
401
402    Some OpenAI Responses API models reject ``.csv`` file extensions.
403    This converts CSV metadata to TXT format while keeping contents unchanged.
404
405    Args:
406        attachment: A normalized attachment descriptor.
407
408    Returns:
409        A 3-tuple of ``(upload_name, upload_mime_type, was_converted)``.
410    """
411    attachment_path = Path(str(attachment.get("path", "")))
412    original_name = str(attachment.get("name", "")).strip() or attachment_path.name or "attachment"
413    original_mime_type = str(attachment.get("mime_type", "")).strip() or "text/plain"
414
415    lowered_name = original_name.lower()
416    lowered_path_suffix = attachment_path.suffix.lower()
417    lowered_mime_type = original_mime_type.lower()
418    is_csv_attachment = (
419        lowered_name.endswith(".csv")
420        or lowered_path_suffix == ".csv"
421        or lowered_mime_type in {"text/csv", "application/csv"}
422    )
423    if not is_csv_attachment:
424        return original_name, original_mime_type, False
425
426    stem = Path(original_name).stem or Path(attachment_path.name).stem or "attachment"
427    return f"{stem}.txt", "text/plain", True
428
429
430def _inline_attachment_data_into_prompt(
431    user_prompt: str,
432    attachments: list[Mapping[str, str]] | None,
433) -> tuple[str, bool]:
434    """Append attachment file contents to the user prompt for text-only fallback.
435
436    All attachment data is inlined without truncation -- in DFIR, every row
437    matters. When the resulting prompt is too large, the caller uses chunked
438    analysis to split it.
439
440    Args:
441        user_prompt: The original user prompt text.
442        attachments: Optional list of attachment descriptors.
443
444    Returns:
445        A 2-tuple of ``(modified_prompt, was_inlined)``.
446    """
447    normalized_attachments = normalize_attachment_inputs(attachments)
448    if not normalized_attachments:
449        return user_prompt, False
450
451    inline_sections: list[str] = []
452    for attachment in normalized_attachments:
453        attachment_path = Path(attachment["path"])
454        attachment_name = str(attachment.get("name", "")).strip() or attachment_path.name
455        try:
456            attachment_text = attachment_path.read_text(
457                encoding="utf-8-sig",
458                errors="replace",
459            )
460        except OSError:
461            continue
462        if not attachment_text.strip():
463            continue
464
465        inline_sections.append(
466            "\n".join(
467                [
468                    f"--- BEGIN ATTACHMENT: {attachment_name} ---",
469                    attachment_text.rstrip(),
470                    f"--- END ATTACHMENT: {attachment_name} ---",
471                ]
472            )
473        )
474
475    if not inline_sections:
476        return user_prompt, False
477
478    inlined_prompt = "\n\n".join(
479        [
480            user_prompt.rstrip(),
481            "File attachments were unavailable, so the attachment contents are inlined below.",
482            "\n\n".join(inline_sections),
483        ]
484    ).strip()
485    return inlined_prompt, True
486
487
488def upload_and_request_via_responses_api(
489    client: Any,
490    openai_module: Any,
491    model: str,
492    normalized_attachments: list[dict[str, str]],
493    system_prompt: str,
494    user_prompt: str,
495    max_tokens: int,
496    provider_name: str,
497    upload_purpose: str = "assistants",
498    convert_csv_to_txt: bool = False,
499) -> str:
500    """Upload attachments and make a Responses API request.
501
502    This is the shared implementation for file-attachment mode used by
503    OpenAI, Kimi, and Local providers. Uploads each attachment as a file,
504    builds a Responses API request with ``input_file`` references, extracts
505    the output text, and cleans up uploaded files.
506
507    Args:
508        client: The ``openai.OpenAI`` SDK client instance.
509        openai_module: The ``openai`` module (for exception types).
510        model: The model identifier to use for the Responses API request.
511        normalized_attachments: Validated attachment descriptors.
512        system_prompt: The system-level instruction text.
513        user_prompt: The user-facing prompt text.
514        max_tokens: Maximum completion tokens.
515        provider_name: Human-readable provider name for error messages.
516        upload_purpose: The ``purpose`` parameter for file uploads.
517        convert_csv_to_txt: If ``True``, convert CSV file metadata to TXT
518            format before uploading (used by OpenAI).
519
520    Returns:
521        The generated text from the Responses API.
522
523    Raises:
524        AIProviderError: If the response is empty or file upload fails.
525    """
526    from .base import AIProviderError, _resolve_completion_token_retry_limit
527
528    uploaded_file_ids: list[str] = []
529    try:
530        for attachment in normalized_attachments:
531            attachment_path = Path(attachment["path"])
532
533            if convert_csv_to_txt:
534                upload_name, upload_mime_type, converted = _prepare_openai_attachment_upload(attachment)
535                if converted:
536                    logger.debug(
537                        "Converting %s attachment upload from CSV to TXT: %s -> %s",
538                        provider_name,
539                        attachment.get("name", attachment_path.name),
540                        upload_name,
541                    )
542            else:
543                upload_name = attachment["name"]
544                upload_mime_type = attachment["mime_type"]
545
546            with attachment_path.open("rb") as handle:
547                uploaded = client.files.create(
548                    file=(upload_name, handle.read(), upload_mime_type),
549                    purpose=upload_purpose,
550                )
551
552            file_id = getattr(uploaded, "id", None)
553            if file_id is None and isinstance(uploaded, dict):
554                file_id = uploaded.get("id")
555            if not isinstance(file_id, str) or not file_id.strip():
556                raise AIProviderError(f"{provider_name} file upload returned no file id.")
557            uploaded_file_ids.append(file_id)
558
559        user_content: list[dict[str, str]] = [{"type": "input_text", "text": user_prompt}]
560        for file_id in uploaded_file_ids:
561            user_content.append({"type": "input_file", "file_id": file_id})
562
563        response_request: dict[str, Any] = {
564            "model": model,
565            "input": [
566                {"role": "system", "content": [{"type": "input_text", "text": system_prompt}]},
567                {"role": "user", "content": user_content},
568            ],
569            "max_output_tokens": max_tokens,
570        }
571
572        try:
573            response = client.responses.create(**response_request)
574        except openai_module.BadRequestError as error:
575            retry_token_count = _resolve_completion_token_retry_limit(
576                error=error,
577                requested_tokens=max_tokens,
578            )
579            if retry_token_count is None:
580                raise
581            logger.warning(
582                "%s rejected max_output_tokens=%d; retrying with max_output_tokens=%d.",
583                provider_name,
584                max_tokens,
585                retry_token_count,
586            )
587            response_request["max_output_tokens"] = retry_token_count
588            response = client.responses.create(**response_request)
589
590        text = _extract_openai_responses_text(response)
591        if not text:
592            raise AIProviderError(
593                f"{provider_name} returned an empty response for file-attachment mode."
594            )
595        return text
596    finally:
597        for uploaded_file_id in uploaded_file_ids:
598            try:
599                client.files.delete(uploaded_file_id)
600            except Exception:
601                continue
logger = <Logger app.ai_providers.utils (WARNING)>
def normalize_attachment_input(attachment: Union[Mapping[str, str], Any]) -> dict[str, str] | None:
351def normalize_attachment_input(attachment: Mapping[str, str] | Any) -> dict[str, str] | None:
352    """Validate and normalize a single attachment descriptor.
353
354    Args:
355        attachment: A raw attachment descriptor with at least a ``"path"`` key.
356
357    Returns:
358        A normalized dict with ``"path"``, ``"name"``, ``"mime_type"`` keys,
359        or ``None`` if invalid.
360    """
361    if not isinstance(attachment, Mapping):
362        return None
363
364    path_value = str(attachment.get("path", "")).strip()
365    if not path_value:
366        return None
367
368    path = Path(path_value)
369    if not path.exists() or not path.is_file():
370        return None
371
372    filename = str(attachment.get("name", "")).strip() or path.name
373    mime_type = str(attachment.get("mime_type", "")).strip() or "text/csv"
374    return {
375        "path": str(path),
376        "name": filename,
377        "mime_type": mime_type,
378    }

Validate and normalize a single attachment descriptor.

Arguments:
  • attachment: A raw attachment descriptor with at least a "path" key.
Returns:

A normalized dict with "path", "name", "mime_type" keys, or None if invalid.

def normalize_attachment_inputs( attachments: list[typing.Mapping[str, str]] | None) -> list[dict[str, str]]:
381def normalize_attachment_inputs(
382    attachments: list[Mapping[str, str]] | None,
383) -> list[dict[str, str]]:
384    """Validate and normalize a list of attachment descriptors.
385
386    Args:
387        attachments: Optional list of raw attachment descriptors.
388
389    Returns:
390        A list of validated attachment dicts. May be empty.
391    """
392    normalized: list[dict[str, str]] = []
393    for attachment in attachments or []:
394        candidate = normalize_attachment_input(attachment)
395        if candidate is not None:
396            normalized.append(candidate)
397    return normalized

Validate and normalize a list of attachment descriptors.

Arguments:
  • attachments: Optional list of raw attachment descriptors.
Returns:

A list of validated attachment dicts. May be empty.

def upload_and_request_via_responses_api( client: Any, openai_module: Any, model: str, normalized_attachments: list[dict[str, str]], system_prompt: str, user_prompt: str, max_tokens: int, provider_name: str, upload_purpose: str = 'assistants', convert_csv_to_txt: bool = False) -> str:
489def upload_and_request_via_responses_api(
490    client: Any,
491    openai_module: Any,
492    model: str,
493    normalized_attachments: list[dict[str, str]],
494    system_prompt: str,
495    user_prompt: str,
496    max_tokens: int,
497    provider_name: str,
498    upload_purpose: str = "assistants",
499    convert_csv_to_txt: bool = False,
500) -> str:
501    """Upload attachments and make a Responses API request.
502
503    This is the shared implementation for file-attachment mode used by
504    OpenAI, Kimi, and Local providers. Uploads each attachment as a file,
505    builds a Responses API request with ``input_file`` references, extracts
506    the output text, and cleans up uploaded files.
507
508    Args:
509        client: The ``openai.OpenAI`` SDK client instance.
510        openai_module: The ``openai`` module (for exception types).
511        model: The model identifier to use for the Responses API request.
512        normalized_attachments: Validated attachment descriptors.
513        system_prompt: The system-level instruction text.
514        user_prompt: The user-facing prompt text.
515        max_tokens: Maximum completion tokens.
516        provider_name: Human-readable provider name for error messages.
517        upload_purpose: The ``purpose`` parameter for file uploads.
518        convert_csv_to_txt: If ``True``, convert CSV file metadata to TXT
519            format before uploading (used by OpenAI).
520
521    Returns:
522        The generated text from the Responses API.
523
524    Raises:
525        AIProviderError: If the response is empty or file upload fails.
526    """
527    from .base import AIProviderError, _resolve_completion_token_retry_limit
528
529    uploaded_file_ids: list[str] = []
530    try:
531        for attachment in normalized_attachments:
532            attachment_path = Path(attachment["path"])
533
534            if convert_csv_to_txt:
535                upload_name, upload_mime_type, converted = _prepare_openai_attachment_upload(attachment)
536                if converted:
537                    logger.debug(
538                        "Converting %s attachment upload from CSV to TXT: %s -> %s",
539                        provider_name,
540                        attachment.get("name", attachment_path.name),
541                        upload_name,
542                    )
543            else:
544                upload_name = attachment["name"]
545                upload_mime_type = attachment["mime_type"]
546
547            with attachment_path.open("rb") as handle:
548                uploaded = client.files.create(
549                    file=(upload_name, handle.read(), upload_mime_type),
550                    purpose=upload_purpose,
551                )
552
553            file_id = getattr(uploaded, "id", None)
554            if file_id is None and isinstance(uploaded, dict):
555                file_id = uploaded.get("id")
556            if not isinstance(file_id, str) or not file_id.strip():
557                raise AIProviderError(f"{provider_name} file upload returned no file id.")
558            uploaded_file_ids.append(file_id)
559
560        user_content: list[dict[str, str]] = [{"type": "input_text", "text": user_prompt}]
561        for file_id in uploaded_file_ids:
562            user_content.append({"type": "input_file", "file_id": file_id})
563
564        response_request: dict[str, Any] = {
565            "model": model,
566            "input": [
567                {"role": "system", "content": [{"type": "input_text", "text": system_prompt}]},
568                {"role": "user", "content": user_content},
569            ],
570            "max_output_tokens": max_tokens,
571        }
572
573        try:
574            response = client.responses.create(**response_request)
575        except openai_module.BadRequestError as error:
576            retry_token_count = _resolve_completion_token_retry_limit(
577                error=error,
578                requested_tokens=max_tokens,
579            )
580            if retry_token_count is None:
581                raise
582            logger.warning(
583                "%s rejected max_output_tokens=%d; retrying with max_output_tokens=%d.",
584                provider_name,
585                max_tokens,
586                retry_token_count,
587            )
588            response_request["max_output_tokens"] = retry_token_count
589            response = client.responses.create(**response_request)
590
591        text = _extract_openai_responses_text(response)
592        if not text:
593            raise AIProviderError(
594                f"{provider_name} returned an empty response for file-attachment mode."
595            )
596        return text
597    finally:
598        for uploaded_file_id in uploaded_file_ids:
599            try:
600                client.files.delete(uploaded_file_id)
601            except Exception:
602                continue

Upload attachments and make a Responses API request.

This is the shared implementation for file-attachment mode used by OpenAI, Kimi, and Local providers. Uploads each attachment as a file, builds a Responses API request with input_file references, extracts the output text, and cleans up uploaded files.

Arguments:
  • client: The openai.OpenAI SDK client instance.
  • openai_module: The openai module (for exception types).
  • model: The model identifier to use for the Responses API request.
  • normalized_attachments: Validated attachment descriptors.
  • system_prompt: The system-level instruction text.
  • user_prompt: The user-facing prompt text.
  • max_tokens: Maximum completion tokens.
  • provider_name: Human-readable provider name for error messages.
  • upload_purpose: The purpose parameter for file uploads.
  • convert_csv_to_txt: If True, convert CSV file metadata to TXT format before uploading (used by OpenAI).
Returns:

The generated text from the Responses API.

Raises:
  • AIProviderError: If the response is empty or file upload fails.