app.config

Configuration loading and persistence for AIFT.

Manages the application's layered configuration system:

  1. Hardcoded defaults -- DEFAULT_CONFIG provides sensible values for every setting so the application runs out of the box.
  2. YAML file -- User overrides in config.yaml are deep-merged on top of the defaults.
  3. Environment variables -- API keys from ANTHROPIC_API_KEY, OPENAI_API_KEY, and MOONSHOT_API_KEY / KIMI_API_KEY take highest precedence.

The save_config() helper persists the current configuration back to YAML so settings changed through the UI are retained across restarts.

Attributes:
  • PROJECT_ROOT: Resolved path to the repository root directory.
  • DEFAULT_CONFIG: Complete default configuration dictionary.
  • LOGO_FILE_CANDIDATES: Ordered tuple of logo filenames to search for in the images/ directory.
  1"""Configuration loading and persistence for AIFT.
  2
  3Manages the application's layered configuration system:
  4
  51. **Hardcoded defaults** -- ``DEFAULT_CONFIG`` provides sensible values for
  6   every setting so the application runs out of the box.
  72. **YAML file** -- User overrides in ``config.yaml`` are deep-merged on top
  8   of the defaults.
  93. **Environment variables** -- API keys from ``ANTHROPIC_API_KEY``,
 10   ``OPENAI_API_KEY``, and ``MOONSHOT_API_KEY`` / ``KIMI_API_KEY`` take
 11   highest precedence.
 12
 13The :func:`save_config` helper persists the current configuration back to
 14YAML so settings changed through the UI are retained across restarts.
 15
 16Attributes:
 17    PROJECT_ROOT: Resolved path to the repository root directory.
 18    DEFAULT_CONFIG: Complete default configuration dictionary.
 19    LOGO_FILE_CANDIDATES: Ordered tuple of logo filenames to search for in
 20        the ``images/`` directory.
 21"""
 22
 23from __future__ import annotations
 24
 25import logging
 26from copy import deepcopy
 27import os
 28from pathlib import Path
 29from typing import Any
 30
 31import yaml
 32
 33__all__ = [
 34    "load_config",
 35    "save_config",
 36    "get_default_config",
 37    "apply_env_overrides",
 38    "validate_config",
 39    "ConfigurationError",
 40    "PROJECT_ROOT",
 41    "DEFAULT_CONFIG",
 42    "KNOWN_AI_PROVIDERS",
 43    "LOGO_FILE_CANDIDATES",
 44]
 45
 46logger = logging.getLogger(__name__)
 47
 48
 49class ConfigurationError(Exception):
 50    """Raised when the configuration fails validation.
 51
 52    Attributes:
 53        errors: List of human-readable validation error strings.
 54    """
 55
 56    def __init__(self, errors: list[str]) -> None:
 57        self.errors = errors
 58        joined = "; ".join(errors)
 59        super().__init__(f"Invalid configuration: {joined}")
 60
 61
 62KNOWN_AI_PROVIDERS = ("claude", "openai", "kimi", "local")
 63
 64PROJECT_ROOT = Path(__file__).resolve().parents[1]
 65
 66DEFAULT_CONFIG: dict[str, Any] = {
 67    "ai": {
 68        "provider": "claude",
 69        "claude": {
 70            "api_key": "",
 71            "model": "claude-opus-4-6",
 72            "attach_csv_as_file": True,
 73            "request_timeout_seconds": 600,
 74        },
 75        "openai": {
 76            "api_key": "",
 77            "model": "gpt-5.4",
 78            "attach_csv_as_file": True,
 79            "request_timeout_seconds": 600,
 80        },
 81        "kimi": {
 82            "api_key": "",
 83            "model": "kimi-k2-turbo-preview",
 84            "base_url": "https://api.moonshot.ai/v1",
 85            "attach_csv_as_file": True,
 86            "request_timeout_seconds": 600,
 87        },
 88        "local": {
 89            "base_url": "http://localhost:11434/v1",
 90            "model": "llama3.1:70b",
 91            "api_key": "not-needed",
 92            "attach_csv_as_file": True,
 93            "request_timeout_seconds": 3600,
 94        },
 95    },
 96    "server": {
 97        "port": 5000,
 98        "host": "127.0.0.1",
 99    },
100    "evidence": {
101        "large_file_threshold_mb": 0,
102        "csv_output_dir": "",
103        "intake_timeout_seconds": 7200,
104    },
105    "analysis": {
106        "ai_max_tokens": 128000,
107        "shortened_prompt_cutoff_tokens": 64000,
108        "connection_test_max_tokens": 256,
109        "citation_spot_check_limit": 20,
110        "artifact_deduplication_enabled": True,
111        "artifact_ai_columns_config_path": "config/artifact_ai_columns.yaml",
112    },
113    # NOTE: artifact_profiles are stored as JSON files on disk (in the profiles/
114    # directory next to config.yaml), not in this config dict.  No default key is
115    # needed here — see _resolve_profiles_root() in routes.py.
116}
117
118# Ordered list of logo filenames to look for in the images/ directory.
119# The first match wins; the fallback in routes.py picks any image alphabetically.
120LOGO_FILE_CANDIDATES = (
121    "AIFT Logo - White Text.png",
122    "AIFT Logo - Dark Text.png",
123)
124
125
126def _deep_merge_inplace(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
127    """Recursively merge *override* into *base* in-place. Returns *base*.
128
129    Nested dictionaries are merged recursively; all other value types in
130    *override* replace the corresponding entry in *base*.  The caller is
131    responsible for passing a copy of *base* if the original must not be
132    mutated.
133
134    Args:
135        base: The target dictionary that will be updated in-place.
136        override: The dictionary whose values take precedence.
137
138    Returns:
139        The mutated *base* dictionary (returned for convenience).
140    """
141    for key, value in override.items():
142        if key in base and isinstance(base[key], dict) and isinstance(value, dict):
143            _deep_merge_inplace(base[key], value)
144        else:
145            base[key] = value
146    return base
147
148
149def get_default_config() -> dict[str, Any]:
150    """Return a deep copy of :data:`DEFAULT_CONFIG` safe for mutation."""
151    return deepcopy(DEFAULT_CONFIG)
152
153
154def apply_env_overrides(config: dict[str, Any]) -> dict[str, Any]:
155    """Overlay API keys from environment variables onto *config*.
156
157    Checks ``ANTHROPIC_API_KEY``, ``OPENAI_API_KEY``, and
158    ``MOONSHOT_API_KEY`` / ``KIMI_API_KEY``.  Non-empty values replace the
159    corresponding ``api_key`` entries in the configuration dictionary.
160
161    Args:
162        config: The configuration dictionary to update in place.
163
164    Returns:
165        The mutated *config* dictionary.
166    """
167    anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
168    openai_api_key = os.getenv("OPENAI_API_KEY", "").strip()
169    kimi_api_key = os.getenv("MOONSHOT_API_KEY", "").strip() or os.getenv("KIMI_API_KEY", "").strip()
170
171    if anthropic_api_key:
172        config.setdefault("ai", {}).setdefault("claude", {})["api_key"] = anthropic_api_key
173    if openai_api_key:
174        config.setdefault("ai", {}).setdefault("openai", {})["api_key"] = openai_api_key
175    if kimi_api_key:
176        config.setdefault("ai", {}).setdefault("kimi", {})["api_key"] = kimi_api_key
177
178    return config
179
180
181def validate_config(config: dict[str, Any]) -> list[str]:
182    """Validate configuration values and return a list of error descriptions.
183
184    Checks that values in the merged configuration are within acceptable
185    ranges and of the correct types.  An empty returned list means the
186    configuration is fully valid.
187
188    Args:
189        config: The fully merged configuration dictionary to validate.
190
191    Returns:
192        A list of human-readable validation error strings.  Empty when
193        the configuration passes all checks.
194    """
195    errors: list[str] = []
196
197    # --- server section ---
198    server = config.get("server", {})
199    if not isinstance(server, dict):
200        errors.append("server: expected a mapping")
201    else:
202        port = server.get("port")
203        if not isinstance(port, int) or not (1 <= port <= 65535):
204            errors.append(
205                f"server.port: must be an integer between 1 and 65535, got {port!r}"
206            )
207
208        host = server.get("host")
209        if not isinstance(host, str) or not host.strip():
210            errors.append(
211                f"server.host: must be a non-empty string, got {host!r}"
212            )
213
214    # --- ai section ---
215    ai = config.get("ai", {})
216    if not isinstance(ai, dict):
217        errors.append("ai: expected a mapping")
218    else:
219        provider = ai.get("provider")
220        if provider not in KNOWN_AI_PROVIDERS:
221            errors.append(
222                f"ai.provider: must be one of {KNOWN_AI_PROVIDERS}, got {provider!r}"
223            )
224
225        for name in KNOWN_AI_PROVIDERS:
226            prov_cfg = ai.get(name)
227            if not isinstance(prov_cfg, dict):
228                continue
229
230            model = prov_cfg.get("model")
231            if not isinstance(model, str) or not model.strip():
232                errors.append(
233                    f"ai.{name}.model: must be a non-empty string, got {model!r}"
234                )
235
236            api_key = prov_cfg.get("api_key")
237            if not isinstance(api_key, str):
238                errors.append(
239                    f"ai.{name}.api_key: must be a string, got {type(api_key).__name__}"
240                )
241
242            base_url = prov_cfg.get("base_url")
243            if base_url is not None:
244                if not isinstance(base_url, str) or not (
245                    base_url.startswith("http://") or base_url.startswith("https://")
246                ):
247                    errors.append(
248                        f"ai.{name}.base_url: must start with http:// or https://, got {base_url!r}"
249                    )
250
251    # --- analysis section ---
252    analysis = config.get("analysis", {})
253    if isinstance(analysis, dict):
254        ai_max_tokens = analysis.get("ai_max_tokens")
255        if not isinstance(ai_max_tokens, int) or ai_max_tokens <= 0:
256            errors.append(
257                f"analysis.ai_max_tokens: must be a positive integer, got {ai_max_tokens!r}"
258            )
259
260    # --- evidence section ---
261    evidence = config.get("evidence", {})
262    if isinstance(evidence, dict):
263        threshold = evidence.get("large_file_threshold_mb")
264        if not isinstance(threshold, (int, float)) or threshold < 0:
265            errors.append(
266                f"evidence.large_file_threshold_mb: must be a non-negative number (0 = unlimited), got {threshold!r}"
267            )
268
269    return errors
270
271
272def load_config(path: str | Path | None = None, use_env_overrides: bool = True) -> dict[str, Any]:
273    """Load the AIFT configuration from a YAML file with layered defaults.
274
275    If the configuration file does not exist, a new file is created from
276    the defaults. Environment variable overrides are applied last unless
277    *use_env_overrides* is ``False``.
278
279    Args:
280        path: Explicit path to a YAML configuration file.  Defaults to
281            ``<PROJECT_ROOT>/config.yaml``.
282        use_env_overrides: When *True* (default), API keys from environment
283            variables take precedence over file values.
284
285    Returns:
286        The fully merged configuration dictionary.
287
288    Raises:
289        ValueError: If the YAML file contains a non-dictionary root value.
290    """
291    config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml"
292    config = get_default_config()
293
294    if config_path.exists():
295        with config_path.open("r", encoding="utf-8") as file:
296            parsed = yaml.safe_load(file) or {}
297
298        if not isinstance(parsed, dict):
299            raise ValueError(f"Invalid configuration format in {config_path}.")
300
301        _deep_merge_inplace(config, parsed)
302    else:
303        save_config(config, config_path)
304
305    if use_env_overrides:
306        apply_env_overrides(config)
307
308    errors = validate_config(config)
309    if errors:
310        for error in errors:
311            logger.error("Config validation: %s", error)
312        raise ConfigurationError(errors)
313
314    return config
315
316
317def save_config(config: dict[str, Any], path: str | Path | None = None) -> None:
318    """Persist the configuration dictionary to a YAML file.
319
320    Parent directories are created automatically when they do not exist.
321
322    Args:
323        config: The configuration dictionary to serialise.
324        path: Destination file path.  Defaults to
325            ``<PROJECT_ROOT>/config.yaml``.
326    """
327    config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml"
328    if config_path.parent != Path("."):
329        config_path.parent.mkdir(parents=True, exist_ok=True)
330
331    with config_path.open("w", encoding="utf-8") as file:
332        yaml.safe_dump(config, file, sort_keys=False)
def load_config( path: str | pathlib.Path | None = None, use_env_overrides: bool = True) -> dict[str, typing.Any]:
273def load_config(path: str | Path | None = None, use_env_overrides: bool = True) -> dict[str, Any]:
274    """Load the AIFT configuration from a YAML file with layered defaults.
275
276    If the configuration file does not exist, a new file is created from
277    the defaults. Environment variable overrides are applied last unless
278    *use_env_overrides* is ``False``.
279
280    Args:
281        path: Explicit path to a YAML configuration file.  Defaults to
282            ``<PROJECT_ROOT>/config.yaml``.
283        use_env_overrides: When *True* (default), API keys from environment
284            variables take precedence over file values.
285
286    Returns:
287        The fully merged configuration dictionary.
288
289    Raises:
290        ValueError: If the YAML file contains a non-dictionary root value.
291    """
292    config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml"
293    config = get_default_config()
294
295    if config_path.exists():
296        with config_path.open("r", encoding="utf-8") as file:
297            parsed = yaml.safe_load(file) or {}
298
299        if not isinstance(parsed, dict):
300            raise ValueError(f"Invalid configuration format in {config_path}.")
301
302        _deep_merge_inplace(config, parsed)
303    else:
304        save_config(config, config_path)
305
306    if use_env_overrides:
307        apply_env_overrides(config)
308
309    errors = validate_config(config)
310    if errors:
311        for error in errors:
312            logger.error("Config validation: %s", error)
313        raise ConfigurationError(errors)
314
315    return config

Load the AIFT configuration from a YAML file with layered defaults.

If the configuration file does not exist, a new file is created from the defaults. Environment variable overrides are applied last unless use_env_overrides is False.

Arguments:
  • path: Explicit path to a YAML configuration file. Defaults to <PROJECT_ROOT>/config.yaml.
  • use_env_overrides: When True (default), API keys from environment variables take precedence over file values.
Returns:

The fully merged configuration dictionary.

Raises:
  • ValueError: If the YAML file contains a non-dictionary root value.
def save_config( config: dict[str, typing.Any], path: str | pathlib.Path | None = None) -> None:
318def save_config(config: dict[str, Any], path: str | Path | None = None) -> None:
319    """Persist the configuration dictionary to a YAML file.
320
321    Parent directories are created automatically when they do not exist.
322
323    Args:
324        config: The configuration dictionary to serialise.
325        path: Destination file path.  Defaults to
326            ``<PROJECT_ROOT>/config.yaml``.
327    """
328    config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml"
329    if config_path.parent != Path("."):
330        config_path.parent.mkdir(parents=True, exist_ok=True)
331
332    with config_path.open("w", encoding="utf-8") as file:
333        yaml.safe_dump(config, file, sort_keys=False)

Persist the configuration dictionary to a YAML file.

Parent directories are created automatically when they do not exist.

Arguments:
  • config: The configuration dictionary to serialise.
  • path: Destination file path. Defaults to <PROJECT_ROOT>/config.yaml.
def get_default_config() -> dict[str, typing.Any]:
150def get_default_config() -> dict[str, Any]:
151    """Return a deep copy of :data:`DEFAULT_CONFIG` safe for mutation."""
152    return deepcopy(DEFAULT_CONFIG)

Return a deep copy of DEFAULT_CONFIG safe for mutation.

def apply_env_overrides(config: dict[str, typing.Any]) -> dict[str, typing.Any]:
155def apply_env_overrides(config: dict[str, Any]) -> dict[str, Any]:
156    """Overlay API keys from environment variables onto *config*.
157
158    Checks ``ANTHROPIC_API_KEY``, ``OPENAI_API_KEY``, and
159    ``MOONSHOT_API_KEY`` / ``KIMI_API_KEY``.  Non-empty values replace the
160    corresponding ``api_key`` entries in the configuration dictionary.
161
162    Args:
163        config: The configuration dictionary to update in place.
164
165    Returns:
166        The mutated *config* dictionary.
167    """
168    anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
169    openai_api_key = os.getenv("OPENAI_API_KEY", "").strip()
170    kimi_api_key = os.getenv("MOONSHOT_API_KEY", "").strip() or os.getenv("KIMI_API_KEY", "").strip()
171
172    if anthropic_api_key:
173        config.setdefault("ai", {}).setdefault("claude", {})["api_key"] = anthropic_api_key
174    if openai_api_key:
175        config.setdefault("ai", {}).setdefault("openai", {})["api_key"] = openai_api_key
176    if kimi_api_key:
177        config.setdefault("ai", {}).setdefault("kimi", {})["api_key"] = kimi_api_key
178
179    return config

Overlay API keys from environment variables onto config.

Checks ANTHROPIC_API_KEY, OPENAI_API_KEY, and MOONSHOT_API_KEY / KIMI_API_KEY. Non-empty values replace the corresponding api_key entries in the configuration dictionary.

Arguments:
  • config: The configuration dictionary to update in place.
Returns:

The mutated config dictionary.

def validate_config(config: dict[str, typing.Any]) -> list[str]:
182def validate_config(config: dict[str, Any]) -> list[str]:
183    """Validate configuration values and return a list of error descriptions.
184
185    Checks that values in the merged configuration are within acceptable
186    ranges and of the correct types.  An empty returned list means the
187    configuration is fully valid.
188
189    Args:
190        config: The fully merged configuration dictionary to validate.
191
192    Returns:
193        A list of human-readable validation error strings.  Empty when
194        the configuration passes all checks.
195    """
196    errors: list[str] = []
197
198    # --- server section ---
199    server = config.get("server", {})
200    if not isinstance(server, dict):
201        errors.append("server: expected a mapping")
202    else:
203        port = server.get("port")
204        if not isinstance(port, int) or not (1 <= port <= 65535):
205            errors.append(
206                f"server.port: must be an integer between 1 and 65535, got {port!r}"
207            )
208
209        host = server.get("host")
210        if not isinstance(host, str) or not host.strip():
211            errors.append(
212                f"server.host: must be a non-empty string, got {host!r}"
213            )
214
215    # --- ai section ---
216    ai = config.get("ai", {})
217    if not isinstance(ai, dict):
218        errors.append("ai: expected a mapping")
219    else:
220        provider = ai.get("provider")
221        if provider not in KNOWN_AI_PROVIDERS:
222            errors.append(
223                f"ai.provider: must be one of {KNOWN_AI_PROVIDERS}, got {provider!r}"
224            )
225
226        for name in KNOWN_AI_PROVIDERS:
227            prov_cfg = ai.get(name)
228            if not isinstance(prov_cfg, dict):
229                continue
230
231            model = prov_cfg.get("model")
232            if not isinstance(model, str) or not model.strip():
233                errors.append(
234                    f"ai.{name}.model: must be a non-empty string, got {model!r}"
235                )
236
237            api_key = prov_cfg.get("api_key")
238            if not isinstance(api_key, str):
239                errors.append(
240                    f"ai.{name}.api_key: must be a string, got {type(api_key).__name__}"
241                )
242
243            base_url = prov_cfg.get("base_url")
244            if base_url is not None:
245                if not isinstance(base_url, str) or not (
246                    base_url.startswith("http://") or base_url.startswith("https://")
247                ):
248                    errors.append(
249                        f"ai.{name}.base_url: must start with http:// or https://, got {base_url!r}"
250                    )
251
252    # --- analysis section ---
253    analysis = config.get("analysis", {})
254    if isinstance(analysis, dict):
255        ai_max_tokens = analysis.get("ai_max_tokens")
256        if not isinstance(ai_max_tokens, int) or ai_max_tokens <= 0:
257            errors.append(
258                f"analysis.ai_max_tokens: must be a positive integer, got {ai_max_tokens!r}"
259            )
260
261    # --- evidence section ---
262    evidence = config.get("evidence", {})
263    if isinstance(evidence, dict):
264        threshold = evidence.get("large_file_threshold_mb")
265        if not isinstance(threshold, (int, float)) or threshold < 0:
266            errors.append(
267                f"evidence.large_file_threshold_mb: must be a non-negative number (0 = unlimited), got {threshold!r}"
268            )
269
270    return errors

Validate configuration values and return a list of error descriptions.

Checks that values in the merged configuration are within acceptable ranges and of the correct types. An empty returned list means the configuration is fully valid.

Arguments:
  • config: The fully merged configuration dictionary to validate.
Returns:

A list of human-readable validation error strings. Empty when the configuration passes all checks.

class ConfigurationError(builtins.Exception):
50class ConfigurationError(Exception):
51    """Raised when the configuration fails validation.
52
53    Attributes:
54        errors: List of human-readable validation error strings.
55    """
56
57    def __init__(self, errors: list[str]) -> None:
58        self.errors = errors
59        joined = "; ".join(errors)
60        super().__init__(f"Invalid configuration: {joined}")

Raised when the configuration fails validation.

Attributes:
  • errors: List of human-readable validation error strings.
ConfigurationError(errors: list[str])
57    def __init__(self, errors: list[str]) -> None:
58        self.errors = errors
59        joined = "; ".join(errors)
60        super().__init__(f"Invalid configuration: {joined}")
errors
PROJECT_ROOT = PosixPath('/home/runner/work/AIFT/AIFT')
DEFAULT_CONFIG: dict[str, typing.Any] = {'ai': {'provider': 'claude', 'claude': {'api_key': '', 'model': 'claude-opus-4-6', 'attach_csv_as_file': True, 'request_timeout_seconds': 600}, 'openai': {'api_key': '', 'model': 'gpt-5.4', 'attach_csv_as_file': True, 'request_timeout_seconds': 600}, 'kimi': {'api_key': '', 'model': 'kimi-k2-turbo-preview', 'base_url': 'https://api.moonshot.ai/v1', 'attach_csv_as_file': True, 'request_timeout_seconds': 600}, 'local': {'base_url': 'http://localhost:11434/v1', 'model': 'llama3.1:70b', 'api_key': 'not-needed', 'attach_csv_as_file': True, 'request_timeout_seconds': 3600}}, 'server': {'port': 5000, 'host': '127.0.0.1'}, 'evidence': {'large_file_threshold_mb': 0, 'csv_output_dir': '', 'intake_timeout_seconds': 7200}, 'analysis': {'ai_max_tokens': 128000, 'shortened_prompt_cutoff_tokens': 64000, 'connection_test_max_tokens': 256, 'citation_spot_check_limit': 20, 'artifact_deduplication_enabled': True, 'artifact_ai_columns_config_path': 'config/artifact_ai_columns.yaml'}}
KNOWN_AI_PROVIDERS = ('claude', 'openai', 'kimi', 'local')
LOGO_FILE_CANDIDATES = ('AIFT Logo - White Text.png', 'AIFT Logo - Dark Text.png')