app.config
Configuration loading and persistence for AIFT.
Manages the application's layered configuration system:
- Hardcoded defaults --
DEFAULT_CONFIGprovides sensible values for every setting so the application runs out of the box. - YAML file -- User overrides in
config.yamlare deep-merged on top of the defaults. - Environment variables -- API keys from
ANTHROPIC_API_KEY,OPENAI_API_KEY, andMOONSHOT_API_KEY/KIMI_API_KEYtake highest precedence.
The save_config() helper persists the current configuration back to
YAML so settings changed through the UI are retained across restarts.
Attributes:
- PROJECT_ROOT: Resolved path to the repository root directory.
- DEFAULT_CONFIG: Complete default configuration dictionary.
- LOGO_FILE_CANDIDATES: Ordered tuple of logo filenames to search for in
the
images/directory.
1"""Configuration loading and persistence for AIFT. 2 3Manages the application's layered configuration system: 4 51. **Hardcoded defaults** -- ``DEFAULT_CONFIG`` provides sensible values for 6 every setting so the application runs out of the box. 72. **YAML file** -- User overrides in ``config.yaml`` are deep-merged on top 8 of the defaults. 93. **Environment variables** -- API keys from ``ANTHROPIC_API_KEY``, 10 ``OPENAI_API_KEY``, and ``MOONSHOT_API_KEY`` / ``KIMI_API_KEY`` take 11 highest precedence. 12 13The :func:`save_config` helper persists the current configuration back to 14YAML so settings changed through the UI are retained across restarts. 15 16Attributes: 17 PROJECT_ROOT: Resolved path to the repository root directory. 18 DEFAULT_CONFIG: Complete default configuration dictionary. 19 LOGO_FILE_CANDIDATES: Ordered tuple of logo filenames to search for in 20 the ``images/`` directory. 21""" 22 23from __future__ import annotations 24 25import logging 26from copy import deepcopy 27import os 28from pathlib import Path 29from typing import Any 30 31import yaml 32 33__all__ = [ 34 "load_config", 35 "save_config", 36 "get_default_config", 37 "apply_env_overrides", 38 "validate_config", 39 "ConfigurationError", 40 "PROJECT_ROOT", 41 "DEFAULT_CONFIG", 42 "KNOWN_AI_PROVIDERS", 43 "LOGO_FILE_CANDIDATES", 44] 45 46logger = logging.getLogger(__name__) 47 48 49class ConfigurationError(Exception): 50 """Raised when the configuration fails validation. 51 52 Attributes: 53 errors: List of human-readable validation error strings. 54 """ 55 56 def __init__(self, errors: list[str]) -> None: 57 self.errors = errors 58 joined = "; ".join(errors) 59 super().__init__(f"Invalid configuration: {joined}") 60 61 62KNOWN_AI_PROVIDERS = ("claude", "openai", "kimi", "local") 63 64PROJECT_ROOT = Path(__file__).resolve().parents[1] 65 66DEFAULT_CONFIG: dict[str, Any] = { 67 "ai": { 68 "provider": "claude", 69 "claude": { 70 "api_key": "", 71 "model": "claude-opus-4-6", 72 "attach_csv_as_file": True, 73 "request_timeout_seconds": 600, 74 }, 75 "openai": { 76 "api_key": "", 77 "model": "gpt-5.4", 78 "attach_csv_as_file": True, 79 "request_timeout_seconds": 600, 80 }, 81 "kimi": { 82 "api_key": "", 83 "model": "kimi-k2-turbo-preview", 84 "base_url": "https://api.moonshot.ai/v1", 85 "attach_csv_as_file": True, 86 "request_timeout_seconds": 600, 87 }, 88 "local": { 89 "base_url": "http://localhost:11434/v1", 90 "model": "llama3.1:70b", 91 "api_key": "not-needed", 92 "attach_csv_as_file": True, 93 "request_timeout_seconds": 3600, 94 }, 95 }, 96 "server": { 97 "port": 5000, 98 "host": "127.0.0.1", 99 }, 100 "evidence": { 101 "large_file_threshold_mb": 0, 102 "csv_output_dir": "", 103 "intake_timeout_seconds": 7200, 104 }, 105 "analysis": { 106 "ai_max_tokens": 128000, 107 "shortened_prompt_cutoff_tokens": 64000, 108 "connection_test_max_tokens": 256, 109 "citation_spot_check_limit": 20, 110 "artifact_deduplication_enabled": True, 111 "artifact_ai_columns_config_path": "config/artifact_ai_columns.yaml", 112 }, 113 # NOTE: artifact_profiles are stored as JSON files on disk (in the profiles/ 114 # directory next to config.yaml), not in this config dict. No default key is 115 # needed here — see _resolve_profiles_root() in routes.py. 116} 117 118# Ordered list of logo filenames to look for in the images/ directory. 119# The first match wins; the fallback in routes.py picks any image alphabetically. 120LOGO_FILE_CANDIDATES = ( 121 "AIFT Logo - White Text.png", 122 "AIFT Logo - Dark Text.png", 123) 124 125 126def _deep_merge_inplace(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]: 127 """Recursively merge *override* into *base* in-place. Returns *base*. 128 129 Nested dictionaries are merged recursively; all other value types in 130 *override* replace the corresponding entry in *base*. The caller is 131 responsible for passing a copy of *base* if the original must not be 132 mutated. 133 134 Args: 135 base: The target dictionary that will be updated in-place. 136 override: The dictionary whose values take precedence. 137 138 Returns: 139 The mutated *base* dictionary (returned for convenience). 140 """ 141 for key, value in override.items(): 142 if key in base and isinstance(base[key], dict) and isinstance(value, dict): 143 _deep_merge_inplace(base[key], value) 144 else: 145 base[key] = value 146 return base 147 148 149def get_default_config() -> dict[str, Any]: 150 """Return a deep copy of :data:`DEFAULT_CONFIG` safe for mutation.""" 151 return deepcopy(DEFAULT_CONFIG) 152 153 154def apply_env_overrides(config: dict[str, Any]) -> dict[str, Any]: 155 """Overlay API keys from environment variables onto *config*. 156 157 Checks ``ANTHROPIC_API_KEY``, ``OPENAI_API_KEY``, and 158 ``MOONSHOT_API_KEY`` / ``KIMI_API_KEY``. Non-empty values replace the 159 corresponding ``api_key`` entries in the configuration dictionary. 160 161 Args: 162 config: The configuration dictionary to update in place. 163 164 Returns: 165 The mutated *config* dictionary. 166 """ 167 anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip() 168 openai_api_key = os.getenv("OPENAI_API_KEY", "").strip() 169 kimi_api_key = os.getenv("MOONSHOT_API_KEY", "").strip() or os.getenv("KIMI_API_KEY", "").strip() 170 171 if anthropic_api_key: 172 config.setdefault("ai", {}).setdefault("claude", {})["api_key"] = anthropic_api_key 173 if openai_api_key: 174 config.setdefault("ai", {}).setdefault("openai", {})["api_key"] = openai_api_key 175 if kimi_api_key: 176 config.setdefault("ai", {}).setdefault("kimi", {})["api_key"] = kimi_api_key 177 178 return config 179 180 181def validate_config(config: dict[str, Any]) -> list[str]: 182 """Validate configuration values and return a list of error descriptions. 183 184 Checks that values in the merged configuration are within acceptable 185 ranges and of the correct types. An empty returned list means the 186 configuration is fully valid. 187 188 Args: 189 config: The fully merged configuration dictionary to validate. 190 191 Returns: 192 A list of human-readable validation error strings. Empty when 193 the configuration passes all checks. 194 """ 195 errors: list[str] = [] 196 197 # --- server section --- 198 server = config.get("server", {}) 199 if not isinstance(server, dict): 200 errors.append("server: expected a mapping") 201 else: 202 port = server.get("port") 203 if not isinstance(port, int) or not (1 <= port <= 65535): 204 errors.append( 205 f"server.port: must be an integer between 1 and 65535, got {port!r}" 206 ) 207 208 host = server.get("host") 209 if not isinstance(host, str) or not host.strip(): 210 errors.append( 211 f"server.host: must be a non-empty string, got {host!r}" 212 ) 213 214 # --- ai section --- 215 ai = config.get("ai", {}) 216 if not isinstance(ai, dict): 217 errors.append("ai: expected a mapping") 218 else: 219 provider = ai.get("provider") 220 if provider not in KNOWN_AI_PROVIDERS: 221 errors.append( 222 f"ai.provider: must be one of {KNOWN_AI_PROVIDERS}, got {provider!r}" 223 ) 224 225 for name in KNOWN_AI_PROVIDERS: 226 prov_cfg = ai.get(name) 227 if not isinstance(prov_cfg, dict): 228 continue 229 230 model = prov_cfg.get("model") 231 if not isinstance(model, str) or not model.strip(): 232 errors.append( 233 f"ai.{name}.model: must be a non-empty string, got {model!r}" 234 ) 235 236 api_key = prov_cfg.get("api_key") 237 if not isinstance(api_key, str): 238 errors.append( 239 f"ai.{name}.api_key: must be a string, got {type(api_key).__name__}" 240 ) 241 242 base_url = prov_cfg.get("base_url") 243 if base_url is not None: 244 if not isinstance(base_url, str) or not ( 245 base_url.startswith("http://") or base_url.startswith("https://") 246 ): 247 errors.append( 248 f"ai.{name}.base_url: must start with http:// or https://, got {base_url!r}" 249 ) 250 251 # --- analysis section --- 252 analysis = config.get("analysis", {}) 253 if isinstance(analysis, dict): 254 ai_max_tokens = analysis.get("ai_max_tokens") 255 if not isinstance(ai_max_tokens, int) or ai_max_tokens <= 0: 256 errors.append( 257 f"analysis.ai_max_tokens: must be a positive integer, got {ai_max_tokens!r}" 258 ) 259 260 # --- evidence section --- 261 evidence = config.get("evidence", {}) 262 if isinstance(evidence, dict): 263 threshold = evidence.get("large_file_threshold_mb") 264 if not isinstance(threshold, (int, float)) or threshold < 0: 265 errors.append( 266 f"evidence.large_file_threshold_mb: must be a non-negative number (0 = unlimited), got {threshold!r}" 267 ) 268 269 return errors 270 271 272def load_config(path: str | Path | None = None, use_env_overrides: bool = True) -> dict[str, Any]: 273 """Load the AIFT configuration from a YAML file with layered defaults. 274 275 If the configuration file does not exist, a new file is created from 276 the defaults. Environment variable overrides are applied last unless 277 *use_env_overrides* is ``False``. 278 279 Args: 280 path: Explicit path to a YAML configuration file. Defaults to 281 ``<PROJECT_ROOT>/config.yaml``. 282 use_env_overrides: When *True* (default), API keys from environment 283 variables take precedence over file values. 284 285 Returns: 286 The fully merged configuration dictionary. 287 288 Raises: 289 ValueError: If the YAML file contains a non-dictionary root value. 290 """ 291 config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml" 292 config = get_default_config() 293 294 if config_path.exists(): 295 with config_path.open("r", encoding="utf-8") as file: 296 parsed = yaml.safe_load(file) or {} 297 298 if not isinstance(parsed, dict): 299 raise ValueError(f"Invalid configuration format in {config_path}.") 300 301 _deep_merge_inplace(config, parsed) 302 else: 303 save_config(config, config_path) 304 305 if use_env_overrides: 306 apply_env_overrides(config) 307 308 errors = validate_config(config) 309 if errors: 310 for error in errors: 311 logger.error("Config validation: %s", error) 312 raise ConfigurationError(errors) 313 314 return config 315 316 317def save_config(config: dict[str, Any], path: str | Path | None = None) -> None: 318 """Persist the configuration dictionary to a YAML file. 319 320 Parent directories are created automatically when they do not exist. 321 322 Args: 323 config: The configuration dictionary to serialise. 324 path: Destination file path. Defaults to 325 ``<PROJECT_ROOT>/config.yaml``. 326 """ 327 config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml" 328 if config_path.parent != Path("."): 329 config_path.parent.mkdir(parents=True, exist_ok=True) 330 331 with config_path.open("w", encoding="utf-8") as file: 332 yaml.safe_dump(config, file, sort_keys=False)
273def load_config(path: str | Path | None = None, use_env_overrides: bool = True) -> dict[str, Any]: 274 """Load the AIFT configuration from a YAML file with layered defaults. 275 276 If the configuration file does not exist, a new file is created from 277 the defaults. Environment variable overrides are applied last unless 278 *use_env_overrides* is ``False``. 279 280 Args: 281 path: Explicit path to a YAML configuration file. Defaults to 282 ``<PROJECT_ROOT>/config.yaml``. 283 use_env_overrides: When *True* (default), API keys from environment 284 variables take precedence over file values. 285 286 Returns: 287 The fully merged configuration dictionary. 288 289 Raises: 290 ValueError: If the YAML file contains a non-dictionary root value. 291 """ 292 config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml" 293 config = get_default_config() 294 295 if config_path.exists(): 296 with config_path.open("r", encoding="utf-8") as file: 297 parsed = yaml.safe_load(file) or {} 298 299 if not isinstance(parsed, dict): 300 raise ValueError(f"Invalid configuration format in {config_path}.") 301 302 _deep_merge_inplace(config, parsed) 303 else: 304 save_config(config, config_path) 305 306 if use_env_overrides: 307 apply_env_overrides(config) 308 309 errors = validate_config(config) 310 if errors: 311 for error in errors: 312 logger.error("Config validation: %s", error) 313 raise ConfigurationError(errors) 314 315 return config
Load the AIFT configuration from a YAML file with layered defaults.
If the configuration file does not exist, a new file is created from
the defaults. Environment variable overrides are applied last unless
use_env_overrides is False.
Arguments:
- path: Explicit path to a YAML configuration file. Defaults to
<PROJECT_ROOT>/config.yaml. - use_env_overrides: When True (default), API keys from environment variables take precedence over file values.
Returns:
The fully merged configuration dictionary.
Raises:
- ValueError: If the YAML file contains a non-dictionary root value.
318def save_config(config: dict[str, Any], path: str | Path | None = None) -> None: 319 """Persist the configuration dictionary to a YAML file. 320 321 Parent directories are created automatically when they do not exist. 322 323 Args: 324 config: The configuration dictionary to serialise. 325 path: Destination file path. Defaults to 326 ``<PROJECT_ROOT>/config.yaml``. 327 """ 328 config_path = Path(path) if path is not None else PROJECT_ROOT / "config.yaml" 329 if config_path.parent != Path("."): 330 config_path.parent.mkdir(parents=True, exist_ok=True) 331 332 with config_path.open("w", encoding="utf-8") as file: 333 yaml.safe_dump(config, file, sort_keys=False)
Persist the configuration dictionary to a YAML file.
Parent directories are created automatically when they do not exist.
Arguments:
- config: The configuration dictionary to serialise.
- path: Destination file path. Defaults to
<PROJECT_ROOT>/config.yaml.
150def get_default_config() -> dict[str, Any]: 151 """Return a deep copy of :data:`DEFAULT_CONFIG` safe for mutation.""" 152 return deepcopy(DEFAULT_CONFIG)
Return a deep copy of DEFAULT_CONFIG safe for mutation.
155def apply_env_overrides(config: dict[str, Any]) -> dict[str, Any]: 156 """Overlay API keys from environment variables onto *config*. 157 158 Checks ``ANTHROPIC_API_KEY``, ``OPENAI_API_KEY``, and 159 ``MOONSHOT_API_KEY`` / ``KIMI_API_KEY``. Non-empty values replace the 160 corresponding ``api_key`` entries in the configuration dictionary. 161 162 Args: 163 config: The configuration dictionary to update in place. 164 165 Returns: 166 The mutated *config* dictionary. 167 """ 168 anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip() 169 openai_api_key = os.getenv("OPENAI_API_KEY", "").strip() 170 kimi_api_key = os.getenv("MOONSHOT_API_KEY", "").strip() or os.getenv("KIMI_API_KEY", "").strip() 171 172 if anthropic_api_key: 173 config.setdefault("ai", {}).setdefault("claude", {})["api_key"] = anthropic_api_key 174 if openai_api_key: 175 config.setdefault("ai", {}).setdefault("openai", {})["api_key"] = openai_api_key 176 if kimi_api_key: 177 config.setdefault("ai", {}).setdefault("kimi", {})["api_key"] = kimi_api_key 178 179 return config
Overlay API keys from environment variables onto config.
Checks ANTHROPIC_API_KEY, OPENAI_API_KEY, and
MOONSHOT_API_KEY / KIMI_API_KEY. Non-empty values replace the
corresponding api_key entries in the configuration dictionary.
Arguments:
- config: The configuration dictionary to update in place.
Returns:
The mutated config dictionary.
182def validate_config(config: dict[str, Any]) -> list[str]: 183 """Validate configuration values and return a list of error descriptions. 184 185 Checks that values in the merged configuration are within acceptable 186 ranges and of the correct types. An empty returned list means the 187 configuration is fully valid. 188 189 Args: 190 config: The fully merged configuration dictionary to validate. 191 192 Returns: 193 A list of human-readable validation error strings. Empty when 194 the configuration passes all checks. 195 """ 196 errors: list[str] = [] 197 198 # --- server section --- 199 server = config.get("server", {}) 200 if not isinstance(server, dict): 201 errors.append("server: expected a mapping") 202 else: 203 port = server.get("port") 204 if not isinstance(port, int) or not (1 <= port <= 65535): 205 errors.append( 206 f"server.port: must be an integer between 1 and 65535, got {port!r}" 207 ) 208 209 host = server.get("host") 210 if not isinstance(host, str) or not host.strip(): 211 errors.append( 212 f"server.host: must be a non-empty string, got {host!r}" 213 ) 214 215 # --- ai section --- 216 ai = config.get("ai", {}) 217 if not isinstance(ai, dict): 218 errors.append("ai: expected a mapping") 219 else: 220 provider = ai.get("provider") 221 if provider not in KNOWN_AI_PROVIDERS: 222 errors.append( 223 f"ai.provider: must be one of {KNOWN_AI_PROVIDERS}, got {provider!r}" 224 ) 225 226 for name in KNOWN_AI_PROVIDERS: 227 prov_cfg = ai.get(name) 228 if not isinstance(prov_cfg, dict): 229 continue 230 231 model = prov_cfg.get("model") 232 if not isinstance(model, str) or not model.strip(): 233 errors.append( 234 f"ai.{name}.model: must be a non-empty string, got {model!r}" 235 ) 236 237 api_key = prov_cfg.get("api_key") 238 if not isinstance(api_key, str): 239 errors.append( 240 f"ai.{name}.api_key: must be a string, got {type(api_key).__name__}" 241 ) 242 243 base_url = prov_cfg.get("base_url") 244 if base_url is not None: 245 if not isinstance(base_url, str) or not ( 246 base_url.startswith("http://") or base_url.startswith("https://") 247 ): 248 errors.append( 249 f"ai.{name}.base_url: must start with http:// or https://, got {base_url!r}" 250 ) 251 252 # --- analysis section --- 253 analysis = config.get("analysis", {}) 254 if isinstance(analysis, dict): 255 ai_max_tokens = analysis.get("ai_max_tokens") 256 if not isinstance(ai_max_tokens, int) or ai_max_tokens <= 0: 257 errors.append( 258 f"analysis.ai_max_tokens: must be a positive integer, got {ai_max_tokens!r}" 259 ) 260 261 # --- evidence section --- 262 evidence = config.get("evidence", {}) 263 if isinstance(evidence, dict): 264 threshold = evidence.get("large_file_threshold_mb") 265 if not isinstance(threshold, (int, float)) or threshold < 0: 266 errors.append( 267 f"evidence.large_file_threshold_mb: must be a non-negative number (0 = unlimited), got {threshold!r}" 268 ) 269 270 return errors
Validate configuration values and return a list of error descriptions.
Checks that values in the merged configuration are within acceptable ranges and of the correct types. An empty returned list means the configuration is fully valid.
Arguments:
- config: The fully merged configuration dictionary to validate.
Returns:
A list of human-readable validation error strings. Empty when the configuration passes all checks.
50class ConfigurationError(Exception): 51 """Raised when the configuration fails validation. 52 53 Attributes: 54 errors: List of human-readable validation error strings. 55 """ 56 57 def __init__(self, errors: list[str]) -> None: 58 self.errors = errors 59 joined = "; ".join(errors) 60 super().__init__(f"Invalid configuration: {joined}")
Raised when the configuration fails validation.
Attributes:
- errors: List of human-readable validation error strings.