"""Environment-driven configuration (handoff §10, §13). All config flows through env vars so the SAME code runs as a plain process now and, later, as a StartOS s9pk daemon (which injects these via the daemon's `exec.env` from a `store.json` FileModel). A local `.env` (gitignored) is loaded for convenience during the pilot. Live values confirmed against the operator's gateway 2026-06-07 (GET /api/status,/api/endpoints): gateway = https://192.168.1.72:62419 (self-signed → SPARK_VERIFY_TLS=false) LLM = RedHatAI/Qwen3.6-35B-A3B-NVFP4 embed = BAAI/bge-m3 (1024-d) rerank = BAAI/bge-reranker-v2-m3 ASR = nvidia/parakeet-tdt-0.6b-v3 diarizer = nvidia/diar_sortformer_4spk-v1 """ from __future__ import annotations import os from dataclasses import dataclass from pathlib import Path def _load_dotenv(path: str = ".env") -> None: """Minimal .env loader (no dependency): KEY=VALUE lines populate os.environ if not already set.""" p = Path(path) if not p.exists(): return for line in p.read_text().splitlines(): line = line.strip() if not line or line.startswith("#") or "=" not in line: continue key, _, val = line.partition("=") os.environ.setdefault(key.strip(), val.strip().strip('"').strip("'")) def _env(key: str, default: str | None = None) -> str | None: return os.environ.get(key, default) @dataclass(frozen=True) class Config: spark_control_url: str spark_verify_tls: bool spark_timeout_s: float audio_concurrency: int # global in-flight cap across BOTH parakeet audio endpoints (sit at 2, ceiling 3) local_llm_model: str embed_model: str transcribe_model: str anthropic_api_key: str | None frontier_model: str # Extraction backend: 'local' (Qwen via Spark Control, default) | 'gemini' (batch overflow/fallback, §scaling) extraction_backend: str gemini_api_key: str | None gemini_model: str fmp_api_key: str | None edgar_user_agent: str data_dir: Path database_url: str audio_cache_dir: Path ui_port: int log_level: str @classmethod def from_env(cls) -> "Config": _load_dotenv() data_dir = Path(_env("DATA_DIR", "./data") or "./data") return cls( spark_control_url=_env("SPARK_CONTROL_URL", "https://192.168.1.72:62419") or "", spark_verify_tls=(_env("SPARK_VERIFY_TLS", "false") or "false").lower() == "true", spark_timeout_s=float(_env("SPARK_TIMEOUT_S", "180") or "180"), audio_concurrency=min(3, max(1, int(_env("AUDIO_CONCURRENCY", "2") or "2"))), local_llm_model=_env("LOCAL_LLM_MODEL", "RedHatAI/Qwen3.6-35B-A3B-NVFP4") or "", embed_model=_env("EMBED_MODEL", "BAAI/bge-m3") or "", transcribe_model=_env("TRANSCRIBE_MODEL", "nvidia/parakeet-tdt-0.6b-v3") or "", anthropic_api_key=_env("ANTHROPIC_API_KEY"), frontier_model=_env("FRONTIER_MODEL", "claude-opus-4-8") or "", extraction_backend=_env("EXTRACTION_BACKEND", "local") or "local", gemini_api_key=_env("GEMINI_API_KEY"), gemini_model=_env("GEMINI_MODEL", "gemini-2.5-flash") or "", fmp_api_key=_env("FMP_API_KEY"), edgar_user_agent=_env("EDGAR_USER_AGENT", "Ten31 Research grant@ten31.xyz") or "", data_dir=data_dir, database_url=_env("DATABASE_URL", "") or "", audio_cache_dir=Path(_env("AUDIO_CACHE_DIR", str(data_dir / "audio-cache")) or "audio-cache"), ui_port=int(_env("UI_PORT", "8000") or "8000"), log_level=_env("LOG_LEVEL", "INFO") or "INFO", ) @property def db_path(self) -> Path: prefix = "sqlite:///" if self.database_url.startswith(prefix): return Path(self.database_url[len(prefix):]) return self.data_dir / "signal.db" def load_config() -> Config: return Config.from_env()