Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
"""Environment-driven configuration (handoff §10, §13).
|
||||
|
||||
All config flows through env vars so the SAME code runs as a plain process now and, later, as a
|
||||
StartOS s9pk daemon (which injects these via the daemon's `exec.env` from a `store.json` FileModel).
|
||||
A local `.env` (gitignored) is loaded for convenience during the pilot.
|
||||
|
||||
Live values confirmed against the operator's gateway 2026-06-07 (GET /api/status,/api/endpoints):
|
||||
gateway = https://192.168.1.72:62419 (self-signed → SPARK_VERIFY_TLS=false)
|
||||
LLM = RedHatAI/Qwen3.6-35B-A3B-NVFP4
|
||||
embed = BAAI/bge-m3 (1024-d) rerank = BAAI/bge-reranker-v2-m3
|
||||
ASR = nvidia/parakeet-tdt-0.6b-v3 diarizer = nvidia/diar_sortformer_4spk-v1
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _load_dotenv(path: str = ".env") -> None:
|
||||
"""Minimal .env loader (no dependency): KEY=VALUE lines populate os.environ if not already set."""
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
return
|
||||
for line in p.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, _, val = line.partition("=")
|
||||
os.environ.setdefault(key.strip(), val.strip().strip('"').strip("'"))
|
||||
|
||||
|
||||
def _env(key: str, default: str | None = None) -> str | None:
|
||||
return os.environ.get(key, default)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Config:
|
||||
spark_control_url: str
|
||||
spark_verify_tls: bool
|
||||
spark_timeout_s: float
|
||||
audio_concurrency: int # global in-flight cap across BOTH parakeet audio endpoints (sit at 2, ceiling 3)
|
||||
|
||||
local_llm_model: str
|
||||
embed_model: str
|
||||
transcribe_model: str
|
||||
|
||||
anthropic_api_key: str | None
|
||||
frontier_model: str
|
||||
|
||||
# Extraction backend: 'local' (Qwen via Spark Control, default) | 'gemini' (batch overflow/fallback, §scaling)
|
||||
extraction_backend: str
|
||||
gemini_api_key: str | None
|
||||
gemini_model: str
|
||||
|
||||
fmp_api_key: str | None
|
||||
edgar_user_agent: str
|
||||
|
||||
data_dir: Path
|
||||
database_url: str
|
||||
audio_cache_dir: Path
|
||||
|
||||
ui_port: int
|
||||
log_level: str
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "Config":
|
||||
_load_dotenv()
|
||||
data_dir = Path(_env("DATA_DIR", "./data") or "./data")
|
||||
return cls(
|
||||
spark_control_url=_env("SPARK_CONTROL_URL", "https://192.168.1.72:62419") or "",
|
||||
spark_verify_tls=(_env("SPARK_VERIFY_TLS", "false") or "false").lower() == "true",
|
||||
spark_timeout_s=float(_env("SPARK_TIMEOUT_S", "180") or "180"),
|
||||
audio_concurrency=min(3, max(1, int(_env("AUDIO_CONCURRENCY", "2") or "2"))),
|
||||
local_llm_model=_env("LOCAL_LLM_MODEL", "RedHatAI/Qwen3.6-35B-A3B-NVFP4") or "",
|
||||
embed_model=_env("EMBED_MODEL", "BAAI/bge-m3") or "",
|
||||
transcribe_model=_env("TRANSCRIBE_MODEL", "nvidia/parakeet-tdt-0.6b-v3") or "",
|
||||
anthropic_api_key=_env("ANTHROPIC_API_KEY"),
|
||||
frontier_model=_env("FRONTIER_MODEL", "claude-opus-4-8") or "",
|
||||
extraction_backend=_env("EXTRACTION_BACKEND", "local") or "local",
|
||||
gemini_api_key=_env("GEMINI_API_KEY"),
|
||||
gemini_model=_env("GEMINI_MODEL", "gemini-2.5-flash") or "",
|
||||
fmp_api_key=_env("FMP_API_KEY"),
|
||||
edgar_user_agent=_env("EDGAR_USER_AGENT", "Ten31 Research grant@ten31.xyz") or "",
|
||||
data_dir=data_dir,
|
||||
database_url=_env("DATABASE_URL", "") or "",
|
||||
audio_cache_dir=Path(_env("AUDIO_CACHE_DIR", str(data_dir / "audio-cache")) or "audio-cache"),
|
||||
ui_port=int(_env("UI_PORT", "8000") or "8000"),
|
||||
log_level=_env("LOG_LEVEL", "INFO") or "INFO",
|
||||
)
|
||||
|
||||
@property
|
||||
def db_path(self) -> Path:
|
||||
prefix = "sqlite:///"
|
||||
if self.database_url.startswith(prefix):
|
||||
return Path(self.database_url[len(prefix):])
|
||||
return self.data_dir / "signal.db"
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
return Config.from_env()
|
||||
Reference in New Issue
Block a user