f357c23c75
- Fuzzy tier (backend/ingest/fuzzy_resolve.py + llm.py): local Qwen adjudicates the deterministic resolver's flagged name-variant candidates; merges are durable via entity_merges (deterministic re-runs respect them), losers soft-deleted, logged. Idempotent. - Incremental sync (backend/ingest/sync.py): re-embeds only rows changed since a watermark (ingest_sync_state); first run / --recreate = full. Tested full→0→1. - Start9 packaging (start9/0.4): Dockerfile bundles ingest+mcp + fastembed/mcp; "Build search index" action runs the init in a subcontainer; MCP shipped as a manual stdio server (not a daemon); version 0.1.0:44. INGEST_PACKAGING.md. - backfill.py: factored embed_and_upsert() shared with sync. Verified end-to-end on synthetic data + live Sparks/Qwen/Qdrant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
30 lines
1.2 KiB
Python
30 lines
1.2 KiB
Python
"""Ingest config — loads .env and exposes the Spark/Qdrant/CRM settings."""
|
|
import os
|
|
|
|
_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
|
|
def load_env(path=None):
|
|
path = path or os.path.join(_ROOT, ".env")
|
|
if not os.path.exists(path):
|
|
return
|
|
with open(path, "r", encoding="utf-8") as fh:
|
|
for line in fh:
|
|
line = line.strip()
|
|
if not line or line.startswith("#") or "=" not in line:
|
|
continue
|
|
k, v = line.split("=", 1)
|
|
os.environ.setdefault(k.strip(), v.strip())
|
|
|
|
|
|
load_env()
|
|
|
|
SPARK_CONTROL_URL = os.environ.get("SPARK_CONTROL_URL", "").rstrip("/")
|
|
SPARK_VERIFY_TLS = os.environ.get("SPARK_CONTROL_VERIFY_TLS", "false").lower() in ("1", "true", "yes", "on")
|
|
QDRANT_URL = os.environ.get("QDRANT_URL", "").rstrip("/")
|
|
COLLECTION = os.environ.get("CRM_QDRANT_COLLECTION", "crm_chunks")
|
|
EMBED_MODEL = os.environ.get("CRM_EMBED_MODEL", "BAAI/bge-m3")
|
|
CHAT_MODEL = os.environ.get("CRM_CHAT_MODEL", "RedHatAI/Qwen3.6-35B-A3B-NVFP4")
|
|
DENSE_DIM = int(os.environ.get("CRM_EMBED_DIM", "1024"))
|
|
DEFAULT_DB = os.environ.get("CRM_DEV_DB_PATH", os.path.join(_ROOT, "data", "crm_dev.db"))
|