Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
"""
|
||||
Background sync scheduler.
|
||||
|
||||
Runs as a daemon thread started from server.py main(). One thread; it wakes
|
||||
every `sync_interval_sec`, processes all accounts serially, sleeps again.
|
||||
|
||||
Singleton: start_sync_scheduler() is idempotent — calling twice won't spawn
|
||||
a second thread. stop_sync_scheduler() gracefully signals shutdown (not
|
||||
strictly needed since it's daemon, but useful for tests).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
|
||||
from . import config as _cfg
|
||||
from . import credentials as _creds
|
||||
from . import sync as _sync
|
||||
from .matcher import InvestorIndex
|
||||
|
||||
|
||||
log = logging.getLogger("email_integration.scheduler")
|
||||
|
||||
|
||||
_state: dict[str, object] = {
|
||||
"thread": None,
|
||||
"stop": threading.Event(),
|
||||
"last_run": 0.0,
|
||||
"last_result": None,
|
||||
"running_now": False,
|
||||
}
|
||||
|
||||
|
||||
def _conn_factory_from_env() -> Callable[[], sqlite3.Connection]:
|
||||
"""Build a get_db() compatible with server.py's pattern.
|
||||
|
||||
We don't import server.py (avoid circular / startup ordering). Instead
|
||||
we re-implement the same settings. If server.py's DB path differs from
|
||||
the default, CRM_DB_PATH env var should be set — same mechanism.
|
||||
"""
|
||||
import os
|
||||
db_path = os.environ.get(
|
||||
"CRM_DB_PATH",
|
||||
os.path.join(_cfg.CONFIG.data_dir, "crm.db"),
|
||||
)
|
||||
|
||||
def get_db() -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
conn.execute("PRAGMA busy_timeout=5000")
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
return get_db
|
||||
|
||||
|
||||
def start_sync_scheduler(conn_factory: Optional[Callable] = None) -> None:
|
||||
if _state["thread"] is not None:
|
||||
return # already running
|
||||
|
||||
if not _cfg.CONFIG.enabled:
|
||||
log.info("email_integration not enabled; scheduler will not start")
|
||||
return
|
||||
|
||||
factory = conn_factory or _conn_factory_from_env()
|
||||
|
||||
try:
|
||||
provider = _creds.build_provider(factory)
|
||||
except Exception as e:
|
||||
log.exception("cannot build credential provider: %s", e)
|
||||
return
|
||||
|
||||
index = InvestorIndex(own_domain=_cfg.CONFIG.workspace_domain)
|
||||
try:
|
||||
index.rebuild(factory)
|
||||
except Exception:
|
||||
log.exception("initial investor-index build failed; scheduler continues")
|
||||
|
||||
stop = threading.Event()
|
||||
_state["stop"] = stop
|
||||
|
||||
def _loop():
|
||||
log.info("email sync scheduler started; interval=%ss", _cfg.CONFIG.sync_interval_sec)
|
||||
# First cycle: short delay to let server finish startup.
|
||||
if stop.wait(10):
|
||||
return
|
||||
while not stop.is_set():
|
||||
_state["running_now"] = True
|
||||
t0 = time.time()
|
||||
try:
|
||||
result = _sync.sync_all(factory, provider, index)
|
||||
_state["last_result"] = result
|
||||
except Exception:
|
||||
log.exception("sync loop crashed; will retry next cycle")
|
||||
finally:
|
||||
_state["running_now"] = False
|
||||
_state["last_run"] = t0
|
||||
if stop.wait(_cfg.CONFIG.sync_interval_sec):
|
||||
return
|
||||
|
||||
t = threading.Thread(target=_loop, name="email-sync", daemon=True)
|
||||
t.start()
|
||||
_state["thread"] = t
|
||||
_state["provider"] = provider
|
||||
_state["index"] = index
|
||||
_state["factory"] = factory
|
||||
|
||||
|
||||
def stop_sync_scheduler() -> None:
|
||||
ev: threading.Event = _state["stop"] # type: ignore
|
||||
ev.set()
|
||||
t = _state.get("thread")
|
||||
if t:
|
||||
try:
|
||||
t.join(timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
_state["thread"] = None
|
||||
|
||||
|
||||
def trigger_run_now() -> dict:
|
||||
"""Force a single sync pass synchronously (admin 'sync now' endpoint)."""
|
||||
if _state.get("running_now"):
|
||||
return {"status": "already_running"}
|
||||
factory = _state.get("factory")
|
||||
provider = _state.get("provider")
|
||||
index = _state.get("index")
|
||||
if not (factory and provider and index):
|
||||
return {"status": "not_initialized"}
|
||||
return _sync.sync_all(factory, provider, index) # type: ignore
|
||||
|
||||
|
||||
def status_snapshot() -> dict:
|
||||
return {
|
||||
"enabled": _cfg.CONFIG.enabled,
|
||||
"running": _state["running_now"],
|
||||
"last_run_unix": _state.get("last_run"),
|
||||
"last_result": _state.get("last_result"),
|
||||
"interval_sec": _cfg.CONFIG.sync_interval_sec,
|
||||
}
|
||||
Reference in New Issue
Block a user