Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
Email integration configuration.
|
||||
|
||||
Reads from the same env-var surface as the rest of the CRM (server.py style),
|
||||
no pydantic/dotenv magic — stdlib only.
|
||||
"""
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
# Reuse the CRM's data dir so backups and email storage live together.
|
||||
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
_DEFAULT_DATA_DIR = os.path.join(_PROJECT_DIR, "data")
|
||||
|
||||
|
||||
def _bool_env(name: str, default: bool = False) -> bool:
|
||||
v = os.environ.get(name, "").strip().lower()
|
||||
if v in ("1", "true", "yes", "on"):
|
||||
return True
|
||||
if v in ("0", "false", "no", "off"):
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _int_env(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EmailConfig:
|
||||
# Master kill switch. When False, scheduler doesn't start and routes
|
||||
# return 503. Migrations are still applied (so schema is ready).
|
||||
enabled: bool
|
||||
|
||||
# Primary auth path. "dwd" means service account / domain-wide delegation.
|
||||
# "oauth" means per-user refresh tokens. DWD is default; OAuth is the
|
||||
# pluggable fallback.
|
||||
primary_auth: str
|
||||
|
||||
# DWD specifics
|
||||
dwd_key_path: Optional[str]
|
||||
workspace_domain: Optional[str]
|
||||
|
||||
# OAuth specifics (used for fallback + admin UI)
|
||||
oauth_client_id: Optional[str]
|
||||
oauth_client_secret: Optional[str]
|
||||
oauth_redirect_uri: Optional[str]
|
||||
|
||||
# Encryption key (base64) for OAuth refresh-token-at-rest encryption.
|
||||
# Required whenever oauth path is in use. DWD path never persists tokens.
|
||||
secret_key_b64: Optional[str]
|
||||
|
||||
# Sync scheduling
|
||||
sync_interval_sec: int
|
||||
backfill_page_size: int
|
||||
max_attachment_mb: int
|
||||
max_parallel_attachment_downloads: int
|
||||
|
||||
# Storage
|
||||
data_dir: str
|
||||
attachments_dir: str
|
||||
secrets_dir: str
|
||||
|
||||
# Rate limit / retry
|
||||
rate_limit_units_per_sec_per_account: int
|
||||
retry_max_attempts: int
|
||||
retry_initial_delay_sec: float
|
||||
retry_max_delay_sec: float
|
||||
|
||||
# Gmail history retention — if we fall this far behind, switch to date
|
||||
# backfill since Gmail may have pruned history records.
|
||||
history_stale_days: int
|
||||
|
||||
|
||||
def load() -> EmailConfig:
|
||||
data_dir = os.environ.get("CRM_DATA_DIR", _DEFAULT_DATA_DIR)
|
||||
return EmailConfig(
|
||||
enabled=_bool_env("CRM_GMAIL_INTEGRATION_ENABLED", False),
|
||||
primary_auth=os.environ.get("CRM_GMAIL_AUTH_METHOD", "dwd").lower(),
|
||||
dwd_key_path=os.environ.get("CRM_GMAIL_SA_KEY_PATH") or None,
|
||||
workspace_domain=os.environ.get("CRM_GMAIL_WORKSPACE_DOMAIN") or None,
|
||||
oauth_client_id=os.environ.get("CRM_GMAIL_OAUTH_CLIENT_ID") or None,
|
||||
oauth_client_secret=os.environ.get("CRM_GMAIL_OAUTH_CLIENT_SECRET") or None,
|
||||
oauth_redirect_uri=os.environ.get("CRM_GMAIL_OAUTH_REDIRECT_URI") or None,
|
||||
secret_key_b64=os.environ.get("CRM_GMAIL_SECRET_KEY") or None,
|
||||
sync_interval_sec=_int_env("CRM_GMAIL_SYNC_INTERVAL_MIN", 180) * 60,
|
||||
backfill_page_size=_int_env("CRM_GMAIL_BACKFILL_PAGE_SIZE", 500),
|
||||
max_attachment_mb=_int_env("CRM_GMAIL_MAX_ATTACHMENT_MB", 50),
|
||||
max_parallel_attachment_downloads=_int_env("CRM_GMAIL_ATTACH_CONCURRENCY", 4),
|
||||
data_dir=data_dir,
|
||||
attachments_dir=os.path.join(data_dir, "email_attachments"),
|
||||
secrets_dir=os.path.join(data_dir, "secrets"),
|
||||
rate_limit_units_per_sec_per_account=_int_env("CRM_GMAIL_RATE_UNITS_SEC", 150),
|
||||
retry_max_attempts=_int_env("CRM_GMAIL_RETRY_MAX", 5),
|
||||
retry_initial_delay_sec=float(os.environ.get("CRM_GMAIL_RETRY_INITIAL_SEC", "1.0")),
|
||||
retry_max_delay_sec=float(os.environ.get("CRM_GMAIL_RETRY_MAX_SEC", "60.0")),
|
||||
history_stale_days=_int_env("CRM_GMAIL_HISTORY_STALE_DAYS", 5),
|
||||
)
|
||||
|
||||
|
||||
# Singleton. Reload with `reload_config()` if env changes (mostly for tests).
|
||||
CONFIG = load()
|
||||
|
||||
|
||||
def reload_config() -> EmailConfig:
|
||||
global CONFIG
|
||||
CONFIG = load()
|
||||
return CONFIG
|
||||
Reference in New Issue
Block a user