Files
Keysat c7ce44d963 Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system:
- A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9
- A2: additive/reversible core migration (canonical_entities, entity_links,
  interaction_log, relationship_edges, soft-delete) + ledgered runner
- B1/B3: chunking + deterministic entity resolution (backend/ingest)
- B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks
- C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools
- docs: redaction/re-hydration, Gmail enablement runbook
- synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db,
  drop legacy files + start9/0.3.5)

Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity
queries). Real backfill runs on Ten31 infra; index holds synthetic data only.
Branch snapshot also captures pre-existing working-tree changes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 08:13:35 -05:00

113 lines
4.0 KiB
Python

"""
Email integration configuration.
Reads from the same env-var surface as the rest of the CRM (server.py style),
no pydantic/dotenv magic — stdlib only.
"""
import os
from dataclasses import dataclass
from typing import Optional
# Reuse the CRM's data dir so backups and email storage live together.
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
_DEFAULT_DATA_DIR = os.path.join(_PROJECT_DIR, "data")
def _bool_env(name: str, default: bool = False) -> bool:
v = os.environ.get(name, "").strip().lower()
if v in ("1", "true", "yes", "on"):
return True
if v in ("0", "false", "no", "off"):
return False
return default
def _int_env(name: str, default: int) -> int:
try:
return int(os.environ.get(name, str(default)))
except (TypeError, ValueError):
return default
@dataclass(frozen=True)
class EmailConfig:
# Master kill switch. When False, scheduler doesn't start and routes
# return 503. Migrations are still applied (so schema is ready).
enabled: bool
# Primary auth path. "dwd" means service account / domain-wide delegation.
# "oauth" means per-user refresh tokens. DWD is default; OAuth is the
# pluggable fallback.
primary_auth: str
# DWD specifics
dwd_key_path: Optional[str]
workspace_domain: Optional[str]
# OAuth specifics (used for fallback + admin UI)
oauth_client_id: Optional[str]
oauth_client_secret: Optional[str]
oauth_redirect_uri: Optional[str]
# Encryption key (base64) for OAuth refresh-token-at-rest encryption.
# Required whenever oauth path is in use. DWD path never persists tokens.
secret_key_b64: Optional[str]
# Sync scheduling
sync_interval_sec: int
backfill_page_size: int
max_attachment_mb: int
max_parallel_attachment_downloads: int
# Storage
data_dir: str
attachments_dir: str
secrets_dir: str
# Rate limit / retry
rate_limit_units_per_sec_per_account: int
retry_max_attempts: int
retry_initial_delay_sec: float
retry_max_delay_sec: float
# Gmail history retention — if we fall this far behind, switch to date
# backfill since Gmail may have pruned history records.
history_stale_days: int
def load() -> EmailConfig:
data_dir = os.environ.get("CRM_DATA_DIR", _DEFAULT_DATA_DIR)
return EmailConfig(
enabled=_bool_env("CRM_GMAIL_INTEGRATION_ENABLED", False),
primary_auth=os.environ.get("CRM_GMAIL_AUTH_METHOD", "dwd").lower(),
dwd_key_path=os.environ.get("CRM_GMAIL_SA_KEY_PATH") or None,
workspace_domain=os.environ.get("CRM_GMAIL_WORKSPACE_DOMAIN") or None,
oauth_client_id=os.environ.get("CRM_GMAIL_OAUTH_CLIENT_ID") or None,
oauth_client_secret=os.environ.get("CRM_GMAIL_OAUTH_CLIENT_SECRET") or None,
oauth_redirect_uri=os.environ.get("CRM_GMAIL_OAUTH_REDIRECT_URI") or None,
secret_key_b64=os.environ.get("CRM_GMAIL_SECRET_KEY") or None,
sync_interval_sec=_int_env("CRM_GMAIL_SYNC_INTERVAL_MIN", 180) * 60,
backfill_page_size=_int_env("CRM_GMAIL_BACKFILL_PAGE_SIZE", 500),
max_attachment_mb=_int_env("CRM_GMAIL_MAX_ATTACHMENT_MB", 50),
max_parallel_attachment_downloads=_int_env("CRM_GMAIL_ATTACH_CONCURRENCY", 4),
data_dir=data_dir,
attachments_dir=os.path.join(data_dir, "email_attachments"),
secrets_dir=os.path.join(data_dir, "secrets"),
rate_limit_units_per_sec_per_account=_int_env("CRM_GMAIL_RATE_UNITS_SEC", 150),
retry_max_attempts=_int_env("CRM_GMAIL_RETRY_MAX", 5),
retry_initial_delay_sec=float(os.environ.get("CRM_GMAIL_RETRY_INITIAL_SEC", "1.0")),
retry_max_delay_sec=float(os.environ.get("CRM_GMAIL_RETRY_MAX_SEC", "60.0")),
history_stale_days=_int_env("CRM_GMAIL_HISTORY_STALE_DAYS", 5),
)
# Singleton. Reload with `reload_config()` if env changes (mostly for tests).
CONFIG = load()
def reload_config() -> EmailConfig:
global CONFIG
CONFIG = load()
return CONFIG