Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
Credential providers for Gmail API access.
|
||||
|
||||
Two implementations behind a common protocol:
|
||||
|
||||
- DWDCredentialProvider: signs a JWT with the Workspace-authorized service
|
||||
account, exchanges for a short-lived access token that impersonates a
|
||||
specific user. No per-user persistent state.
|
||||
|
||||
- OAuthCredentialProvider: uses a per-user refresh token (stored encrypted
|
||||
in email_accounts.oauth_refresh_enc) to mint access tokens. Supports the
|
||||
'connect Gmail' UI flow.
|
||||
|
||||
Both provide the same interface:
|
||||
|
||||
provider.access_token_for(email_address: str) -> AccessToken
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Protocol
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
from . import config as _cfg
|
||||
from . import crypto
|
||||
from . import errors
|
||||
|
||||
|
||||
GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly"
|
||||
GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AccessToken:
|
||||
token: str
|
||||
expires_at: float # epoch seconds
|
||||
|
||||
|
||||
class CredentialProvider(Protocol):
|
||||
def access_token_for(self, email_address: str) -> AccessToken: ...
|
||||
def revoke(self, email_address: str) -> None: ...
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Domain-wide delegation
|
||||
# ============================================================================
|
||||
|
||||
class DWDCredentialProvider:
|
||||
"""Impersonation via service-account JWT bearer grant."""
|
||||
|
||||
def __init__(self, key_path: str):
|
||||
with open(key_path, "r") as f:
|
||||
self._key = json.load(f)
|
||||
self._client_email = self._key["client_email"]
|
||||
self._private_key_pem = self._key["private_key"].encode("utf-8")
|
||||
self._cache: dict[str, AccessToken] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def access_token_for(self, email_address: str) -> AccessToken:
|
||||
with self._lock:
|
||||
cached = self._cache.get(email_address)
|
||||
if cached and cached.expires_at - time.time() > 60:
|
||||
return cached
|
||||
token = self._mint(email_address)
|
||||
self._cache[email_address] = token
|
||||
return token
|
||||
|
||||
def revoke(self, email_address: str) -> None:
|
||||
# DWD tokens expire naturally in <1h. Revocation is via Admin console.
|
||||
# We just drop the cache so next call mints fresh.
|
||||
with self._lock:
|
||||
self._cache.pop(email_address, None)
|
||||
|
||||
# ------------------------------------------------------------------ helpers
|
||||
|
||||
def _mint(self, subject_email: str) -> AccessToken:
|
||||
try:
|
||||
from cryptography.hazmat.primitives import hashes, serialization # type: ignore
|
||||
from cryptography.hazmat.primitives.asymmetric import padding # type: ignore
|
||||
except ImportError as e: # pragma: no cover
|
||||
raise errors.AuthError(
|
||||
"DWD requires the `cryptography` package. Add to requirements.txt."
|
||||
) from e
|
||||
|
||||
now = int(time.time())
|
||||
header = {"alg": "RS256", "typ": "JWT"}
|
||||
claim = {
|
||||
"iss": self._client_email,
|
||||
"sub": subject_email,
|
||||
"scope": GMAIL_READONLY_SCOPE,
|
||||
"aud": GOOGLE_TOKEN_URL,
|
||||
"iat": now,
|
||||
"exp": now + 3600,
|
||||
}
|
||||
signing_input = _b64url(_json(header)) + b"." + _b64url(_json(claim))
|
||||
|
||||
private_key = serialization.load_pem_private_key(self._private_key_pem, password=None)
|
||||
signature = private_key.sign(signing_input, padding.PKCS1v15(), hashes.SHA256())
|
||||
jwt = signing_input + b"." + _b64url(signature)
|
||||
|
||||
body = urllib.parse.urlencode({
|
||||
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
|
||||
"assertion": jwt.decode("ascii"),
|
||||
}).encode("ascii")
|
||||
|
||||
req = urllib.request.Request(
|
||||
GOOGLE_TOKEN_URL,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
payload = json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")
|
||||
try:
|
||||
payload = json.loads(body)
|
||||
except Exception:
|
||||
payload = {"raw": body}
|
||||
raise errors.classify_http(e.code, payload)
|
||||
|
||||
if "access_token" not in payload:
|
||||
raise errors.AuthError("DWD token exchange returned no access_token", payload=payload)
|
||||
return AccessToken(
|
||||
token=payload["access_token"],
|
||||
expires_at=time.time() + float(payload.get("expires_in", 3600)) - 30,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Per-user OAuth (fallback)
|
||||
# ============================================================================
|
||||
|
||||
class OAuthCredentialProvider:
|
||||
"""Refreshes access tokens using a stored encrypted refresh token.
|
||||
|
||||
Refresh tokens are obtained via the consent-flow routes in routes.py and
|
||||
stored in email_accounts.oauth_refresh_enc (AES-GCM ciphertext).
|
||||
"""
|
||||
|
||||
def __init__(self, db_conn_factory, client_id: str, client_secret: str, secret_key_b64: str):
|
||||
self._db = db_conn_factory
|
||||
self._client_id = client_id
|
||||
self._client_secret = client_secret
|
||||
self._secret_key_b64 = secret_key_b64
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def access_token_for(self, email_address: str) -> AccessToken:
|
||||
with self._lock:
|
||||
row = self._load_account(email_address)
|
||||
if row is None:
|
||||
raise errors.AuthError(f"no email_accounts row for {email_address}")
|
||||
# Cached access token still valid?
|
||||
if row["oauth_token_enc"] and row["oauth_token_exp"]:
|
||||
try:
|
||||
exp = float(row["oauth_token_exp"])
|
||||
except ValueError:
|
||||
exp = 0.0
|
||||
if exp - time.time() > 60:
|
||||
token = crypto.decrypt(row["oauth_token_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
|
||||
return AccessToken(token=token, expires_at=exp)
|
||||
# Refresh.
|
||||
return self._refresh(email_address, row)
|
||||
|
||||
def revoke(self, email_address: str) -> None:
|
||||
row = self._load_account(email_address)
|
||||
if not row or not row["oauth_refresh_enc"]:
|
||||
return
|
||||
refresh = crypto.decrypt(row["oauth_refresh_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
|
||||
body = urllib.parse.urlencode({"token": refresh}).encode("ascii")
|
||||
req = urllib.request.Request(
|
||||
"https://oauth2.googleapis.com/revoke",
|
||||
data=body,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
try:
|
||||
urllib.request.urlopen(req, timeout=10).read()
|
||||
except Exception:
|
||||
pass # best effort; we zero locally regardless
|
||||
self._zero_account(email_address)
|
||||
|
||||
# ------------------------------------------------------------------ helpers
|
||||
|
||||
def _refresh(self, email_address: str, row) -> AccessToken:
|
||||
if not row["oauth_refresh_enc"]:
|
||||
raise errors.AuthError(f"no refresh token stored for {email_address}")
|
||||
refresh = crypto.decrypt(row["oauth_refresh_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
|
||||
body = urllib.parse.urlencode({
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh,
|
||||
"client_id": self._client_id,
|
||||
"client_secret": self._client_secret,
|
||||
}).encode("ascii")
|
||||
req = urllib.request.Request(
|
||||
GOOGLE_TOKEN_URL,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
payload = json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
body_text = e.read().decode("utf-8", errors="replace")
|
||||
try:
|
||||
payload = json.loads(body_text)
|
||||
except Exception:
|
||||
payload = {"raw": body_text}
|
||||
raise errors.classify_http(e.code, payload)
|
||||
|
||||
if "access_token" not in payload:
|
||||
raise errors.AuthError("OAuth refresh returned no access_token", payload=payload)
|
||||
|
||||
token_str = payload["access_token"]
|
||||
exp = time.time() + float(payload.get("expires_in", 3600)) - 30
|
||||
enc_token = crypto.encrypt(token_str.encode("ascii"), secret_key_b64=self._secret_key_b64)
|
||||
self._save_token(email_address, enc_token, exp)
|
||||
return AccessToken(token=token_str, expires_at=exp)
|
||||
|
||||
def _load_account(self, email_address: str):
|
||||
conn = self._db()
|
||||
try:
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT id, oauth_refresh_enc, oauth_token_enc, oauth_token_exp "
|
||||
"FROM email_accounts WHERE email_address = ?",
|
||||
(email_address,),
|
||||
)
|
||||
return cur.fetchone()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _save_token(self, email_address: str, enc_token: bytes, exp: float):
|
||||
conn = self._db()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE email_accounts SET oauth_token_enc = ?, oauth_token_exp = ?, "
|
||||
"updated_at = datetime('now') WHERE email_address = ?",
|
||||
(enc_token, str(exp), email_address),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _zero_account(self, email_address: str):
|
||||
conn = self._db()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE email_accounts SET oauth_refresh_enc = NULL, oauth_token_enc = NULL, "
|
||||
"oauth_token_exp = NULL, sync_enabled = 0, sync_status = 'paused', "
|
||||
"updated_at = datetime('now') WHERE email_address = ?",
|
||||
(email_address,),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Factory — resolves CONFIG.primary_auth to a concrete provider
|
||||
# ============================================================================
|
||||
|
||||
def build_provider(db_conn_factory) -> CredentialProvider:
|
||||
cfg = _cfg.CONFIG
|
||||
if cfg.primary_auth == "dwd":
|
||||
if not cfg.dwd_key_path or not os.path.exists(cfg.dwd_key_path):
|
||||
raise errors.AuthError(
|
||||
f"CRM_GMAIL_SA_KEY_PATH not found: {cfg.dwd_key_path!r}"
|
||||
)
|
||||
return DWDCredentialProvider(cfg.dwd_key_path)
|
||||
if cfg.primary_auth == "oauth":
|
||||
if not (cfg.oauth_client_id and cfg.oauth_client_secret and cfg.secret_key_b64):
|
||||
raise errors.AuthError(
|
||||
"OAuth mode requires CRM_GMAIL_OAUTH_CLIENT_ID, "
|
||||
"CRM_GMAIL_OAUTH_CLIENT_SECRET, and CRM_GMAIL_SECRET_KEY."
|
||||
)
|
||||
return OAuthCredentialProvider(
|
||||
db_conn_factory,
|
||||
cfg.oauth_client_id,
|
||||
cfg.oauth_client_secret,
|
||||
cfg.secret_key_b64,
|
||||
)
|
||||
raise errors.AuthError(f"unknown primary_auth: {cfg.primary_auth!r}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------- utils
|
||||
|
||||
def _b64url(data: bytes) -> bytes:
|
||||
return base64.urlsafe_b64encode(data).rstrip(b"=")
|
||||
|
||||
|
||||
def _json(obj) -> bytes:
|
||||
return json.dumps(obj, separators=(",", ":")).encode("utf-8")
|
||||
Reference in New Issue
Block a user