Files
ten31-database/backend/email_integration/credentials.py
T
Keysat c7ce44d963 Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system:
- A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9
- A2: additive/reversible core migration (canonical_entities, entity_links,
  interaction_log, relationship_edges, soft-delete) + ledgered runner
- B1/B3: chunking + deterministic entity resolution (backend/ingest)
- B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks
- C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools
- docs: redaction/re-hydration, Gmail enablement runbook
- synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db,
  drop legacy files + start9/0.3.5)

Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity
queries). Real backfill runs on Ten31 infra; index holds synthetic data only.
Branch snapshot also captures pre-existing working-tree changes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 08:13:35 -05:00

298 lines
11 KiB
Python

"""
Credential providers for Gmail API access.
Two implementations behind a common protocol:
- DWDCredentialProvider: signs a JWT with the Workspace-authorized service
account, exchanges for a short-lived access token that impersonates a
specific user. No per-user persistent state.
- OAuthCredentialProvider: uses a per-user refresh token (stored encrypted
in email_accounts.oauth_refresh_enc) to mint access tokens. Supports the
'connect Gmail' UI flow.
Both provide the same interface:
provider.access_token_for(email_address: str) -> AccessToken
"""
import base64
import json
import os
import threading
import time
from dataclasses import dataclass
from typing import Optional, Protocol
import urllib.parse
import urllib.request
from . import config as _cfg
from . import crypto
from . import errors
GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly"
GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
@dataclass
class AccessToken:
token: str
expires_at: float # epoch seconds
class CredentialProvider(Protocol):
def access_token_for(self, email_address: str) -> AccessToken: ...
def revoke(self, email_address: str) -> None: ...
# ============================================================================
# Domain-wide delegation
# ============================================================================
class DWDCredentialProvider:
"""Impersonation via service-account JWT bearer grant."""
def __init__(self, key_path: str):
with open(key_path, "r") as f:
self._key = json.load(f)
self._client_email = self._key["client_email"]
self._private_key_pem = self._key["private_key"].encode("utf-8")
self._cache: dict[str, AccessToken] = {}
self._lock = threading.Lock()
def access_token_for(self, email_address: str) -> AccessToken:
with self._lock:
cached = self._cache.get(email_address)
if cached and cached.expires_at - time.time() > 60:
return cached
token = self._mint(email_address)
self._cache[email_address] = token
return token
def revoke(self, email_address: str) -> None:
# DWD tokens expire naturally in <1h. Revocation is via Admin console.
# We just drop the cache so next call mints fresh.
with self._lock:
self._cache.pop(email_address, None)
# ------------------------------------------------------------------ helpers
def _mint(self, subject_email: str) -> AccessToken:
try:
from cryptography.hazmat.primitives import hashes, serialization # type: ignore
from cryptography.hazmat.primitives.asymmetric import padding # type: ignore
except ImportError as e: # pragma: no cover
raise errors.AuthError(
"DWD requires the `cryptography` package. Add to requirements.txt."
) from e
now = int(time.time())
header = {"alg": "RS256", "typ": "JWT"}
claim = {
"iss": self._client_email,
"sub": subject_email,
"scope": GMAIL_READONLY_SCOPE,
"aud": GOOGLE_TOKEN_URL,
"iat": now,
"exp": now + 3600,
}
signing_input = _b64url(_json(header)) + b"." + _b64url(_json(claim))
private_key = serialization.load_pem_private_key(self._private_key_pem, password=None)
signature = private_key.sign(signing_input, padding.PKCS1v15(), hashes.SHA256())
jwt = signing_input + b"." + _b64url(signature)
body = urllib.parse.urlencode({
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
"assertion": jwt.decode("ascii"),
}).encode("ascii")
req = urllib.request.Request(
GOOGLE_TOKEN_URL,
data=body,
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
payload = json.loads(resp.read())
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
try:
payload = json.loads(body)
except Exception:
payload = {"raw": body}
raise errors.classify_http(e.code, payload)
if "access_token" not in payload:
raise errors.AuthError("DWD token exchange returned no access_token", payload=payload)
return AccessToken(
token=payload["access_token"],
expires_at=time.time() + float(payload.get("expires_in", 3600)) - 30,
)
# ============================================================================
# Per-user OAuth (fallback)
# ============================================================================
class OAuthCredentialProvider:
"""Refreshes access tokens using a stored encrypted refresh token.
Refresh tokens are obtained via the consent-flow routes in routes.py and
stored in email_accounts.oauth_refresh_enc (AES-GCM ciphertext).
"""
def __init__(self, db_conn_factory, client_id: str, client_secret: str, secret_key_b64: str):
self._db = db_conn_factory
self._client_id = client_id
self._client_secret = client_secret
self._secret_key_b64 = secret_key_b64
self._lock = threading.Lock()
def access_token_for(self, email_address: str) -> AccessToken:
with self._lock:
row = self._load_account(email_address)
if row is None:
raise errors.AuthError(f"no email_accounts row for {email_address}")
# Cached access token still valid?
if row["oauth_token_enc"] and row["oauth_token_exp"]:
try:
exp = float(row["oauth_token_exp"])
except ValueError:
exp = 0.0
if exp - time.time() > 60:
token = crypto.decrypt(row["oauth_token_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
return AccessToken(token=token, expires_at=exp)
# Refresh.
return self._refresh(email_address, row)
def revoke(self, email_address: str) -> None:
row = self._load_account(email_address)
if not row or not row["oauth_refresh_enc"]:
return
refresh = crypto.decrypt(row["oauth_refresh_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
body = urllib.parse.urlencode({"token": refresh}).encode("ascii")
req = urllib.request.Request(
"https://oauth2.googleapis.com/revoke",
data=body,
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
try:
urllib.request.urlopen(req, timeout=10).read()
except Exception:
pass # best effort; we zero locally regardless
self._zero_account(email_address)
# ------------------------------------------------------------------ helpers
def _refresh(self, email_address: str, row) -> AccessToken:
if not row["oauth_refresh_enc"]:
raise errors.AuthError(f"no refresh token stored for {email_address}")
refresh = crypto.decrypt(row["oauth_refresh_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
body = urllib.parse.urlencode({
"grant_type": "refresh_token",
"refresh_token": refresh,
"client_id": self._client_id,
"client_secret": self._client_secret,
}).encode("ascii")
req = urllib.request.Request(
GOOGLE_TOKEN_URL,
data=body,
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
payload = json.loads(resp.read())
except urllib.error.HTTPError as e:
body_text = e.read().decode("utf-8", errors="replace")
try:
payload = json.loads(body_text)
except Exception:
payload = {"raw": body_text}
raise errors.classify_http(e.code, payload)
if "access_token" not in payload:
raise errors.AuthError("OAuth refresh returned no access_token", payload=payload)
token_str = payload["access_token"]
exp = time.time() + float(payload.get("expires_in", 3600)) - 30
enc_token = crypto.encrypt(token_str.encode("ascii"), secret_key_b64=self._secret_key_b64)
self._save_token(email_address, enc_token, exp)
return AccessToken(token=token_str, expires_at=exp)
def _load_account(self, email_address: str):
conn = self._db()
try:
cur = conn.cursor()
cur.execute(
"SELECT id, oauth_refresh_enc, oauth_token_enc, oauth_token_exp "
"FROM email_accounts WHERE email_address = ?",
(email_address,),
)
return cur.fetchone()
finally:
conn.close()
def _save_token(self, email_address: str, enc_token: bytes, exp: float):
conn = self._db()
try:
conn.execute(
"UPDATE email_accounts SET oauth_token_enc = ?, oauth_token_exp = ?, "
"updated_at = datetime('now') WHERE email_address = ?",
(enc_token, str(exp), email_address),
)
conn.commit()
finally:
conn.close()
def _zero_account(self, email_address: str):
conn = self._db()
try:
conn.execute(
"UPDATE email_accounts SET oauth_refresh_enc = NULL, oauth_token_enc = NULL, "
"oauth_token_exp = NULL, sync_enabled = 0, sync_status = 'paused', "
"updated_at = datetime('now') WHERE email_address = ?",
(email_address,),
)
conn.commit()
finally:
conn.close()
# ============================================================================
# Factory — resolves CONFIG.primary_auth to a concrete provider
# ============================================================================
def build_provider(db_conn_factory) -> CredentialProvider:
cfg = _cfg.CONFIG
if cfg.primary_auth == "dwd":
if not cfg.dwd_key_path or not os.path.exists(cfg.dwd_key_path):
raise errors.AuthError(
f"CRM_GMAIL_SA_KEY_PATH not found: {cfg.dwd_key_path!r}"
)
return DWDCredentialProvider(cfg.dwd_key_path)
if cfg.primary_auth == "oauth":
if not (cfg.oauth_client_id and cfg.oauth_client_secret and cfg.secret_key_b64):
raise errors.AuthError(
"OAuth mode requires CRM_GMAIL_OAUTH_CLIENT_ID, "
"CRM_GMAIL_OAUTH_CLIENT_SECRET, and CRM_GMAIL_SECRET_KEY."
)
return OAuthCredentialProvider(
db_conn_factory,
cfg.oauth_client_id,
cfg.oauth_client_secret,
cfg.secret_key_b64,
)
raise errors.AuthError(f"unknown primary_auth: {cfg.primary_auth!r}")
# ---------------------------------------------------------------------------- utils
def _b64url(data: bytes) -> bytes:
return base64.urlsafe_b64encode(data).rstrip(b"=")
def _json(obj) -> bytes:
return json.dumps(obj, separators=(",", ":")).encode("utf-8")