c7ce44d963
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
298 lines
11 KiB
Python
298 lines
11 KiB
Python
"""
|
|
Credential providers for Gmail API access.
|
|
|
|
Two implementations behind a common protocol:
|
|
|
|
- DWDCredentialProvider: signs a JWT with the Workspace-authorized service
|
|
account, exchanges for a short-lived access token that impersonates a
|
|
specific user. No per-user persistent state.
|
|
|
|
- OAuthCredentialProvider: uses a per-user refresh token (stored encrypted
|
|
in email_accounts.oauth_refresh_enc) to mint access tokens. Supports the
|
|
'connect Gmail' UI flow.
|
|
|
|
Both provide the same interface:
|
|
|
|
provider.access_token_for(email_address: str) -> AccessToken
|
|
"""
|
|
|
|
import base64
|
|
import json
|
|
import os
|
|
import threading
|
|
import time
|
|
from dataclasses import dataclass
|
|
from typing import Optional, Protocol
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
from . import config as _cfg
|
|
from . import crypto
|
|
from . import errors
|
|
|
|
|
|
GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly"
|
|
GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
|
|
|
|
|
@dataclass
|
|
class AccessToken:
|
|
token: str
|
|
expires_at: float # epoch seconds
|
|
|
|
|
|
class CredentialProvider(Protocol):
|
|
def access_token_for(self, email_address: str) -> AccessToken: ...
|
|
def revoke(self, email_address: str) -> None: ...
|
|
|
|
|
|
# ============================================================================
|
|
# Domain-wide delegation
|
|
# ============================================================================
|
|
|
|
class DWDCredentialProvider:
|
|
"""Impersonation via service-account JWT bearer grant."""
|
|
|
|
def __init__(self, key_path: str):
|
|
with open(key_path, "r") as f:
|
|
self._key = json.load(f)
|
|
self._client_email = self._key["client_email"]
|
|
self._private_key_pem = self._key["private_key"].encode("utf-8")
|
|
self._cache: dict[str, AccessToken] = {}
|
|
self._lock = threading.Lock()
|
|
|
|
def access_token_for(self, email_address: str) -> AccessToken:
|
|
with self._lock:
|
|
cached = self._cache.get(email_address)
|
|
if cached and cached.expires_at - time.time() > 60:
|
|
return cached
|
|
token = self._mint(email_address)
|
|
self._cache[email_address] = token
|
|
return token
|
|
|
|
def revoke(self, email_address: str) -> None:
|
|
# DWD tokens expire naturally in <1h. Revocation is via Admin console.
|
|
# We just drop the cache so next call mints fresh.
|
|
with self._lock:
|
|
self._cache.pop(email_address, None)
|
|
|
|
# ------------------------------------------------------------------ helpers
|
|
|
|
def _mint(self, subject_email: str) -> AccessToken:
|
|
try:
|
|
from cryptography.hazmat.primitives import hashes, serialization # type: ignore
|
|
from cryptography.hazmat.primitives.asymmetric import padding # type: ignore
|
|
except ImportError as e: # pragma: no cover
|
|
raise errors.AuthError(
|
|
"DWD requires the `cryptography` package. Add to requirements.txt."
|
|
) from e
|
|
|
|
now = int(time.time())
|
|
header = {"alg": "RS256", "typ": "JWT"}
|
|
claim = {
|
|
"iss": self._client_email,
|
|
"sub": subject_email,
|
|
"scope": GMAIL_READONLY_SCOPE,
|
|
"aud": GOOGLE_TOKEN_URL,
|
|
"iat": now,
|
|
"exp": now + 3600,
|
|
}
|
|
signing_input = _b64url(_json(header)) + b"." + _b64url(_json(claim))
|
|
|
|
private_key = serialization.load_pem_private_key(self._private_key_pem, password=None)
|
|
signature = private_key.sign(signing_input, padding.PKCS1v15(), hashes.SHA256())
|
|
jwt = signing_input + b"." + _b64url(signature)
|
|
|
|
body = urllib.parse.urlencode({
|
|
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
|
|
"assertion": jwt.decode("ascii"),
|
|
}).encode("ascii")
|
|
|
|
req = urllib.request.Request(
|
|
GOOGLE_TOKEN_URL,
|
|
data=body,
|
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
payload = json.loads(resp.read())
|
|
except urllib.error.HTTPError as e:
|
|
body = e.read().decode("utf-8", errors="replace")
|
|
try:
|
|
payload = json.loads(body)
|
|
except Exception:
|
|
payload = {"raw": body}
|
|
raise errors.classify_http(e.code, payload)
|
|
|
|
if "access_token" not in payload:
|
|
raise errors.AuthError("DWD token exchange returned no access_token", payload=payload)
|
|
return AccessToken(
|
|
token=payload["access_token"],
|
|
expires_at=time.time() + float(payload.get("expires_in", 3600)) - 30,
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# Per-user OAuth (fallback)
|
|
# ============================================================================
|
|
|
|
class OAuthCredentialProvider:
|
|
"""Refreshes access tokens using a stored encrypted refresh token.
|
|
|
|
Refresh tokens are obtained via the consent-flow routes in routes.py and
|
|
stored in email_accounts.oauth_refresh_enc (AES-GCM ciphertext).
|
|
"""
|
|
|
|
def __init__(self, db_conn_factory, client_id: str, client_secret: str, secret_key_b64: str):
|
|
self._db = db_conn_factory
|
|
self._client_id = client_id
|
|
self._client_secret = client_secret
|
|
self._secret_key_b64 = secret_key_b64
|
|
self._lock = threading.Lock()
|
|
|
|
def access_token_for(self, email_address: str) -> AccessToken:
|
|
with self._lock:
|
|
row = self._load_account(email_address)
|
|
if row is None:
|
|
raise errors.AuthError(f"no email_accounts row for {email_address}")
|
|
# Cached access token still valid?
|
|
if row["oauth_token_enc"] and row["oauth_token_exp"]:
|
|
try:
|
|
exp = float(row["oauth_token_exp"])
|
|
except ValueError:
|
|
exp = 0.0
|
|
if exp - time.time() > 60:
|
|
token = crypto.decrypt(row["oauth_token_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
|
|
return AccessToken(token=token, expires_at=exp)
|
|
# Refresh.
|
|
return self._refresh(email_address, row)
|
|
|
|
def revoke(self, email_address: str) -> None:
|
|
row = self._load_account(email_address)
|
|
if not row or not row["oauth_refresh_enc"]:
|
|
return
|
|
refresh = crypto.decrypt(row["oauth_refresh_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
|
|
body = urllib.parse.urlencode({"token": refresh}).encode("ascii")
|
|
req = urllib.request.Request(
|
|
"https://oauth2.googleapis.com/revoke",
|
|
data=body,
|
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
)
|
|
try:
|
|
urllib.request.urlopen(req, timeout=10).read()
|
|
except Exception:
|
|
pass # best effort; we zero locally regardless
|
|
self._zero_account(email_address)
|
|
|
|
# ------------------------------------------------------------------ helpers
|
|
|
|
def _refresh(self, email_address: str, row) -> AccessToken:
|
|
if not row["oauth_refresh_enc"]:
|
|
raise errors.AuthError(f"no refresh token stored for {email_address}")
|
|
refresh = crypto.decrypt(row["oauth_refresh_enc"], secret_key_b64=self._secret_key_b64).decode("ascii")
|
|
body = urllib.parse.urlencode({
|
|
"grant_type": "refresh_token",
|
|
"refresh_token": refresh,
|
|
"client_id": self._client_id,
|
|
"client_secret": self._client_secret,
|
|
}).encode("ascii")
|
|
req = urllib.request.Request(
|
|
GOOGLE_TOKEN_URL,
|
|
data=body,
|
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
payload = json.loads(resp.read())
|
|
except urllib.error.HTTPError as e:
|
|
body_text = e.read().decode("utf-8", errors="replace")
|
|
try:
|
|
payload = json.loads(body_text)
|
|
except Exception:
|
|
payload = {"raw": body_text}
|
|
raise errors.classify_http(e.code, payload)
|
|
|
|
if "access_token" not in payload:
|
|
raise errors.AuthError("OAuth refresh returned no access_token", payload=payload)
|
|
|
|
token_str = payload["access_token"]
|
|
exp = time.time() + float(payload.get("expires_in", 3600)) - 30
|
|
enc_token = crypto.encrypt(token_str.encode("ascii"), secret_key_b64=self._secret_key_b64)
|
|
self._save_token(email_address, enc_token, exp)
|
|
return AccessToken(token=token_str, expires_at=exp)
|
|
|
|
def _load_account(self, email_address: str):
|
|
conn = self._db()
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute(
|
|
"SELECT id, oauth_refresh_enc, oauth_token_enc, oauth_token_exp "
|
|
"FROM email_accounts WHERE email_address = ?",
|
|
(email_address,),
|
|
)
|
|
return cur.fetchone()
|
|
finally:
|
|
conn.close()
|
|
|
|
def _save_token(self, email_address: str, enc_token: bytes, exp: float):
|
|
conn = self._db()
|
|
try:
|
|
conn.execute(
|
|
"UPDATE email_accounts SET oauth_token_enc = ?, oauth_token_exp = ?, "
|
|
"updated_at = datetime('now') WHERE email_address = ?",
|
|
(enc_token, str(exp), email_address),
|
|
)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
def _zero_account(self, email_address: str):
|
|
conn = self._db()
|
|
try:
|
|
conn.execute(
|
|
"UPDATE email_accounts SET oauth_refresh_enc = NULL, oauth_token_enc = NULL, "
|
|
"oauth_token_exp = NULL, sync_enabled = 0, sync_status = 'paused', "
|
|
"updated_at = datetime('now') WHERE email_address = ?",
|
|
(email_address,),
|
|
)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# ============================================================================
|
|
# Factory — resolves CONFIG.primary_auth to a concrete provider
|
|
# ============================================================================
|
|
|
|
def build_provider(db_conn_factory) -> CredentialProvider:
|
|
cfg = _cfg.CONFIG
|
|
if cfg.primary_auth == "dwd":
|
|
if not cfg.dwd_key_path or not os.path.exists(cfg.dwd_key_path):
|
|
raise errors.AuthError(
|
|
f"CRM_GMAIL_SA_KEY_PATH not found: {cfg.dwd_key_path!r}"
|
|
)
|
|
return DWDCredentialProvider(cfg.dwd_key_path)
|
|
if cfg.primary_auth == "oauth":
|
|
if not (cfg.oauth_client_id and cfg.oauth_client_secret and cfg.secret_key_b64):
|
|
raise errors.AuthError(
|
|
"OAuth mode requires CRM_GMAIL_OAUTH_CLIENT_ID, "
|
|
"CRM_GMAIL_OAUTH_CLIENT_SECRET, and CRM_GMAIL_SECRET_KEY."
|
|
)
|
|
return OAuthCredentialProvider(
|
|
db_conn_factory,
|
|
cfg.oauth_client_id,
|
|
cfg.oauth_client_secret,
|
|
cfg.secret_key_b64,
|
|
)
|
|
raise errors.AuthError(f"unknown primary_auth: {cfg.primary_auth!r}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------- utils
|
|
|
|
def _b64url(data: bytes) -> bytes:
|
|
return base64.urlsafe_b64encode(data).rstrip(b"=")
|
|
|
|
|
|
def _json(obj) -> bytes:
|
|
return json.dumps(obj, separators=(",", ":")).encode("utf-8")
|