c7b74a2704
Communications tab (search/query roadmap items 1 & 2): - Fix the investor dropdown: the facet only listed grid investors, so it came back empty whenever email matched a classic contact or org domain (no grid id — the common case). It now mirrors the email list, resolving each link to a typed identity (fund:/org:/contact:/addr:) with precedence grid -> org -> contact -> address; investor_id accepts the typed key (bare id = fund: for back-compat) and an unknown prefix matches nothing. - Add a date-range filter and a click-to-expand full-body view (GET /api/email/detail, admin, soft-delete-gated; body_text only, never raw remote HTML). - Add a "Search content" mode: GET /api/email/search wraps the ingest hybrid_search over the Qdrant email index (doc_type=email), hydrated and soft-delete-filtered against SQLite (canonical), 503 if Spark/Qdrant down. Daily digest: - Settings -> Admin builds a digest over a chosen window (last 24h or since a date) as an in-app preview before sending (POST /api/admin/digest/preview), so the local-Spark summarizer can be verified on demand even on a quiet day. Manual send uses the same window; neither advances the daily cursor, so a preview never suppresses the scheduled digest. Code-only, migrations no-op. 22/22 backend tests, render-smoke pass.
626 lines
23 KiB
Python
626 lines
23 KiB
Python
"""
|
|
HTTP route handlers for the Gmail integration.
|
|
|
|
Designed to plug into server.py's CRMHandler (BaseHTTPRequestHandler) pattern.
|
|
The hook is a single function call near the top of do_GET / do_POST that
|
|
lets this module claim any /api/email/* request:
|
|
|
|
# in CRMHandler.do_GET and CRMHandler.do_POST, before the 404 fallthrough:
|
|
from email_integration.routes import try_handle
|
|
if try_handle(self):
|
|
return
|
|
|
|
`try_handle(handler)` inspects `handler.command` and `handler.get_path()` and
|
|
returns True if it handled the request (sent a response).
|
|
|
|
Every handler respects the same auth / rate-limit model as the rest of server.py
|
|
by calling handler.get_user() and handler.rate_limited(...).
|
|
"""
|
|
|
|
import json
|
|
import sqlite3
|
|
from typing import Optional
|
|
|
|
from . import config as _cfg
|
|
from . import credentials as _creds
|
|
from . import crypto as _crypto
|
|
from . import db as _db
|
|
from . import scheduler as _sched
|
|
|
|
|
|
# ---------------------------------------------------------------------------- dispatch
|
|
|
|
_GET_ROUTES = {
|
|
"/api/email/status": "status",
|
|
"/api/email/accounts": "list_accounts",
|
|
"/api/email/activity": "activity",
|
|
"/api/email/detail": "detail",
|
|
"/api/email/search": "search",
|
|
"/api/email/threads": "list_threads",
|
|
"/api/email/oauth/start": "oauth_start",
|
|
"/api/email/oauth/callback": "oauth_callback",
|
|
}
|
|
|
|
_POST_ROUTES = {
|
|
"/api/email/accounts/enroll-all": "enroll_all",
|
|
"/api/email/accounts/enroll": "enroll_one",
|
|
"/api/email/sync/run-now": "run_now",
|
|
"/api/email/rematch": "rematch",
|
|
}
|
|
|
|
|
|
def try_handle(handler) -> bool:
|
|
path = handler.get_path()
|
|
method = handler.command
|
|
table = _GET_ROUTES if method == "GET" else _POST_ROUTES if method == "POST" else {}
|
|
name = table.get(path)
|
|
if not path.startswith("/api/email/"):
|
|
return False
|
|
if not name:
|
|
# Route is owned by this module but unknown — return a proper 404
|
|
# instead of letting the main dispatcher's 404 abuse counter fire.
|
|
handler.send_error_json("Not found", 404)
|
|
return True
|
|
|
|
if not _cfg.CONFIG.enabled:
|
|
handler.send_error_json("Email integration disabled", 503)
|
|
return True
|
|
|
|
# Also enforce attachment streaming under a different prefix
|
|
# (handled above via prefix check).
|
|
|
|
impl = globals().get(f"_h_{name}")
|
|
if impl is None:
|
|
handler.send_error_json("Not implemented", 500)
|
|
return True
|
|
|
|
try:
|
|
impl(handler)
|
|
except Exception as e:
|
|
handler.send_error_json(f"Internal error: {e}", 500)
|
|
return True
|
|
|
|
|
|
# ---------------------------------------------------------------------------- helpers
|
|
|
|
def _conn() -> sqlite3.Connection:
|
|
import os
|
|
db_path = os.environ.get(
|
|
"CRM_DB_PATH",
|
|
os.path.join(_cfg.CONFIG.data_dir, "crm.db"),
|
|
)
|
|
conn = sqlite3.connect(db_path)
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
conn.execute("PRAGMA foreign_keys=ON")
|
|
conn.execute("PRAGMA busy_timeout=5000")
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def _require_auth(handler) -> Optional[dict]:
|
|
user = handler.get_user()
|
|
if not user:
|
|
handler.send_error_json("Unauthorized", 401)
|
|
return None
|
|
return user
|
|
|
|
|
|
def _require_admin(handler) -> Optional[dict]:
|
|
user = _require_auth(handler)
|
|
if user is None:
|
|
return None
|
|
if user.get("role") != "admin":
|
|
handler.send_error_json("Admin required", 403)
|
|
return None
|
|
return user
|
|
|
|
|
|
# ---------------------------------------------------------------------------- GET handlers
|
|
|
|
def _h_status(handler):
|
|
# Email Capture is an admin-only surface (nav-hidden from members); these read
|
|
# endpoints expose mailbox/sync metadata, so enforce admin server-side too.
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
snap = _sched.status_snapshot()
|
|
conn = _conn()
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute(
|
|
"SELECT COUNT(*) AS n_accounts, "
|
|
"SUM(CASE WHEN sync_status='active' THEN 1 ELSE 0 END) AS n_active, "
|
|
"SUM(CASE WHEN sync_status='error' THEN 1 ELSE 0 END) AS n_error "
|
|
"FROM email_accounts"
|
|
)
|
|
counts = dict(cur.fetchone() or {})
|
|
cur.execute("SELECT COUNT(*) AS n FROM emails WHERE match_status = 'matched'")
|
|
snap["matched_emails"] = cur.fetchone()["n"]
|
|
# Total captured climbs page-by-page during backfill (the sync_runs row only
|
|
# finalizes at the end), so it is the live progress signal.
|
|
cur.execute("SELECT COUNT(*) AS n FROM emails")
|
|
snap["captured_emails"] = cur.fetchone()["n"]
|
|
# Latest sync run (status running|ok|error|partial) for a human-readable line.
|
|
cur.execute("SELECT kind, status, started_at, finished_at, messages_seen, messages_stored "
|
|
"FROM email_sync_runs ORDER BY started_at DESC LIMIT 1")
|
|
lr = cur.fetchone()
|
|
snap["last_run"] = dict(lr) if lr else None
|
|
# An enrolled account whose backfill has not completed is still pulling history.
|
|
cur.execute("SELECT COUNT(*) AS n FROM email_accounts WHERE sync_enabled = 1 AND backfill_complete = 0")
|
|
snap["backfilling"] = (cur.fetchone()["n"] or 0) > 0
|
|
finally:
|
|
conn.close()
|
|
snap["accounts_summary"] = counts
|
|
handler.send_json(snap)
|
|
|
|
|
|
def _h_list_accounts(handler):
|
|
# Admin-only: the mailbox list (addresses, sync state, errors) belongs to the
|
|
# admin-only Email Capture surface. Enforced server-side, not just nav-hidden.
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
conn = _conn()
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute(
|
|
"SELECT id, user_id, email_address, auth_method, sync_enabled, "
|
|
"sync_status, sync_error, last_synced_at, backfill_complete "
|
|
"FROM email_accounts ORDER BY email_address"
|
|
)
|
|
rows = [dict(r) for r in cur.fetchall()]
|
|
# Per-mailbox counts: emails are de-duplicated globally, so "captured per
|
|
# mailbox" comes from the per-account sighting table; "matched" joins to emails.
|
|
captured, matched = {}, {}
|
|
try:
|
|
captured = {r["account_id"]: r["n"] for r in cur.execute(
|
|
"SELECT account_id, COUNT(*) AS n FROM email_account_messages "
|
|
"WHERE deleted_at IS NULL GROUP BY account_id")}
|
|
matched = {r["account_id"]: r["n"] for r in cur.execute(
|
|
"SELECT eam.account_id AS account_id, COUNT(*) AS n FROM email_account_messages eam "
|
|
"JOIN emails e ON e.id = eam.email_id "
|
|
"WHERE eam.deleted_at IS NULL AND e.is_matched = 1 GROUP BY eam.account_id")}
|
|
except sqlite3.OperationalError:
|
|
pass
|
|
for r in rows:
|
|
r["captured"] = captured.get(r["id"], 0)
|
|
r["matched"] = matched.get(r["id"], 0)
|
|
finally:
|
|
conn.close()
|
|
handler.send_json({"accounts": rows})
|
|
|
|
|
|
def _h_activity(handler):
|
|
# Admin-only: the Communications page renders captured-Gmail activity (the classic
|
|
# manual-log surface was retired). Mailbox/investor substance is admin-scoped, so
|
|
# enforce admin server-side, not just nav-hide.
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
q = handler.get_query_params()
|
|
try:
|
|
limit = int(q.get("limit", 100))
|
|
except (TypeError, ValueError):
|
|
limit = 100
|
|
conn = _conn()
|
|
try:
|
|
result = _db.query_email_activity(
|
|
conn,
|
|
investor_id=(q.get("investor_id") or "").strip() or None,
|
|
account_id=(q.get("account_id") or "").strip() or None,
|
|
search=(q.get("q") or q.get("search") or "").strip() or None,
|
|
direction=(q.get("direction") or "").strip() or None,
|
|
since=(q.get("since") or "").strip() or None,
|
|
until=(q.get("until") or "").strip() or None,
|
|
limit=limit,
|
|
)
|
|
finally:
|
|
conn.close()
|
|
handler.send_json(result)
|
|
|
|
|
|
def _h_detail(handler):
|
|
# Admin-only: the full body + recipients of a captured email is admin-scoped
|
|
# substance, same as the activity list it expands from.
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
email_id = (handler.get_query_params().get("id") or "").strip()
|
|
if not email_id:
|
|
return handler.send_error_json("id required", 400)
|
|
conn = _conn()
|
|
try:
|
|
detail = _db.query_email_detail(conn, email_id)
|
|
finally:
|
|
conn.close()
|
|
if detail is None:
|
|
return handler.send_error_json("Not found", 404)
|
|
handler.send_json(detail)
|
|
|
|
|
|
def _semantic_email_search(query: str, top_k: int) -> list:
|
|
"""Hybrid (dense + BM25, reranked) retrieval over the email bodies indexed in
|
|
Qdrant, pre-filtered to doc_type='email'. Returns raw ranked hits (payload carries
|
|
source_id=email_id, lp_name, date_ts, text). The ingest stack (Spark Control +
|
|
Qdrant + the sparse encoder) lives in the Docker image, so it's imported lazily —
|
|
a bare CRM without it raises, and the caller maps that to a 503."""
|
|
import os
|
|
import sys
|
|
ingest_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest")
|
|
if ingest_dir not in sys.path:
|
|
sys.path.insert(0, ingest_dir)
|
|
import search as _ingest_search # ingest/search.py
|
|
filt = {"must": [{"key": "doc_type", "match": {"value": "email"}}]}
|
|
return _ingest_search.hybrid_search(query, top_k=top_k, rerank=True, filt=filt)
|
|
|
|
|
|
def _h_search(handler):
|
|
# Admin-only semantic search over captured email *content* (bodies), distinct from
|
|
# the structured subject/sender filters in _h_activity. Matched email bodies are the
|
|
# only email indexed in Qdrant (see ingest/chunking). Soft-delete-filtered + hydrated
|
|
# against SQLite (canonical) so a deleted email never surfaces from the stale index.
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
q = handler.get_query_params()
|
|
query = (q.get("q") or q.get("query") or "").strip()
|
|
if not query:
|
|
return handler.send_json({"query": "", "results": []})
|
|
try:
|
|
top_k = min(50, max(1, int(q.get("top_k", 25))))
|
|
except (TypeError, ValueError):
|
|
top_k = 25
|
|
|
|
try:
|
|
hits = _semantic_email_search(query, top_k)
|
|
except Exception as e:
|
|
# Spark Control / Qdrant unreachable, or the ingest stack isn't installed.
|
|
# Log server-side (an error can carry a URL/host); give the UI a clean 503.
|
|
import sys
|
|
print(f"[email-search] retrieval failed: {type(e).__name__}: {e}", file=sys.stderr)
|
|
return handler.send_error_json("Content search is unavailable (Spark/Qdrant not reachable).", 503)
|
|
|
|
# Hydrate + soft-delete-filter against SQLite (canonical), preserving rank order.
|
|
payloads = [(h.get("payload", {}) or {}, h) for h in hits]
|
|
ids = [p.get("source_id") for p, _ in payloads]
|
|
conn = _conn()
|
|
try:
|
|
live = _db.search_hit_emails(conn, ids)
|
|
finally:
|
|
conn.close()
|
|
|
|
results = []
|
|
for p, h in payloads:
|
|
eid = p.get("source_id")
|
|
e = live.get(eid)
|
|
if not e:
|
|
continue # deleted since indexing, or not matched-resolvable -> drop
|
|
results.append({
|
|
"email_id": eid,
|
|
"subject": e["subject"],
|
|
"from_name": e["from_name"],
|
|
"from_email": e["from_email"],
|
|
"sent_at": e["sent_at"],
|
|
"direction": e["direction"],
|
|
"has_attachments": e["has_attachments"],
|
|
"lp_name": p.get("lp_name"),
|
|
"score": h.get("score"),
|
|
"excerpt": (h.get("text") or p.get("text") or "").replace("\n", " ").strip()[:300],
|
|
})
|
|
handler.send_json({"query": query, "results": results, "count": len(results)})
|
|
|
|
|
|
def _h_list_threads(handler):
|
|
user = _require_auth(handler)
|
|
if not user:
|
|
return
|
|
q = handler.get_query_params()
|
|
investor_id = q.get("investor_id")
|
|
limit = min(int(q.get("limit", 50)), 500)
|
|
conn = _conn()
|
|
try:
|
|
cur = conn.cursor()
|
|
if investor_id:
|
|
cur.execute(
|
|
"""SELECT t.*
|
|
FROM email_threads t
|
|
JOIN emails e ON e.thread_id = t.id
|
|
JOIN email_investor_links l ON l.email_id = e.id
|
|
WHERE l.fundraising_investor_id = ?
|
|
OR l.fundraising_contact_id IN (
|
|
SELECT id FROM fundraising_contacts WHERE investor_id = ?
|
|
)
|
|
GROUP BY t.id
|
|
ORDER BY t.last_message_at DESC
|
|
LIMIT ?""",
|
|
(investor_id, investor_id, limit),
|
|
)
|
|
else:
|
|
cur.execute(
|
|
"SELECT * FROM email_threads WHERE is_matched = 1 "
|
|
"ORDER BY last_message_at DESC LIMIT ?",
|
|
(limit,),
|
|
)
|
|
threads = [dict(r) for r in cur.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
handler.send_json({"threads": threads})
|
|
|
|
|
|
def _h_oauth_start(handler):
|
|
"""Begin per-user OAuth consent flow (fallback path)."""
|
|
user = _require_auth(handler)
|
|
if not user:
|
|
return
|
|
if _cfg.CONFIG.primary_auth != "oauth":
|
|
return handler.send_error_json(
|
|
"Per-user OAuth disabled (set CRM_GMAIL_AUTH_METHOD=oauth to enable)", 400
|
|
)
|
|
q = handler.get_query_params()
|
|
account_email = q.get("account_email") or ""
|
|
if not account_email:
|
|
return handler.send_error_json("account_email required", 400)
|
|
|
|
import secrets
|
|
import urllib.parse
|
|
state = secrets.token_urlsafe(32)
|
|
_oauth_state_store(state, user["user_id"], account_email)
|
|
|
|
params = {
|
|
"client_id": _cfg.CONFIG.oauth_client_id,
|
|
"redirect_uri": _cfg.CONFIG.oauth_redirect_uri,
|
|
"response_type": "code",
|
|
"scope": _creds.GMAIL_READONLY_SCOPE,
|
|
"access_type": "offline",
|
|
"prompt": "consent",
|
|
"state": state,
|
|
"login_hint": account_email,
|
|
}
|
|
url = "https://accounts.google.com/o/oauth2/v2/auth?" + urllib.parse.urlencode(params)
|
|
handler.send_json({"redirect_url": url})
|
|
|
|
|
|
def _h_oauth_callback(handler):
|
|
"""Exchange code for tokens, encrypt refresh token, store."""
|
|
q = handler.get_query_params()
|
|
code = q.get("code")
|
|
state = q.get("state")
|
|
if not code or not state:
|
|
return handler.send_error_json("code and state required", 400)
|
|
|
|
state_row = _oauth_state_consume(state)
|
|
if not state_row:
|
|
return handler.send_error_json("Invalid state", 400)
|
|
|
|
import urllib.parse
|
|
import urllib.request
|
|
body = urllib.parse.urlencode({
|
|
"code": code,
|
|
"client_id": _cfg.CONFIG.oauth_client_id,
|
|
"client_secret": _cfg.CONFIG.oauth_client_secret,
|
|
"redirect_uri": _cfg.CONFIG.oauth_redirect_uri,
|
|
"grant_type": "authorization_code",
|
|
}).encode("ascii")
|
|
req = urllib.request.Request(
|
|
"https://oauth2.googleapis.com/token",
|
|
data=body,
|
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
payload = json.loads(resp.read())
|
|
except Exception as e:
|
|
return handler.send_error_json(f"Token exchange failed: {e}", 500)
|
|
|
|
refresh = payload.get("refresh_token")
|
|
if not refresh:
|
|
return handler.send_error_json("No refresh_token returned (user may have previously consented; prompt=consent required)", 400)
|
|
|
|
enc = _crypto.encrypt(refresh.encode("ascii"), secret_key_b64=_cfg.CONFIG.secret_key_b64)
|
|
|
|
conn = _conn()
|
|
try:
|
|
_db.upsert_account(conn, user_id=state_row["user_id"],
|
|
email_address=state_row["account_email"],
|
|
auth_method="oauth")
|
|
conn.execute(
|
|
"UPDATE email_accounts SET oauth_refresh_enc = ?, sync_status = 'pending', "
|
|
"updated_at = datetime('now') WHERE email_address = ?",
|
|
(enc, state_row["account_email"]),
|
|
)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
handler.send_json({"ok": True, "account_email": state_row["account_email"]})
|
|
|
|
|
|
# ---------------------------------------------------------------------------- POST handlers
|
|
|
|
def _h_enroll_all(handler):
|
|
"""Admin: enroll every CRM user whose email is @workspace_domain via DWD."""
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
if _cfg.CONFIG.primary_auth != "dwd":
|
|
return handler.send_error_json("enroll-all only valid in DWD mode", 400)
|
|
domain = _cfg.CONFIG.workspace_domain
|
|
if not domain:
|
|
return handler.send_error_json("CRM_GMAIL_WORKSPACE_DOMAIN not set", 400)
|
|
|
|
conn = _conn()
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute(
|
|
"SELECT id, email FROM users WHERE is_active = 1 AND email LIKE ?",
|
|
(f"%@{domain}",),
|
|
)
|
|
users = cur.fetchall()
|
|
created = []
|
|
for u in users:
|
|
aid = _db.upsert_account(conn, user_id=u["id"],
|
|
email_address=u["email"].lower(),
|
|
auth_method="dwd")
|
|
created.append({"account_id": aid, "email": u["email"]})
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
handler.send_json({"enrolled": created, "count": len(created)})
|
|
|
|
|
|
def _h_enroll_one(handler):
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
body = handler.get_body() or {}
|
|
# Accept either `email` or `email_address` for ergonomics.
|
|
email_address = (body.get("email_address") or body.get("email") or "").lower().strip()
|
|
user_id = body.get("user_id")
|
|
auth_method = body.get("auth_method") or _cfg.CONFIG.primary_auth
|
|
|
|
if not email_address:
|
|
return handler.send_error_json("email (or email_address) required", 400)
|
|
|
|
# If the caller didn't specify a CRM user_id, resolve it from the
|
|
# users table by matching email. Falls back to the authenticated
|
|
# admin's own id (handles the common case of a single admin
|
|
# enrolling themselves without having to paste their UUID).
|
|
if not user_id:
|
|
conn = _conn()
|
|
try:
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT id FROM users WHERE LOWER(email) = ?",
|
|
(email_address,))
|
|
row = cur.fetchone()
|
|
user_id = row["id"] if row else user.get("id")
|
|
finally:
|
|
conn.close()
|
|
|
|
if not user_id:
|
|
return handler.send_error_json("could not resolve user_id for that email", 400)
|
|
|
|
conn = _conn()
|
|
try:
|
|
aid = _db.upsert_account(conn, user_id=user_id,
|
|
email_address=email_address,
|
|
auth_method=auth_method)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
handler.send_json({"account_id": aid, "email": email_address, "user_id": user_id})
|
|
|
|
|
|
def _h_run_now(handler):
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
# Reuse existing rate limit so admins can't hammer this.
|
|
if handler.rate_limited("email-sync-now", 6):
|
|
return handler.send_error_json("Too many requests", 429)
|
|
# A manual sync is an explicit retry. The scheduler intentionally skips
|
|
# error-status accounts (no retry storms), so clear that status here so a
|
|
# mailbox that previously errored is re-attempted. Backfill resumes from its
|
|
# saved cursor and dedups by Message-ID, so nothing is re-captured twice.
|
|
conn = _conn()
|
|
try:
|
|
conn.execute("UPDATE email_accounts SET sync_status = 'pending', sync_error = NULL "
|
|
"WHERE sync_enabled = 1 AND sync_status = 'error'")
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
result = _sched.trigger_run_now()
|
|
handler.send_json(result)
|
|
|
|
|
|
def _h_rematch(handler):
|
|
"""Re-evaluate unmatched emails against the current investor index."""
|
|
user = _require_admin(handler)
|
|
if not user:
|
|
return
|
|
body = handler.get_body() or {}
|
|
since = body.get("since") # optional ISO8601
|
|
conn = _conn()
|
|
scanned = 0
|
|
matched = 0
|
|
try:
|
|
from .matcher import InvestorIndex
|
|
index = InvestorIndex(own_domain=_cfg.CONFIG.workspace_domain)
|
|
index.rebuild(_conn)
|
|
cur = conn.cursor()
|
|
sql = ("SELECT id, from_email, to_emails_json, cc_emails_json "
|
|
"FROM emails WHERE match_status = 'unmatched'")
|
|
params: list = []
|
|
if since:
|
|
sql += " AND sent_at >= ?"
|
|
params.append(since)
|
|
sql += " ORDER BY sent_at DESC LIMIT 10000"
|
|
cur.execute(sql, params)
|
|
for row in cur.fetchall():
|
|
scanned += 1
|
|
participants = set()
|
|
if row["from_email"]:
|
|
participants.add(row["from_email"].lower())
|
|
for col in ("to_emails_json", "cc_emails_json"):
|
|
try:
|
|
arr = json.loads(row[col] or "[]")
|
|
except Exception:
|
|
arr = []
|
|
for a in arr:
|
|
e = a.get("email") if isinstance(a, dict) else a
|
|
if e:
|
|
participants.add(e.lower())
|
|
links = index.match(participants)
|
|
if not links:
|
|
continue
|
|
matched += 1
|
|
conn.execute(
|
|
"UPDATE emails SET match_status='matched', is_matched=1, "
|
|
"updated_at=datetime('now') WHERE id=?",
|
|
(row["id"],),
|
|
)
|
|
for link in links:
|
|
_db.insert_investor_link(conn, email_id=row["id"], link={
|
|
"matched_address": link.matched_address,
|
|
"match_kind": link.match_kind,
|
|
"match_confidence": link.match_confidence,
|
|
"fundraising_investor_id": link.target.fundraising_investor_id,
|
|
"fundraising_contact_id": link.target.fundraising_contact_id,
|
|
"contact_id": link.target.contact_id,
|
|
"organization_id": link.target.organization_id,
|
|
})
|
|
# NOTE: body is still missing — we only have headers. A follow-up
|
|
# job can re-fetch the full message from Gmail using the sighting's
|
|
# gmail_message_id. Not done inline to keep this endpoint fast.
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
handler.send_json({"scanned": scanned, "newly_matched": matched})
|
|
|
|
|
|
# ---------------------------------------------------------------------------- OAuth state store (in-memory)
|
|
# For a 5-person CRM the state store doesn't need to be durable — a server
|
|
# restart between start and callback is rare and just requires a retry.
|
|
|
|
_oauth_states: dict[str, dict] = {}
|
|
_oauth_state_lock = __import__("threading").Lock()
|
|
|
|
|
|
def _oauth_state_store(state: str, user_id: str, account_email: str) -> None:
|
|
import time
|
|
with _oauth_state_lock:
|
|
# Prune stale entries (>10 min).
|
|
cutoff = time.time() - 600
|
|
for k, v in list(_oauth_states.items()):
|
|
if v["created"] < cutoff:
|
|
_oauth_states.pop(k, None)
|
|
_oauth_states[state] = {
|
|
"user_id": user_id,
|
|
"account_email": account_email.lower().strip(),
|
|
"created": time.time(),
|
|
}
|
|
|
|
|
|
def _oauth_state_consume(state: str) -> Optional[dict]:
|
|
with _oauth_state_lock:
|
|
return _oauth_states.pop(state, None)
|