Files
Keysat c7b74a2704 Email search/query + windowed digest preview (v0.1.0:83)
Communications tab (search/query roadmap items 1 & 2):
- Fix the investor dropdown: the facet only listed grid investors, so it
  came back empty whenever email matched a classic contact or org domain
  (no grid id — the common case). It now mirrors the email list, resolving
  each link to a typed identity (fund:/org:/contact:/addr:) with precedence
  grid -> org -> contact -> address; investor_id accepts the typed key
  (bare id = fund: for back-compat) and an unknown prefix matches nothing.
- Add a date-range filter and a click-to-expand full-body view
  (GET /api/email/detail, admin, soft-delete-gated; body_text only, never
  raw remote HTML).
- Add a "Search content" mode: GET /api/email/search wraps the ingest
  hybrid_search over the Qdrant email index (doc_type=email), hydrated and
  soft-delete-filtered against SQLite (canonical), 503 if Spark/Qdrant down.

Daily digest:
- Settings -> Admin builds a digest over a chosen window (last 24h or since
  a date) as an in-app preview before sending (POST /api/admin/digest/preview),
  so the local-Spark summarizer can be verified on demand even on a quiet day.
  Manual send uses the same window; neither advances the daily cursor, so a
  preview never suppresses the scheduled digest.

Code-only, migrations no-op. 22/22 backend tests, render-smoke pass.
2026-06-16 20:46:15 -05:00

336 lines
18 KiB
Python

#!/usr/bin/env python3
"""Test the admin-only email-activity panel (Communications tab, v0.1.0:80).
Covers the pure query (`db.query_email_activity`): matched-only scope (unmatched
cold/unknown-sender email is never surfaced), investor/mailbox/search/direction/
date-range filters, per-sighting soft-delete, direction at the email level, mailbox
roll-ups, and the *typed* investor facet (grid investor / org / contact), including
the v83 fix where an email matched only to a classic contact or org domain — not yet
wired to a grid investor — still resolves to a real name and appears in the dropdown
(previously the facet came back empty). Also asserts the route handler enforces admin
server-side. Synthetic data only.
Run: cd backend && python3 email_integration/test_email_activity_panel.py
"""
import os
import sqlite3
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from email_integration import db as _db # noqa: E402
FAILS = []
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
def make_db():
conn = sqlite3.connect(":memory:")
conn.row_factory = sqlite3.Row
conn.executescript("""
CREATE TABLE email_accounts (id TEXT PRIMARY KEY, email_address TEXT);
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, from_name TEXT, from_email TEXT,
sent_at TEXT, snippet TEXT, body_text TEXT, body_html TEXT, gmail_thread_id TEXT,
has_attachments INT DEFAULT 0, is_matched INT DEFAULT 0,
match_status TEXT DEFAULT 'unmatched');
CREATE TABLE email_account_messages (id TEXT PRIMARY KEY, email_id TEXT, account_id TEXT,
is_sent INT DEFAULT 0, deleted_at TEXT);
CREATE TABLE email_recipients (id TEXT PRIMARY KEY, email_id TEXT, address TEXT,
display_name TEXT, kind TEXT);
CREATE TABLE email_attachments (id TEXT PRIMARY KEY, email_id TEXT, filename TEXT,
mime_type TEXT, size_bytes INTEGER, download_status TEXT);
CREATE TABLE email_investor_links (id TEXT PRIMARY KEY, email_id TEXT,
fundraising_investor_id TEXT, fundraising_contact_id TEXT,
organization_id TEXT, contact_id TEXT, matched_address TEXT);
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, graveyard INTEGER DEFAULT 0);
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, investor_id TEXT, full_name TEXT);
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, deleted_at TEXT);
CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT,
organization_id TEXT, deleted_at TEXT);
""")
# Two mailboxes (us); investors reached different ways: a grid investor directly,
# a grid investor only via a contact link, a graveyarded grid investor, an org-only
# (domain) match, and a classic-contact-only match (the case that left the dropdown
# empty before v83 — neither carries a grid id).
conn.executemany("INSERT INTO email_accounts VALUES (?,?)", [
("acc-grant", "grant@ten31.xyz"),
("acc-jon", "jonathan@ten31.xyz"),
])
conn.executemany("INSERT INTO fundraising_investors VALUES (?,?,?)", [
("inv-harbor", "Harbor & Vine", 0),
("inv-pacific", "Pacific Capital", 0),
("inv-dead", "Dead Deal LP", 1),
])
conn.execute("INSERT INTO fundraising_contacts VALUES ('fc-1','inv-pacific','Sarah Williams')")
conn.execute("INSERT INTO organizations VALUES ('org-bridge','Bridgewater',NULL)")
conn.execute("INSERT INTO contacts VALUES ('c-solo','Nina','Park',NULL,NULL)")
# Emails:
# e1 outbound -> Harbor (grid), seen by grant
# e2 inbound -> Harbor (grid), seen by grant + jonathan
# e3 inbound -> Pacific via grid contact link, seen by jonathan
# e4 inbound, UNMATCHED -> excluded (matched-only)
# e5 inbound, only sighting tombstoned -> excluded
# e6 inbound -> Dead Deal LP (graveyard grid investor)
# e7 inbound -> Bridgewater via ORG-domain match (no grid id)
# e8 inbound -> Nina Park via CLASSIC-contact match (no grid id, no org)
conn.executemany(
"INSERT INTO emails (id,subject,from_name,from_email,sent_at,snippet,has_attachments,is_matched,match_status) VALUES (?,?,?,?,?,?,?,?,?)",
[
("e1", "Fund III update", "Grant", "grant@ten31.xyz", "2026-06-05T10:00:00", "here is the deck", 1, 1, "matched"),
("e2", "Re: Fund III update", "LP Harbor", "lp@harborvine.example", "2026-06-06T09:00:00", "thanks, one question", 0, 1, "matched"),
("e3", "Intro", "Sarah Williams", "sarah@pacificcap.example", "2026-06-07T08:00:00", "would love to chat", 0, 1, "matched"),
("e4", "Cold inbound", "Random", "noreply@spam.example", "2026-06-08T08:00:00", "buy now", 0, 0, "unmatched"),
("e5", "Deleted thread", "Ghost", "ghost@x.example", "2026-06-09T08:00:00", "gone", 0, 1, "matched"),
("e6", "Old dead-deal thread", "Dead LP", "lp@deaddeal.example", "2026-06-01T00:00:00", "we passed", 0, 1, "matched"),
("e7", "Macro view", "Ray", "ray@bridgewater.example", "2026-06-10T08:00:00", "rates outlook", 0, 1, "matched"),
("e8", "Coffee?", "Nina Park", "nina@solo.example", "2026-06-11T08:00:00", "in town next week", 0, 1, "matched"),
])
conn.executemany(
"INSERT INTO email_account_messages (id,email_id,account_id,is_sent,deleted_at) VALUES (?,?,?,?,?)",
[
("m1", "e1", "acc-grant", 1, None),
("m2", "e2", "acc-grant", 0, None),
("m3", "e2", "acc-jon", 0, None),
("m4", "e3", "acc-jon", 0, None),
("m5", "e4", "acc-grant", 0, None),
("m6", "e5", "acc-grant", 0, "2026-06-10T00:00:00"), # tombstoned
("m7", "e6", "acc-grant", 0, None),
("m8", "e7", "acc-grant", 0, None),
("m9", "e8", "acc-jon", 0, None),
])
conn.executemany(
"INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,fundraising_contact_id,organization_id,contact_id,matched_address) VALUES (?,?,?,?,?,?,?)",
[
("l1", "e1", "inv-harbor", None, None, None, "lp@harborvine.example"),
("l2", "e2", "inv-harbor", None, None, None, "lp@harborvine.example"),
("l3", "e3", None, "fc-1", None, None, "sarah@pacificcap.example"),
("l5", "e5", "inv-harbor", None, None, None, "lp@harborvine.example"),
("l6", "e6", "inv-dead", None, None, None, "lp@deaddeal.example"),
("l7", "e7", None, None, "org-bridge", None, "ray@bridgewater.example"),
("l8", "e8", None, None, None, "c-solo", "nina@solo.example"),
])
# Full body + recipients + an attachment on e2, for the detail view.
conn.execute("UPDATE emails SET body_text = ?, gmail_thread_id = ?, has_attachments = 1 WHERE id = 'e2'",
("Thanks for the deck — one question on the carry.", "thr-harbor"))
conn.executemany(
"INSERT INTO email_recipients (id,email_id,address,display_name,kind) VALUES (?,?,?,?,?)",
[
("r1", "e2", "grant@ten31.xyz", "Grant", "to"),
("r2", "e2", "jonathan@ten31.xyz", "Jonathan", "cc"),
("r3", "e2", "lp@harborvine.example", "LP Harbor", "from"), # from -> not surfaced
])
conn.execute("INSERT INTO email_attachments (id,email_id,filename,mime_type,size_bytes,download_status) "
"VALUES ('a1','e2','term_sheet.pdf','application/pdf',20480,'downloaded')")
conn.commit()
return conn
def ids(res):
return [e["id"] for e in res["emails"]]
def main():
conn = make_db()
# --- baseline: matched live emails only, newest first, tombstoned excluded ---
res = _db.query_email_activity(conn)
check(ids(res) == ["e8", "e7", "e3", "e2", "e1", "e6"],
f"matched live emails newest-first; e5 (tombstoned) + e4 (unmatched) excluded; got {ids(res)}")
check(res["count"] == 6 and res["truncated"] is False, "count + not truncated")
check("e4" not in ids(res), "unmatched email (no investor link) never surfaces in the panel")
# --- direction at the email level ---
e1 = next(e for e in res["emails"] if e["id"] == "e1")
e2 = next(e for e in res["emails"] if e["id"] == "e2")
check(e1["direction"] == "outbound", "e1 from our mailbox -> outbound")
check(e2["direction"] == "inbound", "e2 from LP -> inbound")
check(_db.query_email_activity(conn, direction="outbound")["emails"][0]["id"] == "e1"
and len(_db.query_email_activity(conn, direction="outbound")["emails"]) == 1,
"direction=outbound returns only e1")
check(ids(_db.query_email_activity(conn, direction="inbound")) == ["e8", "e7", "e3", "e2", "e6"],
"direction=inbound excludes the outbound e1 (and unmatched e4)")
# --- mailbox roll-up + per-account filter ---
check(set(e2["mailboxes"]) == {"grant@ten31.xyz", "jonathan@ten31.xyz"}, "e2 seen by both mailboxes")
check(ids(_db.query_email_activity(conn, account_id="acc-jon")) == ["e8", "e3", "e2"],
"account_id=acc-jon returns only emails that mailbox saw")
# --- date-range filter [since, until) over sent_at ---
check(ids(_db.query_email_activity(conn, since="2026-06-07T00:00:00", until="2026-06-11T00:00:00")) == ["e7", "e3"],
"date range [06-07, 06-11) -> e7,e3 (excludes 06-11 e8 and earlier e2/e1/e6)")
check(ids(_db.query_email_activity(conn, since="2026-06-10T00:00:00")) == ["e8", "e7"],
"since=06-10 -> e8,e7 only")
# --- investor filter: typed keys + legacy bare-id back-compat ---
check(set(ids(_db.query_email_activity(conn, investor_id="fund:inv-harbor"))) == {"e2", "e1"},
"investor_id=fund:inv-harbor -> e1,e2")
check(set(ids(_db.query_email_activity(conn, investor_id="inv-harbor"))) == {"e2", "e1"},
"legacy bare id treated as fund: -> e1,e2")
check(ids(_db.query_email_activity(conn, investor_id="fund:inv-pacific")) == ["e3"],
"fund:inv-pacific resolved through fundraising_contacts -> e3")
check(ids(_db.query_email_activity(conn, investor_id="org:org-bridge")) == ["e7"],
"org:org-bridge -> e7 (org-domain match)")
check(ids(_db.query_email_activity(conn, investor_id="contact:c-solo")) == ["e8"],
"contact:c-solo -> e8 (classic-contact match)")
check(_db.query_email_activity(conn, investor_id="bogus:x")["emails"] == [],
"unknown investor_id key prefix -> match nothing (never silently unfiltered)")
# --- investor identity roll-up, typed + resolved name ---
check(e1["investors"] == [{"id": "fund:inv-harbor", "name": "Harbor & Vine"}], "e1 grid investor resolved")
e3 = next(e for e in res["emails"] if e["id"] == "e3")
check(e3["investors"] == [{"id": "fund:inv-pacific", "name": "Pacific Capital"}], "e3 resolved via grid contact")
e7 = next(e for e in res["emails"] if e["id"] == "e7")
check(e7["investors"] == [{"id": "org:org-bridge", "name": "Bridgewater"}],
"e7 org-domain match resolves to the org name (not a raw address)")
e8 = next(e for e in res["emails"] if e["id"] == "e8")
check(e8["investors"] == [{"id": "contact:c-solo", "name": "Nina Park"}],
"e8 classic-contact match resolves to the contact name")
# --- free-text search over subject / snippet / sender ---
check(set(ids(_db.query_email_activity(conn, search="Fund III"))) == {"e1", "e2"}, "search subject")
check(ids(_db.query_email_activity(conn, search="pacificcap")) == ["e3"], "search sender address")
check(ids(_db.query_email_activity(conn, search="deck")) == ["e1"], "search snippet (matched email)")
check(ids(_db.query_email_activity(conn, search="buy now")) == [],
"unmatched email never surfaces, even by free-text search")
# --- facets: typed entries spanning grid / org / contact matches ---
check([a["email_address"] for a in res["accounts"]] == ["grant@ten31.xyz", "jonathan@ten31.xyz"],
"accounts facet sorted")
facet_inv = {i["id"] for i in res["investors"]}
check(facet_inv == {"fund:inv-harbor", "fund:inv-pacific", "org:org-bridge", "contact:c-solo"},
f"investor facet now mirrors the list (grid + org + contact), not just grid; got {facet_inv}")
check([i["name"] for i in res["investors"]] == sorted(i["name"] for i in res["investors"]),
"facet sorted by display name")
# --- graveyard: hidden from the picker, but its email stays visible + findable ---
check("fund:inv-dead" not in facet_inv, "graveyard investor excluded from the facet dropdown")
check("e6" in ids(res), "graveyard investor's email still shows in the unfiltered list (audit completeness)")
e6 = next(e for e in res["emails"] if e["id"] == "e6")
check(e6["investors"] == [{"id": "fund:inv-dead", "name": "Dead Deal LP"}], "graveyard email still shows its investor chip")
check(ids(_db.query_email_activity(conn, investor_id="fund:inv-dead")) == ["e6"],
"explicit investor_id filter still works for a graveyard investor")
check(ids(_db.query_email_activity(conn, search="deaddeal")) == ["e6"],
"graveyard email remains findable by free-text search")
# --- truncation ---
tr = _db.query_email_activity(conn, limit=2)
check(tr["count"] == 2 and tr["truncated"] is True, "limit=2 -> truncated")
# --- detail view (full body + recipients + attachments + identity) ---
d = _db.query_email_detail(conn, "e2")
check(d is not None and d["body_text"] == "Thanks for the deck — one question on the carry.",
"detail returns the full body")
check(d["direction"] == "inbound" and set(d["mailboxes"]) == {"grant@ten31.xyz", "jonathan@ten31.xyz"},
"detail direction + mailboxes")
check([(r["address"], r["kind"]) for r in d["recipients"]] ==
[("grant@ten31.xyz", "to"), ("jonathan@ten31.xyz", "cc")],
"detail recipients = to/cc only (from is excluded)")
check([a["filename"] for a in d["attachments"]] == ["term_sheet.pdf"], "detail lists attachments")
check(d["investors"] == [{"id": "fund:inv-harbor", "name": "Harbor & Vine"}], "detail resolves investor identity")
check(_db.query_email_detail(conn, "e5") is None,
"detail of a tombstoned-only email -> None (soft-delete on the sighting)")
check(_db.query_email_detail(conn, "nope") is None, "detail of a missing id -> None")
conn.close()
# --- route enforces admin server-side ---
test_route_admin_only()
# --- semantic content-search route (hydrate + soft-delete + 503) ---
test_search_route()
if FAILS:
print(f"\nFAILED ({len(FAILS)})")
for f in FAILS:
print(" - " + f)
sys.exit(1)
print("\nALL PASS (email-activity panel)")
class FakeHandler:
def __init__(self, user, params=None):
self._user = user
self._params = params or {}
self.json = None
self.err = None
self.code = None
def get_user(self):
return self._user
def get_query_params(self):
return self._params
def send_json(self, obj):
self.json = obj
def send_error_json(self, msg, code):
self.err = msg
self.code = code
def test_route_admin_only():
try:
from email_integration import routes
except Exception as e: # pragma: no cover - optional deps missing in some dev envs
print(f" SKIP route admin test (routes import failed: {e})")
return
h = FakeHandler(None)
routes._h_activity(h)
check(h.code == 401 and h.json is None, "route: no user -> 401")
h = FakeHandler({"role": "member", "user_id": "u1"})
routes._h_activity(h)
check(h.code == 403 and h.json is None, "route: member -> 403 (admin enforced server-side)")
def test_search_route():
try:
from email_integration import routes
except Exception as e: # pragma: no cover
print(f" SKIP search route test (routes import failed: {e})")
return
# Hydration source = a fresh fully-populated in-memory DB each call (the handler
# opens + closes its own conn). Retrieval is stubbed — no Spark/Qdrant in tests.
routes._conn = make_db
routes._semantic_email_search = lambda query, top_k: [
{"score": 0.91, "text": "carry discussion\nand terms", "payload": {"source_id": "e2", "lp_name": "Harbor & Vine"}},
{"score": 0.80, "text": "gone", "payload": {"source_id": "e5", "lp_name": "Ghost"}}, # tombstoned -> drop
{"score": 0.70, "text": "n/a", "payload": {"source_id": "missing", "lp_name": "Nobody"}}, # missing -> drop
]
h = FakeHandler({"role": "admin"}, {"q": "carry"})
routes._h_search(h)
check(h.json and [r["email_id"] for r in h.json["results"]] == ["e2"],
f"content search drops tombstoned + missing, keeps live e2; got {h.json and [r['email_id'] for r in h.json['results']]}")
top = h.json["results"][0]
check(top["lp_name"] == "Harbor & Vine" and top["score"] == 0.91 and top["subject"] == "Re: Fund III update",
"hit carries lp_name + score + hydrated subject")
check("\n" not in top["excerpt"], "excerpt is newline-flattened")
# empty query short-circuits (no retrieval call)
h = FakeHandler({"role": "admin"}, {"q": ""})
routes._h_search(h)
check(h.json == {"query": "", "results": []}, "empty query -> empty results")
# retrieval failure -> clean 503 (Spark/Qdrant down)
def _boom(query, top_k):
raise RuntimeError("spark down")
routes._semantic_email_search = _boom
h = FakeHandler({"role": "admin"}, {"q": "x"})
routes._h_search(h)
check(h.code == 503, f"retrieval failure -> 503, got {h.code}")
# admin enforced
h = FakeHandler({"role": "member"}, {"q": "x"})
routes._h_search(h)
check(h.code == 403, "content search admin-enforced server-side")
if __name__ == "__main__":
main()