Email search/query + windowed digest preview (v0.1.0:83)

Communications tab (search/query roadmap items 1 & 2):
- Fix the investor dropdown: the facet only listed grid investors, so it
  came back empty whenever email matched a classic contact or org domain
  (no grid id — the common case). It now mirrors the email list, resolving
  each link to a typed identity (fund:/org:/contact:/addr:) with precedence
  grid -> org -> contact -> address; investor_id accepts the typed key
  (bare id = fund: for back-compat) and an unknown prefix matches nothing.
- Add a date-range filter and a click-to-expand full-body view
  (GET /api/email/detail, admin, soft-delete-gated; body_text only, never
  raw remote HTML).
- Add a "Search content" mode: GET /api/email/search wraps the ingest
  hybrid_search over the Qdrant email index (doc_type=email), hydrated and
  soft-delete-filtered against SQLite (canonical), 503 if Spark/Qdrant down.

Daily digest:
- Settings -> Admin builds a digest over a chosen window (last 24h or since
  a date) as an in-app preview before sending (POST /api/admin/digest/preview),
  so the local-Spark summarizer can be verified on demand even on a quiet day.
  Manual send uses the same window; neither advances the daily cursor, so a
  preview never suppresses the scheduled digest.

Code-only, migrations no-op. 22/22 backend tests, render-smoke pass.
This commit is contained in:
Keysat
2026-06-16 20:46:15 -05:00
parent c29ac2f2ee
commit c7b74a2704
14 changed files with 989 additions and 138 deletions
+193 -35
View File
@@ -398,17 +398,63 @@ def start_sync_run(conn: sqlite3.Connection, *, account_id: str, kind: str) -> s
return run_id
def _resolve_entity(row) -> tuple:
"""Reduce one email_investor_links hydration row to a (key, name) identity for
the matched investor, with the same precedence the digest uses:
grid investor -> organization -> contact -> raw matched address. The key is
*typed* (`fund:`/`org:`/`contact:`/`addr:`) so the Communications filter can
target the right column. Soft-deleted org/contact rows arrive as NULL (filtered
in the join) and fall through to the next tier."""
if row["fund_id"] and (row["fund_name"] or "").strip():
return f"fund:{row['fund_id']}", row["fund_name"].strip()
if row["org_id"] and (row["org_name"] or "").strip():
return f"org:{row['org_id']}", row["org_name"].strip()
if row["contact_id"] and (row["contact_name"] or "").strip():
return f"contact:{row['contact_id']}", row["contact_name"].strip()
addr = (row["addr"] or "").strip()
return (f"addr:{addr.lower()}", addr) if addr else (None, None)
# Hydration of an email_investor_links row up to the resolvable investor identity,
# shared by the per-email tags and the facet dropdown. Soft-deleted org/contact
# rows are dropped in the join so they fall through to the next identity tier.
_LINK_IDENTITY_JOINS = """
LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id
LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id
LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id
LEFT JOIN organizations o ON o.id = l.organization_id AND o.deleted_at IS NULL
LEFT JOIN contacts c ON c.id = l.contact_id AND c.deleted_at IS NULL
"""
_LINK_IDENTITY_COLS = """
l.matched_address AS addr,
COALESCE(fi.id, fic_inv.id) AS fund_id,
COALESCE(fi.investor_name, fic_inv.investor_name) AS fund_name,
COALESCE(fi.graveyard, fic_inv.graveyard) AS fund_graveyard,
o.id AS org_id, o.name AS org_name,
c.id AS contact_id,
NULLIF(TRIM(COALESCE(c.first_name,'') || ' ' || COALESCE(c.last_name,'')), '') AS contact_name
"""
def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] = None,
account_id: Optional[str] = None, search: Optional[str] = None,
direction: Optional[str] = None, limit: int = 100) -> dict:
direction: Optional[str] = None, since: Optional[str] = None,
until: Optional[str] = None, limit: int = 100) -> dict:
"""Captured-Gmail activity for the admin Communications panel, filterable by
investor (matched fundraising investor) and/or mailbox, with free-text search
over subject/snippet/sender. Returns the email rows plus the filter facets.
matched investor entity, mailbox, direction and date range, with free-text
search over subject/snippet/sender. Returns the email rows plus the filter facets.
Matched-only: the panel shows ONLY email that links to a known investor/contact
(an `email_investor_links` row exists). Unmatched cold/unknown-sender email is
still captured for completeness but is never surfaced here.
Investor identity is *typed* (`fund:`/`org:`/`contact:`/`addr:`) and resolved with
the digest's precedence (grid investor -> organization -> contact -> raw address),
so an email matched only to a classic contact or an org domain — not yet wired to a
grid investor — still shows a real name and is selectable in the dropdown, instead
of the facet coming back empty. `investor_id` accepts a typed key (a bare id is
treated as `fund:` for backward compatibility).
Soft-delete: an email is live only if it still has a non-tombstoned per-mailbox
sighting (`email_account_messages.deleted_at IS NULL`) — the `emails` row itself
carries no deleted_at, so deletion lives on the sighting. Direction is decided at
@@ -433,15 +479,42 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str]
"AND eam.deleted_at IS NULL)")
params.append(account_id)
if investor_id:
where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id "
"AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN "
"(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))")
params.extend([investor_id, investor_id])
kind, _, val = str(investor_id).partition(":")
if not val: # bare id (legacy) -> grid investor
kind, val = "fund", str(investor_id)
if kind == "fund":
where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id "
"AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN "
"(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))")
params.extend([val, val])
elif kind == "org":
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
"WHERE l.email_id = e.id AND l.organization_id = ?)")
params.append(val)
elif kind == "contact":
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
"WHERE l.email_id = e.id AND l.contact_id = ?)")
params.append(val)
elif kind == "addr":
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
"WHERE l.email_id = e.id AND LOWER(l.matched_address) = ?)")
params.append(val.lower())
else:
# Unknown key prefix (malformed input) -> match nothing, never silently
# fall through to an unfiltered list.
where.append("1 = 0")
if search:
like = f"%{search.strip()}%"
where.append("(e.subject LIKE ? OR e.snippet LIKE ? "
"OR e.from_email LIKE ? OR e.from_name LIKE ?)")
params.extend([like, like, like, like])
# Date range over the send time (ISO-8601 strings sort lexically). [since, until).
if since:
where.append("e.sent_at >= ?")
params.append(since)
if until:
where.append("e.sent_at < ?")
params.append(until)
direction = (direction or "").strip().lower()
if direction in ("inbound", "outbound") and own:
marks = ",".join("?" for _ in own)
@@ -459,8 +532,7 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str]
for r in rows:
r["direction"] = "outbound" if (r["from_email"] or "").lower().strip() in own else "inbound"
r["mailboxes"] = []
r["investors"] = []
r["investor_labels"] = []
r["investors"] = [] # [{id: typed-key, name}] — resolved identities
ids = list(by_id)
if ids:
@@ -474,41 +546,127 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str]
if s["addr"] and s["addr"] not in mb:
mb.append(s["addr"])
for lnk in cur.execute(
"SELECT l.email_id AS eid, l.matched_address AS addr, "
"COALESCE(fi.id, fic_inv.id) AS inv_id, "
"COALESCE(fi.investor_name, fic_inv.investor_name) AS inv_name "
"FROM email_investor_links l "
"LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id "
"LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id "
"LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id "
f"SELECT l.email_id AS eid, {_LINK_IDENTITY_COLS} "
f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
f"WHERE l.email_id IN ({marks})", ids):
row = by_id[lnk["eid"]]
if lnk["inv_id"] and lnk["inv_name"]:
if not any(iv["id"] == lnk["inv_id"] for iv in row["investors"]):
row["investors"].append({"id": lnk["inv_id"], "name": lnk["inv_name"]})
elif lnk["addr"] and lnk["addr"] not in row["investor_labels"]:
row["investor_labels"].append(lnk["addr"])
# No graveyard filter here on purpose: a graveyarded investor's *email*
# still shows in the list with its chip (audit completeness, direct or
# via-contact); only the facet dropdown below hides graveyard from the picker.
key, name = _resolve_entity(lnk)
if not key:
continue
invs = by_id[lnk["eid"]]["investors"]
if not any(iv["id"] == key for iv in invs):
invs.append({"id": key, "name": name})
accounts = [dict(r) for r in cur.execute(
"SELECT id, email_address FROM email_accounts ORDER BY email_address")]
# Facet dropdown = live investor relationships only (graveyard = 0, the CRM-wide
# convention). Graveyarded investors are excluded from the *picker*, but their
# captured email still shows in the unfiltered list and stays findable by free-text
# search — this is an audit surface, so history is never hidden, only the picker is.
investors = [dict(r) for r in cur.execute(
"SELECT id, investor_name AS name FROM fundraising_investors WHERE graveyard = 0 AND id IN ("
" SELECT fundraising_investor_id FROM email_investor_links "
" WHERE fundraising_investor_id IS NOT NULL"
" UNION"
" SELECT investor_id FROM fundraising_contacts WHERE id IN ("
" SELECT fundraising_contact_id FROM email_investor_links "
" WHERE fundraising_contact_id IS NOT NULL)"
") ORDER BY investor_name")]
# Facet dropdown mirrors what the list resolves: one entry per distinct matched
# entity (grid investor / org / contact), across all live matched email — not just
# the current page — so the picker is stable under filtering. Excluded from the
# picker: graveyarded grid investors (CRM-wide convention) and raw-address-only
# matches (too many, too noisy). Both still appear in the list and remain findable
# by free-text search — this is an audit surface, so history is never hidden, only
# the picker is.
facet: dict[str, str] = {}
for r in cur.execute(
f"SELECT DISTINCT {_LINK_IDENTITY_COLS} "
f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
"WHERE EXISTS (SELECT 1 FROM email_account_messages eam "
"WHERE eam.email_id = l.email_id AND eam.deleted_at IS NULL)"):
key, name = _resolve_entity(r)
if not key or key.startswith("addr:"):
continue
if key.startswith("fund:") and (r["fund_graveyard"] or 0):
continue
facet.setdefault(key, name)
investors = [{"id": k, "name": v}
for k, v in sorted(facet.items(), key=lambda kv: kv[1].lower())]
return {"emails": rows, "accounts": accounts, "investors": investors,
"count": len(rows), "truncated": truncated}
def search_hit_emails(conn: sqlite3.Connection, email_ids) -> dict:
"""Display fields for the given email ids that are still live (have a
non-tombstoned sighting), keyed by id, with email-level direction.
Used to hydrate + soft-delete-filter semantic-search hits: the Qdrant index can
lag a deletion, and SQLite is canonical (never trust the derived index), so a hit
whose email no longer has a live sighting is dropped here rather than shown."""
ids = [i for i in dict.fromkeys(email_ids) if i]
if not ids:
return {}
cur = conn.cursor()
own = {(r["email_address"] or "").lower().strip()
for r in cur.execute("SELECT email_address FROM email_accounts")}
own.discard("")
marks = ",".join("?" for _ in ids)
out: dict = {}
for e in cur.execute(
"SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.has_attachments "
f"FROM emails e WHERE e.id IN ({marks}) AND EXISTS (SELECT 1 FROM email_account_messages eam "
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", ids):
d = dict(e)
d["direction"] = "outbound" if (d["from_email"] or "").lower().strip() in own else "inbound"
out[d["id"]] = d
return out
def query_email_detail(conn: sqlite3.Connection, email_id: str) -> Optional[dict]:
"""Full record for one captured email — the Communications detail view (full
body + recipients + matched investor identities + mailboxes + attachments).
Returns None if the email doesn't exist or has no live (non-tombstoned) sighting:
soft-delete lives on the per-mailbox `email_account_messages` row, not on `emails`,
so an email is only "live" while at least one sighting survives. Direction is set
at the email level (outbound if the sender is one of our mailboxes), matching the
list. The raw remote `body_html` is NOT returned (XSS); the response carries the
plain-text `body_text` plus a `has_html` flag so the UI can note an HTML-only email."""
cur = conn.cursor()
e = cur.execute(
"SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.snippet, "
"e.body_text, e.body_html, e.has_attachments, e.match_status, e.gmail_thread_id "
"FROM emails e WHERE e.id = ? AND EXISTS (SELECT 1 FROM email_account_messages eam "
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", (email_id,)).fetchone()
if not e:
return None
row = dict(e)
# Don't ship the raw remote HTML to the client (XSS if any consumer ever renders
# it); the UI shows the plain-text body and only needs to know HTML exists.
row["has_html"] = bool((row.pop("body_html", None) or "").strip())
own = {(r["email_address"] or "").lower().strip()
for r in cur.execute("SELECT email_address FROM email_accounts")}
own.discard("")
row["direction"] = "outbound" if (row["from_email"] or "").lower().strip() in own else "inbound"
row["mailboxes"] = [r["addr"] for r in cur.execute(
"SELECT DISTINCT ea.email_address AS addr FROM email_account_messages eam "
"JOIN email_accounts ea ON ea.id = eam.account_id "
"WHERE eam.email_id = ? AND eam.deleted_at IS NULL ORDER BY ea.email_address", (email_id,))]
row["recipients"] = [dict(r) for r in cur.execute(
"SELECT address, display_name, kind FROM email_recipients "
"WHERE email_id = ? AND kind IN ('to','cc') "
"ORDER BY CASE kind WHEN 'to' THEN 0 ELSE 1 END, address", (email_id,))]
row["attachments"] = [dict(r) for r in cur.execute(
"SELECT filename, mime_type, size_bytes, download_status FROM email_attachments "
"WHERE email_id = ? ORDER BY filename", (email_id,))]
investors: dict[str, str] = {}
for lnk in cur.execute(
f"SELECT {_LINK_IDENTITY_COLS} FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
"WHERE l.email_id = ?", (email_id,)):
key, name = _resolve_entity(lnk)
if key:
investors.setdefault(key, name)
row["investors"] = [{"id": k, "name": v} for k, v in investors.items()]
return row
def finish_sync_run(conn: sqlite3.Connection, run_id: str, *, status: str,
stats: Optional[dict] = None, error: Optional[str] = None) -> None:
stats = stats or {}