Email search/query + windowed digest preview (v0.1.0:83)
Communications tab (search/query roadmap items 1 & 2): - Fix the investor dropdown: the facet only listed grid investors, so it came back empty whenever email matched a classic contact or org domain (no grid id — the common case). It now mirrors the email list, resolving each link to a typed identity (fund:/org:/contact:/addr:) with precedence grid -> org -> contact -> address; investor_id accepts the typed key (bare id = fund: for back-compat) and an unknown prefix matches nothing. - Add a date-range filter and a click-to-expand full-body view (GET /api/email/detail, admin, soft-delete-gated; body_text only, never raw remote HTML). - Add a "Search content" mode: GET /api/email/search wraps the ingest hybrid_search over the Qdrant email index (doc_type=email), hydrated and soft-delete-filtered against SQLite (canonical), 503 if Spark/Qdrant down. Daily digest: - Settings -> Admin builds a digest over a chosen window (last 24h or since a date) as an in-app preview before sending (POST /api/admin/digest/preview), so the local-Spark summarizer can be verified on demand even on a quiet day. Manual send uses the same window; neither advances the daily cursor, so a preview never suppresses the scheduled digest. Code-only, migrations no-op. 22/22 backend tests, render-smoke pass.
This commit is contained in:
+193
-35
@@ -398,17 +398,63 @@ def start_sync_run(conn: sqlite3.Connection, *, account_id: str, kind: str) -> s
|
||||
return run_id
|
||||
|
||||
|
||||
def _resolve_entity(row) -> tuple:
|
||||
"""Reduce one email_investor_links hydration row to a (key, name) identity for
|
||||
the matched investor, with the same precedence the digest uses:
|
||||
grid investor -> organization -> contact -> raw matched address. The key is
|
||||
*typed* (`fund:`/`org:`/`contact:`/`addr:`) so the Communications filter can
|
||||
target the right column. Soft-deleted org/contact rows arrive as NULL (filtered
|
||||
in the join) and fall through to the next tier."""
|
||||
if row["fund_id"] and (row["fund_name"] or "").strip():
|
||||
return f"fund:{row['fund_id']}", row["fund_name"].strip()
|
||||
if row["org_id"] and (row["org_name"] or "").strip():
|
||||
return f"org:{row['org_id']}", row["org_name"].strip()
|
||||
if row["contact_id"] and (row["contact_name"] or "").strip():
|
||||
return f"contact:{row['contact_id']}", row["contact_name"].strip()
|
||||
addr = (row["addr"] or "").strip()
|
||||
return (f"addr:{addr.lower()}", addr) if addr else (None, None)
|
||||
|
||||
|
||||
# Hydration of an email_investor_links row up to the resolvable investor identity,
|
||||
# shared by the per-email tags and the facet dropdown. Soft-deleted org/contact
|
||||
# rows are dropped in the join so they fall through to the next identity tier.
|
||||
_LINK_IDENTITY_JOINS = """
|
||||
LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id
|
||||
LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id
|
||||
LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id
|
||||
LEFT JOIN organizations o ON o.id = l.organization_id AND o.deleted_at IS NULL
|
||||
LEFT JOIN contacts c ON c.id = l.contact_id AND c.deleted_at IS NULL
|
||||
"""
|
||||
_LINK_IDENTITY_COLS = """
|
||||
l.matched_address AS addr,
|
||||
COALESCE(fi.id, fic_inv.id) AS fund_id,
|
||||
COALESCE(fi.investor_name, fic_inv.investor_name) AS fund_name,
|
||||
COALESCE(fi.graveyard, fic_inv.graveyard) AS fund_graveyard,
|
||||
o.id AS org_id, o.name AS org_name,
|
||||
c.id AS contact_id,
|
||||
NULLIF(TRIM(COALESCE(c.first_name,'') || ' ' || COALESCE(c.last_name,'')), '') AS contact_name
|
||||
"""
|
||||
|
||||
|
||||
def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] = None,
|
||||
account_id: Optional[str] = None, search: Optional[str] = None,
|
||||
direction: Optional[str] = None, limit: int = 100) -> dict:
|
||||
direction: Optional[str] = None, since: Optional[str] = None,
|
||||
until: Optional[str] = None, limit: int = 100) -> dict:
|
||||
"""Captured-Gmail activity for the admin Communications panel, filterable by
|
||||
investor (matched fundraising investor) and/or mailbox, with free-text search
|
||||
over subject/snippet/sender. Returns the email rows plus the filter facets.
|
||||
matched investor entity, mailbox, direction and date range, with free-text
|
||||
search over subject/snippet/sender. Returns the email rows plus the filter facets.
|
||||
|
||||
Matched-only: the panel shows ONLY email that links to a known investor/contact
|
||||
(an `email_investor_links` row exists). Unmatched cold/unknown-sender email is
|
||||
still captured for completeness but is never surfaced here.
|
||||
|
||||
Investor identity is *typed* (`fund:`/`org:`/`contact:`/`addr:`) and resolved with
|
||||
the digest's precedence (grid investor -> organization -> contact -> raw address),
|
||||
so an email matched only to a classic contact or an org domain — not yet wired to a
|
||||
grid investor — still shows a real name and is selectable in the dropdown, instead
|
||||
of the facet coming back empty. `investor_id` accepts a typed key (a bare id is
|
||||
treated as `fund:` for backward compatibility).
|
||||
|
||||
Soft-delete: an email is live only if it still has a non-tombstoned per-mailbox
|
||||
sighting (`email_account_messages.deleted_at IS NULL`) — the `emails` row itself
|
||||
carries no deleted_at, so deletion lives on the sighting. Direction is decided at
|
||||
@@ -433,15 +479,42 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str]
|
||||
"AND eam.deleted_at IS NULL)")
|
||||
params.append(account_id)
|
||||
if investor_id:
|
||||
where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id "
|
||||
"AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN "
|
||||
"(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))")
|
||||
params.extend([investor_id, investor_id])
|
||||
kind, _, val = str(investor_id).partition(":")
|
||||
if not val: # bare id (legacy) -> grid investor
|
||||
kind, val = "fund", str(investor_id)
|
||||
if kind == "fund":
|
||||
where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id "
|
||||
"AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN "
|
||||
"(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))")
|
||||
params.extend([val, val])
|
||||
elif kind == "org":
|
||||
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
|
||||
"WHERE l.email_id = e.id AND l.organization_id = ?)")
|
||||
params.append(val)
|
||||
elif kind == "contact":
|
||||
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
|
||||
"WHERE l.email_id = e.id AND l.contact_id = ?)")
|
||||
params.append(val)
|
||||
elif kind == "addr":
|
||||
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
|
||||
"WHERE l.email_id = e.id AND LOWER(l.matched_address) = ?)")
|
||||
params.append(val.lower())
|
||||
else:
|
||||
# Unknown key prefix (malformed input) -> match nothing, never silently
|
||||
# fall through to an unfiltered list.
|
||||
where.append("1 = 0")
|
||||
if search:
|
||||
like = f"%{search.strip()}%"
|
||||
where.append("(e.subject LIKE ? OR e.snippet LIKE ? "
|
||||
"OR e.from_email LIKE ? OR e.from_name LIKE ?)")
|
||||
params.extend([like, like, like, like])
|
||||
# Date range over the send time (ISO-8601 strings sort lexically). [since, until).
|
||||
if since:
|
||||
where.append("e.sent_at >= ?")
|
||||
params.append(since)
|
||||
if until:
|
||||
where.append("e.sent_at < ?")
|
||||
params.append(until)
|
||||
direction = (direction or "").strip().lower()
|
||||
if direction in ("inbound", "outbound") and own:
|
||||
marks = ",".join("?" for _ in own)
|
||||
@@ -459,8 +532,7 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str]
|
||||
for r in rows:
|
||||
r["direction"] = "outbound" if (r["from_email"] or "").lower().strip() in own else "inbound"
|
||||
r["mailboxes"] = []
|
||||
r["investors"] = []
|
||||
r["investor_labels"] = []
|
||||
r["investors"] = [] # [{id: typed-key, name}] — resolved identities
|
||||
|
||||
ids = list(by_id)
|
||||
if ids:
|
||||
@@ -474,41 +546,127 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str]
|
||||
if s["addr"] and s["addr"] not in mb:
|
||||
mb.append(s["addr"])
|
||||
for lnk in cur.execute(
|
||||
"SELECT l.email_id AS eid, l.matched_address AS addr, "
|
||||
"COALESCE(fi.id, fic_inv.id) AS inv_id, "
|
||||
"COALESCE(fi.investor_name, fic_inv.investor_name) AS inv_name "
|
||||
"FROM email_investor_links l "
|
||||
"LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id "
|
||||
"LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id "
|
||||
"LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id "
|
||||
f"SELECT l.email_id AS eid, {_LINK_IDENTITY_COLS} "
|
||||
f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
|
||||
f"WHERE l.email_id IN ({marks})", ids):
|
||||
row = by_id[lnk["eid"]]
|
||||
if lnk["inv_id"] and lnk["inv_name"]:
|
||||
if not any(iv["id"] == lnk["inv_id"] for iv in row["investors"]):
|
||||
row["investors"].append({"id": lnk["inv_id"], "name": lnk["inv_name"]})
|
||||
elif lnk["addr"] and lnk["addr"] not in row["investor_labels"]:
|
||||
row["investor_labels"].append(lnk["addr"])
|
||||
# No graveyard filter here on purpose: a graveyarded investor's *email*
|
||||
# still shows in the list with its chip (audit completeness, direct or
|
||||
# via-contact); only the facet dropdown below hides graveyard from the picker.
|
||||
key, name = _resolve_entity(lnk)
|
||||
if not key:
|
||||
continue
|
||||
invs = by_id[lnk["eid"]]["investors"]
|
||||
if not any(iv["id"] == key for iv in invs):
|
||||
invs.append({"id": key, "name": name})
|
||||
|
||||
accounts = [dict(r) for r in cur.execute(
|
||||
"SELECT id, email_address FROM email_accounts ORDER BY email_address")]
|
||||
# Facet dropdown = live investor relationships only (graveyard = 0, the CRM-wide
|
||||
# convention). Graveyarded investors are excluded from the *picker*, but their
|
||||
# captured email still shows in the unfiltered list and stays findable by free-text
|
||||
# search — this is an audit surface, so history is never hidden, only the picker is.
|
||||
investors = [dict(r) for r in cur.execute(
|
||||
"SELECT id, investor_name AS name FROM fundraising_investors WHERE graveyard = 0 AND id IN ("
|
||||
" SELECT fundraising_investor_id FROM email_investor_links "
|
||||
" WHERE fundraising_investor_id IS NOT NULL"
|
||||
" UNION"
|
||||
" SELECT investor_id FROM fundraising_contacts WHERE id IN ("
|
||||
" SELECT fundraising_contact_id FROM email_investor_links "
|
||||
" WHERE fundraising_contact_id IS NOT NULL)"
|
||||
") ORDER BY investor_name")]
|
||||
|
||||
# Facet dropdown mirrors what the list resolves: one entry per distinct matched
|
||||
# entity (grid investor / org / contact), across all live matched email — not just
|
||||
# the current page — so the picker is stable under filtering. Excluded from the
|
||||
# picker: graveyarded grid investors (CRM-wide convention) and raw-address-only
|
||||
# matches (too many, too noisy). Both still appear in the list and remain findable
|
||||
# by free-text search — this is an audit surface, so history is never hidden, only
|
||||
# the picker is.
|
||||
facet: dict[str, str] = {}
|
||||
for r in cur.execute(
|
||||
f"SELECT DISTINCT {_LINK_IDENTITY_COLS} "
|
||||
f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
|
||||
"WHERE EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||
"WHERE eam.email_id = l.email_id AND eam.deleted_at IS NULL)"):
|
||||
key, name = _resolve_entity(r)
|
||||
if not key or key.startswith("addr:"):
|
||||
continue
|
||||
if key.startswith("fund:") and (r["fund_graveyard"] or 0):
|
||||
continue
|
||||
facet.setdefault(key, name)
|
||||
investors = [{"id": k, "name": v}
|
||||
for k, v in sorted(facet.items(), key=lambda kv: kv[1].lower())]
|
||||
|
||||
return {"emails": rows, "accounts": accounts, "investors": investors,
|
||||
"count": len(rows), "truncated": truncated}
|
||||
|
||||
|
||||
def search_hit_emails(conn: sqlite3.Connection, email_ids) -> dict:
|
||||
"""Display fields for the given email ids that are still live (have a
|
||||
non-tombstoned sighting), keyed by id, with email-level direction.
|
||||
|
||||
Used to hydrate + soft-delete-filter semantic-search hits: the Qdrant index can
|
||||
lag a deletion, and SQLite is canonical (never trust the derived index), so a hit
|
||||
whose email no longer has a live sighting is dropped here rather than shown."""
|
||||
ids = [i for i in dict.fromkeys(email_ids) if i]
|
||||
if not ids:
|
||||
return {}
|
||||
cur = conn.cursor()
|
||||
own = {(r["email_address"] or "").lower().strip()
|
||||
for r in cur.execute("SELECT email_address FROM email_accounts")}
|
||||
own.discard("")
|
||||
marks = ",".join("?" for _ in ids)
|
||||
out: dict = {}
|
||||
for e in cur.execute(
|
||||
"SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.has_attachments "
|
||||
f"FROM emails e WHERE e.id IN ({marks}) AND EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", ids):
|
||||
d = dict(e)
|
||||
d["direction"] = "outbound" if (d["from_email"] or "").lower().strip() in own else "inbound"
|
||||
out[d["id"]] = d
|
||||
return out
|
||||
|
||||
|
||||
def query_email_detail(conn: sqlite3.Connection, email_id: str) -> Optional[dict]:
|
||||
"""Full record for one captured email — the Communications detail view (full
|
||||
body + recipients + matched investor identities + mailboxes + attachments).
|
||||
|
||||
Returns None if the email doesn't exist or has no live (non-tombstoned) sighting:
|
||||
soft-delete lives on the per-mailbox `email_account_messages` row, not on `emails`,
|
||||
so an email is only "live" while at least one sighting survives. Direction is set
|
||||
at the email level (outbound if the sender is one of our mailboxes), matching the
|
||||
list. The raw remote `body_html` is NOT returned (XSS); the response carries the
|
||||
plain-text `body_text` plus a `has_html` flag so the UI can note an HTML-only email."""
|
||||
cur = conn.cursor()
|
||||
e = cur.execute(
|
||||
"SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.snippet, "
|
||||
"e.body_text, e.body_html, e.has_attachments, e.match_status, e.gmail_thread_id "
|
||||
"FROM emails e WHERE e.id = ? AND EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", (email_id,)).fetchone()
|
||||
if not e:
|
||||
return None
|
||||
row = dict(e)
|
||||
# Don't ship the raw remote HTML to the client (XSS if any consumer ever renders
|
||||
# it); the UI shows the plain-text body and only needs to know HTML exists.
|
||||
row["has_html"] = bool((row.pop("body_html", None) or "").strip())
|
||||
|
||||
own = {(r["email_address"] or "").lower().strip()
|
||||
for r in cur.execute("SELECT email_address FROM email_accounts")}
|
||||
own.discard("")
|
||||
row["direction"] = "outbound" if (row["from_email"] or "").lower().strip() in own else "inbound"
|
||||
|
||||
row["mailboxes"] = [r["addr"] for r in cur.execute(
|
||||
"SELECT DISTINCT ea.email_address AS addr FROM email_account_messages eam "
|
||||
"JOIN email_accounts ea ON ea.id = eam.account_id "
|
||||
"WHERE eam.email_id = ? AND eam.deleted_at IS NULL ORDER BY ea.email_address", (email_id,))]
|
||||
|
||||
row["recipients"] = [dict(r) for r in cur.execute(
|
||||
"SELECT address, display_name, kind FROM email_recipients "
|
||||
"WHERE email_id = ? AND kind IN ('to','cc') "
|
||||
"ORDER BY CASE kind WHEN 'to' THEN 0 ELSE 1 END, address", (email_id,))]
|
||||
|
||||
row["attachments"] = [dict(r) for r in cur.execute(
|
||||
"SELECT filename, mime_type, size_bytes, download_status FROM email_attachments "
|
||||
"WHERE email_id = ? ORDER BY filename", (email_id,))]
|
||||
|
||||
investors: dict[str, str] = {}
|
||||
for lnk in cur.execute(
|
||||
f"SELECT {_LINK_IDENTITY_COLS} FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
|
||||
"WHERE l.email_id = ?", (email_id,)):
|
||||
key, name = _resolve_entity(lnk)
|
||||
if key:
|
||||
investors.setdefault(key, name)
|
||||
row["investors"] = [{"id": k, "name": v} for k, v in investors.items()]
|
||||
return row
|
||||
|
||||
|
||||
def finish_sync_run(conn: sqlite3.Connection, run_id: str, *, status: str,
|
||||
stats: Optional[dict] = None, error: Optional[str] = None) -> None:
|
||||
stats = stats or {}
|
||||
|
||||
Reference in New Issue
Block a user