From c7b74a270461392ba44ca391cf958a95ba57b3e9 Mon Sep 17 00:00:00 2001 From: Keysat Date: Tue, 16 Jun 2026 20:46:15 -0500 Subject: [PATCH] Email search/query + windowed digest preview (v0.1.0:83) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Communications tab (search/query roadmap items 1 & 2): - Fix the investor dropdown: the facet only listed grid investors, so it came back empty whenever email matched a classic contact or org domain (no grid id — the common case). It now mirrors the email list, resolving each link to a typed identity (fund:/org:/contact:/addr:) with precedence grid -> org -> contact -> address; investor_id accepts the typed key (bare id = fund: for back-compat) and an unknown prefix matches nothing. - Add a date-range filter and a click-to-expand full-body view (GET /api/email/detail, admin, soft-delete-gated; body_text only, never raw remote HTML). - Add a "Search content" mode: GET /api/email/search wraps the ingest hybrid_search over the Qdrant email index (doc_type=email), hydrated and soft-delete-filtered against SQLite (canonical), 503 if Spark/Qdrant down. Daily digest: - Settings -> Admin builds a digest over a chosen window (last 24h or since a date) as an in-app preview before sending (POST /api/admin/digest/preview), so the local-Spark summarizer can be verified on demand even on a quiet day. Manual send uses the same window; neither advances the daily cursor, so a preview never suppresses the scheduled digest. Code-only, migrations no-op. 22/22 backend tests, render-smoke pass. --- AGENTS.md | 9 +- ROADMAP.md | 8 +- backend/digest_builder.py | 51 +++- backend/email_integration/db.py | 228 ++++++++++++--- backend/email_integration/digest_scheduler.py | 31 +- backend/email_integration/routes.py | 95 +++++++ .../test_email_activity_panel.py | 235 ++++++++++++---- backend/server.py | 52 +++- backend/test_digest_builder.py | 66 +++++ docs/guides/email.md | 53 +++- frontend/index.html | 265 +++++++++++++++++- start9/0.4/startos/utils.ts | 5 +- start9/0.4/startos/versions/index.ts | 5 +- start9/0.4/startos/versions/v0.1.0.83.ts | 24 ++ 14 files changed, 989 insertions(+), 138 deletions(-) create mode 100644 start9/0.4/startos/versions/v0.1.0.83.ts diff --git a/AGENTS.md b/AGENTS.md index dd42b6c..cfd8151 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -101,14 +101,15 @@ Subsystem rules live in `docs/guides/` and lazy-load in Claude Code via `.claude ## Current state -_Phase 0 substrate + Phase 1 thesis/outreach are built; **box and repo at v0.1.0:82** (latest: **front-end libs vendored + SRI-pinned and a jsdom render smoke check now gates every build** — closes the v78/v79 blank-screen class; prior v81: Communications tab is matched-only; v80: that tab became the admin-only captured-Gmail panel). **Decision (2026-06-16): the fundraising grid + email capture is the canonical system of record** — vestigial classic-CRM surfaces get pruned or repurposed (see `ROADMAP.md` → "Consolidate on the fundraising grid as canonical"). Longer-term backlog: `ROADMAP.md`._ +_Phase 0 substrate + Phase 1 thesis/outreach are built; **repo at v0.1.0:83, box at v0.1.0:82** (v83 built + verified locally, **not yet deployed** — needs authorization). v83 (latest): **email search/query + windowed digest preview** — Communications tab gains a fixed/typed investor dropdown, a date-range filter, a full-body view, and a semantic "Search content" mode; the Daily Digest gains an in-app windowed preview before send. Prior v82: front-end libs vendored + SRI-pinned + jsdom render-smoke build gate. **Decision (2026-06-16): the fundraising grid + email capture is the canonical system of record** — vestigial classic-CRM surfaces get pruned or repurposed (see `ROADMAP.md` → "Consolidate on the fundraising grid as canonical"). Longer-term backlog: `ROADMAP.md`._ - **Working (all draft-only):** CRM + ingest (chunk→embed→Qdrant + retrieval) + redaction boundary; Gmail capture (DWD) + email-activity propose→approve; Thesis Workshop + Architect (Claude) with dual-approval gate; Outreach Draft Assistant + follow-up radar + per-user voice + Tier-B in-thread Gmail draft creation. +- **Built + verified locally, NOT yet deployed: v0.1.0:83** — **email search/query + windowed digest preview** (code-only, migrations no-op). Communications tab (`CommunicationsPage` + `email_integration/db.query_email_activity`): **fixed the investor dropdown** — the facet now mirrors the list with the digest's precedence (grid → org → contact → address) and **typed keys** (`fund:`/`org:`/`contact:`), so email matched only to a classic contact or org domain (no grid id — the common case, since `fundraising_contacts.email` is sparsely populated) now resolves to a real name and is selectable, instead of the dropdown being empty; added a **date-range filter** (`since`/`until`), and a **click-to-expand full-body view** (`GET /api/email/detail?id=` → `query_email_detail`, admin, soft-delete-gated, renders `body_text` escaped — never raw HTML). New **semantic content search**: a "Search content" toggle → `GET /api/email/search?q=` (`routes._h_search`) wrapping `ingest/search.py:hybrid_search` filtered to `doc_type='email'` (lazy import; **503** if Spark/Qdrant unreachable), **hydrated + soft-delete-filtered against SQLite** (`db.search_hit_emails` — never trust the derived index). **Daily Digest:** Settings → Admin now builds a digest over a chosen window (last 24h or since a date) as an **in-app preview** before sending (`POST /api/admin/digest/preview`); manual send uses the same window (`send-now` + `digest_scheduler.send_digest_window`); window resolved by `digest_builder.resolve_digest_window` (cap 92d). Both run the **real local-Spark summarizer** and **never touch the daily cursor**. Verified: 22/22 backend tests, `py_compile` clean, render-smoke pass. Spark summarization path confirmed reachable (synthetic probe). Detail: `docs/guides/email.md`. - **Deployed & verified live: v0.1.0:82** (box `$START9_BOX_HOST`/immense-voyage.local; `installed-version`→`0.1.0:82`, migration chain `…81→82` clean, server up on `:8080`, schedulers + Gmail integration up). **v82 vendored React 18.3.1 / ReactDOM 18.3.1 / @babel/standalone 7.29.7 into `frontend/assets/vendor/`**, served same-origin with `sha384` SRI (no CDN, no outbound-internet dependency to render the UI), and added **`start9/0.4/render-smoke.mjs`** — a jsdom check (shipped-Babel transform asserts classic/non-module + parseable; real mount asserts the login UI renders) wired into the default `make` goal (`verified-build`), so every build is gated on the frontend actually rendering. Closes the v78 (blank screen) + v79 (Babel-8 ESM-import) class structurally. Detail: `docs/guides/packaging.md`. **Prior shipped & live:** v81 Communications-tab matched-only (`query_email_activity` gates on `EXISTS(email_investor_links)`; unmatched email captured but never shown; `docs/guides/email.md`); v80 admin-only email-activity panel (`GET /api/email/activity`); v78 retired `lp_profiles`/LP Tracker + repointed Dashboard "Total Committed" onto the grid (graveyard-excluded). **Digest fully live:** capture (DWD) → propose→approve; Gmail-DWD→SMTP transport; daily Phase-B digest (`digest_builder.py` + always-on `digest_scheduler.py` reading a DB policy + `send-now`); **auto-send defaults OFF** until Grant enables it in Settings → Admin. Detail: `docs/guides/email.md`. - **Live since v74 (2026-06-13):** login works; `/assets/` traversal 404s (plain + URL-encoded), root health 200. On boot, `ensure_thesis_v2_promoted` makes the v2.0 reserve-asset spine the working *approved* spine (node-level, reversible). Security/privacy hardening (path-traversal close, outreach NER backstop, get-by-id soft-delete) shipped in v74 — detail in `EVALUATION.md`. -- **Tests (2026-06-16):** **22/22 backend tests green** via `python3 backend/run_tests.py`, `py_compile` clean (`test_email_activity_panel.py` covers v81 matched-only scope + the panel filters/facets/admin-enforcement). Frontend now has a **scripted render smoke check** — `cd start9/0.4 && make render-smoke` (jsdom mount + shipped-Babel transform; negative-controlled) — that gates the default `make` build. The 2 stale thesis tests stay fixed (seed structure in `docs/guides/thesis.md`). -- **Decided, not yet built (detail in `ROADMAP.md`):** Pipeline adoption + a grid flag that auto-loads flagged investors as opportunities; NL→safe-query feature; CRM as canonical thesis backbone with the signal-engine reading from it (reconciliation unwired); reply-all for Tier-B drafts (currently reply to the LP only). *(Done this session: v81 matched-only Communications panel; v82 vendor+SRI front-end libs + scripted render-smoke build gate.)* +- **Tests (2026-06-16):** **22/22 backend tests green** via `python3 backend/run_tests.py`, `py_compile` clean. `test_email_activity_panel.py` now covers the **typed facet + org/contact resolution** (the dropdown fix), the **date-range filter**, the **detail view** (full body / recipients / attachments / soft-delete), and the **content-search route** (hydrate / drop-tombstoned / 503 / admin) with retrieval stubbed; `test_digest_builder.py` adds the **window resolver** + **`send_digest_window`** (no-cursor-touch) cases. Frontend **render smoke check** (`cd start9/0.4 && make render-smoke`) still gates the default `make` build. The 2 stale thesis tests stay fixed (seed structure in `docs/guides/thesis.md`). +- **Decided, not yet built (detail in `ROADMAP.md`):** Pipeline adoption + a grid flag that auto-loads flagged investors as opportunities; **NL→safe-query** feature (search item 3 — the larger, separate build); CRM as canonical thesis backbone with the signal-engine reading from it (reconciliation unwired); reply-all for Tier-B drafts (currently reply to the LP only). *(Done this session, v83: email search item 1 [activity query/panel gaps — typed facet fix + date range + full-body view] and item 2 [semantic content search] both shipped; daily-digest windowed preview→send.)* - **Known debt (P2, not deploy-blocking):** **reports-subsystem soft-delete sweep** — `handle_pipeline_report` + remaining report/aggregate queries over opportunities/communications still count soft-deleted rows (v78 shrank this surface: the `lp_profiles`/lp-breakdown aggregates are gone and the dashboard "Total Committed" is now grid-sourced); needs a pass + report-endpoint tests. Also `?limit=abc` crashes the request thread (authenticated list path); scrub-gateway TLS verify off; `cryptography==42.0.5`; stale user-visible `start9/0.4/assets/ABOUT.md`; hardcoded Spark/Qdrant IPs in the s9pk; the 5.4k-line `server.py` monolith. P3 batch + full list in `EVALUATION.md`. *(Resolved v82: front-end CDN/SRI risk — libs vendored + SRI-pinned — and the render smoke check is now scripted into the build.)* - **Doc drift to reconcile:** `crm-overview.md` + `EVALUATION.md` still describe `lp_profiles` as a live model in places — a doc-auditor pass should align them to "grid canonical, `lp_profiles` retired." - **Other gaps:** the v2.0 spine is the *working* spine but **not a canonical `thesis_version`** (needs Grant + Jonathan dual sign-off); Appendix-A conviction/exposure (incl. ~40% Strike) stay Grant's working read, not canonical, not fed to the engine; live features (Claude/Qdrant/Gmail) unverified on the box. -- **Next:** 1) add an **auth regression test** asserting the 3 v79-gated GET endpoints (`/api/users`, `/api/email/status`, `/api/email/accounts`) reject members (v80 added the analogous test for `/api/email/activity`); 2) Grant validates digest Phase B on the box — Settings→Admin **Send Digest Now**, then tick **Send automatically every day**; 3) **reports-subsystem soft-delete sweep** + report-endpoint tests; 4) **Pipeline adoption** — grid flag → auto-load opportunities; 5) `?limit=abc` crash; 6) **NL→safe-query** (separate, larger); 7) Grant + Jonathan freeze v2.0 canonical; 8) build reply-all. *(Logged to ROADMAP: a build step that pre-compiles JSX to drop runtime Babel entirely — bigger, contradicts the "no build step" convention.)* +- **Next:** 1) **deploy v0.1.0:83** to the box (needs authorization) — then Grant validates the **digest windowed preview** (Settings→Admin → "Since" a ~30-day-ago date → **Preview** to see the real Spark narratives, then **Send**) and the **Communications** dropdown/date/search; 2) add an **auth regression test** asserting the 3 v79-gated GET endpoints (`/api/users`, `/api/email/status`, `/api/email/accounts`) reject members; 3) **reports-subsystem soft-delete sweep** + report-endpoint tests; 4) **Pipeline adoption** — grid flag → auto-load opportunities; 5) `?limit=abc` crash; 6) **NL→safe-query** (search item 3 — separate, larger); 7) Grant + Jonathan freeze v2.0 canonical; 8) build reply-all. *(Logged to ROADMAP: a build step that pre-compiles JSX to drop runtime Babel entirely — bigger, contradicts the "no build step" convention.)* diff --git a/ROADMAP.md b/ROADMAP.md index 54b655a..293b962 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -87,7 +87,7 @@ ## Backlog (post-Phase-1 agentic) ### Daily activity digest (email to the team) -*Requested 2026-06-15. **Phase A deployed** (v0.1.0:76). **Phase B deployed & verified live in v0.1.0:77 (2026-06-16)** — digest content + Spark summarization + daily scheduler + by-investor section + admin-panel control + on-demand send. Auto-send defaults OFF until an admin enables it in Settings → Admin.* +*Requested 2026-06-15. **Phase A deployed** (v0.1.0:76). **Phase B deployed & verified live in v0.1.0:77 (2026-06-16)** — digest content + Spark summarization + daily scheduler + by-investor section + admin-panel control + on-demand send. Auto-send defaults OFF until an admin enables it in Settings → Admin. **v0.1.0:83 (built, deploy pending): in-app windowed preview** — Settings → Admin builds a digest over a chosen window (last 24h or since a date) and shows it before sending (`POST /api/admin/digest/preview`), so the **real Spark summarizer can be verified on demand** even on a quiet day (the fixed last-24h `send-now` couldn't); manual send uses the same window and never touches the daily cursor.* **Decisions (locked 2026-06-15):** recipients = **all active admins**; summarization = **Spark-LLM narrative** (never Claude — un-anonymized substance stays local); granularity = **grouped by user** (→ per investor). @@ -111,13 +111,15 @@ Open design questions (settled at build time): send time = **6 PM box-local** (c ### Email/communication search + natural-language query *Requested 2026-06-16. Three increments, **sequenced 1 → 2 → 3** (1 and 2 first as a quick increment; 3 is a separate, larger build after). Origin: Grant asked whether we can query "emails sent to a specific investor" / "activity by user," and floated NL queries like "existing investors who have committed capital across our funds that we haven't emailed in a while."* +**Status: items 1 & 2 SHIPPED in v0.1.0:83 (built + verified locally 2026-06-16, deploy pending).** The Communications tab now has the structured activity surface (item 1: typed/fixed investor dropdown, mailbox + direction + **date-range** filters, free-text, **click-to-expand full body** via `GET /api/email/detail`) and a **"Search content"** semantic mode (item 2: `GET /api/email/search` over the Qdrant email index). The dropdown-empty bug (the facet only listed grid investors) was the v83 fix — it now mirrors the list across grid/org/contact matches. **Item 3 (NL→SQL) remains** — the larger, separate build below. Detail: `docs/guides/email.md`. + **Context — the data is captured but currently has NO front-end.** The entire Gmail email schema (`emails`, `email_threads`, `email_investor_links`, `email_account_messages`, `email_activity_proposals`, …) exists and is populated by the DWD capture pipeline, but is surfaced **nowhere** in `frontend/index.html` today (only as inputs to the daily digest). So all three items below are about making already-captured data queryable/visible. Email bodies of *matched* emails are already chunked + embedded into Qdrant with `{lp_id, lp_name, doc_type:"email", date_ts}` metadata. **Caveat that shapes all three — the two-model join.** "Emails to an investor" link to the **fundraising grid** (`email_investor_links.fundraising_investor_id`); "committed capital" lives in the grid too (`fundraising_commitments`, multi-fund). But manually-logged `communications` and `lp_profiles` (single-fund) live in the **classic** model, and the two models are only bridged by fuzzy email/name matching (no authoritative join key). Any query spanning "committed capital" + "email recency" must reckon with this. Prefer the grid side as the higher-signal source (matcher already does). -**1. Activity query endpoints + panel (do first).** The logic already exists and is tested inside `backend/digest_builder.py` — `collect_user_activity()` (per team-member, sent vs received, with matched investor names) and `collect_investor_activity()` (re-pivoted by investor, team-wide). Expose them as on-demand endpoints (e.g. `GET /api/activity?user_id=…&since=…&until=…` and `…?investor_id=…`) returning the actual records (not just the counts that `/api/reports/activity` gives today), plus a simple UI panel. Answers "emails to investor X" and "what has user Y sent lately" interactively. Small build — mostly assembling tested parts + a thin UI. Soft-delete filter every read. +**1. Activity query endpoints + panel — DONE (v0.1.0:83).** Delivered as the **Communications tab** rather than the originally-sketched `/api/activity` endpoints: `GET /api/email/activity` (`db.query_email_activity`) returns the actual records filterable by investor / mailbox / direction / **date range** / free-text, and `GET /api/email/detail` expands the full body. Answers "emails to investor X" and "what has mailbox Y sent" interactively. Soft-delete filtered throughout; investor identity is typed (`fund:`/`org:`/`contact:`) so org/contact-only matches resolve and are pickable. *(The `collect_user_activity()`/`collect_investor_activity()` digest helpers remain the by-user/by-investor pivot source; a dedicated per-user pivot UI was not needed for the answer Grant wanted, which the mailbox+direction filters already give.)* -**2. Email content search box (do first, alongside 1).** Wire a search box onto the email bodies **already indexed in Qdrant** (capability is ~80% built — see the retrieval modes in `backend/ingest/search.py` and the MCP `hybrid_search`/`semantic_search`/`keyword_search` tools). This is semantic/lexical search over email *content* ("find where we discussed the mining deal"), distinct from the structured filters in item 1. Decide placement (global search bar vs. a dedicated email/search page — note there's no email UI at all today, so this may pair naturally with surfacing threads). Small. +**2. Email content search box — DONE (v0.1.0:83).** A **"Search content"** toggle in the Communications tab → `GET /api/email/search?q=` wraps `backend/ingest/search.py:hybrid_search` filtered to `doc_type='email'`; hits are hydrated + soft-delete-filtered against SQLite (canonical) and link back to the full body. Semantic/lexical search over email *content* ("find where we discussed the mining deal"), distinct from item 1's structured filters. 503 (clean "unavailable") when Spark/Qdrant is unreachable. **3. Natural-language → safe structured query (separate, larger, after 1 & 2).** An LLM translates a plain-English question into a **safe, read-only** DB query against the CRM, for relational/analytical questions that semantic search *cannot* answer — Grant's example ("committed across funds AND not emailed in a while") is joins + aggregates + recency, not a text-topic match. Design constraints (locked at request time, refine at build): - **LLM = Claude behind the redaction boundary** (better at text-to-SQL than local Qwen; the scrub→Claude→re-hydrate path already exists for the PII concern). Not Spark — Spark Control offers embeddings/rerank/RAG + local chat, but **no text-to-SQL**. diff --git a/backend/digest_builder.py b/backend/digest_builder.py index e45f851..54a0c55 100644 --- a/backend/digest_builder.py +++ b/backend/digest_builder.py @@ -21,7 +21,7 @@ importable (and testable with an injected chat fn) without Spark configured. import json import os import sqlite3 -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone # One row per (account-sighting x investor-link) in the window. Grouped into @@ -225,6 +225,55 @@ def load_digest_policy(conn): return pol +# ------------------------------------------------------------------ window + +# Cap a manual/preview window so an admin can't accidentally fire a build over +# years of history — each active user in the window costs one Spark call. ~3 +# months covers any realistic "since last quarter" preview. +MAX_WINDOW_DAYS = 92 + + +def resolve_digest_window(*, hours=None, since=None, now_local=None, now_utc=None): + """Resolve a digest content window to (since_iso, until_iso) as UTC ISO-8601. + + `until` is always now. The start is driven by exactly one of: + - since: a local calendar date 'YYYY-MM-DD' -> that day's local midnight + - hours: a positive integer lookback (the default path; 24 when nothing given) + `since` wins if both are supplied. The span is clamped to MAX_WINDOW_DAYS and + the start must be strictly before now. Raises ValueError on malformed input so + the caller can return a clean 400. Pure (now_* injectable) for testing. + + Used by the admin-panel preview and manual-send — neither advances the daily + cursor, so a wide window here never suppresses the scheduled digest.""" + nu = (now_utc or datetime.now(timezone.utc)).astimezone(timezone.utc) + nl = now_local or datetime.now().astimezone() + floor = nu - timedelta(days=MAX_WINDOW_DAYS) + + if since not in (None, ""): + try: + d = datetime.strptime(str(since).strip()[:10], "%Y-%m-%d") + except ValueError: + raise ValueError("since must be a date in YYYY-MM-DD form") + start = d.replace(tzinfo=nl.tzinfo or timezone.utc).astimezone(timezone.utc) + else: + h = 24 if hours in (None, "") else hours + try: + h = int(h) + except (ValueError, TypeError): + raise ValueError("hours must be an integer") + if h < 1: + raise ValueError("hours must be a positive integer") + start = nu - timedelta(hours=h) + + if start >= nu: + raise ValueError("window start must be before now") + if start < floor: + start = floor # clamp to the max span (the response echoes the real window) + + fmt = "%Y-%m-%dT%H:%M:%SZ" + return start.strftime(fmt), nu.strftime(fmt) + + # ------------------------------------------------------------------ summarization def _default_chat(prompt, system=None, max_tokens=220): diff --git a/backend/email_integration/db.py b/backend/email_integration/db.py index cb1604d..1a89c99 100644 --- a/backend/email_integration/db.py +++ b/backend/email_integration/db.py @@ -398,17 +398,63 @@ def start_sync_run(conn: sqlite3.Connection, *, account_id: str, kind: str) -> s return run_id +def _resolve_entity(row) -> tuple: + """Reduce one email_investor_links hydration row to a (key, name) identity for + the matched investor, with the same precedence the digest uses: + grid investor -> organization -> contact -> raw matched address. The key is + *typed* (`fund:`/`org:`/`contact:`/`addr:`) so the Communications filter can + target the right column. Soft-deleted org/contact rows arrive as NULL (filtered + in the join) and fall through to the next tier.""" + if row["fund_id"] and (row["fund_name"] or "").strip(): + return f"fund:{row['fund_id']}", row["fund_name"].strip() + if row["org_id"] and (row["org_name"] or "").strip(): + return f"org:{row['org_id']}", row["org_name"].strip() + if row["contact_id"] and (row["contact_name"] or "").strip(): + return f"contact:{row['contact_id']}", row["contact_name"].strip() + addr = (row["addr"] or "").strip() + return (f"addr:{addr.lower()}", addr) if addr else (None, None) + + +# Hydration of an email_investor_links row up to the resolvable investor identity, +# shared by the per-email tags and the facet dropdown. Soft-deleted org/contact +# rows are dropped in the join so they fall through to the next identity tier. +_LINK_IDENTITY_JOINS = """ + LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id + LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id + LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id + LEFT JOIN organizations o ON o.id = l.organization_id AND o.deleted_at IS NULL + LEFT JOIN contacts c ON c.id = l.contact_id AND c.deleted_at IS NULL +""" +_LINK_IDENTITY_COLS = """ + l.matched_address AS addr, + COALESCE(fi.id, fic_inv.id) AS fund_id, + COALESCE(fi.investor_name, fic_inv.investor_name) AS fund_name, + COALESCE(fi.graveyard, fic_inv.graveyard) AS fund_graveyard, + o.id AS org_id, o.name AS org_name, + c.id AS contact_id, + NULLIF(TRIM(COALESCE(c.first_name,'') || ' ' || COALESCE(c.last_name,'')), '') AS contact_name +""" + + def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] = None, account_id: Optional[str] = None, search: Optional[str] = None, - direction: Optional[str] = None, limit: int = 100) -> dict: + direction: Optional[str] = None, since: Optional[str] = None, + until: Optional[str] = None, limit: int = 100) -> dict: """Captured-Gmail activity for the admin Communications panel, filterable by - investor (matched fundraising investor) and/or mailbox, with free-text search - over subject/snippet/sender. Returns the email rows plus the filter facets. + matched investor entity, mailbox, direction and date range, with free-text + search over subject/snippet/sender. Returns the email rows plus the filter facets. Matched-only: the panel shows ONLY email that links to a known investor/contact (an `email_investor_links` row exists). Unmatched cold/unknown-sender email is still captured for completeness but is never surfaced here. + Investor identity is *typed* (`fund:`/`org:`/`contact:`/`addr:`) and resolved with + the digest's precedence (grid investor -> organization -> contact -> raw address), + so an email matched only to a classic contact or an org domain — not yet wired to a + grid investor — still shows a real name and is selectable in the dropdown, instead + of the facet coming back empty. `investor_id` accepts a typed key (a bare id is + treated as `fund:` for backward compatibility). + Soft-delete: an email is live only if it still has a non-tombstoned per-mailbox sighting (`email_account_messages.deleted_at IS NULL`) — the `emails` row itself carries no deleted_at, so deletion lives on the sighting. Direction is decided at @@ -433,15 +479,42 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] "AND eam.deleted_at IS NULL)") params.append(account_id) if investor_id: - where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id " - "AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN " - "(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))") - params.extend([investor_id, investor_id]) + kind, _, val = str(investor_id).partition(":") + if not val: # bare id (legacy) -> grid investor + kind, val = "fund", str(investor_id) + if kind == "fund": + where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id " + "AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN " + "(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))") + params.extend([val, val]) + elif kind == "org": + where.append("EXISTS (SELECT 1 FROM email_investor_links l " + "WHERE l.email_id = e.id AND l.organization_id = ?)") + params.append(val) + elif kind == "contact": + where.append("EXISTS (SELECT 1 FROM email_investor_links l " + "WHERE l.email_id = e.id AND l.contact_id = ?)") + params.append(val) + elif kind == "addr": + where.append("EXISTS (SELECT 1 FROM email_investor_links l " + "WHERE l.email_id = e.id AND LOWER(l.matched_address) = ?)") + params.append(val.lower()) + else: + # Unknown key prefix (malformed input) -> match nothing, never silently + # fall through to an unfiltered list. + where.append("1 = 0") if search: like = f"%{search.strip()}%" where.append("(e.subject LIKE ? OR e.snippet LIKE ? " "OR e.from_email LIKE ? OR e.from_name LIKE ?)") params.extend([like, like, like, like]) + # Date range over the send time (ISO-8601 strings sort lexically). [since, until). + if since: + where.append("e.sent_at >= ?") + params.append(since) + if until: + where.append("e.sent_at < ?") + params.append(until) direction = (direction or "").strip().lower() if direction in ("inbound", "outbound") and own: marks = ",".join("?" for _ in own) @@ -459,8 +532,7 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] for r in rows: r["direction"] = "outbound" if (r["from_email"] or "").lower().strip() in own else "inbound" r["mailboxes"] = [] - r["investors"] = [] - r["investor_labels"] = [] + r["investors"] = [] # [{id: typed-key, name}] — resolved identities ids = list(by_id) if ids: @@ -474,41 +546,127 @@ def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] if s["addr"] and s["addr"] not in mb: mb.append(s["addr"]) for lnk in cur.execute( - "SELECT l.email_id AS eid, l.matched_address AS addr, " - "COALESCE(fi.id, fic_inv.id) AS inv_id, " - "COALESCE(fi.investor_name, fic_inv.investor_name) AS inv_name " - "FROM email_investor_links l " - "LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id " - "LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id " - "LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id " + f"SELECT l.email_id AS eid, {_LINK_IDENTITY_COLS} " + f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} " f"WHERE l.email_id IN ({marks})", ids): - row = by_id[lnk["eid"]] - if lnk["inv_id"] and lnk["inv_name"]: - if not any(iv["id"] == lnk["inv_id"] for iv in row["investors"]): - row["investors"].append({"id": lnk["inv_id"], "name": lnk["inv_name"]}) - elif lnk["addr"] and lnk["addr"] not in row["investor_labels"]: - row["investor_labels"].append(lnk["addr"]) + # No graveyard filter here on purpose: a graveyarded investor's *email* + # still shows in the list with its chip (audit completeness, direct or + # via-contact); only the facet dropdown below hides graveyard from the picker. + key, name = _resolve_entity(lnk) + if not key: + continue + invs = by_id[lnk["eid"]]["investors"] + if not any(iv["id"] == key for iv in invs): + invs.append({"id": key, "name": name}) accounts = [dict(r) for r in cur.execute( "SELECT id, email_address FROM email_accounts ORDER BY email_address")] - # Facet dropdown = live investor relationships only (graveyard = 0, the CRM-wide - # convention). Graveyarded investors are excluded from the *picker*, but their - # captured email still shows in the unfiltered list and stays findable by free-text - # search — this is an audit surface, so history is never hidden, only the picker is. - investors = [dict(r) for r in cur.execute( - "SELECT id, investor_name AS name FROM fundraising_investors WHERE graveyard = 0 AND id IN (" - " SELECT fundraising_investor_id FROM email_investor_links " - " WHERE fundraising_investor_id IS NOT NULL" - " UNION" - " SELECT investor_id FROM fundraising_contacts WHERE id IN (" - " SELECT fundraising_contact_id FROM email_investor_links " - " WHERE fundraising_contact_id IS NOT NULL)" - ") ORDER BY investor_name")] + + # Facet dropdown mirrors what the list resolves: one entry per distinct matched + # entity (grid investor / org / contact), across all live matched email — not just + # the current page — so the picker is stable under filtering. Excluded from the + # picker: graveyarded grid investors (CRM-wide convention) and raw-address-only + # matches (too many, too noisy). Both still appear in the list and remain findable + # by free-text search — this is an audit surface, so history is never hidden, only + # the picker is. + facet: dict[str, str] = {} + for r in cur.execute( + f"SELECT DISTINCT {_LINK_IDENTITY_COLS} " + f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} " + "WHERE EXISTS (SELECT 1 FROM email_account_messages eam " + "WHERE eam.email_id = l.email_id AND eam.deleted_at IS NULL)"): + key, name = _resolve_entity(r) + if not key or key.startswith("addr:"): + continue + if key.startswith("fund:") and (r["fund_graveyard"] or 0): + continue + facet.setdefault(key, name) + investors = [{"id": k, "name": v} + for k, v in sorted(facet.items(), key=lambda kv: kv[1].lower())] return {"emails": rows, "accounts": accounts, "investors": investors, "count": len(rows), "truncated": truncated} +def search_hit_emails(conn: sqlite3.Connection, email_ids) -> dict: + """Display fields for the given email ids that are still live (have a + non-tombstoned sighting), keyed by id, with email-level direction. + + Used to hydrate + soft-delete-filter semantic-search hits: the Qdrant index can + lag a deletion, and SQLite is canonical (never trust the derived index), so a hit + whose email no longer has a live sighting is dropped here rather than shown.""" + ids = [i for i in dict.fromkeys(email_ids) if i] + if not ids: + return {} + cur = conn.cursor() + own = {(r["email_address"] or "").lower().strip() + for r in cur.execute("SELECT email_address FROM email_accounts")} + own.discard("") + marks = ",".join("?" for _ in ids) + out: dict = {} + for e in cur.execute( + "SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.has_attachments " + f"FROM emails e WHERE e.id IN ({marks}) AND EXISTS (SELECT 1 FROM email_account_messages eam " + "WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", ids): + d = dict(e) + d["direction"] = "outbound" if (d["from_email"] or "").lower().strip() in own else "inbound" + out[d["id"]] = d + return out + + +def query_email_detail(conn: sqlite3.Connection, email_id: str) -> Optional[dict]: + """Full record for one captured email — the Communications detail view (full + body + recipients + matched investor identities + mailboxes + attachments). + + Returns None if the email doesn't exist or has no live (non-tombstoned) sighting: + soft-delete lives on the per-mailbox `email_account_messages` row, not on `emails`, + so an email is only "live" while at least one sighting survives. Direction is set + at the email level (outbound if the sender is one of our mailboxes), matching the + list. The raw remote `body_html` is NOT returned (XSS); the response carries the + plain-text `body_text` plus a `has_html` flag so the UI can note an HTML-only email.""" + cur = conn.cursor() + e = cur.execute( + "SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.snippet, " + "e.body_text, e.body_html, e.has_attachments, e.match_status, e.gmail_thread_id " + "FROM emails e WHERE e.id = ? AND EXISTS (SELECT 1 FROM email_account_messages eam " + "WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", (email_id,)).fetchone() + if not e: + return None + row = dict(e) + # Don't ship the raw remote HTML to the client (XSS if any consumer ever renders + # it); the UI shows the plain-text body and only needs to know HTML exists. + row["has_html"] = bool((row.pop("body_html", None) or "").strip()) + + own = {(r["email_address"] or "").lower().strip() + for r in cur.execute("SELECT email_address FROM email_accounts")} + own.discard("") + row["direction"] = "outbound" if (row["from_email"] or "").lower().strip() in own else "inbound" + + row["mailboxes"] = [r["addr"] for r in cur.execute( + "SELECT DISTINCT ea.email_address AS addr FROM email_account_messages eam " + "JOIN email_accounts ea ON ea.id = eam.account_id " + "WHERE eam.email_id = ? AND eam.deleted_at IS NULL ORDER BY ea.email_address", (email_id,))] + + row["recipients"] = [dict(r) for r in cur.execute( + "SELECT address, display_name, kind FROM email_recipients " + "WHERE email_id = ? AND kind IN ('to','cc') " + "ORDER BY CASE kind WHEN 'to' THEN 0 ELSE 1 END, address", (email_id,))] + + row["attachments"] = [dict(r) for r in cur.execute( + "SELECT filename, mime_type, size_bytes, download_status FROM email_attachments " + "WHERE email_id = ? ORDER BY filename", (email_id,))] + + investors: dict[str, str] = {} + for lnk in cur.execute( + f"SELECT {_LINK_IDENTITY_COLS} FROM email_investor_links l {_LINK_IDENTITY_JOINS} " + "WHERE l.email_id = ?", (email_id,)): + key, name = _resolve_entity(lnk) + if key: + investors.setdefault(key, name) + row["investors"] = [{"id": k, "name": v} for k, v in investors.items()] + return row + + def finish_sync_run(conn: sqlite3.Connection, run_id: str, *, status: str, stats: Optional[dict] = None, error: Optional[str] = None) -> None: stats = stats or {} diff --git a/backend/email_integration/digest_scheduler.py b/backend/email_integration/digest_scheduler.py index 684a093..b898cd9 100644 --- a/backend/email_integration/digest_scheduler.py +++ b/backend/email_integration/digest_scheduler.py @@ -100,15 +100,36 @@ def _build_and_send(conn, since_iso, until_iso, *, build_fn=None, send_fn=None): } +def send_digest_window(conn_factory=None, *, since_iso, until_iso, + build_fn=None, send_fn=None): + """Build the digest for an explicit (since_iso, until_iso] window and send it + to the active-admin set now, WITHOUT advancing the daily cursor — a manual or + preview send must never suppress the scheduled daily digest. Same transport + + recipient rules as the daily path (raises digest_mailer.NoTransport when none + is configured / no admin has an address). Backs the admin 'send now' endpoint. + + No DB writes happen here (the cursor is deliberately untouched), so the connection + is opened and closed without a commit — don't add one without revisiting that.""" + factory = conn_factory or _conn_factory_from_env() + conn = factory() + try: + result = _build_and_send(conn, since_iso, until_iso, + build_fn=build_fn, send_fn=send_fn) + return {"status": "sent", **result} + finally: + conn.close() + + def maybe_send_digest(conn_factory=None, *, force=False, now_local=None, now_utc=None, build_fn=None, send_fn=None): """Send the daily digest if it is due (or unconditionally when force=True). - Daily path: skips before the send hour and if already sent today; content - window runs from the last send to now and the cursor advances on success. - force path (the admin 'send now' endpoint): ignores the policy and the guards, - uses a fixed last-24h window, and does NOT advance the daily cursor — so an - on-demand preview never suppresses the scheduled send.""" + Daily path (the scheduler loop): skips before the send hour and if already sent + today; content window runs from the last send to now and the cursor advances on + success. force path: ignores the policy and the guards, uses a fixed last-24h + window, and does NOT advance the daily cursor. (The admin 'send now' / preview + endpoints now use send_digest_window for an arbitrary window; force is retained + for the fixed last-24h case and its tests.)""" import digest_builder factory = conn_factory or _conn_factory_from_env() diff --git a/backend/email_integration/routes.py b/backend/email_integration/routes.py index 11dd5d4..ec5fb4f 100644 --- a/backend/email_integration/routes.py +++ b/backend/email_integration/routes.py @@ -34,6 +34,8 @@ _GET_ROUTES = { "/api/email/status": "status", "/api/email/accounts": "list_accounts", "/api/email/activity": "activity", + "/api/email/detail": "detail", + "/api/email/search": "search", "/api/email/threads": "list_threads", "/api/email/oauth/start": "oauth_start", "/api/email/oauth/callback": "oauth_callback", @@ -208,6 +210,8 @@ def _h_activity(handler): account_id=(q.get("account_id") or "").strip() or None, search=(q.get("q") or q.get("search") or "").strip() or None, direction=(q.get("direction") or "").strip() or None, + since=(q.get("since") or "").strip() or None, + until=(q.get("until") or "").strip() or None, limit=limit, ) finally: @@ -215,6 +219,97 @@ def _h_activity(handler): handler.send_json(result) +def _h_detail(handler): + # Admin-only: the full body + recipients of a captured email is admin-scoped + # substance, same as the activity list it expands from. + user = _require_admin(handler) + if not user: + return + email_id = (handler.get_query_params().get("id") or "").strip() + if not email_id: + return handler.send_error_json("id required", 400) + conn = _conn() + try: + detail = _db.query_email_detail(conn, email_id) + finally: + conn.close() + if detail is None: + return handler.send_error_json("Not found", 404) + handler.send_json(detail) + + +def _semantic_email_search(query: str, top_k: int) -> list: + """Hybrid (dense + BM25, reranked) retrieval over the email bodies indexed in + Qdrant, pre-filtered to doc_type='email'. Returns raw ranked hits (payload carries + source_id=email_id, lp_name, date_ts, text). The ingest stack (Spark Control + + Qdrant + the sparse encoder) lives in the Docker image, so it's imported lazily — + a bare CRM without it raises, and the caller maps that to a 503.""" + import os + import sys + ingest_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest") + if ingest_dir not in sys.path: + sys.path.insert(0, ingest_dir) + import search as _ingest_search # ingest/search.py + filt = {"must": [{"key": "doc_type", "match": {"value": "email"}}]} + return _ingest_search.hybrid_search(query, top_k=top_k, rerank=True, filt=filt) + + +def _h_search(handler): + # Admin-only semantic search over captured email *content* (bodies), distinct from + # the structured subject/sender filters in _h_activity. Matched email bodies are the + # only email indexed in Qdrant (see ingest/chunking). Soft-delete-filtered + hydrated + # against SQLite (canonical) so a deleted email never surfaces from the stale index. + user = _require_admin(handler) + if not user: + return + q = handler.get_query_params() + query = (q.get("q") or q.get("query") or "").strip() + if not query: + return handler.send_json({"query": "", "results": []}) + try: + top_k = min(50, max(1, int(q.get("top_k", 25)))) + except (TypeError, ValueError): + top_k = 25 + + try: + hits = _semantic_email_search(query, top_k) + except Exception as e: + # Spark Control / Qdrant unreachable, or the ingest stack isn't installed. + # Log server-side (an error can carry a URL/host); give the UI a clean 503. + import sys + print(f"[email-search] retrieval failed: {type(e).__name__}: {e}", file=sys.stderr) + return handler.send_error_json("Content search is unavailable (Spark/Qdrant not reachable).", 503) + + # Hydrate + soft-delete-filter against SQLite (canonical), preserving rank order. + payloads = [(h.get("payload", {}) or {}, h) for h in hits] + ids = [p.get("source_id") for p, _ in payloads] + conn = _conn() + try: + live = _db.search_hit_emails(conn, ids) + finally: + conn.close() + + results = [] + for p, h in payloads: + eid = p.get("source_id") + e = live.get(eid) + if not e: + continue # deleted since indexing, or not matched-resolvable -> drop + results.append({ + "email_id": eid, + "subject": e["subject"], + "from_name": e["from_name"], + "from_email": e["from_email"], + "sent_at": e["sent_at"], + "direction": e["direction"], + "has_attachments": e["has_attachments"], + "lp_name": p.get("lp_name"), + "score": h.get("score"), + "excerpt": (h.get("text") or p.get("text") or "").replace("\n", " ").strip()[:300], + }) + handler.send_json({"query": query, "results": results, "count": len(results)}) + + def _h_list_threads(handler): user = _require_auth(handler) if not user: diff --git a/backend/email_integration/test_email_activity_panel.py b/backend/email_integration/test_email_activity_panel.py index 83a7c08..a1222a6 100644 --- a/backend/email_integration/test_email_activity_panel.py +++ b/backend/email_integration/test_email_activity_panel.py @@ -2,10 +2,13 @@ """Test the admin-only email-activity panel (Communications tab, v0.1.0:80). Covers the pure query (`db.query_email_activity`): matched-only scope (unmatched -cold/unknown-sender email is never surfaced), investor/mailbox/search/direction -filters, per-sighting soft-delete, direction at the email level, mailbox + investor -roll-ups, and the filter facets. -Also asserts the route handler enforces admin server-side. Synthetic data only. +cold/unknown-sender email is never surfaced), investor/mailbox/search/direction/ +date-range filters, per-sighting soft-delete, direction at the email level, mailbox +roll-ups, and the *typed* investor facet (grid investor / org / contact), including +the v83 fix where an email matched only to a classic contact or org domain — not yet +wired to a grid investor — still resolves to a real name and appears in the dropdown +(previously the facet came back empty). Also asserts the route handler enforces admin +server-side. Synthetic data only. Run: cd backend && python3 email_integration/test_email_activity_panel.py """ @@ -31,17 +34,28 @@ def make_db(): conn.executescript(""" CREATE TABLE email_accounts (id TEXT PRIMARY KEY, email_address TEXT); CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, from_name TEXT, from_email TEXT, - sent_at TEXT, snippet TEXT, has_attachments INT DEFAULT 0, is_matched INT DEFAULT 0, + sent_at TEXT, snippet TEXT, body_text TEXT, body_html TEXT, gmail_thread_id TEXT, + has_attachments INT DEFAULT 0, is_matched INT DEFAULT 0, match_status TEXT DEFAULT 'unmatched'); CREATE TABLE email_account_messages (id TEXT PRIMARY KEY, email_id TEXT, account_id TEXT, is_sent INT DEFAULT 0, deleted_at TEXT); + CREATE TABLE email_recipients (id TEXT PRIMARY KEY, email_id TEXT, address TEXT, + display_name TEXT, kind TEXT); + CREATE TABLE email_attachments (id TEXT PRIMARY KEY, email_id TEXT, filename TEXT, + mime_type TEXT, size_bytes INTEGER, download_status TEXT); CREATE TABLE email_investor_links (id TEXT PRIMARY KEY, email_id TEXT, - fundraising_investor_id TEXT, fundraising_contact_id TEXT, matched_address TEXT); + fundraising_investor_id TEXT, fundraising_contact_id TEXT, + organization_id TEXT, contact_id TEXT, matched_address TEXT); CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, graveyard INTEGER DEFAULT 0); CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, investor_id TEXT, full_name TEXT); + CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, deleted_at TEXT); + CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT, + organization_id TEXT, deleted_at TEXT); """) - # Two mailboxes (us), three investors (one reached only via a contact link; - # one graveyarded but still with captured email history). + # Two mailboxes (us); investors reached different ways: a grid investor directly, + # a grid investor only via a contact link, a graveyarded grid investor, an org-only + # (domain) match, and a classic-contact-only match (the case that left the dropdown + # empty before v83 — neither carries a grid id). conn.executemany("INSERT INTO email_accounts VALUES (?,?)", [ ("acc-grant", "grant@ten31.xyz"), ("acc-jon", "jonathan@ten31.xyz"), @@ -52,12 +66,17 @@ def make_db(): ("inv-dead", "Dead Deal LP", 1), ]) conn.execute("INSERT INTO fundraising_contacts VALUES ('fc-1','inv-pacific','Sarah Williams')") + conn.execute("INSERT INTO organizations VALUES ('org-bridge','Bridgewater',NULL)") + conn.execute("INSERT INTO contacts VALUES ('c-solo','Nina','Park',NULL,NULL)") # Emails: - # e1 outbound (from us) -> Harbor, seen by grant - # e2 inbound (from LP) -> Harbor, seen by grant + jonathan - # e3 inbound (from LP) -> Pacific via contact link, seen by jonathan - # e4 inbound, UNMATCHED (no investor link), seen by grant -> must be excluded (matched-only) - # e5 inbound, only sighting is tombstoned -> must be excluded + # e1 outbound -> Harbor (grid), seen by grant + # e2 inbound -> Harbor (grid), seen by grant + jonathan + # e3 inbound -> Pacific via grid contact link, seen by jonathan + # e4 inbound, UNMATCHED -> excluded (matched-only) + # e5 inbound, only sighting tombstoned -> excluded + # e6 inbound -> Dead Deal LP (graveyard grid investor) + # e7 inbound -> Bridgewater via ORG-domain match (no grid id) + # e8 inbound -> Nina Park via CLASSIC-contact match (no grid id, no org) conn.executemany( "INSERT INTO emails (id,subject,from_name,from_email,sent_at,snippet,has_attachments,is_matched,match_status) VALUES (?,?,?,?,?,?,?,?,?)", [ @@ -67,6 +86,8 @@ def make_db(): ("e4", "Cold inbound", "Random", "noreply@spam.example", "2026-06-08T08:00:00", "buy now", 0, 0, "unmatched"), ("e5", "Deleted thread", "Ghost", "ghost@x.example", "2026-06-09T08:00:00", "gone", 0, 1, "matched"), ("e6", "Old dead-deal thread", "Dead LP", "lp@deaddeal.example", "2026-06-01T00:00:00", "we passed", 0, 1, "matched"), + ("e7", "Macro view", "Ray", "ray@bridgewater.example", "2026-06-10T08:00:00", "rates outlook", 0, 1, "matched"), + ("e8", "Coffee?", "Nina Park", "nina@solo.example", "2026-06-11T08:00:00", "in town next week", 0, 1, "matched"), ]) conn.executemany( "INSERT INTO email_account_messages (id,email_id,account_id,is_sent,deleted_at) VALUES (?,?,?,?,?)", @@ -78,16 +99,32 @@ def make_db(): ("m5", "e4", "acc-grant", 0, None), ("m6", "e5", "acc-grant", 0, "2026-06-10T00:00:00"), # tombstoned ("m7", "e6", "acc-grant", 0, None), + ("m8", "e7", "acc-grant", 0, None), + ("m9", "e8", "acc-jon", 0, None), ]) conn.executemany( - "INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,fundraising_contact_id,matched_address) VALUES (?,?,?,?,?)", + "INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,fundraising_contact_id,organization_id,contact_id,matched_address) VALUES (?,?,?,?,?,?,?)", [ - ("l1", "e1", "inv-harbor", None, "lp@harborvine.example"), - ("l2", "e2", "inv-harbor", None, "lp@harborvine.example"), - ("l3", "e3", None, "fc-1", "sarah@pacificcap.example"), - ("l5", "e5", "inv-harbor", None, "lp@harborvine.example"), - ("l6", "e6", "inv-dead", None, "lp@deaddeal.example"), + ("l1", "e1", "inv-harbor", None, None, None, "lp@harborvine.example"), + ("l2", "e2", "inv-harbor", None, None, None, "lp@harborvine.example"), + ("l3", "e3", None, "fc-1", None, None, "sarah@pacificcap.example"), + ("l5", "e5", "inv-harbor", None, None, None, "lp@harborvine.example"), + ("l6", "e6", "inv-dead", None, None, None, "lp@deaddeal.example"), + ("l7", "e7", None, None, "org-bridge", None, "ray@bridgewater.example"), + ("l8", "e8", None, None, None, "c-solo", "nina@solo.example"), ]) + # Full body + recipients + an attachment on e2, for the detail view. + conn.execute("UPDATE emails SET body_text = ?, gmail_thread_id = ?, has_attachments = 1 WHERE id = 'e2'", + ("Thanks for the deck — one question on the carry.", "thr-harbor")) + conn.executemany( + "INSERT INTO email_recipients (id,email_id,address,display_name,kind) VALUES (?,?,?,?,?)", + [ + ("r1", "e2", "grant@ten31.xyz", "Grant", "to"), + ("r2", "e2", "jonathan@ten31.xyz", "Jonathan", "cc"), + ("r3", "e2", "lp@harborvine.example", "LP Harbor", "from"), # from -> not surfaced + ]) + conn.execute("INSERT INTO email_attachments (id,email_id,filename,mime_type,size_bytes,download_status) " + "VALUES ('a1','e2','term_sheet.pdf','application/pdf',20480,'downloaded')") conn.commit() return conn @@ -101,9 +138,9 @@ def main(): # --- baseline: matched live emails only, newest first, tombstoned excluded --- res = _db.query_email_activity(conn) - check(ids(res) == ["e3", "e2", "e1", "e6"], + check(ids(res) == ["e8", "e7", "e3", "e2", "e1", "e6"], f"matched live emails newest-first; e5 (tombstoned) + e4 (unmatched) excluded; got {ids(res)}") - check(res["count"] == 4 and res["truncated"] is False, "count + not truncated") + check(res["count"] == 6 and res["truncated"] is False, "count + not truncated") check("e4" not in ids(res), "unmatched email (no investor link) never surfaces in the panel") # --- direction at the email level --- @@ -114,25 +151,44 @@ def main(): check(_db.query_email_activity(conn, direction="outbound")["emails"][0]["id"] == "e1" and len(_db.query_email_activity(conn, direction="outbound")["emails"]) == 1, "direction=outbound returns only e1") - check(ids(_db.query_email_activity(conn, direction="inbound")) == ["e3", "e2", "e6"], + check(ids(_db.query_email_activity(conn, direction="inbound")) == ["e8", "e7", "e3", "e2", "e6"], "direction=inbound excludes the outbound e1 (and unmatched e4)") # --- mailbox roll-up + per-account filter --- check(set(e2["mailboxes"]) == {"grant@ten31.xyz", "jonathan@ten31.xyz"}, "e2 seen by both mailboxes") - check(ids(_db.query_email_activity(conn, account_id="acc-jon")) == ["e3", "e2"], + check(ids(_db.query_email_activity(conn, account_id="acc-jon")) == ["e8", "e3", "e2"], "account_id=acc-jon returns only emails that mailbox saw") - # --- investor filter: direct link and via-contact link --- - check(set(ids(_db.query_email_activity(conn, investor_id="inv-harbor"))) == {"e2", "e1"}, - "investor_id=inv-harbor -> e1,e2") - check(ids(_db.query_email_activity(conn, investor_id="inv-pacific")) == ["e3"], - "investor_id=inv-pacific resolved through fundraising_contacts -> e3") + # --- date-range filter [since, until) over sent_at --- + check(ids(_db.query_email_activity(conn, since="2026-06-07T00:00:00", until="2026-06-11T00:00:00")) == ["e7", "e3"], + "date range [06-07, 06-11) -> e7,e3 (excludes 06-11 e8 and earlier e2/e1/e6)") + check(ids(_db.query_email_activity(conn, since="2026-06-10T00:00:00")) == ["e8", "e7"], + "since=06-10 -> e8,e7 only") - # --- investor name roll-up + unmatched fallback --- - check(e1["investors"] == [{"id": "inv-harbor", "name": "Harbor & Vine"}], "e1 investor resolved to name") + # --- investor filter: typed keys + legacy bare-id back-compat --- + check(set(ids(_db.query_email_activity(conn, investor_id="fund:inv-harbor"))) == {"e2", "e1"}, + "investor_id=fund:inv-harbor -> e1,e2") + check(set(ids(_db.query_email_activity(conn, investor_id="inv-harbor"))) == {"e2", "e1"}, + "legacy bare id treated as fund: -> e1,e2") + check(ids(_db.query_email_activity(conn, investor_id="fund:inv-pacific")) == ["e3"], + "fund:inv-pacific resolved through fundraising_contacts -> e3") + check(ids(_db.query_email_activity(conn, investor_id="org:org-bridge")) == ["e7"], + "org:org-bridge -> e7 (org-domain match)") + check(ids(_db.query_email_activity(conn, investor_id="contact:c-solo")) == ["e8"], + "contact:c-solo -> e8 (classic-contact match)") + check(_db.query_email_activity(conn, investor_id="bogus:x")["emails"] == [], + "unknown investor_id key prefix -> match nothing (never silently unfiltered)") + + # --- investor identity roll-up, typed + resolved name --- + check(e1["investors"] == [{"id": "fund:inv-harbor", "name": "Harbor & Vine"}], "e1 grid investor resolved") e3 = next(e for e in res["emails"] if e["id"] == "e3") - check(e3["investors"] == [{"id": "inv-pacific", "name": "Pacific Capital"}], "e3 investor resolved via contact") - check("e4" not in ids(res), "e4 unmatched -> excluded from the matched-only panel") + check(e3["investors"] == [{"id": "fund:inv-pacific", "name": "Pacific Capital"}], "e3 resolved via grid contact") + e7 = next(e for e in res["emails"] if e["id"] == "e7") + check(e7["investors"] == [{"id": "org:org-bridge", "name": "Bridgewater"}], + "e7 org-domain match resolves to the org name (not a raw address)") + e8 = next(e for e in res["emails"] if e["id"] == "e8") + check(e8["investors"] == [{"id": "contact:c-solo", "name": "Nina Park"}], + "e8 classic-contact match resolves to the contact name") # --- free-text search over subject / snippet / sender --- check(set(ids(_db.query_email_activity(conn, search="Fund III"))) == {"e1", "e2"}, "search subject") @@ -141,18 +197,21 @@ def main(): check(ids(_db.query_email_activity(conn, search="buy now")) == [], "unmatched email never surfaces, even by free-text search") - # --- facets --- + # --- facets: typed entries spanning grid / org / contact matches --- check([a["email_address"] for a in res["accounts"]] == ["grant@ten31.xyz", "jonathan@ten31.xyz"], "accounts facet sorted") facet_inv = {i["id"] for i in res["investors"]} - check(facet_inv == {"inv-harbor", "inv-pacific"}, "investor facet covers direct + via-contact activity") + check(facet_inv == {"fund:inv-harbor", "fund:inv-pacific", "org:org-bridge", "contact:c-solo"}, + f"investor facet now mirrors the list (grid + org + contact), not just grid; got {facet_inv}") + check([i["name"] for i in res["investors"]] == sorted(i["name"] for i in res["investors"]), + "facet sorted by display name") # --- graveyard: hidden from the picker, but its email stays visible + findable --- - check("inv-dead" not in facet_inv, "graveyard investor excluded from the facet dropdown") + check("fund:inv-dead" not in facet_inv, "graveyard investor excluded from the facet dropdown") check("e6" in ids(res), "graveyard investor's email still shows in the unfiltered list (audit completeness)") e6 = next(e for e in res["emails"] if e["id"] == "e6") - check(e6["investors"] == [{"id": "inv-dead", "name": "Dead Deal LP"}], "graveyard email still shows its investor chip") - check(ids(_db.query_email_activity(conn, investor_id="inv-dead")) == ["e6"], + check(e6["investors"] == [{"id": "fund:inv-dead", "name": "Dead Deal LP"}], "graveyard email still shows its investor chip") + check(ids(_db.query_email_activity(conn, investor_id="fund:inv-dead")) == ["e6"], "explicit investor_id filter still works for a graveyard investor") check(ids(_db.query_email_activity(conn, search="deaddeal")) == ["e6"], "graveyard email remains findable by free-text search") @@ -161,10 +220,27 @@ def main(): tr = _db.query_email_activity(conn, limit=2) check(tr["count"] == 2 and tr["truncated"] is True, "limit=2 -> truncated") + # --- detail view (full body + recipients + attachments + identity) --- + d = _db.query_email_detail(conn, "e2") + check(d is not None and d["body_text"] == "Thanks for the deck — one question on the carry.", + "detail returns the full body") + check(d["direction"] == "inbound" and set(d["mailboxes"]) == {"grant@ten31.xyz", "jonathan@ten31.xyz"}, + "detail direction + mailboxes") + check([(r["address"], r["kind"]) for r in d["recipients"]] == + [("grant@ten31.xyz", "to"), ("jonathan@ten31.xyz", "cc")], + "detail recipients = to/cc only (from is excluded)") + check([a["filename"] for a in d["attachments"]] == ["term_sheet.pdf"], "detail lists attachments") + check(d["investors"] == [{"id": "fund:inv-harbor", "name": "Harbor & Vine"}], "detail resolves investor identity") + check(_db.query_email_detail(conn, "e5") is None, + "detail of a tombstoned-only email -> None (soft-delete on the sighting)") + check(_db.query_email_detail(conn, "nope") is None, "detail of a missing id -> None") + conn.close() # --- route enforces admin server-side --- test_route_admin_only() + # --- semantic content-search route (hydrate + soft-delete + 503) --- + test_search_route() if FAILS: print(f"\nFAILED ({len(FAILS)})") @@ -174,6 +250,28 @@ def main(): print("\nALL PASS (email-activity panel)") +class FakeHandler: + def __init__(self, user, params=None): + self._user = user + self._params = params or {} + self.json = None + self.err = None + self.code = None + + def get_user(self): + return self._user + + def get_query_params(self): + return self._params + + def send_json(self, obj): + self.json = obj + + def send_error_json(self, msg, code): + self.err = msg + self.code = code + + def test_route_admin_only(): try: from email_integration import routes @@ -181,26 +279,6 @@ def test_route_admin_only(): print(f" SKIP route admin test (routes import failed: {e})") return - class FakeHandler: - def __init__(self, user): - self._user = user - self.json = None - self.err = None - self.code = None - - def get_user(self): - return self._user - - def get_query_params(self): - return {} - - def send_json(self, obj): - self.json = obj - - def send_error_json(self, msg, code): - self.err = msg - self.code = code - h = FakeHandler(None) routes._h_activity(h) check(h.code == 401 and h.json is None, "route: no user -> 401") @@ -210,5 +288,48 @@ def test_route_admin_only(): check(h.code == 403 and h.json is None, "route: member -> 403 (admin enforced server-side)") +def test_search_route(): + try: + from email_integration import routes + except Exception as e: # pragma: no cover + print(f" SKIP search route test (routes import failed: {e})") + return + + # Hydration source = a fresh fully-populated in-memory DB each call (the handler + # opens + closes its own conn). Retrieval is stubbed — no Spark/Qdrant in tests. + routes._conn = make_db + routes._semantic_email_search = lambda query, top_k: [ + {"score": 0.91, "text": "carry discussion\nand terms", "payload": {"source_id": "e2", "lp_name": "Harbor & Vine"}}, + {"score": 0.80, "text": "gone", "payload": {"source_id": "e5", "lp_name": "Ghost"}}, # tombstoned -> drop + {"score": 0.70, "text": "n/a", "payload": {"source_id": "missing", "lp_name": "Nobody"}}, # missing -> drop + ] + h = FakeHandler({"role": "admin"}, {"q": "carry"}) + routes._h_search(h) + check(h.json and [r["email_id"] for r in h.json["results"]] == ["e2"], + f"content search drops tombstoned + missing, keeps live e2; got {h.json and [r['email_id'] for r in h.json['results']]}") + top = h.json["results"][0] + check(top["lp_name"] == "Harbor & Vine" and top["score"] == 0.91 and top["subject"] == "Re: Fund III update", + "hit carries lp_name + score + hydrated subject") + check("\n" not in top["excerpt"], "excerpt is newline-flattened") + + # empty query short-circuits (no retrieval call) + h = FakeHandler({"role": "admin"}, {"q": ""}) + routes._h_search(h) + check(h.json == {"query": "", "results": []}, "empty query -> empty results") + + # retrieval failure -> clean 503 (Spark/Qdrant down) + def _boom(query, top_k): + raise RuntimeError("spark down") + routes._semantic_email_search = _boom + h = FakeHandler({"role": "admin"}, {"q": "x"}) + routes._h_search(h) + check(h.code == 503, f"retrieval failure -> 503, got {h.code}") + + # admin enforced + h = FakeHandler({"role": "member"}, {"q": "x"}) + routes._h_search(h) + check(h.code == 403, "content search admin-enforced server-side") + + if __name__ == "__main__": main() diff --git a/backend/server.py b/backend/server.py index 0e545fe..92a26ff 100644 --- a/backend/server.py +++ b/backend/server.py @@ -1914,6 +1914,8 @@ class CRMHandler(BaseHTTPRequestHandler): return self.handle_admin_send_test_email(user, body) if path == '/api/admin/digest/send-now': return self.handle_admin_send_digest_now(user, body) + if path == '/api/admin/digest/preview': + return self.handle_admin_digest_preview(user, body) if path == '/api/fundraising/backup': return self.handle_backup_fundraising_state(user) if path == '/api/fundraising/restore-preview': @@ -4140,18 +4142,50 @@ class CRMHandler(BaseHTTPRequestHandler): return self.send_json({"data": {"status": "sent", **result}}) - def handle_admin_send_digest_now(self, user, body): - """Build the REAL daily activity digest (last 24h) on demand and send it to - the active-admin set now. An on-demand preview of Phase B — it does not - touch the daily schedule's cursor, so it never suppresses the scheduled send. - Content is summarized on Spark (local), never Claude.""" + def handle_admin_digest_preview(self, user, body): + """Build the activity digest over a chosen window and return it WITHOUT + sending — the admin-panel preview. Window defaults to the last 24h, or + {hours: N} / {since: 'YYYY-MM-DD'} (a local date -> that day's midnight). + Runs the REAL Spark summarization, so widening the window is how you verify + the summarizer on a quiet day. Never touches the daily cursor.""" if not require_admin(user): return self.send_error_json("Admin only", 403) - - import digest_mailer + body = body or {} + import digest_builder try: - from email_integration.digest_scheduler import maybe_send_digest - result = maybe_send_digest(force=True) + since_iso, until_iso = digest_builder.resolve_digest_window( + hours=body.get('hours'), since=body.get('since')) + except ValueError as exc: + return self.send_error_json(str(exc), 400) + conn = get_db() + try: + digest = digest_builder.build_digest(conn, since_iso, until_iso) + except Exception as exc: + print(f"[digest] preview failed: {type(exc).__name__}: {exc}", file=sys.stderr) + return self.send_error_json("Preview failed — see server logs for details.", 502) + finally: + conn.close() + return self.send_json({"data": {**digest, "window": [since_iso, until_iso]}}) + + def handle_admin_send_digest_now(self, user, body): + """Build the REAL activity digest over a chosen window and send it to the + active-admin set now. Window defaults to the last 24h, or {hours: N} / + {since: 'YYYY-MM-DD'} — same resolution as the preview. Does NOT touch the + daily schedule's cursor, so it never suppresses the scheduled send. Content + is summarized on Spark (local), never Claude.""" + if not require_admin(user): + return self.send_error_json("Admin only", 403) + body = body or {} + import digest_mailer + import digest_builder + try: + since_iso, until_iso = digest_builder.resolve_digest_window( + hours=body.get('hours'), since=body.get('since')) + except ValueError as exc: + return self.send_error_json(str(exc), 400) + try: + from email_integration.digest_scheduler import send_digest_window + result = send_digest_window(since_iso=since_iso, until_iso=until_iso) except digest_mailer.NoTransport as exc: return self.send_error_json(str(exc), 400) except Exception as exc: diff --git a/backend/test_digest_builder.py b/backend/test_digest_builder.py index 71d3a5e..75dceee 100644 --- a/backend/test_digest_builder.py +++ b/backend/test_digest_builder.py @@ -283,6 +283,70 @@ def test_scheduler_guards(): conn.close() +def test_window_resolver(): + from datetime import timedelta + nu = datetime(2026, 6, 16, 15, 0, tzinfo=timezone.utc) + nl = datetime(2026, 6, 16, 8, 0, tzinfo=timezone(timedelta(hours=-7))) # PDT + + s, u = digest_builder.resolve_digest_window(now_utc=nu, now_local=nl) + check((s, u) == ("2026-06-15T15:00:00Z", "2026-06-16T15:00:00Z"), f"default = last 24h, got {(s,u)}") + + s, u = digest_builder.resolve_digest_window(hours=48, now_utc=nu, now_local=nl) + check(s == "2026-06-14T15:00:00Z", f"hours=48 lookback, got {s}") + + # since = a local calendar date -> that day's LOCAL midnight, expressed in UTC + s, u = digest_builder.resolve_digest_window(since="2026-06-01", now_utc=nu, now_local=nl) + check(s == "2026-06-01T07:00:00Z", f"since-date -> local midnight in UTC, got {s}") + + # a since older than the cap clamps to MAX_WINDOW_DAYS (response echoes real window) + s, u = digest_builder.resolve_digest_window(since="2025-01-01", now_utc=nu, now_local=nl) + check(s == (nu - timedelta(days=digest_builder.MAX_WINDOW_DAYS)).strftime("%Y-%m-%dT%H:%M:%SZ"), + f"over-cap since clamps to {digest_builder.MAX_WINDOW_DAYS}d, got {s}") + + # since wins over hours when both supplied + s, u = digest_builder.resolve_digest_window(hours=1, since="2026-06-10", now_utc=nu, now_local=nl) + check(s.startswith("2026-06-10"), f"since wins over hours, got {s}") + + # same-day boundary: since = today's local date, now later in the day -> valid + # window (local midnight is strictly before now), not a "start must be before now" raise + s, u = digest_builder.resolve_digest_window(since="2026-06-16", now_utc=nu, now_local=nl) + check(s == "2026-06-16T07:00:00Z" and u == "2026-06-16T15:00:00Z", + f"since=today -> [local midnight, now], got {(s, u)}") + + for bad in [dict(hours=0), dict(hours="abc"), dict(since="nope"), dict(since="2027-01-01")]: + try: + digest_builder.resolve_digest_window(now_utc=nu, now_local=nl, **bad) + check(False, f"bad input {bad} should raise") + except ValueError: + check(True, f"bad input rejected: {bad}") + + +def test_send_digest_window(): + sent = [] + build_fn = lambda conn, since, until: {"subject": "S", "body": f"{since}|{until}", + "has_activity": True, "user_count": 1, + "email_count": 2, "investor_count": 1} + def send_fn(conn, to_addrs, subject, body, sender_email=None): + sent.append((list(to_addrs), body)) + return {"transport": "stub"} + + conn = _conn() + before = digest_scheduler._get_setting(conn, digest_scheduler._LAST_AT_KEY) + conn.close() + + r = digest_scheduler.send_digest_window(_conn, since_iso="2026-05-01T00:00:00Z", + until_iso="2026-06-16T00:00:00Z", + build_fn=build_fn, send_fn=send_fn) + check(r["status"] == "sent" and r["window"] == ["2026-05-01T00:00:00Z", "2026-06-16T00:00:00Z"], + f"windowed send returns its window, got {r}") + check(sent and sent[-1][0] == ["grant@ten31.xyz"], f"windowed send -> active admins only, got {sent}") + + conn = _conn() + after = digest_scheduler._get_setting(conn, digest_scheduler._LAST_AT_KEY) + conn.close() + check(before == after, "windowed manual send does not advance the daily cursor") + + def main(): setup() print("collect_user_activity:"); test_collect() @@ -290,6 +354,8 @@ def main(): print("build_digest + empty:"); test_build_and_empty() print("summary fallback:"); test_summary_fallback() print("digest policy:"); test_policy() + print("window resolver:"); test_window_resolver() + print("windowed manual send:"); test_send_digest_window() print("scheduler guards:"); test_scheduler_guards() if FAILS: print(f"\nFAILED ({len(FAILS)})") diff --git a/docs/guides/email.md b/docs/guides/email.md index 93b7e50..30bf6a1 100644 --- a/docs/guides/email.md +++ b/docs/guides/email.md @@ -70,10 +70,21 @@ different category. **Never extend this path to send to LPs/prospects.** the panel takes effect with no restart. Content window = (last send, now]; cursor (`digest_last_sent_at`) + once-per-day guard (`digest_last_sent_date`) live in `app_settings`, so a missed day rolls into the next digest. Recipients = all active admins. -- **On-demand: `POST /api/admin/digest/send-now`** (admin-only) → `maybe_send_digest(force=True)` - builds the real last-24h digest and sends to the admin set regardless of the policy and - **without** touching the daily cursor (a preview never suppresses the scheduled send). - Surfaced as a "Send Digest Now" button in Settings → Admin, beside "Send Test Digest Email". +- **Windowed preview + manual send (Settings → Admin "Manual run & preview"):** + - **`POST /api/admin/digest/preview`** (admin-only) builds the digest over a chosen window + and returns `{subject, body, …, window}` **without sending** — it runs the **real Spark + summarization**, so widening the window is how you verify the summarizer on a quiet day + (a last-24h window with no activity never calls Spark). Rendered in an in-panel preview. + - **`POST /api/admin/digest/send-now`** (admin-only) sends over the **same** window to the + admin set now. + - Both take the window from the body: default last 24h, `{"hours": N}`, or + `{"since": "YYYY-MM-DD"}` (a **local** date → that day's midnight). Resolved by + `digest_builder.resolve_digest_window` (capped at `MAX_WINDOW_DAYS`=92, validated → 400 on + bad input). The send goes through `digest_scheduler.send_digest_window`, which — like the + old `force=True` path — **does NOT advance the daily cursor**, so a wide manual preview/send + never suppresses the scheduled daily digest. + - The **"Send transport test"** button (`POST /api/admin/digest/test-email`) stays as a pure + pipe check (fixed message, admin-recipient-restricted). - **Decisions (locked):** 6 PM default send · always-send (empty days get a "no activity" note) · per-user narrative + by-investor structured section · enable/time controlled in the admin panel. Tests: `backend/test_digest_builder.py` (per-user + per-investor queries, @@ -103,8 +114,40 @@ query). Filters: `investor_id`, `account_id` (mailbox), `direction` (`inbound`/` - **Graveyard investors** are hidden from the filter *dropdown* (CRM-wide `graveyard = 0`), but their captured email still shows in the list and stays findable by free-text search — it's an audit surface, so history is never hidden, only the picker is. +- **Typed investor facet (the dropdown).** The picker mirrors what the list resolves: one + entry per distinct matched entity, with the digest's precedence (**grid investor → org → + contact → raw address**) and a **typed key** — `fund:` / `org:` / `contact:` + (`investor_id=` accepts these; a bare id is treated as `fund:` for back-compat). This fixed + the "dropdown only shows *All investors*" bug: matches that land on a **classic contact or + org domain** (no grid id — common, since `fundraising_contacts.email` is sparsely populated) + now resolve to a real name and appear in the picker, instead of the facet coming back empty. + Raw-address-only matches stay out of the *picker* (noisy) but still show + search in the list. + Helpers: `db._resolve_entity` + the shared `_LINK_IDENTITY_COLS`/`_LINK_IDENTITY_JOINS`. +- **Date range:** `since`/`until` filter `e.sent_at` as a half-open `[since, until)` + interval; the UI sends `from` as `…T00:00:00` and `to` as the **next day's** midnight, + so the whole "to" day is included regardless of the stored timestamp's precision/zone. +- **Detail view:** **`GET /api/email/detail?id=`** (`_h_detail`, `require_admin`) → + `db.query_email_detail` returns the full body + to/cc recipients + attachments + typed + identities, **soft-delete-gated on a live sighting** (404 otherwise). The UI renders + `body_text` (escaped) — **never** raw remote `body_html` (XSS); click a row to expand. -Tests: `backend/email_integration/test_email_activity_panel.py`. +## Content search (semantic, over email bodies) — admin-only + +The Communications tab has a **Filter ⇄ Search content** toggle. "Search content" is semantic +search over the email *bodies* indexed in Qdrant (distinct from the structured subject/sender +LIKE filters above). **`GET /api/email/search?q=`** (`routes._h_search`, `require_admin`): + +- Retrieval = `ingest/search.py:hybrid_search` (dense + BM25, reranked) pre-filtered to + `doc_type='email'`, imported **lazily** (the ingest stack — Spark Control + Qdrant + the + sparse encoder — ships in the Docker image, not the bare CRM); any failure → a clean **503**. +- Only **matched** email bodies are indexed (see `ingest/chunking.py`); the Qdrant payload + carries `source_id`=email_id, `lp_name`, `date_ts`, so hits link straight back to the row. +- **Hydrated + soft-delete-filtered against SQLite (canonical):** `db.search_hit_emails` + drops any hit whose email no longer has a live sighting — the derived index can lag a + deletion, and we never surface a fact from Qdrant that SQLite has tombstoned. + +Tests: `backend/email_integration/test_email_activity_panel.py` (panel filters/facets/detail + +the search route's hydrate/drop/503/admin paths, with retrieval stubbed). ## Known gap diff --git a/frontend/index.html b/frontend/index.html index ada9e75..b67bcb8 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -4273,8 +4273,17 @@ const [investorId, setInvestorId] = useState(''); const [accountId, setAccountId] = useState(''); const [direction, setDirection] = useState(''); + const [since, setSince] = useState(''); + const [until, setUntil] = useState(''); const [search, setSearch] = useState(''); const [debouncedSearch, setDebouncedSearch] = useState(''); + const [expandedId, setExpandedId] = useState(null); + const [detailCache, setDetailCache] = useState({}); // id -> {loading, data, error} + const [mode, setMode] = useState('filter'); // 'filter' | 'search' (semantic content) + const [contentQuery, setContentQuery] = useState(''); + const [searchResults, setSearchResults] = useState(null); // {results, count} | null + const [searchLoading, setSearchLoading] = useState(false); + const [searchError, setSearchError] = useState(''); // Debounce the free-text box so each keystroke doesn't hit the server. useEffect(() => { @@ -4292,6 +4301,16 @@ if (investorId) params.set('investor_id', investorId); if (accountId) params.set('account_id', accountId); if (direction) params.set('direction', direction); + if (since) params.set('since', since + 'T00:00:00'); + // `until` is exclusive in the query; send the day AFTER the picked + // date at midnight so the whole "to" day is included regardless of + // the stored timestamp's precision/zone suffix. + if (until) { + const u = new Date(until + 'T00:00:00'); + u.setDate(u.getDate() + 1); + const nd = `${u.getFullYear()}-${String(u.getMonth() + 1).padStart(2, '0')}-${String(u.getDate()).padStart(2, '0')}`; + params.set('until', nd + 'T00:00:00'); + } if (debouncedSearch) params.set('q', debouncedSearch); params.set('limit', '200'); const res = await api(`/api/email/activity?${params.toString()}`, {}, token); @@ -4313,7 +4332,73 @@ } })(); return () => { cancelled = true; }; - }, [token, isAdmin, investorId, accountId, direction, debouncedSearch]); + }, [token, isAdmin, investorId, accountId, direction, since, until, debouncedSearch]); + + // Lazily fetch and cache the full body when a row is expanded. + const toggleExpand = async (id) => { + if (expandedId === id) { setExpandedId(null); return; } + setExpandedId(id); + if (detailCache[id]) return; + setDetailCache((c) => ({ ...c, [id]: { loading: true } })); + try { + const res = await api(`/api/email/detail?id=${encodeURIComponent(id)}`, {}, token); + setDetailCache((c) => ({ ...c, [id]: { loading: false, data: res } })); + } catch (err) { + setDetailCache((c) => ({ ...c, [id]: { loading: false, error: getErrorMessage(err, 'Failed to load email') } })); + } + }; + + // Shared expanded-body panel (used by both the filter list and search results). + const renderExpandedBody = (id) => { + const det = detailCache[id]; + return ( +
+ {det?.loading ? + : det?.error ?
{det.error}
+ : det?.data ? (() => { + const d = det.data; + const rcpt = (kind) => (d.recipients || []).filter((r) => r.kind === kind) + .map((r) => r.display_name ? `${r.display_name} <${r.address}>` : r.address).join(', '); + const to = rcpt('to'), cc = rcpt('cc'); + return ( + <> + {to &&
To: {to}
} + {cc &&
Cc: {cc}
} + {(d.attachments || []).length > 0 && ( +
+ Attachments: {d.attachments.map((a) => a.filename).join(', ')} +
+ )} +
+                                        {d.body_text || (d.has_html ? '(HTML-only email — open in Gmail to view formatting)' : '(no body captured)')}
+                                    
+ + ); + })() : null} +
+ ); + }; + + // Semantic search over email *content* (bodies) — heavier (Spark + Qdrant), + // so it runs on submit, not per keystroke. + const runContentSearch = async () => { + const query = contentQuery.trim(); + if (!query) { setSearchResults(null); setSearchError(''); return; } + setSearchLoading(true); setSearchError(''); + try { + const res = await api(`/api/email/search?q=${encodeURIComponent(query)}`, {}, token); + setSearchResults(res); + } catch (err) { + if (err?.status === 503) { + setSearchError('Content search is unavailable right now (Spark/Qdrant not reachable).'); + } else { + setSearchError(getErrorMessage(err, 'Search failed')); + } + setSearchResults(null); + } finally { + setSearchLoading(false); + } + }; if (!isAdmin) { return ( @@ -4327,7 +4412,7 @@ const emails = data?.emails || []; const accounts = data?.accounts || []; const investors = data?.investors || []; - const hasFilter = !!(investorId || accountId || direction || debouncedSearch); + const hasFilter = !!(investorId || accountId || direction || since || until || debouncedSearch); return (
@@ -4336,6 +4421,20 @@ Captured email activity. Logging & drafts live in the Fundraising Grid and Outreach.
+
+ + + + {mode === 'filter' ? 'Structured filters over captured email metadata.' : 'Semantic search over email bodies (finds by meaning, not just keywords).'} + +
+ {mode === 'filter' && (<>
Received + + + {(since || until) && ( + + )}
{loading ? ( @@ -4375,12 +4486,14 @@ {emails.map((em) => { const sent = em.direction === 'outbound'; const who = em.from_name ? `${em.from_name} <${em.from_email}>` : (em.from_email || 'Unknown sender'); - const tags = [...(em.investors || []).map((iv) => iv.name), ...(em.investor_labels || [])]; + const tags = (em.investors || []).map((iv) => iv.name); + const open = expandedId === em.id; return (
-
+
toggleExpand(em.id)}> + {open ? '▾' : '▸'} {sent ? '↗ Sent' : '↘ Received'} {' · '}{who}{em.has_attachments ? ' 📎' : ''}
@@ -4388,8 +4501,9 @@ {formatDate(em.sent_at)} {(em.mailboxes || []).length > 0 && · {em.mailboxes.join(', ')}}
- {em.subject &&
{em.subject}
} - {em.snippet &&
{em.snippet}
} + {em.subject &&
toggleExpand(em.id)}>{em.subject}
} + {!open && em.snippet &&
{em.snippet}
} + {open && renderExpandedBody(em.id)} {tags.length > 0 ? (
{tags.map((t) => ( @@ -4408,6 +4522,66 @@
)} + )} + {mode === 'search' && ( + <> +
+ setContentQuery(e.target.value)} + onKeyDown={(e) => { if (e.key === 'Enter') runContentSearch(); }} + /> + +
+ {searchLoading ? ( + + ) : searchError ? ( +
{searchError}
+ ) : searchResults === null ? ( +
Search the text of captured (investor-matched) emails. Only matched email bodies are indexed.
+ ) : (searchResults.results || []).length === 0 ? ( +
No emails matched “{contentQuery.trim()}”.
+ ) : ( + <> +
+ {searchResults.results.length} result{searchResults.results.length === 1 ? '' : 's'}, most relevant first. +
+
+ {searchResults.results.map((r) => { + const sent = r.direction === 'outbound'; + const who = r.from_name ? `${r.from_name} <${r.from_email}>` : (r.from_email || 'Unknown sender'); + const open = expandedId === r.email_id; + return ( +
+
+
+
toggleExpand(r.email_id)}> + {open ? '▾' : '▸'} + {sent ? '↗ Sent' : '↘ Received'} + {' · '}{who}{r.has_attachments ? ' 📎' : ''} +
+
+ {formatDate(r.sent_at)} + {r.lp_name && · {r.lp_name}} + {typeof r.score === 'number' && · score {r.score.toFixed(2)}} +
+ {r.subject &&
toggleExpand(r.email_id)}>{r.subject}
} + {!open && r.excerpt &&
…{r.excerpt}…
} + {open && renderExpandedBody(r.email_id)} +
+
+ ); + })} +
+ + )} + + )}
); @@ -7341,6 +7515,12 @@ const [digestPolicy, setDigestPolicy] = useState({ enabled: false, send_hour: 18 }); const [digestPolicyLoading, setDigestPolicyLoading] = useState(false); const [digestPolicySaving, setDigestPolicySaving] = useState(false); + // Manual run: window selector + in-panel preview before pushing to email. + const [digestWindowMode, setDigestWindowMode] = useState('24h'); // '24h' | 'since' + const [digestSince, setDigestSince] = useState(() => + new Date(Date.now() - 30 * 864e5).toISOString().slice(0, 10)); + const [digestPreview, setDigestPreview] = useState(null); + const [digestPreviewLoading, setDigestPreviewLoading] = useState(false); const [auditLogs, setAuditLogs] = useState([]); const [auditLoading, setAuditLoading] = useState(false); const [automationRules, setAutomationRules] = useState([]); @@ -7839,19 +8019,39 @@ } }; + // The selected manual-run window: a specific start date, or the last 24h. + const digestWindowBody = () => + digestWindowMode === 'since' && digestSince ? { since: digestSince } : { hours: 24 }; + + const digestSummary = (d) => d.has_activity + ? `${d.user_count} member(s), ${d.email_count} email(s), ${d.investor_count} investor(s)` + : 'no activity in this window'; + + const handlePreviewDigest = async () => { + setDigestPreviewLoading(true); + try { + const result = await api('/api/admin/digest/preview', { + method: 'POST', + body: JSON.stringify(digestWindowBody()) + }, token); + setDigestPreview(result?.data || null); + } catch (err) { + onShowToast(getErrorMessage(err, 'Failed to build preview — is Spark Control reachable?'), 'error'); + } finally { + setDigestPreviewLoading(false); + } + }; + const handleSendDigestNow = async () => { setSendDigestLoading(true); try { const result = await api('/api/admin/digest/send-now', { method: 'POST', - body: JSON.stringify({}) + body: JSON.stringify(digestWindowBody()) }, token); const d = result?.data || {}; const to = (d.recipients || []).join(', '); - const summary = d.has_activity - ? `${d.user_count} member(s), ${d.email_count} email(s), ${d.investor_count} investor(s)` - : 'no activity in the last 24h'; - onShowToast(`Digest sent (${summary})${to ? ` to ${to}` : ''}`, 'success'); + onShowToast(`Digest sent (${digestSummary(d)})${to ? ` to ${to}` : ''}`, 'success'); } catch (err) { onShowToast(getErrorMessage(err, 'Failed to send digest — is a transport (Gmail/DWD or SMTP) configured?'), 'error'); } finally { @@ -8142,7 +8342,7 @@
Daily Digest Email
- A daily email to all active admins: each team member's activity per investor, plus a by-investor view (inbound + outbound), summarized locally (Spark), never Claude. Send Test verifies the outbound pipe with a fixed message; Send Now sends the real last-24h digest immediately. + A daily email to all active admins: each team member's activity per investor, plus a by-investor view (inbound + outbound), summarized locally (Spark), never Claude. Use Preview below to build the digest over a window and read it before sending — a wide window is how you verify the summarizer on a quiet day. Send pushes that same window to the admin inboxes now. Neither touches the daily schedule.
+
Manual run & preview
+
+ + +
- +
+ {digestPreview && ( +
+
+ + {digestPreview.subject} — {digestSummary(digestPreview)} + {Array.isArray(digestPreview.window) && ( + · {formatDate(digestPreview.window[0])} → {formatDate(digestPreview.window[1])} + )} + + +
+
{digestPreview.body}
+
+ )}
diff --git a/start9/0.4/startos/utils.ts b/start9/0.4/startos/utils.ts index 1ff5cca..bc5ecf1 100644 --- a/start9/0.4/startos/utils.ts +++ b/start9/0.4/startos/utils.ts @@ -47,8 +47,9 @@ export const PACKAGE_TITLE = 'Ten31 Database' // * 0.1.0:79 (HOTFIX blank-screen: pin @babel/standalone@7.29.7 — the unpinned CDN upgraded to Babel 8, whose preset-react automatic JSX runtime emits an ESM import that blanks the classic inline-script app; plus close 3 server-side admin gaps: GET /api/users, /api/email/status, /api/email/accounts now require_admin) // * 0.1.0:80 (repurpose Communications tab as the admin-only email-activity panel: new GET /api/email/activity [admin-enforced] over the email_* tables, filterable by investor/mailbox/direction + free-text search; classic manual log form retired; code-only, no schema change) // * 0.1.0:81 (Communications tab is matched-only: query_email_activity gates on EXISTS email_investor_links, so unmatched cold/unknown-sender email is captured but never surfaced in the panel; code-only, no schema change) -// * Current: 0.1.0:82 (vendor + SRI-pin the front-end libs: React/ReactDOM/Babel now ship in the s9pk and load same-origin from /assets/vendor/ with integrity hashes, so a CDN can never swap prod deps [the v78/v79 blank-screen class] and the box needs no outbound internet to render; plus a committed jsdom render smoke check [start9/0.4/render-smoke.mjs] gating the default `make` build) -export const PACKAGE_VERSION = '0.1.0:82' +// * 0.1.0:82 (vendor + SRI-pin the front-end libs: React/ReactDOM/Babel now ship in the s9pk and load same-origin from /assets/vendor/ with integrity hashes, so a CDN can never swap prod deps [the v78/v79 blank-screen class] and the box needs no outbound internet to render; plus a committed jsdom render smoke check [start9/0.4/render-smoke.mjs] gating the default `make` build) +// * Current: 0.1.0:83 (email search/query + windowed digest preview, code-only: Communications investor dropdown now mirrors the list with typed keys [fund:/org:/contact:] so classic-contact/org-domain matches show + are pickable [fixes the empty-dropdown bug], plus a date-range filter, a click-to-expand full-body view [GET /api/email/detail], and a semantic "Search content" mode over indexed email bodies [GET /api/email/search -> ingest hybrid_search, soft-delete-filtered, 503 if Spark/Qdrant down]; Daily Digest gains an in-app windowed preview before send [POST /api/admin/digest/preview, send-now takes the same window] that exercises the real Spark summarizer without touching the daily cursor) +export const PACKAGE_VERSION = '0.1.0:83' export const DATA_MOUNT_PATH = '/data' export const WEB_PORT = 8080 diff --git a/start9/0.4/startos/versions/index.ts b/start9/0.4/startos/versions/index.ts index 4315b18..ac470fd 100644 --- a/start9/0.4/startos/versions/index.ts +++ b/start9/0.4/startos/versions/index.ts @@ -43,8 +43,9 @@ import { v_0_1_0_79 } from './v0.1.0.79' import { v_0_1_0_80 } from './v0.1.0.80' import { v_0_1_0_81 } from './v0.1.0.81' import { v_0_1_0_82 } from './v0.1.0.82' +import { v_0_1_0_83 } from './v0.1.0.83' export const versionGraph = VersionGraph.of({ - current: v_0_1_0_82, - other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42, v_0_1_0_43, v_0_1_0_44, v_0_1_0_45, v_0_1_0_46, v_0_1_0_47, v_0_1_0_48, v_0_1_0_49, v_0_1_0_50, v_0_1_0_51, v_0_1_0_52, v_0_1_0_53, v_0_1_0_54, v_0_1_0_55, v_0_1_0_56, v_0_1_0_57, v_0_1_0_58, v_0_1_0_59, v_0_1_0_60, v_0_1_0_61, v_0_1_0_62, v_0_1_0_63, v_0_1_0_64, v_0_1_0_65, v_0_1_0_66, v_0_1_0_67, v_0_1_0_68, v_0_1_0_69, v_0_1_0_70, v_0_1_0_71, v_0_1_0_72, v_0_1_0_73, v_0_1_0_74, v_0_1_0_75, v_0_1_0_76, v_0_1_0_77, v_0_1_0_78, v_0_1_0_79, v_0_1_0_80, v_0_1_0_81], + current: v_0_1_0_83, + other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42, v_0_1_0_43, v_0_1_0_44, v_0_1_0_45, v_0_1_0_46, v_0_1_0_47, v_0_1_0_48, v_0_1_0_49, v_0_1_0_50, v_0_1_0_51, v_0_1_0_52, v_0_1_0_53, v_0_1_0_54, v_0_1_0_55, v_0_1_0_56, v_0_1_0_57, v_0_1_0_58, v_0_1_0_59, v_0_1_0_60, v_0_1_0_61, v_0_1_0_62, v_0_1_0_63, v_0_1_0_64, v_0_1_0_65, v_0_1_0_66, v_0_1_0_67, v_0_1_0_68, v_0_1_0_69, v_0_1_0_70, v_0_1_0_71, v_0_1_0_72, v_0_1_0_73, v_0_1_0_74, v_0_1_0_75, v_0_1_0_76, v_0_1_0_77, v_0_1_0_78, v_0_1_0_79, v_0_1_0_80, v_0_1_0_81, v_0_1_0_82], }) diff --git a/start9/0.4/startos/versions/v0.1.0.83.ts b/start9/0.4/startos/versions/v0.1.0.83.ts new file mode 100644 index 0000000..25cf756 --- /dev/null +++ b/start9/0.4/startos/versions/v0.1.0.83.ts @@ -0,0 +1,24 @@ +import { VersionInfo } from '@start9labs/start-sdk' + +// Email search/query + windowed digest preview. Code-only, no schema change (migrations no-ops): +// * Communications tab: fixed the investor dropdown (was empty whenever email matched a +// classic contact / org domain rather than a grid investor) — the facet now mirrors the +// list with typed keys (fund:/org:/contact:); added a date-range filter and a click-to- +// expand full-body view (GET /api/email/detail), and a semantic "Search content" mode over +// indexed email bodies (GET /api/email/search -> ingest hybrid_search, soft-delete-filtered). +// * Daily digest: Settings -> Admin now builds a digest over a chosen window (last 24h or +// since a date) as an in-app preview before sending (POST /api/admin/digest/preview), and +// the manual send uses the same window. Exercises the real local-Spark summarizer; neither +// touches the daily schedule cursor. +export const v_0_1_0_83 = VersionInfo.of({ + version: '0.1.0:83', + releaseNotes: { + en_US: [ + 'Communications: the investor dropdown now lists every matched relationship (not just', + 'grid investors), plus a date-range filter, a full-body view, and a semantic content', + 'search over email bodies. Daily Digest: preview a digest over any window before sending.', + 'No data changes.', + ].join(' '), + }, + migrations: { up: async () => {}, down: async () => {} }, +})