Email search/query + windowed digest preview (v0.1.0:83)
Communications tab (search/query roadmap items 1 & 2): - Fix the investor dropdown: the facet only listed grid investors, so it came back empty whenever email matched a classic contact or org domain (no grid id — the common case). It now mirrors the email list, resolving each link to a typed identity (fund:/org:/contact:/addr:) with precedence grid -> org -> contact -> address; investor_id accepts the typed key (bare id = fund: for back-compat) and an unknown prefix matches nothing. - Add a date-range filter and a click-to-expand full-body view (GET /api/email/detail, admin, soft-delete-gated; body_text only, never raw remote HTML). - Add a "Search content" mode: GET /api/email/search wraps the ingest hybrid_search over the Qdrant email index (doc_type=email), hydrated and soft-delete-filtered against SQLite (canonical), 503 if Spark/Qdrant down. Daily digest: - Settings -> Admin builds a digest over a chosen window (last 24h or since a date) as an in-app preview before sending (POST /api/admin/digest/preview), so the local-Spark summarizer can be verified on demand even on a quiet day. Manual send uses the same window; neither advances the daily cursor, so a preview never suppresses the scheduled digest. Code-only, migrations no-op. 22/22 backend tests, render-smoke pass.
This commit is contained in:
@@ -34,6 +34,8 @@ _GET_ROUTES = {
|
||||
"/api/email/status": "status",
|
||||
"/api/email/accounts": "list_accounts",
|
||||
"/api/email/activity": "activity",
|
||||
"/api/email/detail": "detail",
|
||||
"/api/email/search": "search",
|
||||
"/api/email/threads": "list_threads",
|
||||
"/api/email/oauth/start": "oauth_start",
|
||||
"/api/email/oauth/callback": "oauth_callback",
|
||||
@@ -208,6 +210,8 @@ def _h_activity(handler):
|
||||
account_id=(q.get("account_id") or "").strip() or None,
|
||||
search=(q.get("q") or q.get("search") or "").strip() or None,
|
||||
direction=(q.get("direction") or "").strip() or None,
|
||||
since=(q.get("since") or "").strip() or None,
|
||||
until=(q.get("until") or "").strip() or None,
|
||||
limit=limit,
|
||||
)
|
||||
finally:
|
||||
@@ -215,6 +219,97 @@ def _h_activity(handler):
|
||||
handler.send_json(result)
|
||||
|
||||
|
||||
def _h_detail(handler):
|
||||
# Admin-only: the full body + recipients of a captured email is admin-scoped
|
||||
# substance, same as the activity list it expands from.
|
||||
user = _require_admin(handler)
|
||||
if not user:
|
||||
return
|
||||
email_id = (handler.get_query_params().get("id") or "").strip()
|
||||
if not email_id:
|
||||
return handler.send_error_json("id required", 400)
|
||||
conn = _conn()
|
||||
try:
|
||||
detail = _db.query_email_detail(conn, email_id)
|
||||
finally:
|
||||
conn.close()
|
||||
if detail is None:
|
||||
return handler.send_error_json("Not found", 404)
|
||||
handler.send_json(detail)
|
||||
|
||||
|
||||
def _semantic_email_search(query: str, top_k: int) -> list:
|
||||
"""Hybrid (dense + BM25, reranked) retrieval over the email bodies indexed in
|
||||
Qdrant, pre-filtered to doc_type='email'. Returns raw ranked hits (payload carries
|
||||
source_id=email_id, lp_name, date_ts, text). The ingest stack (Spark Control +
|
||||
Qdrant + the sparse encoder) lives in the Docker image, so it's imported lazily —
|
||||
a bare CRM without it raises, and the caller maps that to a 503."""
|
||||
import os
|
||||
import sys
|
||||
ingest_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest")
|
||||
if ingest_dir not in sys.path:
|
||||
sys.path.insert(0, ingest_dir)
|
||||
import search as _ingest_search # ingest/search.py
|
||||
filt = {"must": [{"key": "doc_type", "match": {"value": "email"}}]}
|
||||
return _ingest_search.hybrid_search(query, top_k=top_k, rerank=True, filt=filt)
|
||||
|
||||
|
||||
def _h_search(handler):
|
||||
# Admin-only semantic search over captured email *content* (bodies), distinct from
|
||||
# the structured subject/sender filters in _h_activity. Matched email bodies are the
|
||||
# only email indexed in Qdrant (see ingest/chunking). Soft-delete-filtered + hydrated
|
||||
# against SQLite (canonical) so a deleted email never surfaces from the stale index.
|
||||
user = _require_admin(handler)
|
||||
if not user:
|
||||
return
|
||||
q = handler.get_query_params()
|
||||
query = (q.get("q") or q.get("query") or "").strip()
|
||||
if not query:
|
||||
return handler.send_json({"query": "", "results": []})
|
||||
try:
|
||||
top_k = min(50, max(1, int(q.get("top_k", 25))))
|
||||
except (TypeError, ValueError):
|
||||
top_k = 25
|
||||
|
||||
try:
|
||||
hits = _semantic_email_search(query, top_k)
|
||||
except Exception as e:
|
||||
# Spark Control / Qdrant unreachable, or the ingest stack isn't installed.
|
||||
# Log server-side (an error can carry a URL/host); give the UI a clean 503.
|
||||
import sys
|
||||
print(f"[email-search] retrieval failed: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
return handler.send_error_json("Content search is unavailable (Spark/Qdrant not reachable).", 503)
|
||||
|
||||
# Hydrate + soft-delete-filter against SQLite (canonical), preserving rank order.
|
||||
payloads = [(h.get("payload", {}) or {}, h) for h in hits]
|
||||
ids = [p.get("source_id") for p, _ in payloads]
|
||||
conn = _conn()
|
||||
try:
|
||||
live = _db.search_hit_emails(conn, ids)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
results = []
|
||||
for p, h in payloads:
|
||||
eid = p.get("source_id")
|
||||
e = live.get(eid)
|
||||
if not e:
|
||||
continue # deleted since indexing, or not matched-resolvable -> drop
|
||||
results.append({
|
||||
"email_id": eid,
|
||||
"subject": e["subject"],
|
||||
"from_name": e["from_name"],
|
||||
"from_email": e["from_email"],
|
||||
"sent_at": e["sent_at"],
|
||||
"direction": e["direction"],
|
||||
"has_attachments": e["has_attachments"],
|
||||
"lp_name": p.get("lp_name"),
|
||||
"score": h.get("score"),
|
||||
"excerpt": (h.get("text") or p.get("text") or "").replace("\n", " ").strip()[:300],
|
||||
})
|
||||
handler.send_json({"query": query, "results": results, "count": len(results)})
|
||||
|
||||
|
||||
def _h_list_threads(handler):
|
||||
user = _require_auth(handler)
|
||||
if not user:
|
||||
|
||||
Reference in New Issue
Block a user