Add daily activity digest — Phase B (v0.1.0:77)

Sends a once-a-day internal email to all active admins summarizing each team
member's email activity per investor, plus a team-wide by-investor view
(inbound + outbound, deduped). Narratives are generated on the LOCAL Spark
model, never Claude — the digest is intentionally un-anonymized, so substance
stays on Ten31 infra. This is an internal ops email, exempt from the
'agents draft, humans send' rule (which governs outward LP contact).

- backend/digest_builder.py: per-user + per-investor activity queries
  (soft-delete filtered), per-user Spark narrative with a deterministic
  fallback, two-section plain-text body, and the DB-backed policy resolver.
- backend/email_integration/digest_scheduler.py: always-on daily thread that
  re-reads the policy each cycle and sends once/day; window cursor in
  app_settings so a missed day rolls forward.
- server.py: POST /api/admin/digest/send-now and GET/PATCH
  /api/admin/digest/policy; scheduler wired into main().
- Control lives in Settings -> Admin (enable toggle + send-time dropdown),
  not StartOS actions; env vars only seed the first-boot default.
- Tests: backend/test_digest_builder.py.
This commit is contained in:
Keysat
2026-06-15 22:32:27 -05:00
parent 036226ed74
commit 323f016f64
12 changed files with 1113 additions and 19 deletions
+369
View File
@@ -0,0 +1,369 @@
"""Daily activity digest — content builder (Phase B).
Assembles the per-user -> per-investor email-activity digest and summarizes each
team member's day with ONE narrative paragraph from the LOCAL Spark model
(ingest/llm.py via Spark Control). NEVER Claude: the digest is deliberately
un-anonymized (real LP names + email substance), so every summarization stays on
Ten31 infra. Keeping the substance local is the whole point — this is the one
path that intentionally bypasses the scrub -> Claude -> re-hydrate boundary.
This is an internal ops email to the team's own admins, so it is exempt from the
"agents draft, humans send" rule — that rule governs outward LP/prospect contact,
not an internal digest to the fund's own inboxes. Never extend it to send to LPs.
Soft-delete: every read here filters the relevant tombstones —
`email_account_messages.deleted_at IS NULL`, `users.is_active = 1`, and the
org/contact name joins drop soft-deleted rows (falling back to the raw address).
Stdlib only; the local-LLM client is imported lazily so this module stays
importable (and testable with an injected chat fn) without Spark configured.
"""
import json
import os
import sqlite3
from datetime import datetime, timezone
# One row per (account-sighting x investor-link) in the window. Grouped into
# per-user buckets in Python. Investor display name resolves fundraising grid ->
# organization -> contact -> the raw matched address, skipping soft-deleted
# org/contact rows (fundraising_investors has no soft-delete column — it is a
# rebuilt projection of the grid).
_ACTIVITY_SQL = """
SELECT
ea.user_id AS user_id,
u.username AS username,
u.full_name AS full_name,
ea.email_address AS account_email,
eam.is_sent AS is_sent,
e.id AS email_id,
e.from_email AS from_email,
e.subject AS subject,
e.body_text AS body_text,
e.snippet AS snippet,
e.sent_at AS sent_at,
COALESCE(
NULLIF(TRIM(fi.investor_name), ''),
NULLIF(TRIM(o.name), ''),
NULLIF(TRIM(COALESCE(c.first_name, '') || ' ' || COALESCE(c.last_name, '')), ''),
eil.matched_address
) AS investor_name
FROM email_account_messages eam
JOIN email_accounts ea ON ea.id = eam.account_id
JOIN users u ON u.id = ea.user_id
JOIN emails e ON e.id = eam.email_id
JOIN email_investor_links eil ON eil.email_id = e.id
LEFT JOIN fundraising_investors fi ON fi.id = eil.fundraising_investor_id
LEFT JOIN organizations o ON o.id = eil.organization_id AND o.deleted_at IS NULL
LEFT JOIN contacts c ON c.id = eil.contact_id AND c.deleted_at IS NULL
WHERE eam.deleted_at IS NULL
AND u.is_active = 1
AND e.is_matched = 1
AND e.sent_at >= ? AND e.sent_at < ?
ORDER BY u.full_name, u.username, e.sent_at ASC
"""
_RULE = "" * 52
_FOOTER = ("— Internal Ten31 CRM digest. Narratives are generated locally (Spark), "
"never Claude. This mailbox is unmonitored.")
_SYSTEM = (
"You write a brief internal activity digest for a venture fund's partners. "
"Given one team member's emails with investors over the last day, write 2-4 "
"sentences summarizing what they did: which investors they engaged and the "
"gist of each thread. Name the investors. Past tense, plain prose, no "
"greeting, no bullet points, no sign-off."
)
# ------------------------------------------------------------------ collection
def _fetch_activity_rows(conn, since_iso, until_iso):
"""Raw (sighting x investor-link) rows for the window. [] if email tables absent."""
try:
return conn.execute(_ACTIVITY_SQL, (since_iso, until_iso)).fetchall()
except sqlite3.OperationalError:
return [] # email tables not present (integration disabled) — nothing to report
def _own_addresses(conn):
"""Lower-cased set of enrolled mailbox addresses — used to decide whether an
email is outbound (from us) or inbound (from the investor) at the email level."""
try:
return {(r[0] or "").lower().strip()
for r in conn.execute("SELECT email_address FROM email_accounts")}
except sqlite3.OperationalError:
return set()
def collect_user_activity(conn, since_iso, until_iso):
"""Return per-user activity buckets for emails in [since_iso, until_iso).
Each bucket: {user_id, username, full_name, account_email, emails[], investors[],
sent, received, total}. Empty list if the email tables are absent. Only users
who had activity appear. Direction here is per-mailbox (eam.is_sent): did THIS
user send the message."""
rows = _fetch_activity_rows(conn, since_iso, until_iso)
groups = {}
for r in rows:
uid = r["user_id"]
g = groups.get(uid)
if g is None:
g = {"user_id": uid, "username": r["username"], "full_name": r["full_name"],
"account_email": r["account_email"], "_emails": {}, "_inv": set()}
groups[uid] = g
eid = r["email_id"]
em = g["_emails"].get(eid)
if em is None:
em = {"email_id": eid,
"direction": "sent" if r["is_sent"] else "received",
"subject": r["subject"], "sent_at": r["sent_at"],
"text": r["body_text"] or r["snippet"] or "", "investors": []}
g["_emails"][eid] = em
inv = (r["investor_name"] or "").strip()
if inv:
if inv not in em["investors"]:
em["investors"].append(inv)
g["_inv"].add(inv)
out = []
for g in groups.values():
emails = sorted(g["_emails"].values(), key=lambda e: e["sent_at"] or "")
sent = sum(1 for e in emails if e["direction"] == "sent")
out.append({
"user_id": g["user_id"], "username": g["username"],
"full_name": g["full_name"], "account_email": g["account_email"],
"emails": emails, "investors": sorted(g["_inv"]),
"sent": sent, "received": len(emails) - sent, "total": len(emails),
})
out.sort(key=lambda x: (x["full_name"] or x["username"] or "").lower())
return out
def collect_investor_activity(conn, since_iso, until_iso):
"""Re-pivot the same window by investor (across the whole team), deduping each
email so a reply to several team members counts once. Direction is decided at
the EMAIL level: outbound if the sender is one of our mailboxes, else inbound.
Each bucket: {name, emails[{email_id, direction in/out, subject, sent_at,
members[]}], inbound, outbound, total}. Sorted most-active first."""
rows = _fetch_activity_rows(conn, since_iso, until_iso)
own = _own_addresses(conn)
groups = {}
for r in rows:
name = (r["investor_name"] or "").strip() or "(unmatched)"
g = groups.get(name)
if g is None:
g = {"name": name, "_emails": {}}
groups[name] = g
eid = r["email_id"]
em = g["_emails"].get(eid)
if em is None:
outbound = (r["from_email"] or "").lower().strip() in own
em = {"email_id": eid, "direction": "out" if outbound else "in",
"subject": r["subject"], "sent_at": r["sent_at"], "members": []}
g["_emails"][eid] = em
# Attribute the sending team member on outbound mail (the sighting with
# is_sent=1); inbound is "from them", so no member shown.
if em["direction"] == "out" and r["is_sent"]:
who = (r["full_name"] or r["username"] or "").strip()
if who and who not in em["members"]:
em["members"].append(who)
out = []
for g in groups.values():
emails = sorted(g["_emails"].values(), key=lambda e: e["sent_at"] or "")
inbound = sum(1 for e in emails if e["direction"] == "in")
out.append({"name": g["name"], "emails": emails,
"inbound": inbound, "outbound": len(emails) - inbound, "total": len(emails)})
out.sort(key=lambda x: (-x["total"], x["name"].lower()))
return out
# ------------------------------------------------------------------ policy
DIGEST_POLICY_KEY = "digest_policy"
DEFAULT_DIGEST_POLICY = {"enabled": False, "send_hour": 18}
def load_digest_policy(conn):
"""Resolve the live digest policy. Precedence: the app_settings DB row (the
admin-panel control) wins; absent that, the CRM_DIGEST_ENABLED/SEND_HOUR env
vars seed a first-boot default; absent those, DEFAULT_DIGEST_POLICY. Returns
{enabled: bool, send_hour: int 0-23}. Shared by the server (API) and the
scheduler so both read one source of truth."""
pol = dict(DEFAULT_DIGEST_POLICY)
env_enabled = os.environ.get("CRM_DIGEST_ENABLED")
if env_enabled is not None:
pol["enabled"] = env_enabled.lower() in ("1", "true", "yes", "on")
env_hour = os.environ.get("CRM_DIGEST_SEND_HOUR")
if env_hour:
try:
pol["send_hour"] = min(23, max(0, int(env_hour)))
except ValueError:
pass
try:
row = conn.execute(
"SELECT value_json FROM app_settings WHERE key = ?", (DIGEST_POLICY_KEY,)).fetchone()
except sqlite3.OperationalError:
row = None
if row:
try:
saved = json.loads(row["value_json"])
except Exception:
saved = None
if isinstance(saved, dict):
if "enabled" in saved:
pol["enabled"] = bool(saved["enabled"])
if "send_hour" in saved:
try:
pol["send_hour"] = min(23, max(0, int(saved["send_hour"])))
except (ValueError, TypeError):
pass
return pol
# ------------------------------------------------------------------ summarization
def _default_chat(prompt, system=None, max_tokens=220):
"""Lazily reach the local Qwen chat via Spark Control (ingest/llm.py)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "ingest"))
import llm # noqa: E402
return llm.chat(prompt, system=system, max_tokens=max_tokens)
def _user_email_block(group, max_emails=20, body_chars=500):
lines = []
for em in group["emails"][:max_emails]:
invs = ", ".join(em["investors"]) or "(unmatched)"
body = " ".join((em.get("text") or "").split())[:body_chars]
line = f"- [{em['direction']}] {invs} | subject: {em.get('subject') or '(none)'}"
if body:
line += f" | {body}"
lines.append(line)
return "\n".join(lines)
def _fallback_narrative(group):
"""Deterministic summary when the local model is unavailable — the digest
must still send (always-send) with real counts rather than fail."""
name = group.get("full_name") or group.get("username") or "Team member"
invs = ", ".join(group["investors"]) or "no matched investors"
return (f"{name} had {group['total']} email(s) "
f"({group['sent']} sent, {group['received']} received) with {invs}. "
"(Local summary unavailable.)")
def summarize_user_day(group, chat_fn=None):
"""One narrative paragraph for a user's day, from the local model. Falls back
to a deterministic count summary on any error or empty reply."""
fn = chat_fn or _default_chat
name = group.get("full_name") or group.get("username") or "The team member"
prompt = f"Team member: {name}\nEmails:\n{_user_email_block(group)}"
try:
out = fn(prompt, system=_SYSTEM, max_tokens=220)
out = " ".join((out or "").split()).strip()
if out:
return out
except Exception:
pass
return _fallback_narrative(group)
# ------------------------------------------------------------------ composition
def _parse_iso(iso):
if not iso:
return None
s = str(iso).strip()
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"):
try:
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
except ValueError:
continue
return None
def _fmt_local(iso):
"""UTC ISO -> human local time, e.g. 'Jun 17 2:14 PM'. Manual 12h formatting
to stay portable (no platform-specific %-I)."""
dt = _parse_iso(iso)
if dt is None:
return str(iso)
dt = dt.astimezone()
hour12 = dt.hour % 12 or 12
ampm = "AM" if dt.hour < 12 else "PM"
return f"{dt.strftime('%b')} {dt.day} {hour12}:{dt.minute:02d} {ampm}"
def _compose_body(user_groups, investor_groups, narratives, since_iso, until_iso):
title_date = datetime.now().astimezone().strftime("%A, %b %d %Y")
window = f"{_fmt_local(since_iso)} {_fmt_local(until_iso)}"
L = ["Ten31 CRM — Daily Activity Digest", title_date, f"Window: {window}", ""]
if not user_groups:
L += ["No tracked email activity from any user in this window.", "", _FOOTER]
return "\n".join(L)
total_emails = sum(g["total"] for g in user_groups)
total_invs = len({i for g in user_groups for i in g["investors"]})
L.append(f"{len(user_groups)} team member(s) active · {total_emails} email(s) "
f"· {total_invs} investor(s)")
# ── Section 1: by team member (who did what; per-user Spark narrative) ──
L += ["", _RULE, "BY TEAM MEMBER", _RULE]
for g in user_groups:
invs = ", ".join(g["investors"]) or "(no matched investor)"
L += ["",
f"{g['full_name'] or g['username']} · {g['account_email']}",
f"{g['total']} email(s) ({g['sent']} sent, {g['received']} received) "
f"· {invs}", "",
narratives.get(g["user_id"], ""), ""]
for em in g["emails"]:
arrow = "→ Sent" if em["direction"] == "sent" else "← Received"
invs_e = ", ".join(em["investors"]) or "(unmatched)"
subj = em.get("subject") or "(no subject)"
L.append(f" {arrow} · {invs_e} · \"{subj}\" ({_fmt_local(em['sent_at'])})")
# ── Section 2: by investor (team-wide; both directions, structured) ──
L += ["", _RULE, "BY INVESTOR", _RULE]
for inv in investor_groups:
L += ["",
f"{inv['name']} · {inv['total']} email(s) "
f"({inv['inbound']} in, {inv['outbound']} out)"]
for em in inv["emails"]:
subj = em.get("subject") or "(no subject)"
when = _fmt_local(em["sent_at"])
if em["direction"] == "out":
who = ", ".join(em["members"]) or "team"
L.append(f" → Sent by {who} · \"{subj}\" ({when})")
else:
L.append(f" ← Received · \"{subj}\" ({when})")
L += ["", _RULE, _FOOTER]
return "\n".join(L)
def build_digest(conn, since_iso, until_iso, chat_fn=None):
"""Build the daily digest for [since_iso, until_iso). Returns
{subject, body, has_activity, user_count, email_count, investor_count}. Always
returns a body (empty windows get a 'no activity' note — the team chose
always-send). Two sections: by team member (per-user Spark narrative) and by
investor (structured, both directions)."""
user_groups = collect_user_activity(conn, since_iso, until_iso)
investor_groups = collect_investor_activity(conn, since_iso, until_iso)
narratives = {g["user_id"]: summarize_user_day(g, chat_fn) for g in user_groups}
body = _compose_body(user_groups, investor_groups, narratives, since_iso, until_iso)
stamp = datetime.now().astimezone().strftime("%b %d")
return {
"subject": f"Ten31 CRM — Daily Activity Digest · {stamp}",
"body": body,
"has_activity": bool(user_groups),
"user_count": len(user_groups),
"email_count": sum(g["total"] for g in user_groups),
"investor_count": len(investor_groups),
}