diff --git a/backend/mcp/outreach_agent.py b/backend/mcp/outreach_agent.py index ad7862a..0a276a2 100644 --- a/backend/mcp/outreach_agent.py +++ b/backend/mcp/outreach_agent.py @@ -7,6 +7,7 @@ the thesis is Ten31's own non-sensitive messaging and goes to Claude as-is; the context (CRM notes + email history) is scrubbed first, so the LP list never reaches the API in the clear, and the draft is re-hydrated locally for the human. """ +import json import os import sys @@ -89,43 +90,91 @@ def follow_up_radar(conn, our_addresses, now_iso, warm_days=45, limit=60): def _context(conn, investor_id): - """Assemble the recipient's context: CRM notes + recent matched email with them. - Returns (investor_name, context_text) or (None, None).""" + """Assemble the recipient's context. Structured so the model replies to the ACTIVE + conversation (the most recent email thread) while still having earlier emails as + background. Returns (investor_name, context_text) or (None, None).""" row = conn.execute("SELECT investor_name, notes FROM fundraising_investors WHERE id=?", (investor_id,)).fetchone() if not row: return None, None name = row["investor_name"] - parts = [f"Investor: {name}"] + header = [f"Investor: {name}"] notes = (row["notes"] or "").strip() if notes: - parts.append("CRM notes:\n" + notes) + header.append("CRM notes:\n" + notes) try: rows = conn.execute( - "SELECT e.subject, e.body_text, e.snippet, e.sent_at FROM emails e " + "SELECT e.subject, e.body_text, e.snippet, e.sent_at, e.thread_id FROM emails e " "JOIN email_investor_links l ON l.email_id = e.id " "WHERE l.fundraising_investor_id = ? AND e.is_matched = 1 " - "ORDER BY e.sent_at DESC LIMIT 6", (investor_id,)).fetchall() + "ORDER BY e.sent_at DESC LIMIT 20", (investor_id,)).fetchall() + except Exception: + rows = [] # email tables may be absent / not yet captured + active, background = [], [] + if rows: + active_thread = rows[0]["thread_id"] for em in rows: body = (em["body_text"] or em["snippet"] or "")[:1500].strip() - if body or em["subject"]: - parts.append(f"Email ({(em['sent_at'] or '')[:10]}) — {em['subject'] or '(no subject)'}\n{body}") + block = f"({(em['sent_at'] or '')[:10]}) {em['subject'] or '(no subject)'}\n{body}" + in_active = active_thread is not None and em["thread_id"] == active_thread + (active if in_active else background).append(block) + sections = ["\n".join(header)] + if active: + sections.append("=== Active conversation (the most recent thread — this is what you are replying to) ===\n" + + "\n\n".join(reversed(active[:6]))) + if background: + sections.append("=== Earlier emails (background only, not the active thread) ===\n" + + "\n\n".join(background[:4])) + return name, "\n\n".join(sections) + + +def _voice_examples(conn, sender_email, limit=4): + """The sender's OWN recent sent LP emails — used as voice few-shot AND surfaced for + transparency (no black box). Returns (blocks_for_model, meta_for_ui). meta is the + sender's own emails, safe to show them.""" + if not sender_email: + return [], [] + try: + rows = conn.execute( + "SELECT subject, body_text, snippet, sent_at, to_emails_json FROM emails " + "WHERE LOWER(from_email) = LOWER(?) AND is_matched = 1 " + "AND body_text IS NOT NULL AND TRIM(body_text) <> '' " + "ORDER BY sent_at DESC LIMIT ?", (sender_email, limit)).fetchall() except Exception: - pass # email tables may be absent / not yet captured - return name, "\n\n---\n\n".join(parts) + return [], [] + blocks, meta = [], [] + for r in rows: + body = (r["body_text"] or r["snippet"] or "")[:1200].strip() + if not body: + continue + blocks.append(f"Example — {r['subject'] or '(no subject)'}\n{body}") + to = "" + try: + arr = json.loads(r["to_emails_json"] or "[]") + if arr: + to = arr[0].get("email") if isinstance(arr[0], dict) else arr[0] + except Exception: + to = "" + meta.append({"subject": r["subject"] or "(no subject)", "date": (r["sent_at"] or "")[:10], "to": to}) + return blocks, meta -def _draft_with_claude(aa, thesis, type_desc, deident_context, guidance): +def _draft_with_claude(aa, thesis, type_desc, deident_context, deident_voice, guidance): + voice_block = "" + if deident_voice: + voice_block = ("\n\nHere are examples of how THIS sender actually writes (de-identified). Match their " + "voice, tone, sentence rhythm, openers, and sign-off — not just the rules above:\n\n" + + "\n\n---\n\n".join(deident_voice)) system = ( - "You are Ten31's outreach copilot. Draft ONE ready-to-send LP outreach email in Ten31's voice. " - f"VOICE RULES (follow exactly): {aa.VOICE}\n\n" + "You are Ten31's outreach copilot. Draft ONE ready-to-send LP outreach email in the SENDER's voice. " + f"VOICE RULES (follow exactly): {aa.VOICE}" + voice_block + "\n\n" "Ten31 invests in critical infrastructure across bitcoin, AI, energy, and freedom technologies, " "with scarcity as the connecting idea. Current working thesis:\n" + aa._render_thesis(thesis) + "\n\n" "The recipient's context below is DE-IDENTIFIED: people, firms, and amounts appear as placeholders " "like [PERSON_1], [ORG_1], [AMOUNT_1]. Keep every placeholder EXACTLY as written and NEVER invent new " - "ones — they are swapped back to real values after you reply. Output a subject line, then the email body. " - "Ground it in the actual context; do NOT fabricate facts, numbers, returns, or commitments that are not " - "present in the context or the thesis.") + "ones — they are swapped back to real values after you reply. Reply to the ACTIVE conversation; use the " + "earlier emails only as background. Output a subject line, then the email body. Do NOT fabricate facts, " + "numbers, returns, or commitments that are not present in the context or the thesis.") user = (f"Outreach type: {type_desc}\n\n" f"Recipient context (de-identified):\n{deident_context}\n\n" + (f"Additional guidance from the sender: {guidance}\n\n" if (guidance or "").strip() else "") @@ -137,31 +186,35 @@ def _draft_with_claude(aa, thesis, type_desc, deident_context, guidance): return "".join(b.text for b in resp.content if getattr(b, "type", None) == "text") -def draft_outreach(conn, investor_id, outreach_type, guidance, db_path): - """Draft tailored outreach for one investor. FAILS CLOSED: if the scrub can't be - prepared or Claude hallucinates a placeholder, no de-anonymized draft is returned.""" +def draft_outreach(conn, investor_id, outreach_type, guidance, db_path, sender_email=None): + """Draft tailored outreach for one investor, in the SENDER's voice (few-shot from + their own prior emails). FAILS CLOSED: if the scrub can't be prepared or Claude + hallucinates a placeholder, no de-anonymized draft is returned.""" name, context = _context(conn, investor_id) if not name: return {"status": "not_found"} type_desc = OUTREACH_TYPES.get(outreach_type, OUTREACH_TYPES["follow_up"]) + voice_blocks, voice_meta = _voice_examples(conn, sender_email) - # 1) Scrub the LP context — the LP list / identifiers never reach Claude in the clear. + # 1) Scrub the sender's voice examples + the recipient context TOGETHER (shared token + # space). Nothing reaches Claude in the clear; the voice examples are reference only. try: sys.path.insert(0, os.path.dirname(_HERE)) # backend/ for the redaction package from redaction.client import Boundary boundary = Boundary(db_path=db_path, actor="closer") - scrubbed = boundary.scrub([context], bucket=False, conn=conn) + scrubbed = boundary.scrub(list(voice_blocks) + [context], bucket=False, conn=conn) except Exception as exc: return {"status": "scrub_unavailable", "reason": str(exc)} - deident = scrubbed["items"][0] + items = scrubbed["items"] + deident_voice, deident_target = items[:-1], items[-1] handle = scrubbed["handle"] - # 2) Claude drafts over the de-identified context + (non-sensitive) thesis. + # 2) Claude drafts over the de-identified context + voice + (non-sensitive) thesis. try: sys.path.insert(0, _HERE) import architect_agent as aa thesis = aa.at.get_thesis("core", db=db_path) - raw = _draft_with_claude(aa, thesis, type_desc, deident, guidance) + raw = _draft_with_claude(aa, thesis, type_desc, deident_target, deident_voice, guidance) except Exception as exc: boundary.forget(handle) return {"status": "claude_not_configured", "reason": str(exc)} @@ -172,4 +225,4 @@ def draft_outreach(conn, investor_id, outreach_type, guidance, db_path): if rehy.get("error"): return {"status": "rehydrate_failed"} return {"status": "ok", "draft": rehy["text"], "investor_name": name, - "scrub_stats": scrubbed.get("stats", {})} + "scrub_stats": scrubbed.get("stats", {}), "voice_examples": voice_meta} diff --git a/backend/mcp/test_outreach.py b/backend/mcp/test_outreach.py index efdaa38..8d40f8f 100644 --- a/backend/mcp/test_outreach.py +++ b/backend/mcp/test_outreach.py @@ -27,14 +27,15 @@ def main(): c.row_factory = sqlite3.Row c.executescript(""" CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, notes TEXT); - CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, sent_at TEXT, is_matched INT); + CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, sent_at TEXT, + from_email TEXT, to_emails_json TEXT, thread_id TEXT, is_matched INT); CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT); """) c.execute("INSERT INTO fundraising_investors VALUES ('inv1','Harbor & Vine','Met at the conference; interested in Fund III.')") - c.executemany("INSERT INTO emails (id,subject,body_text,sent_at,is_matched) VALUES (?,?,?,?,1)", [ - ("e1", "Re: Fund III", "Thanks for the call. We are still weighing the lock-up terms.", "2026-06-02T10:00:00"), - ("e2", "Intro", "Good to meet you at the dinner.", "2026-05-01T10:00:00"), - ("e3", "Spam", "ignore me", "2026-04-01T10:00:00"), # not linked -> excluded + c.executemany("INSERT INTO emails (id,subject,body_text,sent_at,thread_id,is_matched) VALUES (?,?,?,?,?,1)", [ + ("e1", "Re: Fund III", "Thanks for the call. We are still weighing the lock-up terms.", "2026-06-02T10:00:00", "t1"), + ("e2", "Intro", "Good to meet you at the dinner.", "2026-05-01T10:00:00", "t0"), + ("e3", "Spam", "ignore me", "2026-04-01T10:00:00", "t9"), # not linked -> excluded ]) c.executemany("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id) VALUES (?,?, 'inv1')", [("l1", "e1"), ("l2", "e2")]) @@ -43,10 +44,23 @@ def main(): name, ctx = oa._context(c, "inv1") check(name == "Harbor & Vine", f"resolves investor name (got {name!r})") check("Met at the conference" in ctx, "includes CRM notes") - check("lock-up terms" in ctx, "includes matched email body") - check("Good to meet you" in ctx, "includes a second matched email") + check("lock-up terms" in ctx, "active-thread email present") + check("Good to meet you" in ctx, "earlier email present as background") check("ignore me" not in ctx, "excludes email not linked to this investor") - check(ctx.index("lock-up terms") < ctx.index("Good to meet you"), "newest email first") + check("Active conversation" in ctx and "Earlier emails" in ctx + and ctx.index("lock-up terms") < ctx.index("Good to meet you"), + "active thread is separated from background, active first") + + # voice examples: the sender's own sent emails (few-shot + transparency) + c.execute("INSERT INTO emails (id,subject,body_text,sent_at,from_email,to_emails_json,thread_id,is_matched) " + "VALUES ('v1','My note','Hi there, quick update on the fund. Best, Grant'," + "'2026-06-01T10:00:00','grant@ten31.xyz','[{\"email\":\"lp@x.example\"}]','tv',1)") + c.commit() + blocks, meta = oa._voice_examples(c, "grant@ten31.xyz") + check(len(blocks) == 1 and "quick update on the fund" in blocks[0], "voice example pulls the sender's own email") + check(len(meta) == 1 and meta[0]["subject"] == "My note" and meta[0]["to"] == "lp@x.example", + "voice meta carries subject + recipient for transparency") + check(oa._voice_examples(c, None) == ([], []), "no sender -> no voice examples") n2, c2 = oa._context(c, "missing") check(n2 is None and c2 is None, "unknown investor -> (None, None)") diff --git a/backend/server.py b/backend/server.py index 7a1781c..ac67c6c 100644 --- a/backend/server.py +++ b/backend/server.py @@ -3948,8 +3948,14 @@ class CRMHandler(BaseHTTPRequestHandler): return self.send_error_json("investor_id required", 400) conn = get_db() try: + sender_email = None + try: + r = conn.execute("SELECT email FROM users WHERE id=?", (user.get('user_id'),)).fetchone() + sender_email = r[0] if r else None + except Exception: + pass res = _outreach_agent.draft_outreach(conn, inv, body.get('outreach_type', 'follow_up'), - body.get('guidance', '') or '', DB_PATH) + body.get('guidance', '') or '', DB_PATH, sender_email=sender_email) try: conn.execute( "INSERT INTO interaction_log (id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at) " diff --git a/frontend/index.html b/frontend/index.html index a3dc4fb..9b57fce 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -10071,7 +10071,7 @@ value={guidance} onChange={(e) => setGuidance(e.target.value)} />