outreach: voice by-purpose (larger sample) + Tier-B Gmail draft creation (v0.1.0:71)

(1) Voice: _voice_examples now picks the sender's prior sent emails OF THE SAME PURPOSE (PURPOSE_PATTERNS keyword cues per outreach type), larger sample (8) weighted by purpose then recency — not just recent. meta carries on_topic for transparency. (2) Tier-B sending (gmail.compose now authorized in Workspace DWD). New email_integration/compose.py create_outreach_draft: mints a compose-scoped DWD token for the sender (credentials._mint/access_token_for parameterized by scope; GMAIL_COMPOSE_SCOPE), builds an RFC822 message, and POSTs gmail.drafts.create into the SENDER's mailbox — as an in-thread reply (threadId + In-Reply-To/References, recipient = matched LP address) when there's an active thread, else a fresh email. NEVER sends — the human sends from Gmail (guardrails #4, #6). Route POST /api/outreach/gmail-draft; UI "Create Gmail draft" button + "Open Gmail Drafts" link. Tests: test_compose.py (parse/reply-target/RFC822+threading). Message construction unit-verified; the live drafts.create runs on the box. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-08 22:30:05 -05:00
parent 49f84ca9a4
commit 606b336a00
9 changed files with 297 additions and 19 deletions
@@ -128,10 +128,23 @@ def _context(conn, investor_id):
    return name, "\n\n".join(sections)


-def _voice_examples(conn, sender_email, limit=4):
-    """The sender's OWN recent sent LP emails — used as voice few-shot AND surfaced for
-    transparency (no black box). Returns (blocks_for_model, meta_for_ui). meta is the
-    sender's own emails, safe to show them."""
+# Keyword cues used to pick the sender's prior emails of the SAME PURPOSE as the draft
+# (so the voice few-shot matches what they're writing, not just whatever is most recent).
+PURPOSE_PATTERNS = {
+    "intro": ["introduc", "nice to meet", "reaching out", "wanted to connect", "by way of introduction", "e-meet"],
+    "follow_up": ["follow up", "following up", "circle back", "circling back", "checking in",
+                  "wanted to revisit", "any thoughts", "wanted to follow", "touching base"],
+    "fund_update": ["update", "progress", "quarter", "deployed", "portfolio", "milestone", "closing", "fund iii"],
+    "meeting_follow_up": ["great to meet", "great speaking", "thanks for the call", "thanks for your time",
+                          "after our", "following our", "enjoyed our", "great to connect", "great chatting"],
+    "nurture": ["checking in", "hope you", "thinking of you", "stay in touch", "wanted to share", "thought you"],
+}
+
+
+def _voice_examples(conn, sender_email, outreach_type=None, limit=8):
+    """The sender's OWN sent LP emails OF THE SAME PURPOSE — used as voice few-shot AND
+    surfaced for transparency (no black box). Larger sample, purpose-weighted (not just
+    recent). Returns (blocks_for_model, meta_for_ui); meta is the sender's own emails."""
    if not sender_email:
        return [], []
    try:
@@ -139,12 +152,19 @@ def _voice_examples(conn, sender_email, limit=4):
            "SELECT subject, body_text, snippet, sent_at, to_emails_json FROM emails "
            "WHERE LOWER(from_email) = LOWER(?) AND is_matched = 1 "
            "AND body_text IS NOT NULL AND TRIM(body_text) <> '' "
-            "ORDER BY sent_at DESC LIMIT ?", (sender_email, limit)).fetchall()
+            "ORDER BY sent_at DESC LIMIT 80", (sender_email,)).fetchall()
    except Exception:
        return [], []
+    pats = PURPOSE_PATTERNS.get(outreach_type or "", [])
+    scored = []
+    for idx, r in enumerate(rows):
+        text = ((r["subject"] or "") + " " + (r["body_text"] or r["snippet"] or "")).lower()
+        score = sum(1 for p in pats if p in text)
+        scored.append((score, -idx, r))  # purpose match first, then more recent
+    scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
    blocks, meta = [], []
-    for r in rows:
-        body = (r["body_text"] or r["snippet"] or "")[:1200].strip()
+    for score, _neg_idx, r in scored[:limit]:
+        body = (r["body_text"] or r["snippet"] or "")[:900].strip()
        if not body:
            continue
        blocks.append(f"Example — {r['subject'] or '(no subject)'}\n{body}")
@@ -155,7 +175,8 @@ def _voice_examples(conn, sender_email, limit=4):
                to = arr[0].get("email") if isinstance(arr[0], dict) else arr[0]
        except Exception:
            to = ""
-        meta.append({"subject": r["subject"] or "(no subject)", "date": (r["sent_at"] or "")[:10], "to": to})
+        meta.append({"subject": r["subject"] or "(no subject)", "date": (r["sent_at"] or "")[:10],
+                     "to": to, "on_topic": score > 0})
    return blocks, meta


@@ -194,7 +215,7 @@ def draft_outreach(conn, investor_id, outreach_type, guidance, db_path, sender_e
    if not name:
        return {"status": "not_found"}
    type_desc = OUTREACH_TYPES.get(outreach_type, OUTREACH_TYPES["follow_up"])
-    voice_blocks, voice_meta = _voice_examples(conn, sender_email)
+    voice_blocks, voice_meta = _voice_examples(conn, sender_email, outreach_type)

    # 1) Scrub the sender's voice examples + the recipient context TOGETHER (shared token
    #    space). Nothing reaches Claude in the clear; the voice examples are reference only.