grounding: wire matched email bodies into the LP-feedback corpus
_ground_feedback_corpus now pulls matched email bodies (the richest objection signal) alongside communications and grid notes, round-robin merged so email is never crowded out by a flat LIMIT, per-item capped at 4000 chars to keep the local minimize tractable on long threads, and degrading gracefully when the email tables are absent. Email remains Tier-2-sensitive: it only ever enters the redaction boundary, never Claude directly. Inert until Gmail capture is enrolled. Not yet deployed (bundles into the next release with the meeting-notes work). Test: test_ground_corpus.py. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+28
-9
@@ -3788,17 +3788,36 @@ class CRMHandler(BaseHTTPRequestHandler):
|
|||||||
return self.send_json({"data": res})
|
return self.send_json({"data": res})
|
||||||
|
|
||||||
def _ground_feedback_corpus(self, conn, limit=60):
|
def _ground_feedback_corpus(self, conn, limit=60):
|
||||||
"""Raw LP-feedback prose for grounding (communications + grid notes). Sensitive
|
"""Raw LP-feedback prose for grounding, newest-first, balanced across sources:
|
||||||
Tier-2-heavy text; ONLY ever passed into the redaction boundary, never to Claude
|
matched email bodies (the richest objection signal), logged communications, and
|
||||||
directly."""
|
fundraising grid notes. Sensitive Tier-2-heavy text; ONLY ever passed into the
|
||||||
items = []
|
redaction boundary, never to Claude directly."""
|
||||||
for q in ("SELECT body FROM communications WHERE body IS NOT NULL AND TRIM(body)<>'' ORDER BY communication_date DESC LIMIT ?",
|
# Email bodies are capped per item (long threads/quote-chains) to keep the local
|
||||||
"SELECT notes FROM fundraising_investors WHERE notes IS NOT NULL AND TRIM(notes)<>'' LIMIT ?"):
|
# minimize tractable; only `matched` emails (tied to a known investor/contact) are
|
||||||
|
# pulled. Sources are round-robin merged so email is always represented even when
|
||||||
|
# communications/notes are plentiful, rather than crowded out by a flat LIMIT.
|
||||||
|
sources = (
|
||||||
|
"SELECT SUBSTR(body_text,1,4000) FROM emails WHERE match_status='matched' "
|
||||||
|
"AND body_text IS NOT NULL AND TRIM(body_text)<>'' ORDER BY sent_at DESC LIMIT ?",
|
||||||
|
"SELECT body FROM communications WHERE body IS NOT NULL AND TRIM(body)<>'' "
|
||||||
|
"ORDER BY communication_date DESC LIMIT ?",
|
||||||
|
"SELECT notes FROM fundraising_investors WHERE notes IS NOT NULL AND TRIM(notes)<>'' LIMIT ?",
|
||||||
|
)
|
||||||
|
buckets = []
|
||||||
|
for q in sources:
|
||||||
try:
|
try:
|
||||||
items += [r[0] for r in conn.execute(q, (limit,))]
|
buckets.append([r[0] for r in conn.execute(q, (limit,))])
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
buckets.append([]) # table absent (e.g. email integration not migrated) -> skip
|
||||||
return items[:limit]
|
items, i = [], 0
|
||||||
|
while len(items) < limit and any(i < len(b) for b in buckets):
|
||||||
|
for b in buckets:
|
||||||
|
if i < len(b):
|
||||||
|
items.append(b[i])
|
||||||
|
if len(items) >= limit:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
return items
|
||||||
|
|
||||||
def handle_architect_ground(self, user, body):
|
def handle_architect_ground(self, user, body):
|
||||||
"""Ground an objection register in real LP feedback THROUGH the redaction boundary
|
"""Ground an objection register in real LP feedback THROUGH the redaction boundary
|
||||||
|
|||||||
@@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test the grounding feedback corpus assembly (CRMHandler._ground_feedback_corpus).
|
||||||
|
|
||||||
|
Verifies email bodies are wired in: matched-only, balanced round-robin merge across
|
||||||
|
email/communications/grid-notes, per-item length cap on long threads, and graceful
|
||||||
|
degradation when the email tables are absent. Synthetic data only (guardrail #9).
|
||||||
|
Run: cd backend && python3 test_ground_corpus.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
os.environ.setdefault("CRM_DB_PATH", os.path.join(tempfile.mkdtemp(), "import.db"))
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
corpus = server.CRMHandler._ground_feedback_corpus # unbound; method uses only (conn, limit)
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def mkdb(with_email=True):
|
||||||
|
db = os.path.join(tempfile.mkdtemp(), "t.db")
|
||||||
|
c = sqlite3.connect(db)
|
||||||
|
c.execute("CREATE TABLE communications(body TEXT, communication_date TEXT)")
|
||||||
|
c.execute("CREATE TABLE fundraising_investors(notes TEXT)")
|
||||||
|
c.executemany("INSERT INTO communications VALUES(?,?)", [(f"comm {i}", f"2026-01-{i+1:02d}") for i in range(5)])
|
||||||
|
c.executemany("INSERT INTO fundraising_investors VALUES(?)", [(f"note {i}",) for i in range(5)])
|
||||||
|
if with_email:
|
||||||
|
c.execute("CREATE TABLE emails(body_text TEXT, match_status TEXT, sent_at TEXT)")
|
||||||
|
c.executemany("INSERT INTO emails VALUES(?,?,?)",
|
||||||
|
[(f"EMAIL {i}", "matched", f"2026-02-{i+1:02d}") for i in range(5)]
|
||||||
|
+ [("UNMATCHED should not appear", "unmatched", "2026-03-01")])
|
||||||
|
c.commit()
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
c = mkdb(True)
|
||||||
|
items = corpus(None, c, limit=9)
|
||||||
|
check(any(x.startswith("EMAIL") for x in items), "matched email is represented in the corpus")
|
||||||
|
check(all("UNMATCHED" not in x for x in items), "unmatched emails are excluded")
|
||||||
|
check(any(x.startswith("comm") for x in items) and any(x.startswith("note") for x in items),
|
||||||
|
"communications and grid notes still represented")
|
||||||
|
check(items[0].startswith("EMAIL") and items[1].startswith("comm") and items[2].startswith("note"),
|
||||||
|
"sources are round-robin balanced (email not crowded out)")
|
||||||
|
|
||||||
|
items2 = corpus(None, mkdb(False), limit=20)
|
||||||
|
check(len(items2) == 10 and all("EMAIL" not in x for x in items2),
|
||||||
|
"degrades gracefully to comms+notes when email table absent")
|
||||||
|
|
||||||
|
c3 = sqlite3.connect(os.path.join(tempfile.mkdtemp(), "t3.db"))
|
||||||
|
c3.execute("CREATE TABLE communications(body TEXT, communication_date TEXT)")
|
||||||
|
c3.execute("CREATE TABLE fundraising_investors(notes TEXT)")
|
||||||
|
c3.execute("CREATE TABLE emails(body_text TEXT, match_status TEXT, sent_at TEXT)")
|
||||||
|
c3.execute("INSERT INTO emails VALUES(?,?,?)", ("X" * 9000, "matched", "2026-02-01"))
|
||||||
|
c3.commit()
|
||||||
|
check(len(corpus(None, c3, limit=5)[0]) == 4000, "long email body capped at 4000 chars for local minimize")
|
||||||
|
|
||||||
|
if FAILS:
|
||||||
|
print(f"\nFAILED ({len(FAILS)})")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("\nALL PASS (email grounding corpus wiring)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user