Matrix intake: fuzzy investor matching + conversational in-thread edits (v0.1.0:86)

Close the two locked post-deploy enhancements for the Matrix intake bot. Fuzzy matching (server-side, ships in the s9pk): new find_intake_candidates in server.py returns ranked deterministic near-matches (difflib name similarity + token-set Jaccard, legal-suffix-aware, + email Levenshtein <= 2); GET /api/intake/match now returns {match, candidates}. The bot surfaces a numbered shortlist so a near-duplicate (Charlie/Charles, Acme Capital vs Acme Capital LLC, a one-char email typo) is confirmed by a human instead of silently creating a second investor. Exact match still auto-attaches; fuzzy candidates are never auto-attached. The optional LLM-judge re-rank is deferred. Conversational edits (bot-side, ships on the Spark): any in-thread reply that isn't yes/no/edit field=value is treated as a natural-language revision and re-run through local Qwen (parse.revise). Email integrity is preserved -- a changed address must literally appear in the instruction; the model's email field is structurally unreachable. No-op revisions re-prompt. Docs/current-state brought current; 27/27 backend tests green.
2026-06-17 18:50:58 -05:00
parent fa6c9da0e6
commit 0b893295e1
15 changed files with 734 additions and 41 deletions
@@ -102,6 +102,65 @@ def test_none_model_reply_is_unclear():
    assert p["intent"] == "unclear"


+def test_parse_message_stashes_source_text():
+    p = parse.parse_message("Acme Capital, Jane jane@acme.com",
+                            parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
+                                            "contact_name": "Jane", "contact_email": "jane@acme.com"}))
+    assert p["_source_text"] == "Acme Capital, Jane jane@acme.com"
+
+
+def test_revise_applies_note_change_and_preserves_control_keys():
+    proposal = parse.parse_message(
+        "New investor Acme Capital, Jane Doe jane@acme.com",
+        parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
+                        "contact_name": "Jane Doe", "contact_email": "jane@acme.com",
+                        "contact_title": None, "note": None}))
+    revised = parse.revise(
+        proposal, "add that we met on June 14",
+        parse_fn=_stub({"investor_name": "Acme Capital", "contact_name": "Jane Doe",
+                        "contact_email": "jane@acme.com", "contact_title": None,
+                        "note": "met on June 14"}))
+    assert revised["note"] == "met on June 14"
+    assert revised["investor_name"] == "Acme Capital"
+    assert revised["intent"] == "new_investor"                 # control key preserved
+    assert revised["_source_text"] == proposal["_source_text"]  # preserved for email integrity
+
+
+def test_revise_email_taken_only_from_instruction():
+    proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
+                "contact_email": "jane@acme.com", "contact_title": None, "note": None,
+                "_source_text": "Acme, Jane jane@acme.com"}
+    # instruction literally carries the new address → accepted
+    r1 = parse.revise(proposal, "her email is jane@newfirm.com",
+                      parse_fn=_stub({"contact_email": "jane@newfirm.com"}))
+    assert r1["contact_email"] == "jane@newfirm.com"
+    # model tries to change the email but the instruction has no address → keep the existing one
+    r2 = parse.revise(proposal, "set her title to GP",
+                      parse_fn=_stub({"contact_email": "totally@madeup.test", "contact_title": "GP"}))
+    assert r2["contact_email"] == "jane@acme.com"  # model's email ignored (not in instruction)
+    assert r2["contact_title"] == "GP"
+
+
+def test_revise_preserves_match_id():
+    proposal = {"intent": "meeting_note", "investor_name": "Acme", "contact_name": None,
+                "contact_email": None, "contact_title": None, "note": "old",
+                "_match_id": "rowAcme", "_stage": "approval", "_source_text": "note for Acme: old"}
+    revised = parse.revise(proposal, "change the note to: sent the deck",
+                           parse_fn=_stub({"note": "sent the deck"}))
+    assert revised["note"] == "sent the deck"
+    assert revised["_match_id"] == "rowAcme"
+    assert revised["intent"] == "meeting_note"
+
+
+def test_revise_cannot_empty_the_proposal():
+    proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
+                "contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}
+    revised = parse.revise(proposal, "clear it",
+                           parse_fn=_stub({"investor_name": None, "contact_name": None,
+                                           "contact_title": None, "note": None}))
+    assert revised["investor_name"] == "Acme" and revised["contact_name"] == "Jane"
+
+
 if __name__ == "__main__":
    fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
    for fn in fns: