Capture city + LinkedIn on card intake; sharpen the transcription prompt

The card transcription prompt now reads emails/URLs/phones character-by-character, explicitly forbids autocompleting toward a plausible domain (the mara.com -> marac.com failure), and emits labeled lines (which also feeds the field extractor cleaner input). The extractor gains city + linkedin_url. city is a plain field (low-harm if wrong; the human sees it on the card). linkedin_url follows the email-integrity rule: kept only if it literally appears in the source / a revise instruction, never minted -- a wrong profile URL points at the wrong person. Both flow to the contact via the existing log-communication upsert (city also syncs to the grid contact pill). Phone is intentionally NOT included yet: the bot's write path can't store it until a small server-side change lands (next s9pk). See the matrix-intake guide.
2026-06-20 11:07:17 -05:00
parent 5e115a3409
commit 8b2eb01a65
8 changed files with 120 additions and 13 deletions
@@ -195,6 +195,44 @@ def test_revise_injects_roster_into_system_prompt():
    assert "doing the outreach" in seen["system"]


+def test_city_kept_as_plain_field_and_linkedin_salvaged_from_source():
+    # A card transcription carries labeled lines; city is kept as-is, LinkedIn is salvaged from
+    # the source text (verbatim) the same way email is.
+    src = ("New investor — from a business card:\nName: Jane Doe\nCompany: Acme Capital\n"
+           "Email: jane@acme.com\nLinkedIn: linkedin.com/in/janedoe\nCity: New York")
+    p = parse.parse_message(
+        src,
+        parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
+                        "contact_name": "Jane Doe", "contact_email": "jane@acme.com",
+                        "city": "New York", "linkedin_url": None}),  # model missed the URL
+    )
+    assert p["city"] == "New York"
+    assert p["linkedin_url"] == "linkedin.com/in/janedoe"  # salvaged from source
+
+
+def test_fabricated_linkedin_dropped_when_not_in_source():
+    p = parse.parse_message(
+        "new prospect Gamma Partners, talked to their GP",
+        parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
+                        "contact_name": "their GP", "linkedin_url": "linkedin.com/in/madeup"}),
+    )
+    assert p["linkedin_url"] is None  # model invented a URL not in the source → dropped
+
+
+def test_revise_linkedin_taken_only_from_instruction():
+    proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
+                "contact_email": "jane@acme.com", "contact_title": None, "city": None,
+                "linkedin_url": None, "note": None, "_source_text": "Acme Jane jane@acme.com"}
+    r1 = parse.revise(proposal, "her linkedin is linkedin.com/in/janedoe",
+                      parse_fn=_stub({"linkedin_url": "linkedin.com/in/janedoe"}))
+    assert r1["linkedin_url"] == "linkedin.com/in/janedoe"
+    # model tries to set a URL but the instruction carries none → keep existing (None)
+    r2 = parse.revise(proposal, "set her title to GP",
+                      parse_fn=_stub({"linkedin_url": "linkedin.com/in/fake", "contact_title": "GP"}))
+    assert r2["linkedin_url"] is None
+    assert r2["contact_title"] == "GP"
+
+
 def test_revise_cannot_empty_the_proposal():
    proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
                "contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}