Capture city + LinkedIn on card intake; sharpen the transcription prompt

The card transcription prompt now reads emails/URLs/phones character-by-character,
explicitly forbids autocompleting toward a plausible domain (the mara.com ->
marac.com failure), and emits labeled lines (which also feeds the field extractor
cleaner input).

The extractor gains city + linkedin_url. city is a plain field (low-harm if wrong;
the human sees it on the card). linkedin_url follows the email-integrity rule: kept
only if it literally appears in the source / a revise instruction, never minted -- a
wrong profile URL points at the wrong person. Both flow to the contact via the
existing log-communication upsert (city also syncs to the grid contact pill).

Phone is intentionally NOT included yet: the bot's write path can't store it until a
small server-side change lands (next s9pk). See the matrix-intake guide.
This commit is contained in:
Keysat
2026-06-20 11:07:17 -05:00
parent 5e115a3409
commit 8b2eb01a65
8 changed files with 120 additions and 13 deletions
+4
View File
@@ -162,6 +162,10 @@ def build_commit_payload(proposal):
"name": proposal.get("contact_name") or proposal.get("investor_name") or "",
"email": proposal.get("contact_email") or "",
"title": proposal.get("contact_title") or "",
# city + linkedin_url are already honored by the server's contact upsert
# (_upsert_contact_from_fundraising); city also syncs to the grid contact pill.
"city": proposal.get("city") or "",
"linkedin_url": proposal.get("linkedin_url") or "",
}
note = proposal.get("note") or ""
# The CRM's grid note line uses subject-or-body for its one-line summary, so a non-empty
+21 -4
View File
@@ -24,6 +24,8 @@ SYSTEM = (
' "contact_name": the individual person mentioned, or null.\n'
' "contact_email": the person\'s email if explicitly present, else null. Never invent one.\n'
' "contact_title": the person\'s role/title if stated, else null.\n'
' "city": the person\'s city or location if stated (e.g. "New York"), else null.\n'
' "linkedin_url": the person\'s LinkedIn URL if explicitly present, else null. Never invent one.\n'
' "note": any meeting note, context, or next step, else null.\n'
"Use null (not empty string) for anything not present."
)
@@ -51,8 +53,10 @@ def build_system(roster=None, base=SYSTEM):
return "\n".join(parts)
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
_LINKEDIN_RE = re.compile(r"(?:https?://)?(?:[a-z]{2,3}\.)?linkedin\.com/[A-Za-z0-9_%/\-.]+", re.I)
_VALID_INTENTS = {"new_investor", "meeting_note", "unclear"}
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
"city", "linkedin_url", "note")
def _clean(v):
@@ -80,6 +84,14 @@ def normalize(raw, source_text=""):
m = _EMAIL_RE.search(source_text or "")
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'") if m else None
# LinkedIn integrity: same rule as email — a profile URL identifies a specific person, so
# never let the model mint one; keep only a linkedin.com URL literally present in the source.
lm = _LINKEDIN_RE.search(source_text or "")
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'") if lm else None
# City is left as a plain extracted field (no source gate): a wrong city is low-harm and the
# human sees it on the card before approving, unlike a wrong email/LinkedIn.
# An intake with no firm AND no person is not actionable.
if not out["investor_name"] and not out["contact_name"]:
out["intent"] = "unclear"
@@ -101,13 +113,14 @@ REVISE_SYSTEM = (
"You revise a structured investor-intake proposal from a short correction a venture-fund "
"team member typed. You are given the CURRENT proposal as JSON and an INSTRUCTION. Apply "
"the instruction and reply with ONLY the full revised JSON object, these keys:\n"
' "investor_name", "contact_name", "contact_email", "contact_title", "note".\n'
' "investor_name", "contact_name", "contact_email", "contact_title", "city", '
'"linkedin_url", "note".\n'
"Change ONLY what the instruction asks; copy every other field through unchanged. Use null "
"for a field the instruction clears or that is genuinely absent. Never invent an email "
"address."
"address or a LinkedIn URL."
)
_REVISABLE = ("investor_name", "contact_name", "contact_title", "note")
_REVISABLE = ("investor_name", "contact_name", "contact_title", "city", "note")
def _apply_revision(proposal, model_out, instruction):
@@ -126,6 +139,10 @@ def _apply_revision(proposal, model_out, instruction):
if m:
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'")
# else: keep proposal's current contact_email (untouched above; control key copied by dict())
# LinkedIn follows the same rule: a revised URL is taken only if it appears in the instruction.
lm = _LINKEDIN_RE.search(instruction or "")
if lm:
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'")
# Don't let a revision strip the proposal down to nothing actionable.
if not out.get("investor_name") and not out.get("contact_name"):
out["investor_name"] = proposal.get("investor_name")
+6 -1
View File
@@ -18,6 +18,8 @@ _EDIT_ALIASES = {
"contact": "contact_name", "person": "contact_name",
"email": "contact_email",
"title": "contact_title", "role": "contact_title",
"city": "city", "location": "city",
"linkedin": "linkedin_url", "linkedin_url": "linkedin_url", "li": "linkedin_url",
"note": "note",
}
@@ -26,7 +28,8 @@ _NO = {"no", "n", "cancel", "discard", "reject", "stop", "👎", "❌"}
# "create a new investor anyway" replies to a disambiguation shortlist
_NEW = {"new", "none", "new investor", "none of these", "create", "create new", "add new", "neither"}
_CONTENT_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
_CONTENT_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
"city", "linkedin_url", "note")
class ProposalStore:
@@ -174,6 +177,8 @@ def render(proposal):
("Contact", proposal.get("contact_name")),
("Email", proposal.get("contact_email")),
("Title", proposal.get("contact_title")),
("City", proposal.get("city")),
("LinkedIn", proposal.get("linkedin_url")),
("Note", proposal.get("note")),
]
for label, val in fields:
+14 -7
View File
@@ -27,13 +27,20 @@ def parse_json(prompt, system=None, max_tokens=400):
# email-integrity check runs against, so the "only keep an address that literally appears in the
# source, never let the model mint one" rule (parse.normalize) protects card intake too.
CARD_SYSTEM = (
"You are transcribing a photo of a business card for a venture-fund team. Read every line of "
"text on the card and write it out exactly as printed — the person's name, job title, company "
"or firm name, email address, phone number(s), website, and mailing address. Copy the email "
"address and phone numbers character-for-character; never guess, complete, or correct them. Do "
"not summarize, translate, or add anything that is not printed on the card. If the image is not "
"a readable business card, reply with the single word NONE. Output only the transcription, one "
"item per line."
"You are transcribing a photo of a business card. Copy the text EXACTLY as printed — never "
"paraphrase, translate, complete, normalize, or correct anything.\n"
"Read each of these character-by-character and reproduce every glyph precisely. Do NOT 'fix' "
"them toward a more common spelling or a well-known company's domain, and never add or drop a "
"character:\n"
" - Email: check the local part, the @, and the domain separately (transcribe 'mara.com' as "
"'mara.com', never 'marac.com').\n"
" - Phone number(s).\n"
" - Website / LinkedIn URL.\n"
"Then list, each on its own labeled line and ONLY if present on the card:\n"
" Name: Title: Company: Email: Phone: LinkedIn: City:\n"
"If a character is genuinely ambiguous, give your single best reading — never invent extra "
"characters to fill a gap. If the image is not a readable business card, reply with the single "
"word NONE. Output only the labeled lines, nothing else."
)
+11 -1
View File
@@ -15,11 +15,21 @@ def test_new_investor_payload():
assert out["investor_name"] == "Acme Capital"
assert out["create_investor_if_missing"] is True
assert "row_id" not in out
assert out["contact"] == {"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}
assert out["contact"] == {"name": "Jane Doe", "email": "jane@acme.com", "title": "GP",
"city": "", "linkedin_url": ""}
assert out["body"] == "met at conf"
assert out["source"] == "matrix_intake"
def test_contact_carries_city_and_linkedin_when_present():
p = {"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane Doe",
"contact_email": "jane@acme.com", "city": "New York",
"linkedin_url": "linkedin.com/in/janedoe", "note": "met at conf"}
out = crm_client.build_commit_payload(p)
assert out["contact"]["city"] == "New York"
assert out["contact"]["linkedin_url"] == "linkedin.com/in/janedoe"
def test_existing_investor_uses_row_id_not_create():
p = {"intent": "meeting_note", "investor_name": "Acme Capital",
"contact_name": "Jane Doe", "contact_email": None, "note": "wants Q3 deck",
+38
View File
@@ -195,6 +195,44 @@ def test_revise_injects_roster_into_system_prompt():
assert "doing the outreach" in seen["system"]
def test_city_kept_as_plain_field_and_linkedin_salvaged_from_source():
# A card transcription carries labeled lines; city is kept as-is, LinkedIn is salvaged from
# the source text (verbatim) the same way email is.
src = ("New investor — from a business card:\nName: Jane Doe\nCompany: Acme Capital\n"
"Email: jane@acme.com\nLinkedIn: linkedin.com/in/janedoe\nCity: New York")
p = parse.parse_message(
src,
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
"city": "New York", "linkedin_url": None}), # model missed the URL
)
assert p["city"] == "New York"
assert p["linkedin_url"] == "linkedin.com/in/janedoe" # salvaged from source
def test_fabricated_linkedin_dropped_when_not_in_source():
p = parse.parse_message(
"new prospect Gamma Partners, talked to their GP",
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
"contact_name": "their GP", "linkedin_url": "linkedin.com/in/madeup"}),
)
assert p["linkedin_url"] is None # model invented a URL not in the source → dropped
def test_revise_linkedin_taken_only_from_instruction():
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
"contact_email": "jane@acme.com", "contact_title": None, "city": None,
"linkedin_url": None, "note": None, "_source_text": "Acme Jane jane@acme.com"}
r1 = parse.revise(proposal, "her linkedin is linkedin.com/in/janedoe",
parse_fn=_stub({"linkedin_url": "linkedin.com/in/janedoe"}))
assert r1["linkedin_url"] == "linkedin.com/in/janedoe"
# model tries to set a URL but the instruction carries none → keep existing (None)
r2 = parse.revise(proposal, "set her title to GP",
parse_fn=_stub({"linkedin_url": "linkedin.com/in/fake", "contact_title": "GP"}))
assert r2["linkedin_url"] is None
assert r2["contact_title"] == "GP"
def test_revise_cannot_empty_the_proposal():
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
"contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}
+14
View File
@@ -54,6 +54,20 @@ def test_interpret_edit_colon_and_alias():
assert payload == ("investor_name", "Acme Capital LLC")
def test_interpret_edit_city_and_linkedin_aliases():
a1, p1 = proposals.interpret_reply("city: New York")
assert (a1, p1) == ("edit", ("city", "New York"))
a2, p2 = proposals.interpret_reply("linkedin=linkedin.com/in/jane")
assert (a2, p2) == ("edit", ("linkedin_url", "linkedin.com/in/jane"))
def test_render_shows_city_and_linkedin_when_present():
p = {**SAMPLE, "city": "New York", "linkedin_url": "linkedin.com/in/jane"}
out = proposals.render(p)
assert "City: New York" in out
assert "LinkedIn: linkedin.com/in/jane" in out
def test_interpret_unknown():
assert proposals.interpret_reply("maybe later")[0] == "unknown"