Capture city + LinkedIn on card intake; sharpen the transcription prompt
The card transcription prompt now reads emails/URLs/phones character-by-character, explicitly forbids autocompleting toward a plausible domain (the mara.com -> marac.com failure), and emits labeled lines (which also feeds the field extractor cleaner input). The extractor gains city + linkedin_url. city is a plain field (low-harm if wrong; the human sees it on the card). linkedin_url follows the email-integrity rule: kept only if it literally appears in the source / a revise instruction, never minted -- a wrong profile URL points at the wrong person. Both flow to the contact via the existing log-communication upsert (city also syncs to the grid contact pill). Phone is intentionally NOT included yet: the bot's write path can't store it until a small server-side change lands (next s9pk). See the matrix-intake guide.
This commit is contained in:
@@ -162,6 +162,10 @@ def build_commit_payload(proposal):
|
||||
"name": proposal.get("contact_name") or proposal.get("investor_name") or "",
|
||||
"email": proposal.get("contact_email") or "",
|
||||
"title": proposal.get("contact_title") or "",
|
||||
# city + linkedin_url are already honored by the server's contact upsert
|
||||
# (_upsert_contact_from_fundraising); city also syncs to the grid contact pill.
|
||||
"city": proposal.get("city") or "",
|
||||
"linkedin_url": proposal.get("linkedin_url") or "",
|
||||
}
|
||||
note = proposal.get("note") or ""
|
||||
# The CRM's grid note line uses subject-or-body for its one-line summary, so a non-empty
|
||||
|
||||
@@ -24,6 +24,8 @@ SYSTEM = (
|
||||
' "contact_name": the individual person mentioned, or null.\n'
|
||||
' "contact_email": the person\'s email if explicitly present, else null. Never invent one.\n'
|
||||
' "contact_title": the person\'s role/title if stated, else null.\n'
|
||||
' "city": the person\'s city or location if stated (e.g. "New York"), else null.\n'
|
||||
' "linkedin_url": the person\'s LinkedIn URL if explicitly present, else null. Never invent one.\n'
|
||||
' "note": any meeting note, context, or next step, else null.\n'
|
||||
"Use null (not empty string) for anything not present."
|
||||
)
|
||||
@@ -51,8 +53,10 @@ def build_system(roster=None, base=SYSTEM):
|
||||
return "\n".join(parts)
|
||||
|
||||
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
|
||||
_LINKEDIN_RE = re.compile(r"(?:https?://)?(?:[a-z]{2,3}\.)?linkedin\.com/[A-Za-z0-9_%/\-.]+", re.I)
|
||||
_VALID_INTENTS = {"new_investor", "meeting_note", "unclear"}
|
||||
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
|
||||
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
|
||||
"city", "linkedin_url", "note")
|
||||
|
||||
|
||||
def _clean(v):
|
||||
@@ -80,6 +84,14 @@ def normalize(raw, source_text=""):
|
||||
m = _EMAIL_RE.search(source_text or "")
|
||||
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'") if m else None
|
||||
|
||||
# LinkedIn integrity: same rule as email — a profile URL identifies a specific person, so
|
||||
# never let the model mint one; keep only a linkedin.com URL literally present in the source.
|
||||
lm = _LINKEDIN_RE.search(source_text or "")
|
||||
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'") if lm else None
|
||||
|
||||
# City is left as a plain extracted field (no source gate): a wrong city is low-harm and the
|
||||
# human sees it on the card before approving, unlike a wrong email/LinkedIn.
|
||||
|
||||
# An intake with no firm AND no person is not actionable.
|
||||
if not out["investor_name"] and not out["contact_name"]:
|
||||
out["intent"] = "unclear"
|
||||
@@ -101,13 +113,14 @@ REVISE_SYSTEM = (
|
||||
"You revise a structured investor-intake proposal from a short correction a venture-fund "
|
||||
"team member typed. You are given the CURRENT proposal as JSON and an INSTRUCTION. Apply "
|
||||
"the instruction and reply with ONLY the full revised JSON object, these keys:\n"
|
||||
' "investor_name", "contact_name", "contact_email", "contact_title", "note".\n'
|
||||
' "investor_name", "contact_name", "contact_email", "contact_title", "city", '
|
||||
'"linkedin_url", "note".\n'
|
||||
"Change ONLY what the instruction asks; copy every other field through unchanged. Use null "
|
||||
"for a field the instruction clears or that is genuinely absent. Never invent an email "
|
||||
"address."
|
||||
"address or a LinkedIn URL."
|
||||
)
|
||||
|
||||
_REVISABLE = ("investor_name", "contact_name", "contact_title", "note")
|
||||
_REVISABLE = ("investor_name", "contact_name", "contact_title", "city", "note")
|
||||
|
||||
|
||||
def _apply_revision(proposal, model_out, instruction):
|
||||
@@ -126,6 +139,10 @@ def _apply_revision(proposal, model_out, instruction):
|
||||
if m:
|
||||
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'")
|
||||
# else: keep proposal's current contact_email (untouched above; control key copied by dict())
|
||||
# LinkedIn follows the same rule: a revised URL is taken only if it appears in the instruction.
|
||||
lm = _LINKEDIN_RE.search(instruction or "")
|
||||
if lm:
|
||||
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'")
|
||||
# Don't let a revision strip the proposal down to nothing actionable.
|
||||
if not out.get("investor_name") and not out.get("contact_name"):
|
||||
out["investor_name"] = proposal.get("investor_name")
|
||||
|
||||
@@ -18,6 +18,8 @@ _EDIT_ALIASES = {
|
||||
"contact": "contact_name", "person": "contact_name",
|
||||
"email": "contact_email",
|
||||
"title": "contact_title", "role": "contact_title",
|
||||
"city": "city", "location": "city",
|
||||
"linkedin": "linkedin_url", "linkedin_url": "linkedin_url", "li": "linkedin_url",
|
||||
"note": "note",
|
||||
}
|
||||
|
||||
@@ -26,7 +28,8 @@ _NO = {"no", "n", "cancel", "discard", "reject", "stop", "👎", "❌"}
|
||||
# "create a new investor anyway" replies to a disambiguation shortlist
|
||||
_NEW = {"new", "none", "new investor", "none of these", "create", "create new", "add new", "neither"}
|
||||
|
||||
_CONTENT_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
|
||||
_CONTENT_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
|
||||
"city", "linkedin_url", "note")
|
||||
|
||||
|
||||
class ProposalStore:
|
||||
@@ -174,6 +177,8 @@ def render(proposal):
|
||||
("Contact", proposal.get("contact_name")),
|
||||
("Email", proposal.get("contact_email")),
|
||||
("Title", proposal.get("contact_title")),
|
||||
("City", proposal.get("city")),
|
||||
("LinkedIn", proposal.get("linkedin_url")),
|
||||
("Note", proposal.get("note")),
|
||||
]
|
||||
for label, val in fields:
|
||||
|
||||
@@ -27,13 +27,20 @@ def parse_json(prompt, system=None, max_tokens=400):
|
||||
# email-integrity check runs against, so the "only keep an address that literally appears in the
|
||||
# source, never let the model mint one" rule (parse.normalize) protects card intake too.
|
||||
CARD_SYSTEM = (
|
||||
"You are transcribing a photo of a business card for a venture-fund team. Read every line of "
|
||||
"text on the card and write it out exactly as printed — the person's name, job title, company "
|
||||
"or firm name, email address, phone number(s), website, and mailing address. Copy the email "
|
||||
"address and phone numbers character-for-character; never guess, complete, or correct them. Do "
|
||||
"not summarize, translate, or add anything that is not printed on the card. If the image is not "
|
||||
"a readable business card, reply with the single word NONE. Output only the transcription, one "
|
||||
"item per line."
|
||||
"You are transcribing a photo of a business card. Copy the text EXACTLY as printed — never "
|
||||
"paraphrase, translate, complete, normalize, or correct anything.\n"
|
||||
"Read each of these character-by-character and reproduce every glyph precisely. Do NOT 'fix' "
|
||||
"them toward a more common spelling or a well-known company's domain, and never add or drop a "
|
||||
"character:\n"
|
||||
" - Email: check the local part, the @, and the domain separately (transcribe 'mara.com' as "
|
||||
"'mara.com', never 'marac.com').\n"
|
||||
" - Phone number(s).\n"
|
||||
" - Website / LinkedIn URL.\n"
|
||||
"Then list, each on its own labeled line and ONLY if present on the card:\n"
|
||||
" Name: Title: Company: Email: Phone: LinkedIn: City:\n"
|
||||
"If a character is genuinely ambiguous, give your single best reading — never invent extra "
|
||||
"characters to fill a gap. If the image is not a readable business card, reply with the single "
|
||||
"word NONE. Output only the labeled lines, nothing else."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -15,11 +15,21 @@ def test_new_investor_payload():
|
||||
assert out["investor_name"] == "Acme Capital"
|
||||
assert out["create_investor_if_missing"] is True
|
||||
assert "row_id" not in out
|
||||
assert out["contact"] == {"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}
|
||||
assert out["contact"] == {"name": "Jane Doe", "email": "jane@acme.com", "title": "GP",
|
||||
"city": "", "linkedin_url": ""}
|
||||
assert out["body"] == "met at conf"
|
||||
assert out["source"] == "matrix_intake"
|
||||
|
||||
|
||||
def test_contact_carries_city_and_linkedin_when_present():
|
||||
p = {"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane Doe",
|
||||
"contact_email": "jane@acme.com", "city": "New York",
|
||||
"linkedin_url": "linkedin.com/in/janedoe", "note": "met at conf"}
|
||||
out = crm_client.build_commit_payload(p)
|
||||
assert out["contact"]["city"] == "New York"
|
||||
assert out["contact"]["linkedin_url"] == "linkedin.com/in/janedoe"
|
||||
|
||||
|
||||
def test_existing_investor_uses_row_id_not_create():
|
||||
p = {"intent": "meeting_note", "investor_name": "Acme Capital",
|
||||
"contact_name": "Jane Doe", "contact_email": None, "note": "wants Q3 deck",
|
||||
|
||||
@@ -195,6 +195,44 @@ def test_revise_injects_roster_into_system_prompt():
|
||||
assert "doing the outreach" in seen["system"]
|
||||
|
||||
|
||||
def test_city_kept_as_plain_field_and_linkedin_salvaged_from_source():
|
||||
# A card transcription carries labeled lines; city is kept as-is, LinkedIn is salvaged from
|
||||
# the source text (verbatim) the same way email is.
|
||||
src = ("New investor — from a business card:\nName: Jane Doe\nCompany: Acme Capital\n"
|
||||
"Email: jane@acme.com\nLinkedIn: linkedin.com/in/janedoe\nCity: New York")
|
||||
p = parse.parse_message(
|
||||
src,
|
||||
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
||||
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
||||
"city": "New York", "linkedin_url": None}), # model missed the URL
|
||||
)
|
||||
assert p["city"] == "New York"
|
||||
assert p["linkedin_url"] == "linkedin.com/in/janedoe" # salvaged from source
|
||||
|
||||
|
||||
def test_fabricated_linkedin_dropped_when_not_in_source():
|
||||
p = parse.parse_message(
|
||||
"new prospect Gamma Partners, talked to their GP",
|
||||
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
|
||||
"contact_name": "their GP", "linkedin_url": "linkedin.com/in/madeup"}),
|
||||
)
|
||||
assert p["linkedin_url"] is None # model invented a URL not in the source → dropped
|
||||
|
||||
|
||||
def test_revise_linkedin_taken_only_from_instruction():
|
||||
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
||||
"contact_email": "jane@acme.com", "contact_title": None, "city": None,
|
||||
"linkedin_url": None, "note": None, "_source_text": "Acme Jane jane@acme.com"}
|
||||
r1 = parse.revise(proposal, "her linkedin is linkedin.com/in/janedoe",
|
||||
parse_fn=_stub({"linkedin_url": "linkedin.com/in/janedoe"}))
|
||||
assert r1["linkedin_url"] == "linkedin.com/in/janedoe"
|
||||
# model tries to set a URL but the instruction carries none → keep existing (None)
|
||||
r2 = parse.revise(proposal, "set her title to GP",
|
||||
parse_fn=_stub({"linkedin_url": "linkedin.com/in/fake", "contact_title": "GP"}))
|
||||
assert r2["linkedin_url"] is None
|
||||
assert r2["contact_title"] == "GP"
|
||||
|
||||
|
||||
def test_revise_cannot_empty_the_proposal():
|
||||
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
||||
"contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}
|
||||
|
||||
@@ -54,6 +54,20 @@ def test_interpret_edit_colon_and_alias():
|
||||
assert payload == ("investor_name", "Acme Capital LLC")
|
||||
|
||||
|
||||
def test_interpret_edit_city_and_linkedin_aliases():
|
||||
a1, p1 = proposals.interpret_reply("city: New York")
|
||||
assert (a1, p1) == ("edit", ("city", "New York"))
|
||||
a2, p2 = proposals.interpret_reply("linkedin=linkedin.com/in/jane")
|
||||
assert (a2, p2) == ("edit", ("linkedin_url", "linkedin.com/in/jane"))
|
||||
|
||||
|
||||
def test_render_shows_city_and_linkedin_when_present():
|
||||
p = {**SAMPLE, "city": "New York", "linkedin_url": "linkedin.com/in/jane"}
|
||||
out = proposals.render(p)
|
||||
assert "City: New York" in out
|
||||
assert "LinkedIn: linkedin.com/in/jane" in out
|
||||
|
||||
|
||||
def test_interpret_unknown():
|
||||
assert proposals.interpret_reply("maybe later")[0] == "unknown"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user