Capture city + LinkedIn on card intake; sharpen the transcription prompt
The card transcription prompt now reads emails/URLs/phones character-by-character, explicitly forbids autocompleting toward a plausible domain (the mara.com -> marac.com failure), and emits labeled lines (which also feeds the field extractor cleaner input). The extractor gains city + linkedin_url. city is a plain field (low-harm if wrong; the human sees it on the card). linkedin_url follows the email-integrity rule: kept only if it literally appears in the source / a revise instruction, never minted -- a wrong profile URL points at the wrong person. Both flow to the contact via the existing log-communication upsert (city also syncs to the grid contact pill). Phone is intentionally NOT included yet: the bot's write path can't store it until a small server-side change lands (next s9pk). See the matrix-intake guide.
This commit is contained in:
@@ -24,6 +24,8 @@ SYSTEM = (
|
||||
' "contact_name": the individual person mentioned, or null.\n'
|
||||
' "contact_email": the person\'s email if explicitly present, else null. Never invent one.\n'
|
||||
' "contact_title": the person\'s role/title if stated, else null.\n'
|
||||
' "city": the person\'s city or location if stated (e.g. "New York"), else null.\n'
|
||||
' "linkedin_url": the person\'s LinkedIn URL if explicitly present, else null. Never invent one.\n'
|
||||
' "note": any meeting note, context, or next step, else null.\n'
|
||||
"Use null (not empty string) for anything not present."
|
||||
)
|
||||
@@ -51,8 +53,10 @@ def build_system(roster=None, base=SYSTEM):
|
||||
return "\n".join(parts)
|
||||
|
||||
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
|
||||
_LINKEDIN_RE = re.compile(r"(?:https?://)?(?:[a-z]{2,3}\.)?linkedin\.com/[A-Za-z0-9_%/\-.]+", re.I)
|
||||
_VALID_INTENTS = {"new_investor", "meeting_note", "unclear"}
|
||||
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
|
||||
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
|
||||
"city", "linkedin_url", "note")
|
||||
|
||||
|
||||
def _clean(v):
|
||||
@@ -80,6 +84,14 @@ def normalize(raw, source_text=""):
|
||||
m = _EMAIL_RE.search(source_text or "")
|
||||
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'") if m else None
|
||||
|
||||
# LinkedIn integrity: same rule as email — a profile URL identifies a specific person, so
|
||||
# never let the model mint one; keep only a linkedin.com URL literally present in the source.
|
||||
lm = _LINKEDIN_RE.search(source_text or "")
|
||||
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'") if lm else None
|
||||
|
||||
# City is left as a plain extracted field (no source gate): a wrong city is low-harm and the
|
||||
# human sees it on the card before approving, unlike a wrong email/LinkedIn.
|
||||
|
||||
# An intake with no firm AND no person is not actionable.
|
||||
if not out["investor_name"] and not out["contact_name"]:
|
||||
out["intent"] = "unclear"
|
||||
@@ -101,13 +113,14 @@ REVISE_SYSTEM = (
|
||||
"You revise a structured investor-intake proposal from a short correction a venture-fund "
|
||||
"team member typed. You are given the CURRENT proposal as JSON and an INSTRUCTION. Apply "
|
||||
"the instruction and reply with ONLY the full revised JSON object, these keys:\n"
|
||||
' "investor_name", "contact_name", "contact_email", "contact_title", "note".\n'
|
||||
' "investor_name", "contact_name", "contact_email", "contact_title", "city", '
|
||||
'"linkedin_url", "note".\n'
|
||||
"Change ONLY what the instruction asks; copy every other field through unchanged. Use null "
|
||||
"for a field the instruction clears or that is genuinely absent. Never invent an email "
|
||||
"address."
|
||||
"address or a LinkedIn URL."
|
||||
)
|
||||
|
||||
_REVISABLE = ("investor_name", "contact_name", "contact_title", "note")
|
||||
_REVISABLE = ("investor_name", "contact_name", "contact_title", "city", "note")
|
||||
|
||||
|
||||
def _apply_revision(proposal, model_out, instruction):
|
||||
@@ -126,6 +139,10 @@ def _apply_revision(proposal, model_out, instruction):
|
||||
if m:
|
||||
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'")
|
||||
# else: keep proposal's current contact_email (untouched above; control key copied by dict())
|
||||
# LinkedIn follows the same rule: a revised URL is taken only if it appears in the instruction.
|
||||
lm = _LINKEDIN_RE.search(instruction or "")
|
||||
if lm:
|
||||
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'")
|
||||
# Don't let a revision strip the proposal down to nothing actionable.
|
||||
if not out.get("investor_name") and not out.get("contact_name"):
|
||||
out["investor_name"] = proposal.get("investor_name")
|
||||
|
||||
Reference in New Issue
Block a user