Matrix intake: frame parse with team roster so a teammate isn't read as the prospect
Local-smoke found "jonathan is chatting with wyoming" extracted the teammate, not the prospect. Feed the parser an optional team roster (INTAKE_TEAM_ROSTER) via a build_system(roster) outreach frame: roster names/initials are the people doing outreach and are never extracted; the other party is the investor/prospect. Same framing on the revise leg. Unset roster = prior behavior.
This commit is contained in:
@@ -15,7 +15,9 @@ import spark
|
||||
|
||||
SYSTEM = (
|
||||
"You extract structured investor-intake data from a short message a venture-fund "
|
||||
"team member typed. Reply with ONLY a JSON object, no prose, with these keys:\n"
|
||||
"team member typed about their fundraising outreach. The message is a note FROM a "
|
||||
"team member ABOUT an investor or prospect they are contacting. Reply with ONLY a JSON "
|
||||
"object, no prose, with these keys:\n"
|
||||
' "intent": "new_investor" if the message introduces a new investor or prospect, '
|
||||
'"meeting_note" if it logs a note/update about an investor, else "unclear".\n'
|
||||
' "investor_name": the investing firm or entity name (e.g. "Acme Capital"), or null.\n'
|
||||
@@ -23,9 +25,31 @@ SYSTEM = (
|
||||
' "contact_email": the person\'s email if explicitly present, else null. Never invent one.\n'
|
||||
' "contact_title": the person\'s role/title if stated, else null.\n'
|
||||
' "note": any meeting note, context, or next step, else null.\n'
|
||||
"Use null (not empty string) for anything not present. Output JSON only."
|
||||
"Use null (not empty string) for anything not present."
|
||||
)
|
||||
|
||||
# Appended when the team roster is known, so the model reads a teammate's name as the person
|
||||
# DOING the outreach, not the investor — fixes "Jonathan is chatting with Wyoming" extracting
|
||||
# the teammate instead of the prospect. Names come from settings.team_roster() (INTAKE_TEAM_ROSTER).
|
||||
ROSTER_FRAME = (
|
||||
"These names and initials (case-insensitive) are our OWN team members — the people doing "
|
||||
"the outreach, NOT investors or prospects. Never extract one as investor_name or "
|
||||
"contact_name: {names}. When a team member is described talking with, meeting, or chasing "
|
||||
'someone (e.g. "Jonathan is chatting with Wyoming"), the OTHER party (here "Wyoming") is '
|
||||
"the investor or prospect to extract."
|
||||
)
|
||||
|
||||
|
||||
def build_system(roster=None, base=SYSTEM):
|
||||
"""Assemble the extraction system prompt. With a `roster` (team-member names) it appends
|
||||
the outreach frame so a teammate's name is read as the person doing outreach, not the
|
||||
investor. JSON-only stays the last line for recency. Pure + offline-testable."""
|
||||
parts = [base]
|
||||
if roster:
|
||||
parts.append(ROSTER_FRAME.format(names=", ".join(roster)))
|
||||
parts.append("Output JSON only.")
|
||||
return "\n".join(parts)
|
||||
|
||||
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
|
||||
_VALID_INTENTS = {"new_investor", "meeting_note", "unclear"}
|
||||
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
|
||||
@@ -62,10 +86,11 @@ def normalize(raw, source_text=""):
|
||||
return out
|
||||
|
||||
|
||||
def parse_message(text, parse_fn=spark.parse_json):
|
||||
"""Parse one intake message. `parse_fn` is injectable for tests (defaults to Spark/Qwen).
|
||||
def parse_message(text, parse_fn=spark.parse_json, roster=None):
|
||||
"""Parse one intake message. `parse_fn` is injectable for tests (defaults to Spark/Qwen);
|
||||
`roster` is the team-member names that frame the extraction (see build_system).
|
||||
Returns a normalized proposal dict. On a model/transport failure, raises (caller decides)."""
|
||||
raw = parse_fn(text, system=SYSTEM, max_tokens=400)
|
||||
raw = parse_fn(text, system=build_system(roster), max_tokens=400)
|
||||
proposal = normalize(raw, source_text=text)
|
||||
# Stash the original message so a later revise() can re-check email integrity against it.
|
||||
proposal["_source_text"] = text
|
||||
@@ -79,7 +104,7 @@ REVISE_SYSTEM = (
|
||||
' "investor_name", "contact_name", "contact_email", "contact_title", "note".\n'
|
||||
"Change ONLY what the instruction asks; copy every other field through unchanged. Use null "
|
||||
"for a field the instruction clears or that is genuinely absent. Never invent an email "
|
||||
"address. Output JSON only."
|
||||
"address."
|
||||
)
|
||||
|
||||
_REVISABLE = ("investor_name", "contact_name", "contact_title", "note")
|
||||
@@ -108,12 +133,13 @@ def _apply_revision(proposal, model_out, instruction):
|
||||
return out
|
||||
|
||||
|
||||
def revise(proposal, instruction, parse_fn=spark.parse_json):
|
||||
def revise(proposal, instruction, parse_fn=spark.parse_json, roster=None):
|
||||
"""Apply a natural-language correction to a pending proposal via local Qwen; return the
|
||||
revised proposal dict. `parse_fn` is injectable for tests (defaults to Spark/Qwen)."""
|
||||
revised proposal dict. `parse_fn` is injectable for tests (defaults to Spark/Qwen);
|
||||
`roster` frames the revision the same way parse_message does (see build_system)."""
|
||||
current = {k: proposal.get(k) for k in
|
||||
("investor_name", "contact_name", "contact_email", "contact_title", "note")}
|
||||
prompt = ("CURRENT:\n" + json.dumps(current, ensure_ascii=False)
|
||||
+ "\n\nINSTRUCTION:\n" + (instruction or "").strip())
|
||||
raw = parse_fn(prompt, system=REVISE_SYSTEM, max_tokens=400)
|
||||
raw = parse_fn(prompt, system=build_system(roster, base=REVISE_SYSTEM), max_tokens=400)
|
||||
return _apply_revision(proposal, raw, instruction)
|
||||
|
||||
Reference in New Issue
Block a user