c1ea1769a4
Local-smoke found "jonathan is chatting with wyoming" extracted the teammate, not the prospect. Feed the parser an optional team roster (INTAKE_TEAM_ROSTER) via a build_system(roster) outreach frame: roster names/initials are the people doing outreach and are never extracted; the other party is the investor/prospect. Same framing on the revise leg. Unset roster = prior behavior.
213 lines
9.2 KiB
Python
213 lines
9.2 KiB
Python
"""Tests for the intake parse/normalize layer — Spark/Qwen stubbed (no network)."""
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
import parse # noqa: E402
|
|
|
|
|
|
def _stub(reply):
|
|
"""Return a parse_fn that ignores input and yields `reply` (simulating Qwen's JSON)."""
|
|
return lambda text, system=None, max_tokens=400: reply
|
|
|
|
|
|
def test_new_investor_basic():
|
|
p = parse.parse_message(
|
|
"New investor Acme Capital, contact Jane Doe jane@acme.com, met at the Austin conf",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
|
"contact_title": None, "note": "met at the Austin conf"}),
|
|
)
|
|
assert p["intent"] == "new_investor"
|
|
assert p["investor_name"] == "Acme Capital"
|
|
assert p["contact_email"] == "jane@acme.com"
|
|
|
|
|
|
def test_email_salvaged_from_source_when_model_misses():
|
|
p = parse.parse_message(
|
|
"add bob@example.org from Beta LP",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Beta LP",
|
|
"contact_name": "Bob", "contact_email": None}),
|
|
)
|
|
assert p["contact_email"] == "bob@example.org"
|
|
|
|
|
|
def test_fabricated_email_dropped_when_not_in_source():
|
|
p = parse.parse_message(
|
|
"new prospect Gamma Partners, talked to their GP",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
|
|
"contact_name": "their GP", "contact_email": "made-up@nowhere.test"}),
|
|
)
|
|
# the model invented an address that isn't in the source → must be dropped
|
|
assert p["contact_email"] is None
|
|
|
|
|
|
def test_email_extracted_without_surrounding_punctuation():
|
|
# "Name <addr>" is the most common contact format; parens / trailing period also occur.
|
|
# The salvage-from-source path must extract the bare address, never the brackets.
|
|
cases = [
|
|
("New investor: Larch Capital — Dana Reed <dana@larchcap.com>, met at conf", "dana@larchcap.com"),
|
|
("ping (sam@beta.io) re the deck", "sam@beta.io"),
|
|
("reach kim@acme.co.", "kim@acme.co"),
|
|
]
|
|
for src, expected in cases:
|
|
p = parse.parse_message(
|
|
src,
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "X",
|
|
"contact_name": "Y", "contact_email": None}),
|
|
)
|
|
assert p["contact_email"] == expected, (src, p["contact_email"])
|
|
|
|
|
|
def test_meeting_note_intent_preserved():
|
|
p = parse.parse_message(
|
|
"Note for Acme Capital: wants the Q3 deck",
|
|
parse_fn=_stub({"intent": "meeting_note", "investor_name": "Acme Capital",
|
|
"note": "wants the Q3 deck"}),
|
|
)
|
|
assert p["intent"] == "meeting_note"
|
|
assert p["note"] == "wants the Q3 deck"
|
|
|
|
|
|
def test_unclear_when_no_entity():
|
|
p = parse.parse_message(
|
|
"hey what's up",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": None, "contact_name": None}),
|
|
)
|
|
assert p["intent"] == "unclear"
|
|
|
|
|
|
def test_null_strings_normalized():
|
|
p = parse.parse_message(
|
|
"Delta Fund",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Delta Fund",
|
|
"contact_name": "null", "contact_email": "N/A", "note": ""}),
|
|
)
|
|
assert p["contact_name"] is None
|
|
assert p["contact_email"] is None
|
|
assert p["note"] is None
|
|
|
|
|
|
def test_bad_intent_falls_back_to_unclear():
|
|
p = parse.parse_message(
|
|
"Epsilon Capital",
|
|
parse_fn=_stub({"intent": "garbage", "investor_name": "Epsilon Capital"}),
|
|
)
|
|
assert p["intent"] == "unclear"
|
|
|
|
|
|
def test_none_model_reply_is_unclear():
|
|
p = parse.parse_message("???", parse_fn=_stub(None))
|
|
assert p["intent"] == "unclear"
|
|
|
|
|
|
def test_parse_message_stashes_source_text():
|
|
p = parse.parse_message("Acme Capital, Jane jane@acme.com",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
|
"contact_name": "Jane", "contact_email": "jane@acme.com"}))
|
|
assert p["_source_text"] == "Acme Capital, Jane jane@acme.com"
|
|
|
|
|
|
def test_revise_applies_note_change_and_preserves_control_keys():
|
|
proposal = parse.parse_message(
|
|
"New investor Acme Capital, Jane Doe jane@acme.com",
|
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
|
"contact_title": None, "note": None}))
|
|
revised = parse.revise(
|
|
proposal, "add that we met on June 14",
|
|
parse_fn=_stub({"investor_name": "Acme Capital", "contact_name": "Jane Doe",
|
|
"contact_email": "jane@acme.com", "contact_title": None,
|
|
"note": "met on June 14"}))
|
|
assert revised["note"] == "met on June 14"
|
|
assert revised["investor_name"] == "Acme Capital"
|
|
assert revised["intent"] == "new_investor" # control key preserved
|
|
assert revised["_source_text"] == proposal["_source_text"] # preserved for email integrity
|
|
|
|
|
|
def test_revise_email_taken_only_from_instruction():
|
|
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
|
"contact_email": "jane@acme.com", "contact_title": None, "note": None,
|
|
"_source_text": "Acme, Jane jane@acme.com"}
|
|
# instruction literally carries the new address → accepted
|
|
r1 = parse.revise(proposal, "her email is jane@newfirm.com",
|
|
parse_fn=_stub({"contact_email": "jane@newfirm.com"}))
|
|
assert r1["contact_email"] == "jane@newfirm.com"
|
|
# model tries to change the email but the instruction has no address → keep the existing one
|
|
r2 = parse.revise(proposal, "set her title to GP",
|
|
parse_fn=_stub({"contact_email": "totally@madeup.test", "contact_title": "GP"}))
|
|
assert r2["contact_email"] == "jane@acme.com" # model's email ignored (not in instruction)
|
|
assert r2["contact_title"] == "GP"
|
|
|
|
|
|
def test_revise_preserves_match_id():
|
|
proposal = {"intent": "meeting_note", "investor_name": "Acme", "contact_name": None,
|
|
"contact_email": None, "contact_title": None, "note": "old",
|
|
"_match_id": "rowAcme", "_stage": "approval", "_source_text": "note for Acme: old"}
|
|
revised = parse.revise(proposal, "change the note to: sent the deck",
|
|
parse_fn=_stub({"note": "sent the deck"}))
|
|
assert revised["note"] == "sent the deck"
|
|
assert revised["_match_id"] == "rowAcme"
|
|
assert revised["intent"] == "meeting_note"
|
|
|
|
|
|
def test_build_system_appends_roster_frame_only_when_roster_given():
|
|
base = parse.build_system()
|
|
assert base.strip().endswith("Output JSON only.")
|
|
assert "doing the outreach" not in base # no roster → no outreach frame
|
|
|
|
framed = parse.build_system(["Grant", "Jonathan", "Marty"])
|
|
assert "Grant" in framed and "Jonathan" in framed and "Marty" in framed
|
|
assert "doing the outreach" in framed # the outreach frame is present
|
|
assert framed.strip().endswith("Output JSON only.") # JSON-only stays last for recency
|
|
|
|
|
|
def test_parse_message_injects_roster_into_system_prompt():
|
|
# Capture the system prompt the model is handed, and confirm the teammate ("jonathan")
|
|
# is framed as outreach while the prospect ("wyoming") is what gets extracted.
|
|
seen = {}
|
|
|
|
def cap(text, system=None, max_tokens=400):
|
|
seen["system"] = system
|
|
return {"intent": "meeting_note", "investor_name": "Wyoming", "contact_name": None,
|
|
"note": "jonathan chatting with them"}
|
|
|
|
p = parse.parse_message("jonathan is chatting with wyoming", parse_fn=cap,
|
|
roster=["Grant", "Jonathan", "Marty"])
|
|
assert "Jonathan" in seen["system"]
|
|
assert "doing the outreach" in seen["system"]
|
|
assert p["investor_name"] == "Wyoming"
|
|
|
|
|
|
def test_revise_injects_roster_into_system_prompt():
|
|
proposal = {"intent": "meeting_note", "investor_name": "Wyoming", "contact_name": None,
|
|
"contact_email": None, "contact_title": None, "note": "x",
|
|
"_source_text": "jonathan is chatting with wyoming"}
|
|
seen = {}
|
|
|
|
def cap(prompt, system=None, max_tokens=400):
|
|
seen["system"] = system
|
|
return {"note": "sent the deck"}
|
|
|
|
parse.revise(proposal, "note: sent the deck", parse_fn=cap, roster=["Grant", "Jonathan"])
|
|
assert "Jonathan" in seen["system"]
|
|
assert "doing the outreach" in seen["system"]
|
|
|
|
|
|
def test_revise_cannot_empty_the_proposal():
|
|
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
|
"contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}
|
|
revised = parse.revise(proposal, "clear it",
|
|
parse_fn=_stub({"investor_name": None, "contact_name": None,
|
|
"contact_title": None, "note": None}))
|
|
assert revised["investor_name"] == "Acme" and revised["contact_name"] == "Jane"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
|
for fn in fns:
|
|
fn()
|
|
print(f"ok {fn.__name__}")
|
|
print(f"\n{len(fns)} passed")
|