"""Tests for the intake parse/normalize layer — Spark/Qwen stubbed (no network).""" import os import sys sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import parse # noqa: E402 def _stub(reply): """Return a parse_fn that ignores input and yields `reply` (simulating Qwen's JSON).""" return lambda text, system=None, max_tokens=400: reply def test_new_investor_basic(): p = parse.parse_message( "New investor Acme Capital, contact Jane Doe jane@acme.com, met at the Austin conf", parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane Doe", "contact_email": "jane@acme.com", "contact_title": None, "note": "met at the Austin conf"}), ) assert p["intent"] == "new_investor" assert p["investor_name"] == "Acme Capital" assert p["contact_email"] == "jane@acme.com" def test_email_salvaged_from_source_when_model_misses(): p = parse.parse_message( "add bob@example.org from Beta LP", parse_fn=_stub({"intent": "new_investor", "investor_name": "Beta LP", "contact_name": "Bob", "contact_email": None}), ) assert p["contact_email"] == "bob@example.org" def test_fabricated_email_dropped_when_not_in_source(): p = parse.parse_message( "new prospect Gamma Partners, talked to their GP", parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners", "contact_name": "their GP", "contact_email": "made-up@nowhere.test"}), ) # the model invented an address that isn't in the source → must be dropped assert p["contact_email"] is None def test_email_extracted_without_surrounding_punctuation(): # "Name " is the most common contact format; parens / trailing period also occur. # The salvage-from-source path must extract the bare address, never the brackets. cases = [ ("New investor: Larch Capital — Dana Reed , met at conf", "dana@larchcap.com"), ("ping (sam@beta.io) re the deck", "sam@beta.io"), ("reach kim@acme.co.", "kim@acme.co"), ] for src, expected in cases: p = parse.parse_message( src, parse_fn=_stub({"intent": "new_investor", "investor_name": "X", "contact_name": "Y", "contact_email": None}), ) assert p["contact_email"] == expected, (src, p["contact_email"]) def test_meeting_note_intent_preserved(): p = parse.parse_message( "Note for Acme Capital: wants the Q3 deck", parse_fn=_stub({"intent": "meeting_note", "investor_name": "Acme Capital", "note": "wants the Q3 deck"}), ) assert p["intent"] == "meeting_note" assert p["note"] == "wants the Q3 deck" def test_unclear_when_no_entity(): p = parse.parse_message( "hey what's up", parse_fn=_stub({"intent": "new_investor", "investor_name": None, "contact_name": None}), ) assert p["intent"] == "unclear" def test_null_strings_normalized(): p = parse.parse_message( "Delta Fund", parse_fn=_stub({"intent": "new_investor", "investor_name": "Delta Fund", "contact_name": "null", "contact_email": "N/A", "note": ""}), ) assert p["contact_name"] is None assert p["contact_email"] is None assert p["note"] is None def test_bad_intent_falls_back_to_unclear(): p = parse.parse_message( "Epsilon Capital", parse_fn=_stub({"intent": "garbage", "investor_name": "Epsilon Capital"}), ) assert p["intent"] == "unclear" def test_none_model_reply_is_unclear(): p = parse.parse_message("???", parse_fn=_stub(None)) assert p["intent"] == "unclear" def test_parse_message_stashes_source_text(): p = parse.parse_message("Acme Capital, Jane jane@acme.com", parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane", "contact_email": "jane@acme.com"})) assert p["_source_text"] == "Acme Capital, Jane jane@acme.com" def test_revise_applies_note_change_and_preserves_control_keys(): proposal = parse.parse_message( "New investor Acme Capital, Jane Doe jane@acme.com", parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane Doe", "contact_email": "jane@acme.com", "contact_title": None, "note": None})) revised = parse.revise( proposal, "add that we met on June 14", parse_fn=_stub({"investor_name": "Acme Capital", "contact_name": "Jane Doe", "contact_email": "jane@acme.com", "contact_title": None, "note": "met on June 14"})) assert revised["note"] == "met on June 14" assert revised["investor_name"] == "Acme Capital" assert revised["intent"] == "new_investor" # control key preserved assert revised["_source_text"] == proposal["_source_text"] # preserved for email integrity def test_revise_email_taken_only_from_instruction(): proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane", "contact_email": "jane@acme.com", "contact_title": None, "note": None, "_source_text": "Acme, Jane jane@acme.com"} # instruction literally carries the new address → accepted r1 = parse.revise(proposal, "her email is jane@newfirm.com", parse_fn=_stub({"contact_email": "jane@newfirm.com"})) assert r1["contact_email"] == "jane@newfirm.com" # model tries to change the email but the instruction has no address → keep the existing one r2 = parse.revise(proposal, "set her title to GP", parse_fn=_stub({"contact_email": "totally@madeup.test", "contact_title": "GP"})) assert r2["contact_email"] == "jane@acme.com" # model's email ignored (not in instruction) assert r2["contact_title"] == "GP" def test_revise_preserves_match_id(): proposal = {"intent": "meeting_note", "investor_name": "Acme", "contact_name": None, "contact_email": None, "contact_title": None, "note": "old", "_match_id": "rowAcme", "_stage": "approval", "_source_text": "note for Acme: old"} revised = parse.revise(proposal, "change the note to: sent the deck", parse_fn=_stub({"note": "sent the deck"})) assert revised["note"] == "sent the deck" assert revised["_match_id"] == "rowAcme" assert revised["intent"] == "meeting_note" def test_revise_cannot_empty_the_proposal(): proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane", "contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"} revised = parse.revise(proposal, "clear it", parse_fn=_stub({"investor_name": None, "contact_name": None, "contact_title": None, "note": None})) assert revised["investor_name"] == "Acme" and revised["contact_name"] == "Jane" if __name__ == "__main__": fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)] for fn in fns: fn() print(f"ok {fn.__name__}") print(f"\n{len(fns)} passed")