ten31-database/backend/matrix_intake/test_parse.py

"""Tests for the intake parse/normalize layer — Spark/Qwen stubbed (no network)."""
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

import parse  # noqa: E402


def _stub(reply):
    """Return a parse_fn that ignores input and yields `reply` (simulating Qwen's JSON)."""
    return lambda text, system=None, max_tokens=400: reply


def test_new_investor_basic():
    p = parse.parse_message(
        "New investor Acme Capital, contact Jane Doe jane@acme.com, met at the Austin conf",
        parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
                        "contact_name": "Jane Doe", "contact_email": "jane@acme.com",
                        "contact_title": None, "note": "met at the Austin conf"}),
    )
    assert p["intent"] == "new_investor"
    assert p["investor_name"] == "Acme Capital"
    assert p["contact_email"] == "jane@acme.com"


def test_email_salvaged_from_source_when_model_misses():
    p = parse.parse_message(
        "add bob@example.org from Beta LP",
        parse_fn=_stub({"intent": "new_investor", "investor_name": "Beta LP",
                        "contact_name": "Bob", "contact_email": None}),
    )
    assert p["contact_email"] == "bob@example.org"


def test_fabricated_email_dropped_when_not_in_source():
    p = parse.parse_message(
        "new prospect Gamma Partners, talked to their GP",
        parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
                        "contact_name": "their GP", "contact_email": "made-up@nowhere.test"}),
    )
    # the model invented an address that isn't in the source → must be dropped
    assert p["contact_email"] is None


def test_email_extracted_without_surrounding_punctuation():
    # "Name <addr>" is the most common contact format; parens / trailing period also occur.
    # The salvage-from-source path must extract the bare address, never the brackets.
    cases = [
        ("New investor: Larch Capital — Dana Reed <dana@larchcap.com>, met at conf", "dana@larchcap.com"),
        ("ping (sam@beta.io) re the deck", "sam@beta.io"),
        ("reach kim@acme.co.", "kim@acme.co"),
    ]
    for src, expected in cases:
        p = parse.parse_message(
            src,
            parse_fn=_stub({"intent": "new_investor", "investor_name": "X",
                            "contact_name": "Y", "contact_email": None}),
        )
        assert p["contact_email"] == expected, (src, p["contact_email"])


def test_meeting_note_intent_preserved():
    p = parse.parse_message(
        "Note for Acme Capital: wants the Q3 deck",
        parse_fn=_stub({"intent": "meeting_note", "investor_name": "Acme Capital",
                        "note": "wants the Q3 deck"}),
    )
    assert p["intent"] == "meeting_note"
    assert p["note"] == "wants the Q3 deck"


def test_unclear_when_no_entity():
    p = parse.parse_message(
        "hey what's up",
        parse_fn=_stub({"intent": "new_investor", "investor_name": None, "contact_name": None}),
    )
    assert p["intent"] == "unclear"


def test_null_strings_normalized():
    p = parse.parse_message(
        "Delta Fund",
        parse_fn=_stub({"intent": "new_investor", "investor_name": "Delta Fund",
                        "contact_name": "null", "contact_email": "N/A", "note": ""}),
    )
    assert p["contact_name"] is None
    assert p["contact_email"] is None
    assert p["note"] is None


def test_bad_intent_falls_back_to_unclear():
    p = parse.parse_message(
        "Epsilon Capital",
        parse_fn=_stub({"intent": "garbage", "investor_name": "Epsilon Capital"}),
    )
    assert p["intent"] == "unclear"


def test_none_model_reply_is_unclear():
    p = parse.parse_message("???", parse_fn=_stub(None))
    assert p["intent"] == "unclear"


def test_parse_message_stashes_source_text():
    p = parse.parse_message("Acme Capital, Jane jane@acme.com",
                            parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
                                            "contact_name": "Jane", "contact_email": "jane@acme.com"}))
    assert p["_source_text"] == "Acme Capital, Jane jane@acme.com"


def test_revise_applies_note_change_and_preserves_control_keys():
    proposal = parse.parse_message(
        "New investor Acme Capital, Jane Doe jane@acme.com",
        parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
                        "contact_name": "Jane Doe", "contact_email": "jane@acme.com",
                        "contact_title": None, "note": None}))
    revised = parse.revise(
        proposal, "add that we met on June 14",
        parse_fn=_stub({"investor_name": "Acme Capital", "contact_name": "Jane Doe",
                        "contact_email": "jane@acme.com", "contact_title": None,
                        "note": "met on June 14"}))
    assert revised["note"] == "met on June 14"
    assert revised["investor_name"] == "Acme Capital"
    assert revised["intent"] == "new_investor"                 # control key preserved
    assert revised["_source_text"] == proposal["_source_text"]  # preserved for email integrity


def test_revise_email_taken_only_from_instruction():
    proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
                "contact_email": "jane@acme.com", "contact_title": None, "note": None,
                "_source_text": "Acme, Jane jane@acme.com"}
    # instruction literally carries the new address → accepted
    r1 = parse.revise(proposal, "her email is jane@newfirm.com",
                      parse_fn=_stub({"contact_email": "jane@newfirm.com"}))
    assert r1["contact_email"] == "jane@newfirm.com"
    # model tries to change the email but the instruction has no address → keep the existing one
    r2 = parse.revise(proposal, "set her title to GP",
                      parse_fn=_stub({"contact_email": "totally@madeup.test", "contact_title": "GP"}))
    assert r2["contact_email"] == "jane@acme.com"  # model's email ignored (not in instruction)
    assert r2["contact_title"] == "GP"


def test_revise_preserves_match_id():
    proposal = {"intent": "meeting_note", "investor_name": "Acme", "contact_name": None,
                "contact_email": None, "contact_title": None, "note": "old",
                "_match_id": "rowAcme", "_stage": "approval", "_source_text": "note for Acme: old"}
    revised = parse.revise(proposal, "change the note to: sent the deck",
                           parse_fn=_stub({"note": "sent the deck"}))
    assert revised["note"] == "sent the deck"
    assert revised["_match_id"] == "rowAcme"
    assert revised["intent"] == "meeting_note"


def test_revise_cannot_empty_the_proposal():
    proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
                "contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}
    revised = parse.revise(proposal, "clear it",
                           parse_fn=_stub({"investor_name": None, "contact_name": None,
                                           "contact_title": None, "note": None}))
    assert revised["investor_name"] == "Acme" and revised["contact_name"] == "Jane"


if __name__ == "__main__":
    fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
    for fn in fns:
        fn()
        print(f"ok  {fn.__name__}")
    print(f"\n{len(fns)} passed")