Files
ten31-database/backend/matrix_intake/test_parse.py
T
Keysat fd2e3ed78e Matrix intake: strip surrounding punctuation from extracted emails
normalize()'s email regex matched non-@/non-space runs, so "Name <addr>"
(the most common contact format) yielded "<addr"; only trailing punctuation
was stripped, never leading. Tighten the regex to standard local@domain.tld
so the bare address is extracted from <…>, (…), and trailing-period forms.
Found via the live-deploy pre-flight. Add a regression test.

Also log two intake backlog items in ROADMAP: the scoped service-credential
auth path (deferred; bot uses a member login for now) and fuzzy match +
in-thread confirm (post-deploy).
2026-06-17 14:06:32 -05:00

111 lines
3.9 KiB
Python

"""Tests for the intake parse/normalize layer — Spark/Qwen stubbed (no network)."""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import parse # noqa: E402
def _stub(reply):
"""Return a parse_fn that ignores input and yields `reply` (simulating Qwen's JSON)."""
return lambda text, system=None, max_tokens=400: reply
def test_new_investor_basic():
p = parse.parse_message(
"New investor Acme Capital, contact Jane Doe jane@acme.com, met at the Austin conf",
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
"contact_title": None, "note": "met at the Austin conf"}),
)
assert p["intent"] == "new_investor"
assert p["investor_name"] == "Acme Capital"
assert p["contact_email"] == "jane@acme.com"
def test_email_salvaged_from_source_when_model_misses():
p = parse.parse_message(
"add bob@example.org from Beta LP",
parse_fn=_stub({"intent": "new_investor", "investor_name": "Beta LP",
"contact_name": "Bob", "contact_email": None}),
)
assert p["contact_email"] == "bob@example.org"
def test_fabricated_email_dropped_when_not_in_source():
p = parse.parse_message(
"new prospect Gamma Partners, talked to their GP",
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
"contact_name": "their GP", "contact_email": "made-up@nowhere.test"}),
)
# the model invented an address that isn't in the source → must be dropped
assert p["contact_email"] is None
def test_email_extracted_without_surrounding_punctuation():
# "Name <addr>" is the most common contact format; parens / trailing period also occur.
# The salvage-from-source path must extract the bare address, never the brackets.
cases = [
("New investor: Larch Capital — Dana Reed <dana@larchcap.com>, met at conf", "dana@larchcap.com"),
("ping (sam@beta.io) re the deck", "sam@beta.io"),
("reach kim@acme.co.", "kim@acme.co"),
]
for src, expected in cases:
p = parse.parse_message(
src,
parse_fn=_stub({"intent": "new_investor", "investor_name": "X",
"contact_name": "Y", "contact_email": None}),
)
assert p["contact_email"] == expected, (src, p["contact_email"])
def test_meeting_note_intent_preserved():
p = parse.parse_message(
"Note for Acme Capital: wants the Q3 deck",
parse_fn=_stub({"intent": "meeting_note", "investor_name": "Acme Capital",
"note": "wants the Q3 deck"}),
)
assert p["intent"] == "meeting_note"
assert p["note"] == "wants the Q3 deck"
def test_unclear_when_no_entity():
p = parse.parse_message(
"hey what's up",
parse_fn=_stub({"intent": "new_investor", "investor_name": None, "contact_name": None}),
)
assert p["intent"] == "unclear"
def test_null_strings_normalized():
p = parse.parse_message(
"Delta Fund",
parse_fn=_stub({"intent": "new_investor", "investor_name": "Delta Fund",
"contact_name": "null", "contact_email": "N/A", "note": ""}),
)
assert p["contact_name"] is None
assert p["contact_email"] is None
assert p["note"] is None
def test_bad_intent_falls_back_to_unclear():
p = parse.parse_message(
"Epsilon Capital",
parse_fn=_stub({"intent": "garbage", "investor_name": "Epsilon Capital"}),
)
assert p["intent"] == "unclear"
def test_none_model_reply_is_unclear():
p = parse.parse_message("???", parse_fn=_stub(None))
assert p["intent"] == "unclear"
if __name__ == "__main__":
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
for fn in fns:
fn()
print(f"ok {fn.__name__}")
print(f"\n{len(fns)} passed")