Matrix intake: strip surrounding punctuation from extracted emails
normalize()'s email regex matched non-@/non-space runs, so "Name <addr>" (the most common contact format) yielded "<addr"; only trailing punctuation was stripped, never leading. Tighten the regex to standard local@domain.tld so the bare address is extracted from <…>, (…), and trailing-period forms. Found via the live-deploy pre-flight. Add a regression test. Also log two intake backlog items in ROADMAP: the scoped service-credential auth path (deferred; bot uses a member login for now) and fuzzy match + in-thread confirm (post-deploy).
This commit is contained in:
@@ -20,7 +20,7 @@ SYSTEM = (
|
||||
"Use null (not empty string) for anything not present. Output JSON only."
|
||||
)
|
||||
|
||||
_EMAIL_RE = re.compile(r"[^@\s]+@[^@\s]+\.[^@\s]+")
|
||||
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
|
||||
_VALID_INTENTS = {"new_investor", "meeting_note", "unclear"}
|
||||
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
|
||||
|
||||
|
||||
@@ -43,6 +43,23 @@ def test_fabricated_email_dropped_when_not_in_source():
|
||||
assert p["contact_email"] is None
|
||||
|
||||
|
||||
def test_email_extracted_without_surrounding_punctuation():
|
||||
# "Name <addr>" is the most common contact format; parens / trailing period also occur.
|
||||
# The salvage-from-source path must extract the bare address, never the brackets.
|
||||
cases = [
|
||||
("New investor: Larch Capital — Dana Reed <dana@larchcap.com>, met at conf", "dana@larchcap.com"),
|
||||
("ping (sam@beta.io) re the deck", "sam@beta.io"),
|
||||
("reach kim@acme.co.", "kim@acme.co"),
|
||||
]
|
||||
for src, expected in cases:
|
||||
p = parse.parse_message(
|
||||
src,
|
||||
parse_fn=_stub({"intent": "new_investor", "investor_name": "X",
|
||||
"contact_name": "Y", "contact_email": None}),
|
||||
)
|
||||
assert p["contact_email"] == expected, (src, p["contact_email"])
|
||||
|
||||
|
||||
def test_meeting_note_intent_preserved():
|
||||
p = parse.parse_message(
|
||||
"Note for Acme Capital: wants the Q3 deck",
|
||||
|
||||
Reference in New Issue
Block a user