Matrix intake: fuzzy investor matching + conversational in-thread edits (v0.1.0:86)
Close the two locked post-deploy enhancements for the Matrix intake bot.
Fuzzy matching (server-side, ships in the s9pk): new find_intake_candidates in
server.py returns ranked deterministic near-matches (difflib name similarity +
token-set Jaccard, legal-suffix-aware, + email Levenshtein <= 2); GET
/api/intake/match now returns {match, candidates}. The bot surfaces a numbered
shortlist so a near-duplicate (Charlie/Charles, Acme Capital vs Acme Capital LLC,
a one-char email typo) is confirmed by a human instead of silently creating a
second investor. Exact match still auto-attaches; fuzzy candidates are never
auto-attached. The optional LLM-judge re-rank is deferred.
Conversational edits (bot-side, ships on the Spark): any in-thread reply that
isn't yes/no/edit field=value is treated as a natural-language revision and
re-run through local Qwen (parse.revise). Email integrity is preserved -- a
changed address must literally appear in the instruction; the model's email
field is structurally unreachable. No-op revisions re-prompt.
Docs/current-state brought current; 27/27 backend tests green.
This commit is contained in:
@@ -71,6 +71,10 @@ GRID = {
|
||||
"rows": [
|
||||
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
|
||||
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
|
||||
{"id": "rowCharlie", "investor_name": "Charlie Brown", "notes": "",
|
||||
"contacts": [{"name": "Charlie Brown", "email": "cb@brown.fund", "title": ""}]},
|
||||
{"id": "rowBeta", "investor_name": "Beta Capital LLC", "notes": "",
|
||||
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com", "title": ""}]},
|
||||
],
|
||||
}
|
||||
|
||||
@@ -119,6 +123,61 @@ def main():
|
||||
check(st == 200 and (d or {}).get("data", {}).get("match") is None,
|
||||
f"no match -> null (got {st}, {d})")
|
||||
|
||||
print("\n[fuzzy: exact match returns no candidates (bot auto-attaches)]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
|
||||
data = (d or {}).get("data", {})
|
||||
check(st == 200 and data.get("match") and data.get("candidates") == [],
|
||||
f"exact match -> match set, candidates empty (got {data})")
|
||||
|
||||
print("\n[fuzzy: near-spelling surfaces a candidate (Charles Brown ~ Charlie Brown)]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?q=Charles%20Brown", token)
|
||||
data = (d or {}).get("data", {})
|
||||
cids = [c["id"] for c in data.get("candidates", [])]
|
||||
check(data.get("match") is None and "rowCharlie" in cids,
|
||||
f"near-spelling -> candidate rowCharlie, no exact (got {data})")
|
||||
|
||||
print("\n[fuzzy: legal-suffix difference surfaces a candidate (Beta Capital ~ Beta Capital LLC)]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?q=Beta%20Capital", token)
|
||||
data = (d or {}).get("data", {})
|
||||
cids = [c["id"] for c in data.get("candidates", [])]
|
||||
check(data.get("match") is None and "rowBeta" in cids,
|
||||
f"legal-suffix -> candidate rowBeta, no exact (got {data})")
|
||||
|
||||
print("\n[fuzzy: legal-suffix-only difference ranks as a top candidate (Acme Capital LLC ~ Acme Capital)]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital%20LLC", token)
|
||||
data = (d or {}).get("data", {})
|
||||
top = (data.get("candidates") or [None])[0]
|
||||
check(data.get("match") is None and top and top["id"] == "rowAcme" and top["score"] == 1.0,
|
||||
f"legal-suffix-only -> rowAcme top candidate @1.0, no exact (got {data})")
|
||||
|
||||
print("\n[fuzzy: one-character email typo surfaces a candidate by email]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?email=jhane@acme.com", token)
|
||||
data = (d or {}).get("data", {})
|
||||
cands = data.get("candidates", [])
|
||||
hit = next((c for c in cands if c["id"] == "rowAcme"), None)
|
||||
check(data.get("match") is None and hit and hit["matched_on"] == "email",
|
||||
f"email typo -> candidate rowAcme matched_on email (got {data})")
|
||||
|
||||
print("\n[fuzzy: two-character email typo (distance 2) still surfaces]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?email=jane@acne.con", token) # acme->acne, com->con
|
||||
data = (d or {}).get("data", {})
|
||||
hit = next((c for c in data.get("candidates", []) if c["id"] == "rowAcme"), None)
|
||||
check(data.get("match") is None and hit and hit["matched_on"] == "email" and hit["score"] == 0.8,
|
||||
f"dist-2 email -> rowAcme @0.8 (got {data})")
|
||||
|
||||
print("\n[fuzzy: a row matching on BOTH name and email appears once (deduped)]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capitol&email=jhane@acme.com", token)
|
||||
data = (d or {}).get("data", {})
|
||||
acme_hits = [c for c in data.get("candidates", []) if c["id"] == "rowAcme"]
|
||||
check(data.get("match") is None and len(acme_hits) == 1,
|
||||
f"name+email both match rowAcme -> single deduped entry (got {data})")
|
||||
|
||||
print("\n[fuzzy: nothing close -> empty candidates]")
|
||||
st, d = _req(port, "GET", "/api/intake/match?q=Zphq%20Nobody%20LP", token)
|
||||
data = (d or {}).get("data", {})
|
||||
check(st == 200 and data.get("match") is None and data.get("candidates") == [],
|
||||
f"unrelated query -> no match, no candidates (got {data})")
|
||||
|
||||
print("\n[match: missing q and email -> 400]")
|
||||
st, _ = _req(port, "GET", "/api/intake/match", token)
|
||||
check(st == 400, f"no params -> 400 (got {st})")
|
||||
|
||||
Reference in New Issue
Block a user