0b893295e1
Close the two locked post-deploy enhancements for the Matrix intake bot.
Fuzzy matching (server-side, ships in the s9pk): new find_intake_candidates in
server.py returns ranked deterministic near-matches (difflib name similarity +
token-set Jaccard, legal-suffix-aware, + email Levenshtein <= 2); GET
/api/intake/match now returns {match, candidates}. The bot surfaces a numbered
shortlist so a near-duplicate (Charlie/Charles, Acme Capital vs Acme Capital LLC,
a one-char email typo) is confirmed by a human instead of silently creating a
second investor. Exact match still auto-attaches; fuzzy candidates are never
auto-attached. The optional LLM-judge re-rank is deferred.
Conversational edits (bot-side, ships on the Spark): any in-thread reply that
isn't yes/no/edit field=value is treated as a natural-language revision and
re-run through local Qwen (parse.revise). Email integrity is preserved -- a
changed address must literally appear in the instruction; the model's email
field is structurally unreachable. No-op revisions re-prompt.
Docs/current-state brought current; 27/27 backend tests green.
225 lines
9.9 KiB
Python
225 lines
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for the Matrix-intake CRM surface (v0.1.0 Matrix-intake M2).
|
|
|
|
The bot adds no parallel write path — it reuses /api/fundraising/log-communication and adds
|
|
one read-only lookup, GET /api/intake/match. This boots the REAL server against a temp DB and
|
|
asserts:
|
|
- match by normalized name and by contact email, returning the GRID ROW id;
|
|
- the new-vs-existing contract: a bot-style create (log-communication +
|
|
create_investor_if_missing) then matches by name — so an approved note lands on that same
|
|
investor instead of duplicating it;
|
|
- provenance: an intake-sourced communication is audited with source="matrix_intake";
|
|
- guards: missing q/email -> 400, unauthenticated -> 401.
|
|
Synthetic data only.
|
|
|
|
Run: cd backend && python3 test_intake_endpoints.py
|
|
"""
|
|
import http.client
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
import tempfile
|
|
import threading
|
|
from http.server import ThreadingHTTPServer
|
|
|
|
_DATA = tempfile.mkdtemp()
|
|
os.environ["CRM_DATA_DIR"] = _DATA
|
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
import server # noqa: E402
|
|
|
|
FAILS = []
|
|
|
|
|
|
def check(cond, msg):
|
|
print((" PASS " if cond else " FAIL ") + msg)
|
|
if not cond:
|
|
FAILS.append(msg)
|
|
|
|
|
|
class _Quiet(server.CRMHandler):
|
|
def log_message(self, *a):
|
|
pass
|
|
|
|
|
|
def _req(port, method, path, token=None, body=None):
|
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
|
headers = {}
|
|
if token:
|
|
headers["Authorization"] = "Bearer " + token
|
|
payload = None
|
|
if body is not None:
|
|
payload = json.dumps(body)
|
|
headers["Content-Type"] = "application/json"
|
|
conn.request(method, path, body=payload, headers=headers)
|
|
resp = conn.getresponse()
|
|
raw = resp.read().decode("utf-8", "replace")
|
|
conn.close()
|
|
data = None
|
|
if raw:
|
|
try:
|
|
data = json.loads(raw)
|
|
except ValueError:
|
|
pass
|
|
return resp.status, data
|
|
|
|
|
|
GRID = {
|
|
"columns": [],
|
|
"rows": [
|
|
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
|
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
|
|
{"id": "rowCharlie", "investor_name": "Charlie Brown", "notes": "",
|
|
"contacts": [{"name": "Charlie Brown", "email": "cb@brown.fund", "title": ""}]},
|
|
{"id": "rowBeta", "investor_name": "Beta Capital LLC", "notes": "",
|
|
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com", "title": ""}]},
|
|
],
|
|
}
|
|
|
|
|
|
def seed():
|
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
|
# init_db doesn't create the 'main' state row (it's created lazily on first write), so
|
|
# upsert rather than UPDATE — a plain UPDATE would silently match zero rows.
|
|
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
|
|
"VALUES ('main', ?, '[]', 1) "
|
|
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
|
|
c.commit()
|
|
c.close()
|
|
|
|
|
|
def main():
|
|
server.init_db()
|
|
seed()
|
|
token = server.create_token("u1", "grant", "admin")
|
|
|
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
|
port = httpd.server_address[1]
|
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
|
try:
|
|
print("\n[match: existing investor by name returns the grid row id]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
|
|
m = (d or {}).get("data", {}).get("match")
|
|
check(st == 200 and m and m["id"] == "rowAcme" and m["matched_on"] == "name",
|
|
f"name match -> rowAcme (got {st}, {m})")
|
|
|
|
print("\n[match: case-insensitive name]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=acme%20capital", token)
|
|
m = (d or {}).get("data", {}).get("match")
|
|
check(m and m["id"] == "rowAcme", f"normalized name match (got {m})")
|
|
|
|
print("\n[match: by contact email]")
|
|
st, d = _req(port, "GET", "/api/intake/match?email=jane@acme.com", token)
|
|
m = (d or {}).get("data", {}).get("match")
|
|
check(m and m["id"] == "rowAcme" and m["matched_on"] == "email",
|
|
f"email match -> rowAcme (got {m})")
|
|
|
|
print("\n[match: unknown -> null]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Nobody%20LP", token)
|
|
check(st == 200 and (d or {}).get("data", {}).get("match") is None,
|
|
f"no match -> null (got {st}, {d})")
|
|
|
|
print("\n[fuzzy: exact match returns no candidates (bot auto-attaches)]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
|
|
data = (d or {}).get("data", {})
|
|
check(st == 200 and data.get("match") and data.get("candidates") == [],
|
|
f"exact match -> match set, candidates empty (got {data})")
|
|
|
|
print("\n[fuzzy: near-spelling surfaces a candidate (Charles Brown ~ Charlie Brown)]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Charles%20Brown", token)
|
|
data = (d or {}).get("data", {})
|
|
cids = [c["id"] for c in data.get("candidates", [])]
|
|
check(data.get("match") is None and "rowCharlie" in cids,
|
|
f"near-spelling -> candidate rowCharlie, no exact (got {data})")
|
|
|
|
print("\n[fuzzy: legal-suffix difference surfaces a candidate (Beta Capital ~ Beta Capital LLC)]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Beta%20Capital", token)
|
|
data = (d or {}).get("data", {})
|
|
cids = [c["id"] for c in data.get("candidates", [])]
|
|
check(data.get("match") is None and "rowBeta" in cids,
|
|
f"legal-suffix -> candidate rowBeta, no exact (got {data})")
|
|
|
|
print("\n[fuzzy: legal-suffix-only difference ranks as a top candidate (Acme Capital LLC ~ Acme Capital)]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital%20LLC", token)
|
|
data = (d or {}).get("data", {})
|
|
top = (data.get("candidates") or [None])[0]
|
|
check(data.get("match") is None and top and top["id"] == "rowAcme" and top["score"] == 1.0,
|
|
f"legal-suffix-only -> rowAcme top candidate @1.0, no exact (got {data})")
|
|
|
|
print("\n[fuzzy: one-character email typo surfaces a candidate by email]")
|
|
st, d = _req(port, "GET", "/api/intake/match?email=jhane@acme.com", token)
|
|
data = (d or {}).get("data", {})
|
|
cands = data.get("candidates", [])
|
|
hit = next((c for c in cands if c["id"] == "rowAcme"), None)
|
|
check(data.get("match") is None and hit and hit["matched_on"] == "email",
|
|
f"email typo -> candidate rowAcme matched_on email (got {data})")
|
|
|
|
print("\n[fuzzy: two-character email typo (distance 2) still surfaces]")
|
|
st, d = _req(port, "GET", "/api/intake/match?email=jane@acne.con", token) # acme->acne, com->con
|
|
data = (d or {}).get("data", {})
|
|
hit = next((c for c in data.get("candidates", []) if c["id"] == "rowAcme"), None)
|
|
check(data.get("match") is None and hit and hit["matched_on"] == "email" and hit["score"] == 0.8,
|
|
f"dist-2 email -> rowAcme @0.8 (got {data})")
|
|
|
|
print("\n[fuzzy: a row matching on BOTH name and email appears once (deduped)]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capitol&email=jhane@acme.com", token)
|
|
data = (d or {}).get("data", {})
|
|
acme_hits = [c for c in data.get("candidates", []) if c["id"] == "rowAcme"]
|
|
check(data.get("match") is None and len(acme_hits) == 1,
|
|
f"name+email both match rowAcme -> single deduped entry (got {data})")
|
|
|
|
print("\n[fuzzy: nothing close -> empty candidates]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Zphq%20Nobody%20LP", token)
|
|
data = (d or {}).get("data", {})
|
|
check(st == 200 and data.get("match") is None and data.get("candidates") == [],
|
|
f"unrelated query -> no match, no candidates (got {data})")
|
|
|
|
print("\n[match: missing q and email -> 400]")
|
|
st, _ = _req(port, "GET", "/api/intake/match", token)
|
|
check(st == 400, f"no params -> 400 (got {st})")
|
|
|
|
print("\n[match: unauthenticated -> 401]")
|
|
st, _ = _req(port, "GET", "/api/intake/match?q=Acme", None)
|
|
check(st == 401, f"no token -> 401 (got {st})")
|
|
|
|
print("\n[bot create: log-communication + create_investor_if_missing, source tagged]")
|
|
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
|
|
"investor_name": "Beacon Ventures",
|
|
"contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"},
|
|
"create_investor_if_missing": True,
|
|
"type": "note", "subject": "Intake (Matrix)", "body": "met at the Austin conf",
|
|
"source": "matrix_intake",
|
|
})
|
|
check(st in (200, 201), f"create new investor -> 201 (got {st})")
|
|
|
|
print("\n[new-vs-existing contract: the just-created investor now matches by name]")
|
|
st, d = _req(port, "GET", "/api/intake/match?q=Beacon%20Ventures", token)
|
|
m = (d or {}).get("data", {}).get("match")
|
|
check(m and m.get("investor_name") == "Beacon Ventures",
|
|
f"created investor is matchable (no duplicate on next note) (got {m})")
|
|
|
|
print("\n[provenance: the intake communication is audited as source=matrix_intake]")
|
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
|
rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall()
|
|
c.close()
|
|
sources = [json.loads(r[0]).get("source") for r in rows if r[0]]
|
|
check("matrix_intake" in sources, f"audit carries source=matrix_intake (got {sources})")
|
|
finally:
|
|
httpd.shutdown()
|
|
|
|
print()
|
|
if FAILS:
|
|
print(f"FAILED ({len(FAILS)}):")
|
|
for f in FAILS:
|
|
print(f" - {f}")
|
|
sys.exit(1)
|
|
print("ALL PASS (matrix-intake endpoints)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|