Files
ten31-database/backend/test_intake_endpoints.py
T
Keysat acd316ead4 Review fixes: narrow intake redact predicate to the bot's own nudge + edge tests
reviewer agent flagged the broadened redact_thread predicate (event_id OR in_reply==root)
as over-matching any plain reply to a thread root. Gate the bare-in_reply clause to the bot's
own sender (the nudge is always ours); thread children (cards/acks/human yes-no) still match by
rel_type=m.thread. Add unit edges for _name_similarity's all-generic fallback and a contact_id
NULL orphan case for the grid-blob email heal.
2026-06-20 13:05:13 -05:00

260 lines
12 KiB
Python

#!/usr/bin/env python3
"""Tests for the Matrix-intake CRM surface (v0.1.0 Matrix-intake M2).
The bot adds no parallel write path — it reuses /api/fundraising/log-communication and adds
one read-only lookup, GET /api/intake/match. This boots the REAL server against a temp DB and
asserts:
- match by normalized name and by contact email, returning the GRID ROW id;
- the new-vs-existing contract: a bot-style create (log-communication +
create_investor_if_missing) then matches by name — so an approved note lands on that same
investor instead of duplicating it;
- provenance: an intake-sourced communication is audited with source="matrix_intake";
- guards: missing q/email -> 400, unauthenticated -> 401.
Synthetic data only.
Run: cd backend && python3 test_intake_endpoints.py
"""
import http.client
import json
import os
import sqlite3
import sys
import tempfile
import threading
from http.server import ThreadingHTTPServer
_DATA = tempfile.mkdtemp()
os.environ["CRM_DATA_DIR"] = _DATA
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import server # noqa: E402
FAILS = []
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
class _Quiet(server.CRMHandler):
def log_message(self, *a):
pass
def _req(port, method, path, token=None, body=None):
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
headers = {}
if token:
headers["Authorization"] = "Bearer " + token
payload = None
if body is not None:
payload = json.dumps(body)
headers["Content-Type"] = "application/json"
conn.request(method, path, body=payload, headers=headers)
resp = conn.getresponse()
raw = resp.read().decode("utf-8", "replace")
conn.close()
data = None
if raw:
try:
data = json.loads(raw)
except ValueError:
pass
return resp.status, data
GRID = {
"columns": [],
"rows": [
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
{"id": "rowCharlie", "investor_name": "Charlie Brown", "notes": "",
"contacts": [{"name": "Charlie Brown", "email": "cb@brown.fund", "title": ""}]},
{"id": "rowBeta", "investor_name": "Beta Capital LLC", "notes": "",
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com", "title": ""}]},
# Generic-descriptor decoys: share only "investment group" / "investments" with the
# Fortitude card below — must NOT surface as look-alikes (the 2026-06-20 false-positive fix).
{"id": "rowAether", "investor_name": "Aether Investment Group", "notes": "",
"contacts": [{"name": "Ada Ng", "email": "ada@aether.com", "title": ""}]},
{"id": "rowRussell", "investor_name": "Russell Investments", "notes": "",
"contacts": [{"name": "Russ Lee", "email": "russ@russell.com", "title": ""}]},
],
}
def seed():
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
# init_db doesn't create the 'main' state row (it's created lazily on first write), so
# upsert rather than UPDATE — a plain UPDATE would silently match zero rows.
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
"VALUES ('main', ?, '[]', 1) "
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
c.commit()
c.close()
def main():
server.init_db()
seed()
token = server.create_token("u1", "grant", "admin")
# Unit: the distinctive-token similarity edges (the all-generic fallback path the endpoint
# seed can't naturally reach — no real investor is named purely with generic descriptors).
print("\n[unit: _name_similarity distinctive-token edges]")
sim = server._name_similarity
check(sim("Fortitude Investment Group", "Aether Investment Group") < 0.62,
f"generic-only overlap stays below threshold (got {sim('Fortitude Investment Group', 'Aether Investment Group'):.2f})")
check(sim("Aether Capital", "Aether Capital Partners") == 1.0,
f"distinctive 'aether' (generic descriptors stripped) scores 1.0 (got {sim('Aether Capital', 'Aether Capital Partners'):.2f})")
# Both sides all-generic → fallback compares full tokens on BOTH sides; shared generic word
# alone must not clear the bar.
check(sim("Capital Group", "Global Capital") < 0.62,
f"all-generic both sides stays below threshold (got {sim('Capital Group', 'Global Capital'):.2f})")
check(sim("Family Office", "Family Office") == 1.0,
"identical all-generic names still score 1.0 (early-out)")
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
port = httpd.server_address[1]
threading.Thread(target=httpd.serve_forever, daemon=True).start()
try:
print("\n[match: existing investor by name returns the grid row id]")
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
m = (d or {}).get("data", {}).get("match")
check(st == 200 and m and m["id"] == "rowAcme" and m["matched_on"] == "name",
f"name match -> rowAcme (got {st}, {m})")
print("\n[match: case-insensitive name]")
st, d = _req(port, "GET", "/api/intake/match?q=acme%20capital", token)
m = (d or {}).get("data", {}).get("match")
check(m and m["id"] == "rowAcme", f"normalized name match (got {m})")
print("\n[match: by contact email]")
st, d = _req(port, "GET", "/api/intake/match?email=jane@acme.com", token)
m = (d or {}).get("data", {}).get("match")
check(m and m["id"] == "rowAcme" and m["matched_on"] == "email",
f"email match -> rowAcme (got {m})")
print("\n[match: unknown -> null]")
st, d = _req(port, "GET", "/api/intake/match?q=Nobody%20LP", token)
check(st == 200 and (d or {}).get("data", {}).get("match") is None,
f"no match -> null (got {st}, {d})")
print("\n[fuzzy: exact match returns no candidates (bot auto-attaches)]")
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
data = (d or {}).get("data", {})
check(st == 200 and data.get("match") and data.get("candidates") == [],
f"exact match -> match set, candidates empty (got {data})")
print("\n[fuzzy: near-spelling surfaces a candidate (Charles Brown ~ Charlie Brown)]")
st, d = _req(port, "GET", "/api/intake/match?q=Charles%20Brown", token)
data = (d or {}).get("data", {})
cids = [c["id"] for c in data.get("candidates", [])]
check(data.get("match") is None and "rowCharlie" in cids,
f"near-spelling -> candidate rowCharlie, no exact (got {data})")
print("\n[fuzzy: legal-suffix difference surfaces a candidate (Beta Capital ~ Beta Capital LLC)]")
st, d = _req(port, "GET", "/api/intake/match?q=Beta%20Capital", token)
data = (d or {}).get("data", {})
cids = [c["id"] for c in data.get("candidates", [])]
check(data.get("match") is None and "rowBeta" in cids,
f"legal-suffix -> candidate rowBeta, no exact (got {data})")
print("\n[fuzzy: legal-suffix-only difference ranks as a top candidate (Acme Capital LLC ~ Acme Capital)]")
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital%20LLC", token)
data = (d or {}).get("data", {})
top = (data.get("candidates") or [None])[0]
check(data.get("match") is None and top and top["id"] == "rowAcme" and top["score"] == 1.0,
f"legal-suffix-only -> rowAcme top candidate @1.0, no exact (got {data})")
print("\n[fuzzy: one-character email typo surfaces a candidate by email]")
st, d = _req(port, "GET", "/api/intake/match?email=jhane@acme.com", token)
data = (d or {}).get("data", {})
cands = data.get("candidates", [])
hit = next((c for c in cands if c["id"] == "rowAcme"), None)
check(data.get("match") is None and hit and hit["matched_on"] == "email",
f"email typo -> candidate rowAcme matched_on email (got {data})")
print("\n[fuzzy: two-character email typo (distance 2) still surfaces]")
st, d = _req(port, "GET", "/api/intake/match?email=jane@acne.con", token) # acme->acne, com->con
data = (d or {}).get("data", {})
hit = next((c for c in data.get("candidates", []) if c["id"] == "rowAcme"), None)
check(data.get("match") is None and hit and hit["matched_on"] == "email" and hit["score"] == 0.8,
f"dist-2 email -> rowAcme @0.8 (got {data})")
print("\n[fuzzy: a row matching on BOTH name and email appears once (deduped)]")
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capitol&email=jhane@acme.com", token)
data = (d or {}).get("data", {})
acme_hits = [c for c in data.get("candidates", []) if c["id"] == "rowAcme"]
check(data.get("match") is None and len(acme_hits) == 1,
f"name+email both match rowAcme -> single deduped entry (got {data})")
print("\n[fuzzy: nothing close -> empty candidates]")
st, d = _req(port, "GET", "/api/intake/match?q=Zphq%20Nobody%20LP", token)
data = (d or {}).get("data", {})
check(st == 200 and data.get("match") is None and data.get("candidates") == [],
f"unrelated query -> no match, no candidates (got {data})")
print("\n[fuzzy: shared generic words alone do NOT surface look-alikes (Fortitude vs Aether/Russell)]")
st, d = _req(port, "GET", "/api/intake/match?q=Fortitude%20Investment%20Group", token)
data = (d or {}).get("data", {})
cids = [c["id"] for c in data.get("candidates", [])]
check(data.get("match") is None and "rowAether" not in cids and "rowRussell" not in cids,
f"generic-only overlap -> no decoy candidates (got {data})")
print("\n[fuzzy: a shared DISTINCTIVE word still surfaces (Aether Capital ~ Aether Investment Group)]")
st, d = _req(port, "GET", "/api/intake/match?q=Aether%20Capital", token)
data = (d or {}).get("data", {})
cids = [c["id"] for c in data.get("candidates", [])]
check(data.get("match") is None and "rowAether" in cids,
f"distinctive overlap -> rowAether candidate (got {data})")
print("\n[match: missing q and email -> 400]")
st, _ = _req(port, "GET", "/api/intake/match", token)
check(st == 400, f"no params -> 400 (got {st})")
print("\n[match: unauthenticated -> 401]")
st, _ = _req(port, "GET", "/api/intake/match?q=Acme", None)
check(st == 401, f"no token -> 401 (got {st})")
print("\n[bot create: log-communication + create_investor_if_missing, source tagged]")
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
"investor_name": "Beacon Ventures",
"contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"},
"create_investor_if_missing": True,
"type": "note", "subject": "Intake (Matrix)", "body": "met at the Austin conf",
"source": "matrix_intake",
})
check(st in (200, 201), f"create new investor -> 201 (got {st})")
print("\n[new-vs-existing contract: the just-created investor now matches by name]")
st, d = _req(port, "GET", "/api/intake/match?q=Beacon%20Ventures", token)
m = (d or {}).get("data", {}).get("match")
check(m and m.get("investor_name") == "Beacon Ventures",
f"created investor is matchable (no duplicate on next note) (got {m})")
print("\n[provenance: the intake communication is audited as source=matrix_intake]")
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall()
c.close()
sources = [json.loads(r[0]).get("source") for r in rows if r[0]]
check("matrix_intake" in sources, f"audit carries source=matrix_intake (got {sources})")
finally:
httpd.shutdown()
print()
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for f in FAILS:
print(f" - {f}")
sys.exit(1)
print("ALL PASS (matrix-intake endpoints)")
if __name__ == "__main__":
main()