#!/usr/bin/env python3 """Tests for the in-app business-card intake endpoint (#7): POST /api/intake/card. The endpoint reuses the Matrix card flow's nio-free core — vision-transcribe (spark) -> text parse (parse) -> the same fuzzy matcher (find_intake_match / find_intake_candidates) — minus Matrix, surfaced for a mobile sheet. The real vision/OCR path is live-smoke only (same as the Matrix M3 path), so here we STUB the two network legs and assert the wiring + contract: - happy path: transcribe -> parse -> proposal + match/candidates, status 200 ok:true; - the email-integrity rule rides along (a model-minted address NOT in the transcription is dropped in favor of the one literally present), exactly as on the text/Matrix path; - new-vs-existing: an exact firm name returns `match`; a near-spelling returns `candidates`; - soft-fails: an unreadable image -> ok:false/unreadable; vision down -> 502/vision_unavailable; - guards: missing/invalid image -> 400; unauthenticated -> 401; - provenance: the approve write reuses log-communication tagged source="app_card". Synthetic data only. Run: cd backend && python3 test_intake_card.py """ import base64 import http.client import json import os import sqlite3 import sys import tempfile import threading from http.server import ThreadingHTTPServer _DATA = tempfile.mkdtemp() os.environ["CRM_DATA_DIR"] = _DATA os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db") _BACKEND = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, _BACKEND) sys.path.insert(0, os.path.join(_BACKEND, "ingest")) # llm sys.path.insert(0, os.path.join(_BACKEND, "matrix_intake")) # spark, parse import server # noqa: E402 import llm # noqa: E402 (ingest/llm.py — patched so spark.parse_json hits no network) import spark # noqa: E402 (matrix_intake/spark.py — transcribe_card stubbed) import parse # noqa: E402 (matrix_intake/parse.py — parse_message defaults to spark.parse_json) FAILS = [] # The handler imports `spark`/`parse` lazily and looks up transcribe_card on the module at call # time, so patching the module attribute here takes effect. parse.parse_message binds its default # parse_fn=spark.parse_json at import, and spark.parse_json calls llm.chat_json dynamically — so # patching llm.chat_json (not spark.parse_json) is what reaches the parse leg. _STATE = {"transcription": "", "raw": {}, "boom": False} def _fake_transcribe(image_b64, mime="image/jpeg", chat_fn=None): if _STATE["boom"]: raise RuntimeError("spark control unreachable") return _STATE["transcription"] def _fake_chat_json(prompt, system=None, max_tokens=200): return dict(_STATE["raw"]) spark.transcribe_card = _fake_transcribe llm.chat_json = _fake_chat_json def check(cond, msg): print((" PASS " if cond else " FAIL ") + msg) if not cond: FAILS.append(msg) class _Quiet(server.CRMHandler): def log_message(self, *a): pass def _req(port, method, path, token=None, body=None): conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10) headers = {} if token: headers["Authorization"] = "Bearer " + token payload = None if body is not None: payload = json.dumps(body) headers["Content-Type"] = "application/json" conn.request(method, path, body=payload, headers=headers) resp = conn.getresponse() raw = resp.read().decode("utf-8", "replace") conn.close() data = None if raw: try: data = json.loads(raw) except ValueError: pass return resp.status, data GRID = { "columns": [], "rows": [ {"id": "rowAcme", "investor_name": "Acme Capital", "notes": "", "contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]}, ], } _IMG = base64.b64encode(b"not-a-real-image-just-valid-base64").decode() def seed(): c = sqlite3.connect(os.environ["CRM_DB_PATH"]) c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) " "VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)") c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) " "VALUES ('main', ?, '[]', 1) " "ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),)) c.commit() c.close() def main(): server.init_db() seed() token = server.create_token("u1", "grant", "admin") httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet) port = httpd.server_address[1] threading.Thread(target=httpd.serve_forever, daemon=True).start() try: print("\n[happy path: transcribe -> parse -> proposal, new investor, no match]") _STATE["transcription"] = ("Sam Lee\nPartner\nBeacon Ventures\n" "sam@beacon.vc\nMobile: +1 555 987 6543") _STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures", "contact_name": "Sam Lee", "contact_title": "Partner", "mobile": "+1 555 987 6543", "contact_email": "sam@beacon.vc"} _STATE["boom"] = False st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) data = (d or {}).get("data", {}) p = data.get("proposal", {}) check(st == 200 and data.get("ok") is True, f"200 ok:true (got {st}, {data})") check(p.get("investor_name") == "Beacon Ventures" and p.get("contact_name") == "Sam Lee", f"proposal carries firm + person (got {p})") check(p.get("contact_email") == "sam@beacon.vc", f"email kept (got {p.get('contact_email')})") check(p.get("mobile") == "+1 555 987 6543", f"mobile kept (got {p.get('mobile')})") check("transcription" in data and data["match"] is None and data["candidates"] == [], f"transcription returned, unknown firm -> no match/candidates (got {data})") check(not any(k.startswith("_") for k in p), f"internal control keys stripped (got {list(p)})") print("\n[email integrity: a model-minted address NOT in the card is dropped]") _STATE["transcription"] = "Ann Roe\nDir\nOmega LP\nann@omega.fund" # the only address present _STATE["raw"] = {"intent": "new_investor", "investor_name": "Omega LP", "contact_name": "Ann Roe", "contact_email": "evil@phish.example"} st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) p = (d or {}).get("data", {}).get("proposal", {}) check(p.get("contact_email") == "ann@omega.fund", f"source address wins over the minted one (got {p.get('contact_email')})") print("\n[match: exact firm name returns the grid row id]") _STATE["transcription"] = "Jane Doe\nGP\nAcme Capital" # no email -> match on name _STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane Doe", "contact_title": "GP"} st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) m = (d or {}).get("data", {}).get("match") check(m and m.get("id") == "rowAcme", f"exact firm -> match rowAcme (got {m})") print("\n[match by card email: exact contact email returns the grid row id]") _STATE["transcription"] = "Jane Doe\nGP\nAcme Capital Group\njane@acme.com" _STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital Group", "contact_name": "Jane Doe"} st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) m = (d or {}).get("data", {}).get("match") check(m and m.get("id") == "rowAcme" and m.get("matched_on") == "email", f"card email -> exact match rowAcme on email (got {m})") print("\n[fuzzy: a near-spelling returns a candidate, no exact match]") # Typo in the DISTINCTIVE token ('Acme'->'Acne') so the fuzzy matcher surfaces it; a typo # in a generic descriptor (e.g. 'Capitol') wouldn't, since those are stripped first. _STATE["transcription"] = "Jane Doe\nGP\nAcne Capital" # no email -> name-only fuzzy _STATE["raw"] = {"intent": "new_investor", "investor_name": "Acne Capital", "contact_name": "Jane Doe"} st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) data = (d or {}).get("data", {}) cids = [c["id"] for c in data.get("candidates", [])] check(data.get("match") is None and "rowAcme" in cids, f"near-spelling -> candidate rowAcme, no exact (got {data})") print("\n[no firm and no person: readable but unactionable -> ok:true, no DB lookup, no 500]") _STATE["transcription"] = "some faded scribbles, no usable fields" # >=5 chars, no email/firm _STATE["raw"] = {"intent": "unclear"} st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) data = (d or {}).get("data", {}) check(st == 200 and data.get("ok") is True and data.get("match") is None and data.get("candidates") == [], f"unclear proposal -> ok:true, no match/candidates, not 500 (got {st}, {data})") print("\n[parse leg down: parse_message raises -> 502/vision_unavailable]") _orig_pm = parse.parse_message parse.parse_message = lambda *a, **k: (_ for _ in ()).throw(RuntimeError("qwen down")) try: _STATE["transcription"] = "Jane Doe\nGP\nAcme Capital" st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) data = (d or {}).get("data", {}) check(st == 502 and data.get("reason") == "vision_unavailable", f"parse error -> 502 vision_unavailable (got {st}, {data})") finally: parse.parse_message = _orig_pm print("\n[unreadable: model saw no card -> ok:false/unreadable, 200]") _STATE["transcription"] = "" # transcribe_card returns '' on the NONE sentinel st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) data = (d or {}).get("data", {}) check(st == 200 and data.get("ok") is False and data.get("reason") == "unreadable", f"empty transcription -> unreadable (got {st}, {data})") print("\n[vision down: transcribe raises -> 502/vision_unavailable]") _STATE["boom"] = True st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG}) data = (d or {}).get("data", {}) check(st == 502 and data.get("reason") == "vision_unavailable", f"spark error -> 502 vision_unavailable (got {st}, {data})") _STATE["boom"] = False print("\n[data-URI tolerated: a full data: prefix is stripped to raw base64]") _STATE["transcription"] = "Sam Lee\nPartner\nBeacon Ventures" _STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures", "contact_name": "Sam Lee"} st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": "data:image/jpeg;base64," + _IMG}) check(st == 200 and (d or {}).get("data", {}).get("ok") is True, f"data-URI accepted (got {st})") print("\n[guard: missing image -> 400]") st, _ = _req(port, "POST", "/api/intake/card", token, {}) check(st == 400, f"no image_b64 -> 400 (got {st})") print("\n[guard: malformed base64 -> 400]") st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "%%%not base64%%%"}) check(st == 400, f"invalid base64 -> 400 (got {st})") print("\n[guard: oversized image -> 413 (size check runs before decode)]") st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "A" * 12_000_001}) check(st == 413, f"over the 12 MB b64 cap -> 413 (got {st})") print("\n[guard: unauthenticated -> 401]") st, _ = _req(port, "POST", "/api/intake/card", None, {"image_b64": _IMG}) check(st == 401, f"no token -> 401 (got {st})") print("\n[provenance: the approve write reuses log-communication tagged source=app_card]") st, d = _req(port, "POST", "/api/fundraising/log-communication", token, { "investor_name": "Beacon Ventures", "contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"}, "create_investor_if_missing": True, "type": "note", "subject": "", "body": "scanned business card", "source": "app_card", }) check(st in (200, 201), f"app_card create -> 201 (got {st})") c = sqlite3.connect(os.environ["CRM_DB_PATH"]) rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall() c.close() sources = [json.loads(r[0]).get("source") for r in rows if r[0]] check("app_card" in sources, f"audit carries source=app_card (got {sources})") finally: httpd.shutdown() print() if FAILS: print(f"FAILED ({len(FAILS)}):") for f in FAILS: print(f" - {f}") sys.exit(1) print("ALL PASS (in-app card intake endpoint)") if __name__ == "__main__": main()