Files
ten31-database/backend/test_intake_card.py
T
Keysat 463f624548 Add in-app camera business-card intake (#7) (v0.1.0:100)
A mobile, in-app twin of the Matrix business-card flow (M3): photograph a
card in the app and it becomes a reviewed fundraising-grid add/note, with a
human approving every write.

Server — POST /api/intake/card (authenticated member+, read-only): lazily
imports the bot's nio-free parse + spark core, vision-transcribes the photo
(local VL via Spark Control — nothing to Claude), runs the same email/phone/
LinkedIn integrity rule + fuzzy matcher, and returns a proposal plus exact
match / fuzzy candidates. No write happens here.

Frontend — a camera button in the mobile top bar (left of the quick-log
pencil) → take or pick a photo → <canvas> downscale to JPEG (also normalizes
iPhone HEIC) → the endpoint → an editable review sheet (proposal fields +
existing-investor picker). Save reuses /api/fundraising/log-communication
tagged source="app_card".

No schema change, no migration, no new dependency, no Matrix-bot change. The
camera/canvas/OCR path is on-device-only (jsdom has no canvas); covered by
test_intake_card.py (stubbed vision+parse) + the render/mount smokes.
2026-06-20 14:15:03 -05:00

276 lines
13 KiB
Python

#!/usr/bin/env python3
"""Tests for the in-app business-card intake endpoint (#7): POST /api/intake/card.
The endpoint reuses the Matrix card flow's nio-free core — vision-transcribe (spark) -> text
parse (parse) -> the same fuzzy matcher (find_intake_match / find_intake_candidates) — minus
Matrix, surfaced for a mobile sheet. The real vision/OCR path is live-smoke only (same as the
Matrix M3 path), so here we STUB the two network legs and assert the wiring + contract:
- happy path: transcribe -> parse -> proposal + match/candidates, status 200 ok:true;
- the email-integrity rule rides along (a model-minted address NOT in the transcription is
dropped in favor of the one literally present), exactly as on the text/Matrix path;
- new-vs-existing: an exact firm name returns `match`; a near-spelling returns `candidates`;
- soft-fails: an unreadable image -> ok:false/unreadable; vision down -> 502/vision_unavailable;
- guards: missing/invalid image -> 400; unauthenticated -> 401;
- provenance: the approve write reuses log-communication tagged source="app_card".
Synthetic data only.
Run: cd backend && python3 test_intake_card.py
"""
import base64
import http.client
import json
import os
import sqlite3
import sys
import tempfile
import threading
from http.server import ThreadingHTTPServer
_DATA = tempfile.mkdtemp()
os.environ["CRM_DATA_DIR"] = _DATA
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
_BACKEND = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, _BACKEND)
sys.path.insert(0, os.path.join(_BACKEND, "ingest")) # llm
sys.path.insert(0, os.path.join(_BACKEND, "matrix_intake")) # spark, parse
import server # noqa: E402
import llm # noqa: E402 (ingest/llm.py — patched so spark.parse_json hits no network)
import spark # noqa: E402 (matrix_intake/spark.py — transcribe_card stubbed)
import parse # noqa: E402 (matrix_intake/parse.py — parse_message defaults to spark.parse_json)
FAILS = []
# The handler imports `spark`/`parse` lazily and looks up transcribe_card on the module at call
# time, so patching the module attribute here takes effect. parse.parse_message binds its default
# parse_fn=spark.parse_json at import, and spark.parse_json calls llm.chat_json dynamically — so
# patching llm.chat_json (not spark.parse_json) is what reaches the parse leg.
_STATE = {"transcription": "", "raw": {}, "boom": False}
def _fake_transcribe(image_b64, mime="image/jpeg", chat_fn=None):
if _STATE["boom"]:
raise RuntimeError("spark control unreachable")
return _STATE["transcription"]
def _fake_chat_json(prompt, system=None, max_tokens=200):
return dict(_STATE["raw"])
spark.transcribe_card = _fake_transcribe
llm.chat_json = _fake_chat_json
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
class _Quiet(server.CRMHandler):
def log_message(self, *a):
pass
def _req(port, method, path, token=None, body=None):
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
headers = {}
if token:
headers["Authorization"] = "Bearer " + token
payload = None
if body is not None:
payload = json.dumps(body)
headers["Content-Type"] = "application/json"
conn.request(method, path, body=payload, headers=headers)
resp = conn.getresponse()
raw = resp.read().decode("utf-8", "replace")
conn.close()
data = None
if raw:
try:
data = json.loads(raw)
except ValueError:
pass
return resp.status, data
GRID = {
"columns": [],
"rows": [
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
],
}
_IMG = base64.b64encode(b"not-a-real-image-just-valid-base64").decode()
def seed():
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
"VALUES ('main', ?, '[]', 1) "
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
c.commit()
c.close()
def main():
server.init_db()
seed()
token = server.create_token("u1", "grant", "admin")
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
port = httpd.server_address[1]
threading.Thread(target=httpd.serve_forever, daemon=True).start()
try:
print("\n[happy path: transcribe -> parse -> proposal, new investor, no match]")
_STATE["transcription"] = ("Sam Lee\nPartner\nBeacon Ventures\n"
"sam@beacon.vc\nMobile: +1 555 987 6543")
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures",
"contact_name": "Sam Lee", "contact_title": "Partner",
"mobile": "+1 555 987 6543", "contact_email": "sam@beacon.vc"}
_STATE["boom"] = False
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
data = (d or {}).get("data", {})
p = data.get("proposal", {})
check(st == 200 and data.get("ok") is True, f"200 ok:true (got {st}, {data})")
check(p.get("investor_name") == "Beacon Ventures" and p.get("contact_name") == "Sam Lee",
f"proposal carries firm + person (got {p})")
check(p.get("contact_email") == "sam@beacon.vc", f"email kept (got {p.get('contact_email')})")
check(p.get("mobile") == "+1 555 987 6543", f"mobile kept (got {p.get('mobile')})")
check("transcription" in data and data["match"] is None and data["candidates"] == [],
f"transcription returned, unknown firm -> no match/candidates (got {data})")
check(not any(k.startswith("_") for k in p), f"internal control keys stripped (got {list(p)})")
print("\n[email integrity: a model-minted address NOT in the card is dropped]")
_STATE["transcription"] = "Ann Roe\nDir\nOmega LP\nann@omega.fund" # the only address present
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Omega LP",
"contact_name": "Ann Roe", "contact_email": "evil@phish.example"}
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
p = (d or {}).get("data", {}).get("proposal", {})
check(p.get("contact_email") == "ann@omega.fund",
f"source address wins over the minted one (got {p.get('contact_email')})")
print("\n[match: exact firm name returns the grid row id]")
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital" # no email -> match on name
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital",
"contact_name": "Jane Doe", "contact_title": "GP"}
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
m = (d or {}).get("data", {}).get("match")
check(m and m.get("id") == "rowAcme", f"exact firm -> match rowAcme (got {m})")
print("\n[match by card email: exact contact email returns the grid row id]")
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital Group\njane@acme.com"
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital Group",
"contact_name": "Jane Doe"}
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
m = (d or {}).get("data", {}).get("match")
check(m and m.get("id") == "rowAcme" and m.get("matched_on") == "email",
f"card email -> exact match rowAcme on email (got {m})")
print("\n[fuzzy: a near-spelling returns a candidate, no exact match]")
# Typo in the DISTINCTIVE token ('Acme'->'Acne') so the fuzzy matcher surfaces it; a typo
# in a generic descriptor (e.g. 'Capitol') wouldn't, since those are stripped first.
_STATE["transcription"] = "Jane Doe\nGP\nAcne Capital" # no email -> name-only fuzzy
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acne Capital",
"contact_name": "Jane Doe"}
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
data = (d or {}).get("data", {})
cids = [c["id"] for c in data.get("candidates", [])]
check(data.get("match") is None and "rowAcme" in cids,
f"near-spelling -> candidate rowAcme, no exact (got {data})")
print("\n[no firm and no person: readable but unactionable -> ok:true, no DB lookup, no 500]")
_STATE["transcription"] = "some faded scribbles, no usable fields" # >=5 chars, no email/firm
_STATE["raw"] = {"intent": "unclear"}
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
data = (d or {}).get("data", {})
check(st == 200 and data.get("ok") is True
and data.get("match") is None and data.get("candidates") == [],
f"unclear proposal -> ok:true, no match/candidates, not 500 (got {st}, {data})")
print("\n[parse leg down: parse_message raises -> 502/vision_unavailable]")
_orig_pm = parse.parse_message
parse.parse_message = lambda *a, **k: (_ for _ in ()).throw(RuntimeError("qwen down"))
try:
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital"
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
data = (d or {}).get("data", {})
check(st == 502 and data.get("reason") == "vision_unavailable",
f"parse error -> 502 vision_unavailable (got {st}, {data})")
finally:
parse.parse_message = _orig_pm
print("\n[unreadable: model saw no card -> ok:false/unreadable, 200]")
_STATE["transcription"] = "" # transcribe_card returns '' on the NONE sentinel
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
data = (d or {}).get("data", {})
check(st == 200 and data.get("ok") is False and data.get("reason") == "unreadable",
f"empty transcription -> unreadable (got {st}, {data})")
print("\n[vision down: transcribe raises -> 502/vision_unavailable]")
_STATE["boom"] = True
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
data = (d or {}).get("data", {})
check(st == 502 and data.get("reason") == "vision_unavailable",
f"spark error -> 502 vision_unavailable (got {st}, {data})")
_STATE["boom"] = False
print("\n[data-URI tolerated: a full data: prefix is stripped to raw base64]")
_STATE["transcription"] = "Sam Lee\nPartner\nBeacon Ventures"
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures",
"contact_name": "Sam Lee"}
st, d = _req(port, "POST", "/api/intake/card", token,
{"image_b64": "data:image/jpeg;base64," + _IMG})
check(st == 200 and (d or {}).get("data", {}).get("ok") is True,
f"data-URI accepted (got {st})")
print("\n[guard: missing image -> 400]")
st, _ = _req(port, "POST", "/api/intake/card", token, {})
check(st == 400, f"no image_b64 -> 400 (got {st})")
print("\n[guard: malformed base64 -> 400]")
st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "%%%not base64%%%"})
check(st == 400, f"invalid base64 -> 400 (got {st})")
print("\n[guard: oversized image -> 413 (size check runs before decode)]")
st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "A" * 12_000_001})
check(st == 413, f"over the 12 MB b64 cap -> 413 (got {st})")
print("\n[guard: unauthenticated -> 401]")
st, _ = _req(port, "POST", "/api/intake/card", None, {"image_b64": _IMG})
check(st == 401, f"no token -> 401 (got {st})")
print("\n[provenance: the approve write reuses log-communication tagged source=app_card]")
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
"investor_name": "Beacon Ventures",
"contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"},
"create_investor_if_missing": True,
"type": "note", "subject": "", "body": "scanned business card",
"source": "app_card",
})
check(st in (200, 201), f"app_card create -> 201 (got {st})")
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall()
c.close()
sources = [json.loads(r[0]).get("source") for r in rows if r[0]]
check("app_card" in sources, f"audit carries source=app_card (got {sources})")
finally:
httpd.shutdown()
print()
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for f in FAILS:
print(f" - {f}")
sys.exit(1)
print("ALL PASS (in-app card intake endpoint)")
if __name__ == "__main__":
main()