463f624548
A mobile, in-app twin of the Matrix business-card flow (M3): photograph a card in the app and it becomes a reviewed fundraising-grid add/note, with a human approving every write. Server — POST /api/intake/card (authenticated member+, read-only): lazily imports the bot's nio-free parse + spark core, vision-transcribes the photo (local VL via Spark Control — nothing to Claude), runs the same email/phone/ LinkedIn integrity rule + fuzzy matcher, and returns a proposal plus exact match / fuzzy candidates. No write happens here. Frontend — a camera button in the mobile top bar (left of the quick-log pencil) → take or pick a photo → <canvas> downscale to JPEG (also normalizes iPhone HEIC) → the endpoint → an editable review sheet (proposal fields + existing-investor picker). Save reuses /api/fundraising/log-communication tagged source="app_card". No schema change, no migration, no new dependency, no Matrix-bot change. The camera/canvas/OCR path is on-device-only (jsdom has no canvas); covered by test_intake_card.py (stubbed vision+parse) + the render/mount smokes.
276 lines
13 KiB
Python
276 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for the in-app business-card intake endpoint (#7): POST /api/intake/card.
|
|
|
|
The endpoint reuses the Matrix card flow's nio-free core — vision-transcribe (spark) -> text
|
|
parse (parse) -> the same fuzzy matcher (find_intake_match / find_intake_candidates) — minus
|
|
Matrix, surfaced for a mobile sheet. The real vision/OCR path is live-smoke only (same as the
|
|
Matrix M3 path), so here we STUB the two network legs and assert the wiring + contract:
|
|
- happy path: transcribe -> parse -> proposal + match/candidates, status 200 ok:true;
|
|
- the email-integrity rule rides along (a model-minted address NOT in the transcription is
|
|
dropped in favor of the one literally present), exactly as on the text/Matrix path;
|
|
- new-vs-existing: an exact firm name returns `match`; a near-spelling returns `candidates`;
|
|
- soft-fails: an unreadable image -> ok:false/unreadable; vision down -> 502/vision_unavailable;
|
|
- guards: missing/invalid image -> 400; unauthenticated -> 401;
|
|
- provenance: the approve write reuses log-communication tagged source="app_card".
|
|
Synthetic data only.
|
|
|
|
Run: cd backend && python3 test_intake_card.py
|
|
"""
|
|
import base64
|
|
import http.client
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
import tempfile
|
|
import threading
|
|
from http.server import ThreadingHTTPServer
|
|
|
|
_DATA = tempfile.mkdtemp()
|
|
os.environ["CRM_DATA_DIR"] = _DATA
|
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
|
|
|
_BACKEND = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.insert(0, _BACKEND)
|
|
sys.path.insert(0, os.path.join(_BACKEND, "ingest")) # llm
|
|
sys.path.insert(0, os.path.join(_BACKEND, "matrix_intake")) # spark, parse
|
|
|
|
import server # noqa: E402
|
|
import llm # noqa: E402 (ingest/llm.py — patched so spark.parse_json hits no network)
|
|
import spark # noqa: E402 (matrix_intake/spark.py — transcribe_card stubbed)
|
|
import parse # noqa: E402 (matrix_intake/parse.py — parse_message defaults to spark.parse_json)
|
|
|
|
FAILS = []
|
|
|
|
# The handler imports `spark`/`parse` lazily and looks up transcribe_card on the module at call
|
|
# time, so patching the module attribute here takes effect. parse.parse_message binds its default
|
|
# parse_fn=spark.parse_json at import, and spark.parse_json calls llm.chat_json dynamically — so
|
|
# patching llm.chat_json (not spark.parse_json) is what reaches the parse leg.
|
|
_STATE = {"transcription": "", "raw": {}, "boom": False}
|
|
|
|
|
|
def _fake_transcribe(image_b64, mime="image/jpeg", chat_fn=None):
|
|
if _STATE["boom"]:
|
|
raise RuntimeError("spark control unreachable")
|
|
return _STATE["transcription"]
|
|
|
|
|
|
def _fake_chat_json(prompt, system=None, max_tokens=200):
|
|
return dict(_STATE["raw"])
|
|
|
|
|
|
spark.transcribe_card = _fake_transcribe
|
|
llm.chat_json = _fake_chat_json
|
|
|
|
|
|
def check(cond, msg):
|
|
print((" PASS " if cond else " FAIL ") + msg)
|
|
if not cond:
|
|
FAILS.append(msg)
|
|
|
|
|
|
class _Quiet(server.CRMHandler):
|
|
def log_message(self, *a):
|
|
pass
|
|
|
|
|
|
def _req(port, method, path, token=None, body=None):
|
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
|
headers = {}
|
|
if token:
|
|
headers["Authorization"] = "Bearer " + token
|
|
payload = None
|
|
if body is not None:
|
|
payload = json.dumps(body)
|
|
headers["Content-Type"] = "application/json"
|
|
conn.request(method, path, body=payload, headers=headers)
|
|
resp = conn.getresponse()
|
|
raw = resp.read().decode("utf-8", "replace")
|
|
conn.close()
|
|
data = None
|
|
if raw:
|
|
try:
|
|
data = json.loads(raw)
|
|
except ValueError:
|
|
pass
|
|
return resp.status, data
|
|
|
|
|
|
GRID = {
|
|
"columns": [],
|
|
"rows": [
|
|
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
|
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
|
|
],
|
|
}
|
|
|
|
_IMG = base64.b64encode(b"not-a-real-image-just-valid-base64").decode()
|
|
|
|
|
|
def seed():
|
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
|
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
|
|
"VALUES ('main', ?, '[]', 1) "
|
|
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
|
|
c.commit()
|
|
c.close()
|
|
|
|
|
|
def main():
|
|
server.init_db()
|
|
seed()
|
|
token = server.create_token("u1", "grant", "admin")
|
|
|
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
|
port = httpd.server_address[1]
|
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
|
try:
|
|
print("\n[happy path: transcribe -> parse -> proposal, new investor, no match]")
|
|
_STATE["transcription"] = ("Sam Lee\nPartner\nBeacon Ventures\n"
|
|
"sam@beacon.vc\nMobile: +1 555 987 6543")
|
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures",
|
|
"contact_name": "Sam Lee", "contact_title": "Partner",
|
|
"mobile": "+1 555 987 6543", "contact_email": "sam@beacon.vc"}
|
|
_STATE["boom"] = False
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
data = (d or {}).get("data", {})
|
|
p = data.get("proposal", {})
|
|
check(st == 200 and data.get("ok") is True, f"200 ok:true (got {st}, {data})")
|
|
check(p.get("investor_name") == "Beacon Ventures" and p.get("contact_name") == "Sam Lee",
|
|
f"proposal carries firm + person (got {p})")
|
|
check(p.get("contact_email") == "sam@beacon.vc", f"email kept (got {p.get('contact_email')})")
|
|
check(p.get("mobile") == "+1 555 987 6543", f"mobile kept (got {p.get('mobile')})")
|
|
check("transcription" in data and data["match"] is None and data["candidates"] == [],
|
|
f"transcription returned, unknown firm -> no match/candidates (got {data})")
|
|
check(not any(k.startswith("_") for k in p), f"internal control keys stripped (got {list(p)})")
|
|
|
|
print("\n[email integrity: a model-minted address NOT in the card is dropped]")
|
|
_STATE["transcription"] = "Ann Roe\nDir\nOmega LP\nann@omega.fund" # the only address present
|
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Omega LP",
|
|
"contact_name": "Ann Roe", "contact_email": "evil@phish.example"}
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
p = (d or {}).get("data", {}).get("proposal", {})
|
|
check(p.get("contact_email") == "ann@omega.fund",
|
|
f"source address wins over the minted one (got {p.get('contact_email')})")
|
|
|
|
print("\n[match: exact firm name returns the grid row id]")
|
|
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital" # no email -> match on name
|
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital",
|
|
"contact_name": "Jane Doe", "contact_title": "GP"}
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
m = (d or {}).get("data", {}).get("match")
|
|
check(m and m.get("id") == "rowAcme", f"exact firm -> match rowAcme (got {m})")
|
|
|
|
print("\n[match by card email: exact contact email returns the grid row id]")
|
|
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital Group\njane@acme.com"
|
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital Group",
|
|
"contact_name": "Jane Doe"}
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
m = (d or {}).get("data", {}).get("match")
|
|
check(m and m.get("id") == "rowAcme" and m.get("matched_on") == "email",
|
|
f"card email -> exact match rowAcme on email (got {m})")
|
|
|
|
print("\n[fuzzy: a near-spelling returns a candidate, no exact match]")
|
|
# Typo in the DISTINCTIVE token ('Acme'->'Acne') so the fuzzy matcher surfaces it; a typo
|
|
# in a generic descriptor (e.g. 'Capitol') wouldn't, since those are stripped first.
|
|
_STATE["transcription"] = "Jane Doe\nGP\nAcne Capital" # no email -> name-only fuzzy
|
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acne Capital",
|
|
"contact_name": "Jane Doe"}
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
data = (d or {}).get("data", {})
|
|
cids = [c["id"] for c in data.get("candidates", [])]
|
|
check(data.get("match") is None and "rowAcme" in cids,
|
|
f"near-spelling -> candidate rowAcme, no exact (got {data})")
|
|
|
|
print("\n[no firm and no person: readable but unactionable -> ok:true, no DB lookup, no 500]")
|
|
_STATE["transcription"] = "some faded scribbles, no usable fields" # >=5 chars, no email/firm
|
|
_STATE["raw"] = {"intent": "unclear"}
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
data = (d or {}).get("data", {})
|
|
check(st == 200 and data.get("ok") is True
|
|
and data.get("match") is None and data.get("candidates") == [],
|
|
f"unclear proposal -> ok:true, no match/candidates, not 500 (got {st}, {data})")
|
|
|
|
print("\n[parse leg down: parse_message raises -> 502/vision_unavailable]")
|
|
_orig_pm = parse.parse_message
|
|
parse.parse_message = lambda *a, **k: (_ for _ in ()).throw(RuntimeError("qwen down"))
|
|
try:
|
|
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital"
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
data = (d or {}).get("data", {})
|
|
check(st == 502 and data.get("reason") == "vision_unavailable",
|
|
f"parse error -> 502 vision_unavailable (got {st}, {data})")
|
|
finally:
|
|
parse.parse_message = _orig_pm
|
|
|
|
print("\n[unreadable: model saw no card -> ok:false/unreadable, 200]")
|
|
_STATE["transcription"] = "" # transcribe_card returns '' on the NONE sentinel
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
data = (d or {}).get("data", {})
|
|
check(st == 200 and data.get("ok") is False and data.get("reason") == "unreadable",
|
|
f"empty transcription -> unreadable (got {st}, {data})")
|
|
|
|
print("\n[vision down: transcribe raises -> 502/vision_unavailable]")
|
|
_STATE["boom"] = True
|
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
|
data = (d or {}).get("data", {})
|
|
check(st == 502 and data.get("reason") == "vision_unavailable",
|
|
f"spark error -> 502 vision_unavailable (got {st}, {data})")
|
|
_STATE["boom"] = False
|
|
|
|
print("\n[data-URI tolerated: a full data: prefix is stripped to raw base64]")
|
|
_STATE["transcription"] = "Sam Lee\nPartner\nBeacon Ventures"
|
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures",
|
|
"contact_name": "Sam Lee"}
|
|
st, d = _req(port, "POST", "/api/intake/card", token,
|
|
{"image_b64": "data:image/jpeg;base64," + _IMG})
|
|
check(st == 200 and (d or {}).get("data", {}).get("ok") is True,
|
|
f"data-URI accepted (got {st})")
|
|
|
|
print("\n[guard: missing image -> 400]")
|
|
st, _ = _req(port, "POST", "/api/intake/card", token, {})
|
|
check(st == 400, f"no image_b64 -> 400 (got {st})")
|
|
|
|
print("\n[guard: malformed base64 -> 400]")
|
|
st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "%%%not base64%%%"})
|
|
check(st == 400, f"invalid base64 -> 400 (got {st})")
|
|
|
|
print("\n[guard: oversized image -> 413 (size check runs before decode)]")
|
|
st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "A" * 12_000_001})
|
|
check(st == 413, f"over the 12 MB b64 cap -> 413 (got {st})")
|
|
|
|
print("\n[guard: unauthenticated -> 401]")
|
|
st, _ = _req(port, "POST", "/api/intake/card", None, {"image_b64": _IMG})
|
|
check(st == 401, f"no token -> 401 (got {st})")
|
|
|
|
print("\n[provenance: the approve write reuses log-communication tagged source=app_card]")
|
|
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
|
|
"investor_name": "Beacon Ventures",
|
|
"contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"},
|
|
"create_investor_if_missing": True,
|
|
"type": "note", "subject": "", "body": "scanned business card",
|
|
"source": "app_card",
|
|
})
|
|
check(st in (200, 201), f"app_card create -> 201 (got {st})")
|
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
|
rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall()
|
|
c.close()
|
|
sources = [json.loads(r[0]).get("source") for r in rows if r[0]]
|
|
check("app_card" in sources, f"audit carries source=app_card (got {sources})")
|
|
finally:
|
|
httpd.shutdown()
|
|
|
|
print()
|
|
if FAILS:
|
|
print(f"FAILED ({len(FAILS)}):")
|
|
for f in FAILS:
|
|
print(f" - {f}")
|
|
sys.exit(1)
|
|
print("ALL PASS (in-app card intake endpoint)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|