#!/usr/bin/env python3 """Boundary-enforcement + FAIL-CLOSED test for the Architect grounding flow. Proves: (1) whatever reaches Claude is de-identified and the draft is re-hydrated locally; (2) the local-Qwen NER backstop tokenizes UNKNOWN names not in the CRM dictionary; (3) the flow FAILS CLOSED — no Claude call when the local model is unavailable, the scrub refuses without a db_path, and a Claude-hallucinated token quarantines the draft. Offline + synthetic (guardrail #9): minimize, Claude, and NER are injected as stubs. Run: cd backend && python3 mcp/test_grounding_boundary.py """ import os import sqlite3 import sys import tempfile sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import architect_grounding as G # noqa: E402 sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "redaction")) from client import Boundary # noqa: E402 FAILS = [] def check(cond, msg): print((" PASS " if cond else " FAIL ") + msg) if not cond: FAILS.append(msg) def make_db(): db = os.path.join(tempfile.mkdtemp(), "crm.db") c = sqlite3.connect(db) c.executescript(""" CREATE TABLE canonical_entities (id TEXT PRIMARY KEY, entity_kind TEXT, display_name TEXT, primary_email TEXT, deleted_at TEXT); CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT, email TEXT, deleted_at TEXT); CREATE TABLE interaction_log (id TEXT PRIMARY KEY, ts TEXT, actor_type TEXT, actor_id TEXT, action TEXT, target_type TEXT, target_id TEXT, payload TEXT, source TEXT, created_at TEXT); """) c.execute("INSERT INTO canonical_entities VALUES ('per_1','person','Jonathan Reyes','jon@cedarpoint.example',NULL)") c.execute("INSERT INTO canonical_entities VALUES ('inv_1','investor','Cedar Point Capital',NULL,NULL)") c.execute("INSERT INTO contacts VALUES ('c1','Jonathan','Reyes','jon@cedarpoint.example',NULL)") c.commit() c.close() return db FEEDBACK = [ "Jonathan Reyes at Cedar Point Capital (jon@cedarpoint.example) is cooling; Reyes wants better terms " "and a $5,000,000 minimum. Wire acct 000123456789 flagged. Objection: fee load and lock-up.", "Another LP echoed the lock-up concern and questioned the energy thesis timeline.", ] SENSITIVE = ["Jonathan Reyes", "Reyes", "Cedar Point Capital", "jon@cedarpoint.example", "$5,000,000", "000123456789"] def main(): db = make_db() conn = sqlite3.connect(db) passthrough = lambda items, seg: "\n".join(items) # worst case: no minimization # ── A) deterministic enforcement (NER off): nothing sensitive reaches Claude ── print("\n[A — deterministic enforcement]") captured = {} res = G.ground_objections(FEEDBACK, segment_key="institution", db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=None, claude_fn=lambda reg, seg: (captured.__setitem__("sent", reg), reg)[1]) check(res.get("status") == "ok", f"grounding ok (status={res.get('status')})") for v in SENSITIVE: check(v not in captured.get("sent", ""), f"de-identified payload to Claude has NO {v!r}") check("fee load" in captured.get("sent", ""), "objection substance survives to Claude") check("000123456789" not in res.get("draft", ""), "Tier-1 account number never re-hydrated") check("Jonathan Reyes" in res.get("draft", ""), "rehydrate restored real Tier-2 values locally") blob = " ".join(r[0] for r in conn.execute("SELECT payload FROM interaction_log WHERE action LIKE 'redaction.%'")) check(all(v not in blob for v in SENSITIVE), "interaction_log carries NO sensitive value") # ── B) NER backstop tokenizes an UNKNOWN name not in the CRM dictionary ── print("\n[B — NER backstop for unknown names]") cap2 = {} fb = ["New intro: Penelope Ashworth-Vane runs a family office and is cooling on the lock-up."] ner_stub = lambda text: [("Penelope Ashworth-Vane", "PERSON")] res2 = G.ground_objections(fb, db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=ner_stub, claude_fn=lambda reg, seg: (cap2.__setitem__("sent", reg), reg)[1]) check("Penelope Ashworth-Vane" not in cap2.get("sent", ""), "unknown name tokenized by NER backstop (absent from Claude payload)") check("Penelope Ashworth-Vane" in res2.get("draft", ""), "unknown name re-hydrated locally for the human") # ── C) FAIL CLOSED: local model unavailable -> no Claude call ── print("\n[C — fail closed: local model down]") called = {"claude": False} def boom(items, seg): raise RuntimeError("Spark Control unreachable") res3 = G.ground_objections(FEEDBACK, db_path=db, conn=conn, minimize_fn=boom, ner_fn=None, claude_fn=lambda reg, seg: called.__setitem__("claude", True) or reg) check(res3.get("status") == "local_model_unavailable", f"status local_model_unavailable (got {res3.get('status')})") check(called["claude"] is False, "Claude was NOT called when minimize failed (fail closed)") # ── D) FAIL CLOSED: a Claude-hallucinated token quarantines the draft ── print("\n[D — fail closed: hallucinated token]") res4 = G.ground_objections(FEEDBACK, db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=None, claude_fn=lambda reg, seg: reg + " Also loop in [PERSON_99].") check(res4.get("status") == "rehydrate_failed", f"status rehydrate_failed (got {res4.get('status')})") check("draft" not in res4 and res4.get("draft_quarantined"), "de-anonymized draft quarantined, not returned") # ── E) FAIL CLOSED: local scrub backend requires a db_path ── print("\n[E — fail closed: missing db_path]") raised = False try: Boundary(db_path=None, backend="local") except ValueError: raised = True check(raised, "Boundary(local) without db_path raises (never runs name-blind)") res5 = G.ground_objections(FEEDBACK, db_path=None, conn=conn, minimize_fn=passthrough, ner_fn=None, claude_fn=lambda reg, seg: reg) check(res5.get("status") == "scrub_unavailable", f"grounding fails closed without db_path (got {res5.get('status')})") conn.close() print() if FAILS: print(f"FAILED ({len(FAILS)}):") for f in FAILS: print(f" - {f}") sys.exit(1) print("ALL PASS (grounding boundary enforcement + fail-closed)") if __name__ == "__main__": main()