Files
ten31-database/backend/mcp/test_grounding_boundary.py
T
Keysat 2e70b34592 Architect grounding boundary: redaction/re-hydration privacy gate (v0.1.0:55)
Phase 1 Workstream D. Lets the Architect ground the thesis in REAL recurring LP
objections without any LP identity reaching the Claude API. Layered, defense-in-depth,
fail-closed by construction (docs/redaction-rehydration.md).

backend/redaction/:
- scrub.py: the leak-proof core. Drops Tier-1 (labelled/structured account/wire/SSN/
  IBAN/SWIFT/passport, separator-tolerant); tokenizes known LP entities (dictionary from
  the canonical layer, unicode-folded + hyphen-extended) and structured PII (emails,
  scheme-less/social URLs, intl+ext phones, currency-cued amounts, ISO/worded/numeric/
  quarter dates, addresses, bare long digit runs); pre-neutralizes injected [TYPE_N]
  strings; single-pass rehydrate; metadata-only audit logging (the pseudonym map is the
  de-anon key — local-only, never logged/sent). Hardened across THREE adversarial
  leak-hunts (worded/coded amounts, intl phones, NFD/ligature/zero-width names, slash/
  comma SSN, SWIFT, alpha-prefixed accounts, substance-preserving false-positive fixes).
- client.py: Boundary — one scrub/rehydrate contract, SCRUB_BACKEND=local (default) or
  gateway (Spark Control /scrub + /rehydrate). Fails closed (db_path required; dictionary
  build errors propagate; strict rehydrate returns tokenized-not-de-anon text).
- test_scrub_leak.py, test_reidentification.py: golden-file leak + re-identification
  suites (synthetic only, guardrail #9), regression-locking every leak-hunt vector.

backend/mcp/architect_grounding.py: the flow — retrieve (local) -> minimize-first
(local Qwen) -> scrub (+ local-Qwen NER backstop for unknown names) -> Claude over the
de-identified register only -> re-hydrate locally -> human review. FAILS CLOSED if the
local model is unreachable or a hallucinated token appears. test_grounding_boundary.py
proves nothing sensitive reaches Claude and the three fail-closed paths.

server.py: POST /api/architect/ground (admin) wires retrieval -> ground_objections.
docker_entrypoint.sh: SCRUB_BACKEND (default local). docs/spark-control-scrub-endpoints.md:
the gateway handover spec (Option 1 — caller supplies the entity dictionary).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 17:06:29 -05:00

127 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""Boundary-enforcement + FAIL-CLOSED test for the Architect grounding flow.
Proves: (1) whatever reaches Claude is de-identified and the draft is re-hydrated locally;
(2) the local-Qwen NER backstop tokenizes UNKNOWN names not in the CRM dictionary;
(3) the flow FAILS CLOSED — no Claude call when the local model is unavailable, the scrub
refuses without a db_path, and a Claude-hallucinated token quarantines the draft.
Offline + synthetic (guardrail #9): minimize, Claude, and NER are injected as stubs.
Run: cd backend && python3 mcp/test_grounding_boundary.py
"""
import os
import sqlite3
import sys
import tempfile
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import architect_grounding as G # noqa: E402
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "redaction"))
from client import Boundary # noqa: E402
FAILS = []
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
def make_db():
db = os.path.join(tempfile.mkdtemp(), "crm.db")
c = sqlite3.connect(db)
c.executescript("""
CREATE TABLE canonical_entities (id TEXT PRIMARY KEY, entity_kind TEXT, display_name TEXT, primary_email TEXT, deleted_at TEXT);
CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT, email TEXT, deleted_at TEXT);
CREATE TABLE interaction_log (id TEXT PRIMARY KEY, ts TEXT, actor_type TEXT, actor_id TEXT, action TEXT,
target_type TEXT, target_id TEXT, payload TEXT, source TEXT, created_at TEXT);
""")
c.execute("INSERT INTO canonical_entities VALUES ('per_1','person','Jonathan Reyes','jon@cedarpoint.example',NULL)")
c.execute("INSERT INTO canonical_entities VALUES ('inv_1','investor','Cedar Point Capital',NULL,NULL)")
c.execute("INSERT INTO contacts VALUES ('c1','Jonathan','Reyes','jon@cedarpoint.example',NULL)")
c.commit()
c.close()
return db
FEEDBACK = [
"Jonathan Reyes at Cedar Point Capital (jon@cedarpoint.example) is cooling; Reyes wants better terms "
"and a $5,000,000 minimum. Wire acct 000123456789 flagged. Objection: fee load and lock-up.",
"Another LP echoed the lock-up concern and questioned the energy thesis timeline.",
]
SENSITIVE = ["Jonathan Reyes", "Reyes", "Cedar Point Capital", "jon@cedarpoint.example", "$5,000,000", "000123456789"]
def main():
db = make_db()
conn = sqlite3.connect(db)
passthrough = lambda items, seg: "\n".join(items) # worst case: no minimization
# ── A) deterministic enforcement (NER off): nothing sensitive reaches Claude ──
print("\n[A — deterministic enforcement]")
captured = {}
res = G.ground_objections(FEEDBACK, segment_key="institution", db_path=db, conn=conn,
minimize_fn=passthrough, ner_fn=None,
claude_fn=lambda reg, seg: (captured.__setitem__("sent", reg), reg)[1])
check(res.get("status") == "ok", f"grounding ok (status={res.get('status')})")
for v in SENSITIVE:
check(v not in captured.get("sent", ""), f"de-identified payload to Claude has NO {v!r}")
check("fee load" in captured.get("sent", ""), "objection substance survives to Claude")
check("000123456789" not in res.get("draft", ""), "Tier-1 account number never re-hydrated")
check("Jonathan Reyes" in res.get("draft", ""), "rehydrate restored real Tier-2 values locally")
blob = " ".join(r[0] for r in conn.execute("SELECT payload FROM interaction_log WHERE action LIKE 'redaction.%'"))
check(all(v not in blob for v in SENSITIVE), "interaction_log carries NO sensitive value")
# ── B) NER backstop tokenizes an UNKNOWN name not in the CRM dictionary ──
print("\n[B — NER backstop for unknown names]")
cap2 = {}
fb = ["New intro: Penelope Ashworth-Vane runs a family office and is cooling on the lock-up."]
ner_stub = lambda text: [("Penelope Ashworth-Vane", "PERSON")]
res2 = G.ground_objections(fb, db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=ner_stub,
claude_fn=lambda reg, seg: (cap2.__setitem__("sent", reg), reg)[1])
check("Penelope Ashworth-Vane" not in cap2.get("sent", ""), "unknown name tokenized by NER backstop (absent from Claude payload)")
check("Penelope Ashworth-Vane" in res2.get("draft", ""), "unknown name re-hydrated locally for the human")
# ── C) FAIL CLOSED: local model unavailable -> no Claude call ──
print("\n[C — fail closed: local model down]")
called = {"claude": False}
def boom(items, seg):
raise RuntimeError("Spark Control unreachable")
res3 = G.ground_objections(FEEDBACK, db_path=db, conn=conn, minimize_fn=boom, ner_fn=None,
claude_fn=lambda reg, seg: called.__setitem__("claude", True) or reg)
check(res3.get("status") == "local_model_unavailable", f"status local_model_unavailable (got {res3.get('status')})")
check(called["claude"] is False, "Claude was NOT called when minimize failed (fail closed)")
# ── D) FAIL CLOSED: a Claude-hallucinated token quarantines the draft ──
print("\n[D — fail closed: hallucinated token]")
res4 = G.ground_objections(FEEDBACK, db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=None,
claude_fn=lambda reg, seg: reg + " Also loop in [PERSON_99].")
check(res4.get("status") == "rehydrate_failed", f"status rehydrate_failed (got {res4.get('status')})")
check("draft" not in res4 and res4.get("draft_quarantined"), "de-anonymized draft quarantined, not returned")
# ── E) FAIL CLOSED: local scrub backend requires a db_path ──
print("\n[E — fail closed: missing db_path]")
raised = False
try:
Boundary(db_path=None, backend="local")
except ValueError:
raised = True
check(raised, "Boundary(local) without db_path raises (never runs name-blind)")
res5 = G.ground_objections(FEEDBACK, db_path=None, conn=conn, minimize_fn=passthrough, ner_fn=None,
claude_fn=lambda reg, seg: reg)
check(res5.get("status") == "scrub_unavailable", f"grounding fails closed without db_path (got {res5.get('status')})")
conn.close()
print()
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for f in FAILS:
print(f" - {f}")
sys.exit(1)
print("ALL PASS (grounding boundary enforcement + fail-closed)")
if __name__ == "__main__":
main()