Architect grounding boundary: redaction/re-hydration privacy gate (v0.1.0:55)
Phase 1 Workstream D. Lets the Architect ground the thesis in REAL recurring LP objections without any LP identity reaching the Claude API. Layered, defense-in-depth, fail-closed by construction (docs/redaction-rehydration.md). backend/redaction/: - scrub.py: the leak-proof core. Drops Tier-1 (labelled/structured account/wire/SSN/ IBAN/SWIFT/passport, separator-tolerant); tokenizes known LP entities (dictionary from the canonical layer, unicode-folded + hyphen-extended) and structured PII (emails, scheme-less/social URLs, intl+ext phones, currency-cued amounts, ISO/worded/numeric/ quarter dates, addresses, bare long digit runs); pre-neutralizes injected [TYPE_N] strings; single-pass rehydrate; metadata-only audit logging (the pseudonym map is the de-anon key — local-only, never logged/sent). Hardened across THREE adversarial leak-hunts (worded/coded amounts, intl phones, NFD/ligature/zero-width names, slash/ comma SSN, SWIFT, alpha-prefixed accounts, substance-preserving false-positive fixes). - client.py: Boundary — one scrub/rehydrate contract, SCRUB_BACKEND=local (default) or gateway (Spark Control /scrub + /rehydrate). Fails closed (db_path required; dictionary build errors propagate; strict rehydrate returns tokenized-not-de-anon text). - test_scrub_leak.py, test_reidentification.py: golden-file leak + re-identification suites (synthetic only, guardrail #9), regression-locking every leak-hunt vector. backend/mcp/architect_grounding.py: the flow — retrieve (local) -> minimize-first (local Qwen) -> scrub (+ local-Qwen NER backstop for unknown names) -> Claude over the de-identified register only -> re-hydrate locally -> human review. FAILS CLOSED if the local model is unreachable or a hallucinated token appears. test_grounding_boundary.py proves nothing sensitive reaches Claude and the three fail-closed paths. server.py: POST /api/architect/ground (admin) wires retrieval -> ground_objections. docker_entrypoint.sh: SCRUB_BACKEND (default local). docs/spark-control-scrub-endpoints.md: the gateway handover spec (Option 1 — caller supplies the entity dictionary). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Boundary-enforcement + FAIL-CLOSED test for the Architect grounding flow.
|
||||
|
||||
Proves: (1) whatever reaches Claude is de-identified and the draft is re-hydrated locally;
|
||||
(2) the local-Qwen NER backstop tokenizes UNKNOWN names not in the CRM dictionary;
|
||||
(3) the flow FAILS CLOSED — no Claude call when the local model is unavailable, the scrub
|
||||
refuses without a db_path, and a Claude-hallucinated token quarantines the draft.
|
||||
Offline + synthetic (guardrail #9): minimize, Claude, and NER are injected as stubs.
|
||||
|
||||
Run: cd backend && python3 mcp/test_grounding_boundary.py
|
||||
"""
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import architect_grounding as G # noqa: E402
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "redaction"))
|
||||
from client import Boundary # noqa: E402
|
||||
|
||||
FAILS = []
|
||||
|
||||
|
||||
def check(cond, msg):
|
||||
print((" PASS " if cond else " FAIL ") + msg)
|
||||
if not cond:
|
||||
FAILS.append(msg)
|
||||
|
||||
|
||||
def make_db():
|
||||
db = os.path.join(tempfile.mkdtemp(), "crm.db")
|
||||
c = sqlite3.connect(db)
|
||||
c.executescript("""
|
||||
CREATE TABLE canonical_entities (id TEXT PRIMARY KEY, entity_kind TEXT, display_name TEXT, primary_email TEXT, deleted_at TEXT);
|
||||
CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT, email TEXT, deleted_at TEXT);
|
||||
CREATE TABLE interaction_log (id TEXT PRIMARY KEY, ts TEXT, actor_type TEXT, actor_id TEXT, action TEXT,
|
||||
target_type TEXT, target_id TEXT, payload TEXT, source TEXT, created_at TEXT);
|
||||
""")
|
||||
c.execute("INSERT INTO canonical_entities VALUES ('per_1','person','Jonathan Reyes','jon@cedarpoint.example',NULL)")
|
||||
c.execute("INSERT INTO canonical_entities VALUES ('inv_1','investor','Cedar Point Capital',NULL,NULL)")
|
||||
c.execute("INSERT INTO contacts VALUES ('c1','Jonathan','Reyes','jon@cedarpoint.example',NULL)")
|
||||
c.commit()
|
||||
c.close()
|
||||
return db
|
||||
|
||||
|
||||
FEEDBACK = [
|
||||
"Jonathan Reyes at Cedar Point Capital (jon@cedarpoint.example) is cooling; Reyes wants better terms "
|
||||
"and a $5,000,000 minimum. Wire acct 000123456789 flagged. Objection: fee load and lock-up.",
|
||||
"Another LP echoed the lock-up concern and questioned the energy thesis timeline.",
|
||||
]
|
||||
SENSITIVE = ["Jonathan Reyes", "Reyes", "Cedar Point Capital", "jon@cedarpoint.example", "$5,000,000", "000123456789"]
|
||||
|
||||
|
||||
def main():
|
||||
db = make_db()
|
||||
conn = sqlite3.connect(db)
|
||||
passthrough = lambda items, seg: "\n".join(items) # worst case: no minimization
|
||||
|
||||
# ── A) deterministic enforcement (NER off): nothing sensitive reaches Claude ──
|
||||
print("\n[A — deterministic enforcement]")
|
||||
captured = {}
|
||||
res = G.ground_objections(FEEDBACK, segment_key="institution", db_path=db, conn=conn,
|
||||
minimize_fn=passthrough, ner_fn=None,
|
||||
claude_fn=lambda reg, seg: (captured.__setitem__("sent", reg), reg)[1])
|
||||
check(res.get("status") == "ok", f"grounding ok (status={res.get('status')})")
|
||||
for v in SENSITIVE:
|
||||
check(v not in captured.get("sent", ""), f"de-identified payload to Claude has NO {v!r}")
|
||||
check("fee load" in captured.get("sent", ""), "objection substance survives to Claude")
|
||||
check("000123456789" not in res.get("draft", ""), "Tier-1 account number never re-hydrated")
|
||||
check("Jonathan Reyes" in res.get("draft", ""), "rehydrate restored real Tier-2 values locally")
|
||||
blob = " ".join(r[0] for r in conn.execute("SELECT payload FROM interaction_log WHERE action LIKE 'redaction.%'"))
|
||||
check(all(v not in blob for v in SENSITIVE), "interaction_log carries NO sensitive value")
|
||||
|
||||
# ── B) NER backstop tokenizes an UNKNOWN name not in the CRM dictionary ──
|
||||
print("\n[B — NER backstop for unknown names]")
|
||||
cap2 = {}
|
||||
fb = ["New intro: Penelope Ashworth-Vane runs a family office and is cooling on the lock-up."]
|
||||
ner_stub = lambda text: [("Penelope Ashworth-Vane", "PERSON")]
|
||||
res2 = G.ground_objections(fb, db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=ner_stub,
|
||||
claude_fn=lambda reg, seg: (cap2.__setitem__("sent", reg), reg)[1])
|
||||
check("Penelope Ashworth-Vane" not in cap2.get("sent", ""), "unknown name tokenized by NER backstop (absent from Claude payload)")
|
||||
check("Penelope Ashworth-Vane" in res2.get("draft", ""), "unknown name re-hydrated locally for the human")
|
||||
|
||||
# ── C) FAIL CLOSED: local model unavailable -> no Claude call ──
|
||||
print("\n[C — fail closed: local model down]")
|
||||
called = {"claude": False}
|
||||
def boom(items, seg):
|
||||
raise RuntimeError("Spark Control unreachable")
|
||||
res3 = G.ground_objections(FEEDBACK, db_path=db, conn=conn, minimize_fn=boom, ner_fn=None,
|
||||
claude_fn=lambda reg, seg: called.__setitem__("claude", True) or reg)
|
||||
check(res3.get("status") == "local_model_unavailable", f"status local_model_unavailable (got {res3.get('status')})")
|
||||
check(called["claude"] is False, "Claude was NOT called when minimize failed (fail closed)")
|
||||
|
||||
# ── D) FAIL CLOSED: a Claude-hallucinated token quarantines the draft ──
|
||||
print("\n[D — fail closed: hallucinated token]")
|
||||
res4 = G.ground_objections(FEEDBACK, db_path=db, conn=conn, minimize_fn=passthrough, ner_fn=None,
|
||||
claude_fn=lambda reg, seg: reg + " Also loop in [PERSON_99].")
|
||||
check(res4.get("status") == "rehydrate_failed", f"status rehydrate_failed (got {res4.get('status')})")
|
||||
check("draft" not in res4 and res4.get("draft_quarantined"), "de-anonymized draft quarantined, not returned")
|
||||
|
||||
# ── E) FAIL CLOSED: local scrub backend requires a db_path ──
|
||||
print("\n[E — fail closed: missing db_path]")
|
||||
raised = False
|
||||
try:
|
||||
Boundary(db_path=None, backend="local")
|
||||
except ValueError:
|
||||
raised = True
|
||||
check(raised, "Boundary(local) without db_path raises (never runs name-blind)")
|
||||
res5 = G.ground_objections(FEEDBACK, db_path=None, conn=conn, minimize_fn=passthrough, ner_fn=None,
|
||||
claude_fn=lambda reg, seg: reg)
|
||||
check(res5.get("status") == "scrub_unavailable", f"grounding fails closed without db_path (got {res5.get('status')})")
|
||||
|
||||
conn.close()
|
||||
print()
|
||||
if FAILS:
|
||||
print(f"FAILED ({len(FAILS)}):")
|
||||
for f in FAILS:
|
||||
print(f" - {f}")
|
||||
sys.exit(1)
|
||||
print("ALL PASS (grounding boundary enforcement + fail-closed)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user