Add regression tests for v74 fixes; close soft-delete leak in list-view aggregates

Lock in the three v0.1.0:74 security/privacy fixes with regression tests, and
fix a same-class soft-delete leak surfaced while writing them.

- backend/test_assets_traversal.py: boots the real server, proves /assets/
  path-traversal vectors (incl. a real decoy file and the live crm.db, plain
  and URL-encoded) 404 and leak nothing, while a legit asset still serves 200.
- backend/test_soft_delete_reads.py: get-by-id 404s soft-deleted rows and
  nested + list-view aggregates exclude soft-deleted children.
- backend/mcp/test_outreach_redaction.py: an unknown free-prose name is
  tokenized away from the Claude payload but re-hydrated locally, and the path
  fails closed (no Claude call) when the local NER model is down.
- backend/run_tests.py: aggregate runner (each backend/**/test_*.py in its own
  subprocess); replaces the manual for-loop. 16/16 green.

A reviewer pass on the tests confirmed the soft-delete filter was missing from
list-view aggregate sub-selects: org contact_count/total_funded and contacts
comm_count/last_contact_date counted soft-deleted rows. Add `deleted_at IS NULL`
to those four (server.py) and regression-cover them.

The reports subsystem (dashboard/pipeline/LP-breakdown, ~16 aggregate queries)
has the same leak and is logged as P2 for a dedicated pass. Not yet built or
deployed — bump the package version before the next s9pk build.
This commit is contained in:
Keysat
2026-06-13 00:26:22 -05:00
parent a74a540295
commit 7285bb0e52
6 changed files with 488 additions and 11 deletions
+160
View File
@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""Regression test for the soft-delete READ-path fix (v0.1.0:74).
Guardrail #3 is soft-delete only (deleted_at), and the 2026-06-12 audit found that
while LIST handlers filtered `deleted_at IS NULL`, the get-by-id handlers and their
nested related-data sub-selects did not — so a soft-deleted contact/org was still
readable by id, and soft-deleted children still surfaced inside a parent's detail
payload. The fix added `deleted_at IS NULL` to every get-by-id + nested sub-select
(server.py handle_get_contact / handle_get_organization).
This boots the REAL server, hand-builds active + soft-deleted rows across the five
soft-deletable tables, and drives the live HTTP read paths with a real token. It
asserts: get-by-id 404s a soft-deleted contact/org, and nested sub-selects
(org->contacts/opportunities, contact->communications/opportunities/lp_profile)
omit soft-deleted children while keeping the live ones. Synthetic only (guardrail #9).
Run: cd backend && python3 test_soft_delete_reads.py
"""
import http.client
import json
import os
import sqlite3
import sys
import tempfile
import threading
from http.server import ThreadingHTTPServer
_DATA = tempfile.mkdtemp()
os.environ["CRM_DATA_DIR"] = _DATA
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import server # noqa: E402
FAILS = []
DEL = "2026-06-01T00:00:00" # any non-NULL deleted_at marks a row soft-deleted
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
class _Quiet(server.CRMHandler):
def log_message(self, *a):
pass
def _get(port, path, token):
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
conn.request("GET", path, headers={"Authorization": "Bearer " + token})
resp = conn.getresponse()
body = resp.read().decode("utf-8", "replace")
conn.close()
data = None
if body:
try:
data = json.loads(body)
except ValueError:
pass
return resp.status, data
def seed():
"""Build a fixed graph of live + soft-deleted rows directly in the migrated DB."""
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
# organizations: one live, one soft-deleted
c.execute("INSERT INTO organizations (id,name) VALUES ('orgA','Harbor & Vine')")
c.execute("INSERT INTO organizations (id,name,deleted_at) VALUES ('orgX','Deleted Org',?)", (DEL,))
# contacts under orgA: one live (with children), one soft-deleted, one live w/ deleted lp
c.execute("INSERT INTO contacts (id,first_name,last_name,organization_id) VALUES ('cLive','Ada','Live','orgA')")
c.execute("INSERT INTO contacts (id,first_name,last_name,organization_id,deleted_at) VALUES ('cDead','Boris','Gone','orgA',?)", (DEL,))
c.execute("INSERT INTO contacts (id,first_name,last_name,organization_id) VALUES ('cLp','Cora','Lp','orgA')")
# opportunities on cLive (also tied to orgA so they appear in the org detail too)
c.execute("INSERT INTO opportunities (id,name,contact_id,organization_id,owner_id) VALUES ('opLive','Live Opp','cLive','orgA','u1')")
c.execute("INSERT INTO opportunities (id,name,contact_id,organization_id,owner_id,deleted_at) VALUES ('opDead','Dead Opp','cLive','orgA','u1',?)", (DEL,))
# funded opportunities on orgA — one live, one soft-deleted (for the org-list total_funded aggregate)
c.execute("INSERT INTO opportunities (id,name,contact_id,organization_id,owner_id,stage,commitment_amount) VALUES ('opFundLive','Funded Live','cLive','orgA','u1','funded',1000000)")
c.execute("INSERT INTO opportunities (id,name,contact_id,organization_id,owner_id,stage,commitment_amount,deleted_at) VALUES ('opFundDead','Funded Dead','cLive','orgA','u1','funded',500000,?)", (DEL,))
# communications on cLive
c.execute("INSERT INTO communications (id,contact_id,communication_date,created_by,subject) VALUES ('cmLive','cLive','2026-05-01','u1','Live note')")
c.execute("INSERT INTO communications (id,contact_id,communication_date,created_by,subject,deleted_at) VALUES ('cmDead','cLive','2026-05-02','u1','Dead note',?)", (DEL,))
# lp_profiles: live one on cLive, soft-deleted one on cLp
c.execute("INSERT INTO lp_profiles (id,contact_id,fund_name) VALUES ('lpLive','cLive','Fund III')")
c.execute("INSERT INTO lp_profiles (id,contact_id,fund_name,deleted_at) VALUES ('lpDead','cLp','Fund III',?)", (DEL,))
c.commit()
c.close()
def main():
server.init_db()
seed()
token = server.create_token("u1", "grant", "admin")
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
port = httpd.server_address[1]
threading.Thread(target=httpd.serve_forever, daemon=True).start()
try:
# ── get-by-id: soft-deleted rows are not found ──
print("\n[get-by-id excludes soft-deleted]")
st, _ = _get(port, "/api/contacts/cDead", token)
check(st == 404, f"GET soft-deleted contact -> 404 (got {st})")
st, _ = _get(port, "/api/organizations/orgX", token)
check(st == 404, f"GET soft-deleted organization -> 404 (got {st})")
st, live = _get(port, "/api/contacts/cLive", token)
check(st == 200, f"GET live contact -> 200 (got {st})")
# ── contact detail nested sub-selects exclude soft-deleted children ──
print("\n[contact detail nested sub-selects]")
d = (live or {}).get("data", {})
comm_ids = {x["id"] for x in d.get("communications", [])}
opp_ids = {x["id"] for x in d.get("opportunities", [])}
check("cmLive" in comm_ids and "cmDead" not in comm_ids, f"communications: live only (got {comm_ids})")
check("opLive" in opp_ids and "opDead" not in opp_ids, f"opportunities: live only (got {opp_ids})")
check(bool(d.get("lp_profile")) and d["lp_profile"].get("id") == "lpLive", "live lp_profile present on contact")
# soft-deleted lp_profile must read back as None (nested single-row sub-select)
_, lpc = _get(port, "/api/contacts/cLp", token)
check((lpc or {}).get("data", {}).get("lp_profile") is None, "soft-deleted lp_profile reads back as None")
# ── organization detail nested sub-selects exclude soft-deleted children ──
print("\n[organization detail nested sub-selects]")
_, org = _get(port, "/api/organizations/orgA", token)
od = (org or {}).get("data", {})
org_contacts = {x["id"] for x in od.get("contacts", [])}
org_opps = {x["id"] for x in od.get("opportunities", [])}
check("cLive" in org_contacts and "cLp" in org_contacts and "cDead" not in org_contacts,
f"org.contacts: both live contacts present, soft-deleted absent (got {org_contacts})")
check("opLive" in org_opps and "opDead" not in org_opps, f"org.opportunities: live only (got {org_opps})")
# ── list-view aggregates exclude soft-deleted rows (org contact_count/total_funded, contact comm_count) ──
print("\n[list-view aggregates]")
_, orglist = _get(port, "/api/organizations", token)
rowA = next((x for x in (orglist or {}).get("data", []) if x.get("id") == "orgA"), None)
check(rowA is not None, "orgA present in org list")
if rowA:
check(rowA.get("contact_count") == 2, f"org contact_count: live only (cLive,cLp -> 2; got {rowA.get('contact_count')})")
check(rowA.get("total_funded") == 1000000, f"org total_funded: live funded only (1,000,000; got {rowA.get('total_funded')})")
_, ctlist = _get(port, "/api/contacts", token)
rowC = next((x for x in (ctlist or {}).get("data", []) if x.get("id") == "cLive"), None)
check(rowC is not None, "cLive present in contact list")
if rowC:
check(rowC.get("comm_count") == 1, f"contact comm_count: live only (cmLive -> 1; got {rowC.get('comm_count')})")
finally:
httpd.shutdown()
print()
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for f in FAILS:
print(f" - {f}")
sys.exit(1)
print("ALL PASS (soft-delete read-path containment)")
if __name__ == "__main__":
main()