Files
ten31-database/backend/test_assets_traversal.py
T
Keysat 7285bb0e52 Add regression tests for v74 fixes; close soft-delete leak in list-view aggregates
Lock in the three v0.1.0:74 security/privacy fixes with regression tests, and
fix a same-class soft-delete leak surfaced while writing them.

- backend/test_assets_traversal.py: boots the real server, proves /assets/
  path-traversal vectors (incl. a real decoy file and the live crm.db, plain
  and URL-encoded) 404 and leak nothing, while a legit asset still serves 200.
- backend/test_soft_delete_reads.py: get-by-id 404s soft-deleted rows and
  nested + list-view aggregates exclude soft-deleted children.
- backend/mcp/test_outreach_redaction.py: an unknown free-prose name is
  tokenized away from the Claude payload but re-hydrated locally, and the path
  fails closed (no Claude call) when the local NER model is down.
- backend/run_tests.py: aggregate runner (each backend/**/test_*.py in its own
  subprocess); replaces the manual for-loop. 16/16 green.

A reviewer pass on the tests confirmed the soft-delete filter was missing from
list-view aggregate sub-selects: org contact_count/total_funded and contacts
comm_count/last_contact_date counted soft-deleted rows. Add `deleted_at IS NULL`
to those four (server.py) and regression-cover them.

The reports subsystem (dashboard/pipeline/LP-breakdown, ~16 aggregate queries)
has the same leak and is logged as P2 for a dedicated pass. Not yet built or
deployed — bump the package version before the next s9pk build.
2026-06-13 00:26:22 -05:00

127 lines
5.1 KiB
Python

#!/usr/bin/env python3
"""Regression test for the /assets/ path-traversal containment fix (v0.1.0:74).
Before the fix, get_path()/urlparse did NOT normalize '..', so an unauthenticated
GET /assets/../../data/crm.db (raw client, no client-side normalization) escaped the
frontend root and read any file the process could — the LP DB, the JWT secret, the
Gmail key. The fix resolves the target with os.path.realpath and 404s anything that
does not stay under FRONTEND_ROOT (server.py, the `/assets/` branch of do_GET).
This boots the REAL server in-process against a throwaway frontend root, plants a
decoy "secret" OUTSIDE that root, and proves: (1) traversal vectors that resolve to a
real readable file outside the root still 404 and leak no bytes; (2) the live crm.db
path is 404'd; (3) URL-encoded separators don't help; (4) a legit in-bounds asset
still serves 200 (the fix isn't over-broad). Synthetic only (guardrail #9).
Run: cd backend && python3 test_assets_traversal.py
"""
import http.client
import os
import sys
import tempfile
import threading
from http.server import ThreadingHTTPServer
# Lay out a throwaway tree BEFORE importing server (FRONTEND_DIR/ROOT resolve at import):
# base/frontend/{index.html,assets/app.css} <- the served root
# base/secret.txt <- a real file a traversal would target
# base/data/crm.db <- the live DB, created by init_db()
_BASE = tempfile.mkdtemp()
_FRONTEND = os.path.join(_BASE, "frontend")
os.makedirs(os.path.join(_FRONTEND, "assets"))
_DATA = os.path.join(_BASE, "data")
os.makedirs(_DATA)
with open(os.path.join(_FRONTEND, "index.html"), "w") as f:
f.write("<!doctype html><title>crm</title>")
_CSS_MARKER = "/* legit-asset-marker-7f3a */"
with open(os.path.join(_FRONTEND, "assets", "app.css"), "w") as f:
f.write(_CSS_MARKER)
_SECRET_MARKER = "TOPSECRET-JWT-zq19"
with open(os.path.join(_BASE, "secret.txt"), "w") as f:
f.write(_SECRET_MARKER)
os.environ["CRM_FRONTEND_DIR"] = _FRONTEND
os.environ["CRM_DATA_DIR"] = _DATA
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import server # noqa: E402
FAILS = []
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
class _Quiet(server.CRMHandler):
def log_message(self, *a): # keep the test output clean
pass
def _get(port, path):
"""Raw GET with the path sent verbatim — http.client does NOT normalize '..',
which is exactly the unauthenticated raw-client threat the fix defends against."""
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
conn.request("GET", path)
resp = conn.getresponse()
body = resp.read().decode("utf-8", "replace")
conn.close()
return resp.status, body
def main():
server.init_db() # creates base/data/crm.db and the full schema
check(os.path.exists(os.environ["CRM_DB_PATH"]), "init_db created the live crm.db (a real traversal target)")
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
port = httpd.server_address[1]
threading.Thread(target=httpd.serve_forever, daemon=True).start()
try:
# ── legit in-bounds asset still serves (containment is not over-broad) ──
print("\n[legit asset]")
st, body = _get(port, "/assets/app.css")
check(st == 200, f"in-bounds /assets/app.css serves 200 (got {st})")
check(_CSS_MARKER in body, "in-bounds asset body is served intact")
# ── traversal to a REAL file outside the root: 404, zero bytes leaked ──
print("\n[traversal -> decoy secret outside the root]")
for vec in ["/assets/../../secret.txt",
"/assets/../../../secret.txt",
"/assets/..%2f..%2fsecret.txt", # urlparse won't decode %2f
"/assets/..%2F..%2Fsecret.txt"]: # …nor uppercase %2F (some clients send it)
st, body = _get(port, vec)
check(st == 404, f"{vec} -> 404 (got {st})")
check(_SECRET_MARKER not in body, f"{vec} leaks no secret bytes")
# ── traversal to the live crm.db (the headline vector from the eval) ──
print("\n[traversal -> live crm.db]")
for vec in ["/assets/../../data/crm.db",
"/assets/../data/crm.db",
"/assets/..%2f..%2fdata%2fcrm.db"]:
st, body = _get(port, vec)
check(st == 404, f"{vec} -> 404 (got {st})")
check("SQLite format 3" not in body, f"{vec} leaks no DB header")
# ── deep absolute-style escape ──
print("\n[deep escape]")
st, body = _get(port, "/assets/../../../../../../../../etc/passwd")
check(st == 404, f"/assets/../../etc/passwd -> 404 (got {st})")
check("root:" not in body, "/etc/passwd not leaked")
finally:
httpd.shutdown()
print()
if FAILS:
print(f"FAILED ({len(FAILS)}):")
for f in FAILS:
print(f" - {f}")
sys.exit(1)
print("ALL PASS (assets path-traversal containment)")
if __name__ == "__main__":
main()