Add regression tests for v74 fixes; close soft-delete leak in list-view aggregates
Lock in the three v0.1.0:74 security/privacy fixes with regression tests, and fix a same-class soft-delete leak surfaced while writing them. - backend/test_assets_traversal.py: boots the real server, proves /assets/ path-traversal vectors (incl. a real decoy file and the live crm.db, plain and URL-encoded) 404 and leak nothing, while a legit asset still serves 200. - backend/test_soft_delete_reads.py: get-by-id 404s soft-deleted rows and nested + list-view aggregates exclude soft-deleted children. - backend/mcp/test_outreach_redaction.py: an unknown free-prose name is tokenized away from the Claude payload but re-hydrated locally, and the path fails closed (no Claude call) when the local NER model is down. - backend/run_tests.py: aggregate runner (each backend/**/test_*.py in its own subprocess); replaces the manual for-loop. 16/16 green. A reviewer pass on the tests confirmed the soft-delete filter was missing from list-view aggregate sub-selects: org contact_count/total_funded and contacts comm_count/last_contact_date counted soft-deleted rows. Add `deleted_at IS NULL` to those four (server.py) and regression-cover them. The reports subsystem (dashboard/pipeline/LP-breakdown, ~16 aggregate queries) has the same leak and is logged as P2 for a dedicated pass. Not yet built or deployed — bump the package version before the next s9pk build.
This commit is contained in:
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Regression test for the /assets/ path-traversal containment fix (v0.1.0:74).
|
||||
|
||||
Before the fix, get_path()/urlparse did NOT normalize '..', so an unauthenticated
|
||||
GET /assets/../../data/crm.db (raw client, no client-side normalization) escaped the
|
||||
frontend root and read any file the process could — the LP DB, the JWT secret, the
|
||||
Gmail key. The fix resolves the target with os.path.realpath and 404s anything that
|
||||
does not stay under FRONTEND_ROOT (server.py, the `/assets/` branch of do_GET).
|
||||
|
||||
This boots the REAL server in-process against a throwaway frontend root, plants a
|
||||
decoy "secret" OUTSIDE that root, and proves: (1) traversal vectors that resolve to a
|
||||
real readable file outside the root still 404 and leak no bytes; (2) the live crm.db
|
||||
path is 404'd; (3) URL-encoded separators don't help; (4) a legit in-bounds asset
|
||||
still serves 200 (the fix isn't over-broad). Synthetic only (guardrail #9).
|
||||
|
||||
Run: cd backend && python3 test_assets_traversal.py
|
||||
"""
|
||||
import http.client
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
from http.server import ThreadingHTTPServer
|
||||
|
||||
# Lay out a throwaway tree BEFORE importing server (FRONTEND_DIR/ROOT resolve at import):
|
||||
# base/frontend/{index.html,assets/app.css} <- the served root
|
||||
# base/secret.txt <- a real file a traversal would target
|
||||
# base/data/crm.db <- the live DB, created by init_db()
|
||||
_BASE = tempfile.mkdtemp()
|
||||
_FRONTEND = os.path.join(_BASE, "frontend")
|
||||
os.makedirs(os.path.join(_FRONTEND, "assets"))
|
||||
_DATA = os.path.join(_BASE, "data")
|
||||
os.makedirs(_DATA)
|
||||
with open(os.path.join(_FRONTEND, "index.html"), "w") as f:
|
||||
f.write("<!doctype html><title>crm</title>")
|
||||
_CSS_MARKER = "/* legit-asset-marker-7f3a */"
|
||||
with open(os.path.join(_FRONTEND, "assets", "app.css"), "w") as f:
|
||||
f.write(_CSS_MARKER)
|
||||
_SECRET_MARKER = "TOPSECRET-JWT-zq19"
|
||||
with open(os.path.join(_BASE, "secret.txt"), "w") as f:
|
||||
f.write(_SECRET_MARKER)
|
||||
|
||||
os.environ["CRM_FRONTEND_DIR"] = _FRONTEND
|
||||
os.environ["CRM_DATA_DIR"] = _DATA
|
||||
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import server # noqa: E402
|
||||
|
||||
FAILS = []
|
||||
|
||||
|
||||
def check(cond, msg):
|
||||
print((" PASS " if cond else " FAIL ") + msg)
|
||||
if not cond:
|
||||
FAILS.append(msg)
|
||||
|
||||
|
||||
class _Quiet(server.CRMHandler):
|
||||
def log_message(self, *a): # keep the test output clean
|
||||
pass
|
||||
|
||||
|
||||
def _get(port, path):
|
||||
"""Raw GET with the path sent verbatim — http.client does NOT normalize '..',
|
||||
which is exactly the unauthenticated raw-client threat the fix defends against."""
|
||||
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||
conn.request("GET", path)
|
||||
resp = conn.getresponse()
|
||||
body = resp.read().decode("utf-8", "replace")
|
||||
conn.close()
|
||||
return resp.status, body
|
||||
|
||||
|
||||
def main():
|
||||
server.init_db() # creates base/data/crm.db and the full schema
|
||||
check(os.path.exists(os.environ["CRM_DB_PATH"]), "init_db created the live crm.db (a real traversal target)")
|
||||
|
||||
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||
port = httpd.server_address[1]
|
||||
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||
try:
|
||||
# ── legit in-bounds asset still serves (containment is not over-broad) ──
|
||||
print("\n[legit asset]")
|
||||
st, body = _get(port, "/assets/app.css")
|
||||
check(st == 200, f"in-bounds /assets/app.css serves 200 (got {st})")
|
||||
check(_CSS_MARKER in body, "in-bounds asset body is served intact")
|
||||
|
||||
# ── traversal to a REAL file outside the root: 404, zero bytes leaked ──
|
||||
print("\n[traversal -> decoy secret outside the root]")
|
||||
for vec in ["/assets/../../secret.txt",
|
||||
"/assets/../../../secret.txt",
|
||||
"/assets/..%2f..%2fsecret.txt", # urlparse won't decode %2f
|
||||
"/assets/..%2F..%2Fsecret.txt"]: # …nor uppercase %2F (some clients send it)
|
||||
st, body = _get(port, vec)
|
||||
check(st == 404, f"{vec} -> 404 (got {st})")
|
||||
check(_SECRET_MARKER not in body, f"{vec} leaks no secret bytes")
|
||||
|
||||
# ── traversal to the live crm.db (the headline vector from the eval) ──
|
||||
print("\n[traversal -> live crm.db]")
|
||||
for vec in ["/assets/../../data/crm.db",
|
||||
"/assets/../data/crm.db",
|
||||
"/assets/..%2f..%2fdata%2fcrm.db"]:
|
||||
st, body = _get(port, vec)
|
||||
check(st == 404, f"{vec} -> 404 (got {st})")
|
||||
check("SQLite format 3" not in body, f"{vec} leaks no DB header")
|
||||
|
||||
# ── deep absolute-style escape ──
|
||||
print("\n[deep escape]")
|
||||
st, body = _get(port, "/assets/../../../../../../../../etc/passwd")
|
||||
check(st == 404, f"/assets/../../etc/passwd -> 404 (got {st})")
|
||||
check("root:" not in body, "/etc/passwd not leaked")
|
||||
finally:
|
||||
httpd.shutdown()
|
||||
|
||||
print()
|
||||
if FAILS:
|
||||
print(f"FAILED ({len(FAILS)}):")
|
||||
for f in FAILS:
|
||||
print(f" - {f}")
|
||||
sys.exit(1)
|
||||
print("ALL PASS (assets path-traversal containment)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user