#!/usr/bin/env python3 """Regression test for the /assets/ path-traversal containment fix (v0.1.0:74). Before the fix, get_path()/urlparse did NOT normalize '..', so an unauthenticated GET /assets/../../data/crm.db (raw client, no client-side normalization) escaped the frontend root and read any file the process could — the LP DB, the JWT secret, the Gmail key. The fix resolves the target with os.path.realpath and 404s anything that does not stay under FRONTEND_ROOT (server.py, the `/assets/` branch of do_GET). This boots the REAL server in-process against a throwaway frontend root, plants a decoy "secret" OUTSIDE that root, and proves: (1) traversal vectors that resolve to a real readable file outside the root still 404 and leak no bytes; (2) the live crm.db path is 404'd; (3) URL-encoded separators don't help; (4) a legit in-bounds asset still serves 200 (the fix isn't over-broad). Synthetic only (guardrail #9). Run: cd backend && python3 test_assets_traversal.py """ import http.client import os import sys import tempfile import threading from http.server import ThreadingHTTPServer # Lay out a throwaway tree BEFORE importing server (FRONTEND_DIR/ROOT resolve at import): # base/frontend/{index.html,assets/app.css} <- the served root # base/secret.txt <- a real file a traversal would target # base/data/crm.db <- the live DB, created by init_db() _BASE = tempfile.mkdtemp() _FRONTEND = os.path.join(_BASE, "frontend") os.makedirs(os.path.join(_FRONTEND, "assets")) _DATA = os.path.join(_BASE, "data") os.makedirs(_DATA) with open(os.path.join(_FRONTEND, "index.html"), "w") as f: f.write("crm") _CSS_MARKER = "/* legit-asset-marker-7f3a */" with open(os.path.join(_FRONTEND, "assets", "app.css"), "w") as f: f.write(_CSS_MARKER) _SECRET_MARKER = "TOPSECRET-JWT-zq19" with open(os.path.join(_BASE, "secret.txt"), "w") as f: f.write(_SECRET_MARKER) os.environ["CRM_FRONTEND_DIR"] = _FRONTEND os.environ["CRM_DATA_DIR"] = _DATA os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db") sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import server # noqa: E402 FAILS = [] def check(cond, msg): print((" PASS " if cond else " FAIL ") + msg) if not cond: FAILS.append(msg) class _Quiet(server.CRMHandler): def log_message(self, *a): # keep the test output clean pass def _get(port, path): """Raw GET with the path sent verbatim — http.client does NOT normalize '..', which is exactly the unauthenticated raw-client threat the fix defends against.""" conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10) conn.request("GET", path) resp = conn.getresponse() body = resp.read().decode("utf-8", "replace") conn.close() return resp.status, body def main(): server.init_db() # creates base/data/crm.db and the full schema check(os.path.exists(os.environ["CRM_DB_PATH"]), "init_db created the live crm.db (a real traversal target)") httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet) port = httpd.server_address[1] threading.Thread(target=httpd.serve_forever, daemon=True).start() try: # ── legit in-bounds asset still serves (containment is not over-broad) ── print("\n[legit asset]") st, body = _get(port, "/assets/app.css") check(st == 200, f"in-bounds /assets/app.css serves 200 (got {st})") check(_CSS_MARKER in body, "in-bounds asset body is served intact") # ── traversal to a REAL file outside the root: 404, zero bytes leaked ── print("\n[traversal -> decoy secret outside the root]") for vec in ["/assets/../../secret.txt", "/assets/../../../secret.txt", "/assets/..%2f..%2fsecret.txt", # urlparse won't decode %2f "/assets/..%2F..%2Fsecret.txt"]: # …nor uppercase %2F (some clients send it) st, body = _get(port, vec) check(st == 404, f"{vec} -> 404 (got {st})") check(_SECRET_MARKER not in body, f"{vec} leaks no secret bytes") # ── traversal to the live crm.db (the headline vector from the eval) ── print("\n[traversal -> live crm.db]") for vec in ["/assets/../../data/crm.db", "/assets/../data/crm.db", "/assets/..%2f..%2fdata%2fcrm.db"]: st, body = _get(port, vec) check(st == 404, f"{vec} -> 404 (got {st})") check("SQLite format 3" not in body, f"{vec} leaks no DB header") # ── deep absolute-style escape ── print("\n[deep escape]") st, body = _get(port, "/assets/../../../../../../../../etc/passwd") check(st == 404, f"/assets/../../etc/passwd -> 404 (got {st})") check("root:" not in body, "/etc/passwd not leaked") finally: httpd.shutdown() print() if FAILS: print(f"FAILED ({len(FAILS)}):") for f in FAILS: print(f" - {f}") sys.exit(1) print("ALL PASS (assets path-traversal containment)") if __name__ == "__main__": main()