7285bb0e52
Lock in the three v0.1.0:74 security/privacy fixes with regression tests, and fix a same-class soft-delete leak surfaced while writing them. - backend/test_assets_traversal.py: boots the real server, proves /assets/ path-traversal vectors (incl. a real decoy file and the live crm.db, plain and URL-encoded) 404 and leak nothing, while a legit asset still serves 200. - backend/test_soft_delete_reads.py: get-by-id 404s soft-deleted rows and nested + list-view aggregates exclude soft-deleted children. - backend/mcp/test_outreach_redaction.py: an unknown free-prose name is tokenized away from the Claude payload but re-hydrated locally, and the path fails closed (no Claude call) when the local NER model is down. - backend/run_tests.py: aggregate runner (each backend/**/test_*.py in its own subprocess); replaces the manual for-loop. 16/16 green. A reviewer pass on the tests confirmed the soft-delete filter was missing from list-view aggregate sub-selects: org contact_count/total_funded and contacts comm_count/last_contact_date counted soft-deleted rows. Add `deleted_at IS NULL` to those four (server.py) and regression-cover them. The reports subsystem (dashboard/pipeline/LP-breakdown, ~16 aggregate queries) has the same leak and is logged as P2 for a dedicated pass. Not yet built or deployed — bump the package version before the next s9pk build.
127 lines
5.1 KiB
Python
127 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Regression test for the /assets/ path-traversal containment fix (v0.1.0:74).
|
|
|
|
Before the fix, get_path()/urlparse did NOT normalize '..', so an unauthenticated
|
|
GET /assets/../../data/crm.db (raw client, no client-side normalization) escaped the
|
|
frontend root and read any file the process could — the LP DB, the JWT secret, the
|
|
Gmail key. The fix resolves the target with os.path.realpath and 404s anything that
|
|
does not stay under FRONTEND_ROOT (server.py, the `/assets/` branch of do_GET).
|
|
|
|
This boots the REAL server in-process against a throwaway frontend root, plants a
|
|
decoy "secret" OUTSIDE that root, and proves: (1) traversal vectors that resolve to a
|
|
real readable file outside the root still 404 and leak no bytes; (2) the live crm.db
|
|
path is 404'd; (3) URL-encoded separators don't help; (4) a legit in-bounds asset
|
|
still serves 200 (the fix isn't over-broad). Synthetic only (guardrail #9).
|
|
|
|
Run: cd backend && python3 test_assets_traversal.py
|
|
"""
|
|
import http.client
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
import threading
|
|
from http.server import ThreadingHTTPServer
|
|
|
|
# Lay out a throwaway tree BEFORE importing server (FRONTEND_DIR/ROOT resolve at import):
|
|
# base/frontend/{index.html,assets/app.css} <- the served root
|
|
# base/secret.txt <- a real file a traversal would target
|
|
# base/data/crm.db <- the live DB, created by init_db()
|
|
_BASE = tempfile.mkdtemp()
|
|
_FRONTEND = os.path.join(_BASE, "frontend")
|
|
os.makedirs(os.path.join(_FRONTEND, "assets"))
|
|
_DATA = os.path.join(_BASE, "data")
|
|
os.makedirs(_DATA)
|
|
with open(os.path.join(_FRONTEND, "index.html"), "w") as f:
|
|
f.write("<!doctype html><title>crm</title>")
|
|
_CSS_MARKER = "/* legit-asset-marker-7f3a */"
|
|
with open(os.path.join(_FRONTEND, "assets", "app.css"), "w") as f:
|
|
f.write(_CSS_MARKER)
|
|
_SECRET_MARKER = "TOPSECRET-JWT-zq19"
|
|
with open(os.path.join(_BASE, "secret.txt"), "w") as f:
|
|
f.write(_SECRET_MARKER)
|
|
|
|
os.environ["CRM_FRONTEND_DIR"] = _FRONTEND
|
|
os.environ["CRM_DATA_DIR"] = _DATA
|
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
import server # noqa: E402
|
|
|
|
FAILS = []
|
|
|
|
|
|
def check(cond, msg):
|
|
print((" PASS " if cond else " FAIL ") + msg)
|
|
if not cond:
|
|
FAILS.append(msg)
|
|
|
|
|
|
class _Quiet(server.CRMHandler):
|
|
def log_message(self, *a): # keep the test output clean
|
|
pass
|
|
|
|
|
|
def _get(port, path):
|
|
"""Raw GET with the path sent verbatim — http.client does NOT normalize '..',
|
|
which is exactly the unauthenticated raw-client threat the fix defends against."""
|
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
|
conn.request("GET", path)
|
|
resp = conn.getresponse()
|
|
body = resp.read().decode("utf-8", "replace")
|
|
conn.close()
|
|
return resp.status, body
|
|
|
|
|
|
def main():
|
|
server.init_db() # creates base/data/crm.db and the full schema
|
|
check(os.path.exists(os.environ["CRM_DB_PATH"]), "init_db created the live crm.db (a real traversal target)")
|
|
|
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
|
port = httpd.server_address[1]
|
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
|
try:
|
|
# ── legit in-bounds asset still serves (containment is not over-broad) ──
|
|
print("\n[legit asset]")
|
|
st, body = _get(port, "/assets/app.css")
|
|
check(st == 200, f"in-bounds /assets/app.css serves 200 (got {st})")
|
|
check(_CSS_MARKER in body, "in-bounds asset body is served intact")
|
|
|
|
# ── traversal to a REAL file outside the root: 404, zero bytes leaked ──
|
|
print("\n[traversal -> decoy secret outside the root]")
|
|
for vec in ["/assets/../../secret.txt",
|
|
"/assets/../../../secret.txt",
|
|
"/assets/..%2f..%2fsecret.txt", # urlparse won't decode %2f
|
|
"/assets/..%2F..%2Fsecret.txt"]: # …nor uppercase %2F (some clients send it)
|
|
st, body = _get(port, vec)
|
|
check(st == 404, f"{vec} -> 404 (got {st})")
|
|
check(_SECRET_MARKER not in body, f"{vec} leaks no secret bytes")
|
|
|
|
# ── traversal to the live crm.db (the headline vector from the eval) ──
|
|
print("\n[traversal -> live crm.db]")
|
|
for vec in ["/assets/../../data/crm.db",
|
|
"/assets/../data/crm.db",
|
|
"/assets/..%2f..%2fdata%2fcrm.db"]:
|
|
st, body = _get(port, vec)
|
|
check(st == 404, f"{vec} -> 404 (got {st})")
|
|
check("SQLite format 3" not in body, f"{vec} leaks no DB header")
|
|
|
|
# ── deep absolute-style escape ──
|
|
print("\n[deep escape]")
|
|
st, body = _get(port, "/assets/../../../../../../../../etc/passwd")
|
|
check(st == 404, f"/assets/../../etc/passwd -> 404 (got {st})")
|
|
check("root:" not in body, "/etc/passwd not leaked")
|
|
finally:
|
|
httpd.shutdown()
|
|
|
|
print()
|
|
if FAILS:
|
|
print(f"FAILED ({len(FAILS)}):")
|
|
for f in FAILS:
|
|
print(f" - {f}")
|
|
sys.exit(1)
|
|
print("ALL PASS (assets path-traversal containment)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|