#!/usr/bin/env python3 """Tests for the W2 safe NL-query runner (the model-free core). Boots the REAL schema (server.init_db against a temp DB — exact columns + all migrations), inserts synthetic fundraising/email/reminder/pipeline data, and exercises every intent plus the trust-boundary behaviour: - each intent returns the right rows over the real schema; - SOFT-DELETE is respected on both recency legs (a tombstoned communication and a tombstoned email sighting never count), on reminders, and on opportunities; graveyard investors are excluded from "live" intents; - the validator rejects bad/unknown/unexpected slots WITHOUT crashing (the `?limit=abc` class); - LIKE wildcards in a free-text slot are escaped (a city of "%" does NOT return everything); - limits clamp to their caps; the audit hook fires with the intent + row count. Synthetic data only — no real LP substance, no network, no model. Run: cd backend && python3 nl_query/test_nl_query.py """ import os import sys import tempfile from datetime import datetime, timedelta _DATA = tempfile.mkdtemp() os.environ["CRM_DATA_DIR"] = _DATA os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db") os.environ["CRM_GMAIL_INTEGRATION_ENABLED"] = "1" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/ import server # noqa: E402 import nl_query # noqa: E402 FAILS = [] def check(cond, msg): print((" PASS " if cond else " FAIL ") + msg) if not cond: FAILS.append(msg) def _ago(days): return (datetime.utcnow() - timedelta(days=days)).isoformat() + "Z" TODAY = datetime.utcnow().date() def seed(conn): c = conn.execute # users + mailboxes c("INSERT INTO users (id, username, email, password_hash, full_name, role) VALUES " "('u_grant','grant','grant@ten31.xyz','x','Grant Smith','admin')," "('u_jon','jonathan','jon@ten31.xyz','x','Jonathan Lee','member')") c("INSERT INTO email_accounts (id, user_id, email_address, auth_method) VALUES " "('a_grant','u_grant','grant@ten31.xyz','dwd')," "('a_jon','u_jon','jon@ten31.xyz','dwd')") # funds c("INSERT INTO fundraising_funds (id, column_id, fund_name, display_order) VALUES " "('f1','c_f1','Fund I',1),('f2','c_f2','Fund II',2)") # investors (graveyard flag is the live/retired axis; no deleted_at on this table) def inv(iid, name, lead, total, grave=0): c("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, " "source_row_id, total_invested) VALUES (?,?,?,?,?,?)", (iid, name, lead, grave, iid, total)) inv("i_acme", "Acme Capital", "Jonathan Lee", 5_000_000) inv("i_beta", "Beta Partners", "Grant Smith", 2_000_000) inv("i_cold", "Cold Co", "Grant Smith", 0) # never contacted inv("i_delta", "Delta LP", "Grant Smith", 1_000_000) # only a (comms) signal inv("i_ghost", "Graveyard Ghost", "Grant Smith", 9_999_999, grave=1) # contacts (grid pills) + classic contact rows for the comms leg c("INSERT INTO fundraising_contacts (id, investor_id, full_name, email, title, city, " "contact_id, sort_order) VALUES " "('fc_a','i_acme','Alice Acme','alice@acme.com','GP','Austin','cc_alice',0)," "('fc_b','i_beta','Bob Beta','bob@beta.com','LP','Denver',NULL,0)," "('fc_d','i_delta','Dana Delta','dana@delta.com','CFO','Miami','cc_dana',0)") c("INSERT INTO contacts (id, first_name, last_name, email) VALUES " "('cc_alice','Alice','Acme','alice@acme.com')," "('cc_dana','Dana','Delta','dana@delta.com')") # commitments — Acme across two funds (3M + 2M = 5M); Beta one fund c("INSERT INTO fundraising_commitments (id, investor_id, fund_id, amount) VALUES " "('cm1','i_acme','f1',3_000_000),('cm2','i_acme','f2',2_000_000)," "('cm3','i_beta','f1',2_000_000)") # emails: matched + a per-mailbox sighting. is_sent + from_email decide direction. def email(eid, frm, frm_name, days, inv_id, account, is_sent, deleted=False): c("INSERT INTO emails (id, rfc_message_id, from_email, from_name, sent_at, subject, " "is_matched, match_status) VALUES (?,?,?,?,?,?,1,'matched')", (eid, "rfc_" + eid, frm, frm_name, _ago(days), "Re: " + eid)) c("INSERT INTO email_account_messages (id, email_id, account_id, gmail_message_id, " "gmail_thread_id, is_sent, deleted_at) VALUES (?,?,?,?,?,?,?)", ("eam_" + eid, eid, account, "g_" + eid, "t_" + eid, is_sent, _ago(days) if deleted else None)) c("INSERT INTO email_investor_links (id, email_id, fundraising_investor_id, " "matched_address, match_kind) VALUES (?,?,?,?, 'exact_email')", ("eil_" + eid, eid, inv_id, frm)) email("ea_recent", "grant@ten31.xyz", "Grant Smith", 0, "i_acme", "a_grant", 1) # Acme: today email("eb_old", "grant@ten31.xyz", "Grant Smith", 40, "i_beta", "a_grant", 1) # Beta: 40d email("edel", "grant@ten31.xyz", "Grant Smith", 0, "i_beta", "a_grant", 1, deleted=True) # tombstoned email("ej", "jon@ten31.xyz", "Jonathan Lee", 0, "i_acme", "a_jon", 1) # jonathan today email("ein", "alice@acme.com", "Alice Acme", 3, "i_acme", "a_grant", 0) # inbound 3d # an UNMATCHED sent email by Grant (NO email_investor_links row) — captured, but not to a # known investor. The investor-email intents are matched-only, so it must be EXCLUDED from # comms_by_user / email_counts_by_user; without the matched-only filter it would inflate both. c("INSERT INTO emails (id, rfc_message_id, from_email, from_name, sent_at, subject, " "is_matched, match_status) VALUES ('eunm','rfc_eunm','grant@ten31.xyz','Grant Smith',?," "'Internal: team lunch',0,'unmatched')", (_ago(0),)) c("INSERT INTO email_account_messages (id, email_id, account_id, gmail_message_id, " "gmail_thread_id, is_sent, deleted_at) VALUES " "('eam_eunm','eunm','a_grant','g_eunm','t_eunm',1,NULL)") # communications (the other recency leg) — Delta has ONLY comms: one live (5d), one tombstoned # (today). If the soft-delete filter broke, Delta would read as contacted today. c("INSERT INTO communications (id, contact_id, type, communication_date, created_by) VALUES " "('cmm_live','cc_dana','email',?,'u_grant')", (_ago(5),)) c("INSERT INTO communications (id, contact_id, type, communication_date, created_by, deleted_at) " "VALUES ('cmm_del','cc_dana','email',?,'u_grant',?)", (_ago(0), _ago(0))) # reminders — open(overdue) / open(future) / done / deleted / standalone def rem(rid, inv_id, title, due, status="open", deleted=False): c("INSERT INTO reminders (id, investor_id, investor_name, title, due_date, status, " "deleted_at) VALUES (?,?,?,?,?,?,?)", (rid, inv_id, title, title, due, status, _ago(0) if deleted else None)) rem("r_over", "i_beta", "Send deck", (TODAY - timedelta(days=1)).isoformat()) # overdue rem("r_future", "i_acme", "Quarterly check-in", (TODAY + timedelta(days=10)).isoformat()) rem("r_done", "i_acme", "Old task", (TODAY - timedelta(days=2)).isoformat(), status="done") rem("r_del", "i_acme", "Tombstoned", (TODAY - timedelta(days=2)).isoformat(), deleted=True) rem("r_standalone", None, "Team chore", (TODAY - timedelta(days=1)).isoformat()) # opportunities — committed / meeting (live) / lost (terminal) / deleted def opp(oid, inv_id, contact, stage, expected, owner, deleted=False): c("INSERT INTO opportunities (id, name, contact_id, stage, expected_amount, owner_id, " "fundraising_investor_id, deleted_at) VALUES (?,?,?,?,?,?,?,?)", (oid, oid, contact, stage, expected, owner, inv_id, _ago(0) if deleted else None)) # opp contact_id must reference a real contacts row (FK on); reuse the two we made opp("o_acme", "i_acme", "cc_alice", "committed", 4_000_000, "u_jon") opp("o_beta", "i_beta", "cc_dana", "meeting", 1_000_000, "u_grant") opp("o_lost", "i_acme", "cc_alice", "lost", 9_000_000, "u_jon") opp("o_del", "i_beta", "cc_dana", "due_diligence", 7_000_000, "u_grant", deleted=True) conn.commit() def names(res): return [r["investor_name"] for r in res["rows"]] def main(): server.init_db() conn = server.get_db() seed(conn) run = lambda *a, **k: nl_query.run_query(conn, *a, **k) print("investors_cold") r = run("investors_cold", {"days": 30}) check(names(r) == ["Cold Co", "Beta Partners"], f"cold(30) never-first then stale: {names(r)}") check(run("investors_cold", {"days": 90})["row_count"] == 1, "cold(90): only never-contacted") check("Graveyard Ghost" not in names(run("investors_cold", {"days": 3650})), "cold excludes graveyard investors") check("Delta LP" in names(run("investors_cold", {"days": 3})), "cold(3) sees Delta (comms 5d)") check("Delta LP" not in names(run("investors_cold", {"days": 7})), "cold(7): Delta's tombstoned comm (today) did NOT count") print("investor_lookup") r = run("investor_lookup", {"name": "acme"}) check(r["row_count"] == 1 and r["rows"][0]["total_invested"] == 5_000_000, "lookup total committed") check({c["fund_name"] for c in r["rows"][0]["commitments"]} == {"Fund I", "Fund II"}, "lookup per-fund breakdown") check(r["rows"][0]["contacts"][0]["email"] == "alice@acme.com", "lookup surfaces contact email") print("investors_by_city / by_lead / top / follow_up") check(names(run("investors_by_city", {"city": "Austin"})) == ["Acme Capital"], "by_city") check(set(names(run("investors_by_lead", {"lead": "Grant"}))) == {"Beta Partners", "Cold Co", "Delta LP"}, "by_lead excludes graveyard + other leads") check(names(run("top_investors_committed", {"limit": 2})) == ["Acme Capital", "Beta Partners"], "top by committed (graveyard + zero excluded)") r = run("investors_follow_up") check(names(r) == ["Beta Partners", "Acme Capital"], f"follow_up overdue-first, open-only: {names(r)}") check(r["rows"][0]["overdue"] == 1 and r["rows"][1]["overdue"] == 0, "follow_up overdue flag") print("pipeline") r = run("pipeline_totals") stages = {row["stage"]: row for row in r["rows"]} check(set(stages) == {"committed", "meeting"}, f"pipeline_totals excludes lost+deleted: {set(stages)}") check(stages["committed"]["expected_total"] == 4_000_000, "pipeline_totals stage sum") r = run("pipeline_top", {"limit": 10}) check(names(r) == ["Acme Capital", "Beta Partners"], "pipeline_top furthest-stage first") check(r["rows"][0]["last_activity_at"] is not None, "pipeline_top enriches last activity") print("emails") check(run("recent_emails", {"direction": "outbound"})["row_count"] == 3, "recent_emails(outbound): 3 live (tombstoned sighting excluded)") check(run("recent_emails", {"direction": "inbound"})["row_count"] == 1, "recent_emails(inbound)") check(run("recent_emails")["row_count"] == 4, "recent_emails(any): 4 live") r = run("investor_last_contact", {"name": "beta"}) check(r["rows"][0]["days_since"] >= 39, "investor_last_contact days_since") check(run("comms_by_user", {"user": "Grant"})["row_count"] == 2, "comms_by_user: grant's 2 live MATCHED outbound (tombstoned + unmatched excluded)") r = run("email_counts_by_user", {"user": "grant"}) check(r["rows"][0]["this_week"] == 1, "email_counts this_week = 1 live matched (tombstoned + unmatched excluded)") check(r["rows"][0]["ytd"] >= 1, "email_counts ytd") print("trust boundary") check(run("investors_cold", {"days": "abc"})["error"] == "bad_slot", "bad int slot -> bad_slot, no crash") check(run("nope")["error"] == "unknown_intent", "unknown intent rejected") check(run("pipeline_totals", {"foo": 1})["error"] == "bad_slot", "unexpected slot rejected") check(run("investor_lookup", {})["error"] == "bad_slot", "missing required slot rejected") check(run("investors_by_city", {"city": "%"})["row_count"] == 0, "LIKE wildcard escaped — '%' does not match every row") check(run("investors_cold", {"days": 0})["slots"]["days"] == 1, "int slot clamps to min") check(run("top_investors_committed", {"limit": 99999})["slots"]["limit"] == nl_query.INTENTS ["top_investors_committed"]["slots"]["limit"]["max"], "int slot clamps to max") print("audit hook + catalog") seen = [] run("pipeline_totals", audit_fn=seen.append, actor="tester", source="test") check(len(seen) == 1 and seen[0]["intent"] == "pipeline_totals" and seen[0]["error"] is None and seen[0]["actor"] == "tester", "audit hook fires with intent/actor/no-error") run("nope", audit_fn=seen.append) check(seen[-1]["error"] == "unknown_intent", "audit hook fires on rejection too") check(len(nl_query.catalog()) == len(nl_query.INTENTS), "catalog covers every intent") conn.close() print() if FAILS: print(f"{len(FAILS)} FAILED") for f in FAILS: print(" - " + f) sys.exit(1) print("ALL PASS") if __name__ == "__main__": main()