Add NL-query backend (W2): local translator + safe named-query runner

Read-only "ask the database in plain English" backend. Translation runs on the local Qwen via Spark Control (question -> {intent, slots}); nothing leaves the box, no Claude and no redaction boundary (the simplification chosen after pressure-testing). The safe surface is a curated catalog of ~12 hand-written parameterized queries; a slot validator is the trust boundary (no generic SQL, no dynamic identifiers). POST /api/query/nl + GET /api/query/catalog, gated require_bot_or_admin, read-only, audited. Soft-delete-correct per table. Local Qwen translated 12/12 real example questions correctly against the live Spark. Web "Ask" box and Matrix bot still to come (steps 4-5).
2026-06-18 18:35:41 -05:00
parent a166b49397
commit 6c29c22601
13 changed files with 1348 additions and 13 deletions
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg.
+
+The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no
+network). Covers:
+  - build_system() exposes the whole intent catalog as the model's closed vocabulary;
+  - translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't
+    declare (model noise), while every surviving value is still validated downstream;
+  - the translation failure modes: no intent fit -> no_match; unparseable -> no_match;
+    local model unreachable -> model_unavailable (so the endpoint can 503);
+  - answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent
+    from the model is still rejected by the validator (the model output is never trusted).
+
+Run: cd backend && python3 nl_query/test_translate.py
+"""
+import os
+import sys
+import tempfile
+
+_DATA = tempfile.mkdtemp()
+os.environ["CRM_DATA_DIR"] = _DATA
+os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  # backend/
+import server  # noqa: E402
+import nl_query  # noqa: E402
+
+T = nl_query  # exercise the public API (translate/answer/build_system are re-exported)
+
+FAILS = []
+
+
+def check(cond, msg):
+    print(("  PASS " if cond else "  FAIL ") + msg)
+    if not cond:
+        FAILS.append(msg)
+
+
+def main():
+    print("build_system")
+    sysprompt = nl_query.build_system()
+    check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key")
+    check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default")
+    check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices")
+
+    print("translate")
+    captured = {}
+
+    def fake(prompt, system):
+        captured["system"] = system
+        captured["prompt"] = prompt
+        return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}}
+
+    r = T.translate("who's gone quiet for 3 months?", chat_fn=fake)
+    check(r == {"intent": "investors_cold", "slots": {"days": 90}},
+          f"routes to intent + drops unknown slot 'bogus': {r}")
+    check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog")
+    check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question")
+
+    check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match",
+          "intent null -> no_match")
+    check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match",
+          "unparseable model reply -> no_match")
+    check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match",
+          "empty question -> no_match (no model call needed)")
+
+    def boom(q, s):
+        raise RuntimeError("spark down")
+
+    check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable",
+          "local model unreachable -> model_unavailable")
+
+    print("answer (end-to-end through the validated runner)")
+    server.init_db()
+    conn = server.get_db()
+    conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
+                 "source_row_id, total_invested) VALUES "
+                 "('a','Acme Capital','Jon',0,'a',5000000),"
+                 "('b','Beta Partners','Grant',0,'b',2000000),"
+                 "('g','Ghost','Grant',1,'g',9000000)")
+    conn.commit()
+
+    r = T.answer(conn, "top investors",
+                 chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}})
+    check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"],
+          "answer() runs the translated query")
+    check(r["question"] == "top investors", "answer() echoes the original question")
+
+    r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}})
+    check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator")
+    check(r["question"] == "nonsense", "answer() echoes question on error too")
+
+    r = T.answer(conn, "anything", chat_fn=boom)
+    check(r.get("error") == "model_unavailable", "answer() surfaces a model outage")
+
+    conn.close()
+    print()
+    if FAILS:
+        print(f"{len(FAILS)} FAILED")
+        for f in FAILS:
+            print("  - " + f)
+        sys.exit(1)
+    print("ALL PASS")
+
+
+if __name__ == "__main__":
+    main()