Add NL-query backend (W2): local translator + safe named-query runner

Read-only "ask the database in plain English" backend. Translation runs on
the local Qwen via Spark Control (question -> {intent, slots}); nothing leaves
the box, no Claude and no redaction boundary (the simplification chosen after
pressure-testing). The safe surface is a curated catalog of ~12 hand-written
parameterized queries; a slot validator is the trust boundary (no generic SQL,
no dynamic identifiers). POST /api/query/nl + GET /api/query/catalog, gated
require_bot_or_admin, read-only, audited. Soft-delete-correct per table.
Local Qwen translated 12/12 real example questions correctly against the live
Spark. Web "Ask" box and Matrix bot still to come (steps 4-5).
This commit is contained in:
Keysat
2026-06-18 18:35:41 -05:00
parent a166b49397
commit 6c29c22601
13 changed files with 1348 additions and 13 deletions
+107
View File
@@ -0,0 +1,107 @@
#!/usr/bin/env python3
"""Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg.
The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no
network). Covers:
- build_system() exposes the whole intent catalog as the model's closed vocabulary;
- translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't
declare (model noise), while every surviving value is still validated downstream;
- the translation failure modes: no intent fit -> no_match; unparseable -> no_match;
local model unreachable -> model_unavailable (so the endpoint can 503);
- answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent
from the model is still rejected by the validator (the model output is never trusted).
Run: cd backend && python3 nl_query/test_translate.py
"""
import os
import sys
import tempfile
_DATA = tempfile.mkdtemp()
os.environ["CRM_DATA_DIR"] = _DATA
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
import server # noqa: E402
import nl_query # noqa: E402
T = nl_query # exercise the public API (translate/answer/build_system are re-exported)
FAILS = []
def check(cond, msg):
print((" PASS " if cond else " FAIL ") + msg)
if not cond:
FAILS.append(msg)
def main():
print("build_system")
sysprompt = nl_query.build_system()
check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key")
check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default")
check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices")
print("translate")
captured = {}
def fake(prompt, system):
captured["system"] = system
captured["prompt"] = prompt
return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}}
r = T.translate("who's gone quiet for 3 months?", chat_fn=fake)
check(r == {"intent": "investors_cold", "slots": {"days": 90}},
f"routes to intent + drops unknown slot 'bogus': {r}")
check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog")
check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question")
check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match",
"intent null -> no_match")
check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match",
"unparseable model reply -> no_match")
check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match",
"empty question -> no_match (no model call needed)")
def boom(q, s):
raise RuntimeError("spark down")
check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable",
"local model unreachable -> model_unavailable")
print("answer (end-to-end through the validated runner)")
server.init_db()
conn = server.get_db()
conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
"source_row_id, total_invested) VALUES "
"('a','Acme Capital','Jon',0,'a',5000000),"
"('b','Beta Partners','Grant',0,'b',2000000),"
"('g','Ghost','Grant',1,'g',9000000)")
conn.commit()
r = T.answer(conn, "top investors",
chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}})
check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"],
"answer() runs the translated query")
check(r["question"] == "top investors", "answer() echoes the original question")
r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}})
check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator")
check(r["question"] == "nonsense", "answer() echoes question on error too")
r = T.answer(conn, "anything", chat_fn=boom)
check(r.get("error") == "model_unavailable", "answer() surfaces a model outage")
conn.close()
print()
if FAILS:
print(f"{len(FAILS)} FAILED")
for f in FAILS:
print(" - " + f)
sys.exit(1)
print("ALL PASS")
if __name__ == "__main__":
main()