Add NL-query backend (W2): local translator + safe named-query runner
Read-only "ask the database in plain English" backend. Translation runs on
the local Qwen via Spark Control (question -> {intent, slots}); nothing leaves
the box, no Claude and no redaction boundary (the simplification chosen after
pressure-testing). The safe surface is a curated catalog of ~12 hand-written
parameterized queries; a slot validator is the trust boundary (no generic SQL,
no dynamic identifiers). POST /api/query/nl + GET /api/query/catalog, gated
require_bot_or_admin, read-only, audited. Soft-delete-correct per table.
Local Qwen translated 12/12 real example questions correctly against the live
Spark. Web "Ask" box and Matrix bot still to come (steps 4-5).
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg.
|
||||
|
||||
The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no
|
||||
network). Covers:
|
||||
- build_system() exposes the whole intent catalog as the model's closed vocabulary;
|
||||
- translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't
|
||||
declare (model noise), while every surviving value is still validated downstream;
|
||||
- the translation failure modes: no intent fit -> no_match; unparseable -> no_match;
|
||||
local model unreachable -> model_unavailable (so the endpoint can 503);
|
||||
- answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent
|
||||
from the model is still rejected by the validator (the model output is never trusted).
|
||||
|
||||
Run: cd backend && python3 nl_query/test_translate.py
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
_DATA = tempfile.mkdtemp()
|
||||
os.environ["CRM_DATA_DIR"] = _DATA
|
||||
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
|
||||
import server # noqa: E402
|
||||
import nl_query # noqa: E402
|
||||
|
||||
T = nl_query # exercise the public API (translate/answer/build_system are re-exported)
|
||||
|
||||
FAILS = []
|
||||
|
||||
|
||||
def check(cond, msg):
|
||||
print((" PASS " if cond else " FAIL ") + msg)
|
||||
if not cond:
|
||||
FAILS.append(msg)
|
||||
|
||||
|
||||
def main():
|
||||
print("build_system")
|
||||
sysprompt = nl_query.build_system()
|
||||
check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key")
|
||||
check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default")
|
||||
check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices")
|
||||
|
||||
print("translate")
|
||||
captured = {}
|
||||
|
||||
def fake(prompt, system):
|
||||
captured["system"] = system
|
||||
captured["prompt"] = prompt
|
||||
return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}}
|
||||
|
||||
r = T.translate("who's gone quiet for 3 months?", chat_fn=fake)
|
||||
check(r == {"intent": "investors_cold", "slots": {"days": 90}},
|
||||
f"routes to intent + drops unknown slot 'bogus': {r}")
|
||||
check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog")
|
||||
check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question")
|
||||
|
||||
check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match",
|
||||
"intent null -> no_match")
|
||||
check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match",
|
||||
"unparseable model reply -> no_match")
|
||||
check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match",
|
||||
"empty question -> no_match (no model call needed)")
|
||||
|
||||
def boom(q, s):
|
||||
raise RuntimeError("spark down")
|
||||
|
||||
check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable",
|
||||
"local model unreachable -> model_unavailable")
|
||||
|
||||
print("answer (end-to-end through the validated runner)")
|
||||
server.init_db()
|
||||
conn = server.get_db()
|
||||
conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
|
||||
"source_row_id, total_invested) VALUES "
|
||||
"('a','Acme Capital','Jon',0,'a',5000000),"
|
||||
"('b','Beta Partners','Grant',0,'b',2000000),"
|
||||
"('g','Ghost','Grant',1,'g',9000000)")
|
||||
conn.commit()
|
||||
|
||||
r = T.answer(conn, "top investors",
|
||||
chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}})
|
||||
check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"],
|
||||
"answer() runs the translated query")
|
||||
check(r["question"] == "top investors", "answer() echoes the original question")
|
||||
|
||||
r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}})
|
||||
check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator")
|
||||
check(r["question"] == "nonsense", "answer() echoes question on error too")
|
||||
|
||||
r = T.answer(conn, "anything", chat_fn=boom)
|
||||
check(r.get("error") == "model_unavailable", "answer() surfaces a model outage")
|
||||
|
||||
conn.close()
|
||||
print()
|
||||
if FAILS:
|
||||
print(f"{len(FAILS)} FAILED")
|
||||
for f in FAILS:
|
||||
print(" - " + f)
|
||||
sys.exit(1)
|
||||
print("ALL PASS")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user