6c29c22601
Read-only "ask the database in plain English" backend. Translation runs on
the local Qwen via Spark Control (question -> {intent, slots}); nothing leaves
the box, no Claude and no redaction boundary (the simplification chosen after
pressure-testing). The safe surface is a curated catalog of ~12 hand-written
parameterized queries; a slot validator is the trust boundary (no generic SQL,
no dynamic identifiers). POST /api/query/nl + GET /api/query/catalog, gated
require_bot_or_admin, read-only, audited. Soft-delete-correct per table.
Local Qwen translated 12/12 real example questions correctly against the live
Spark. Web "Ask" box and Matrix bot still to come (steps 4-5).
108 lines
4.4 KiB
Python
108 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg.
|
|
|
|
The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no
|
|
network). Covers:
|
|
- build_system() exposes the whole intent catalog as the model's closed vocabulary;
|
|
- translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't
|
|
declare (model noise), while every surviving value is still validated downstream;
|
|
- the translation failure modes: no intent fit -> no_match; unparseable -> no_match;
|
|
local model unreachable -> model_unavailable (so the endpoint can 503);
|
|
- answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent
|
|
from the model is still rejected by the validator (the model output is never trusted).
|
|
|
|
Run: cd backend && python3 nl_query/test_translate.py
|
|
"""
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
|
|
_DATA = tempfile.mkdtemp()
|
|
os.environ["CRM_DATA_DIR"] = _DATA
|
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
|
|
import server # noqa: E402
|
|
import nl_query # noqa: E402
|
|
|
|
T = nl_query # exercise the public API (translate/answer/build_system are re-exported)
|
|
|
|
FAILS = []
|
|
|
|
|
|
def check(cond, msg):
|
|
print((" PASS " if cond else " FAIL ") + msg)
|
|
if not cond:
|
|
FAILS.append(msg)
|
|
|
|
|
|
def main():
|
|
print("build_system")
|
|
sysprompt = nl_query.build_system()
|
|
check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key")
|
|
check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default")
|
|
check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices")
|
|
|
|
print("translate")
|
|
captured = {}
|
|
|
|
def fake(prompt, system):
|
|
captured["system"] = system
|
|
captured["prompt"] = prompt
|
|
return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}}
|
|
|
|
r = T.translate("who's gone quiet for 3 months?", chat_fn=fake)
|
|
check(r == {"intent": "investors_cold", "slots": {"days": 90}},
|
|
f"routes to intent + drops unknown slot 'bogus': {r}")
|
|
check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog")
|
|
check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question")
|
|
|
|
check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match",
|
|
"intent null -> no_match")
|
|
check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match",
|
|
"unparseable model reply -> no_match")
|
|
check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match",
|
|
"empty question -> no_match (no model call needed)")
|
|
|
|
def boom(q, s):
|
|
raise RuntimeError("spark down")
|
|
|
|
check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable",
|
|
"local model unreachable -> model_unavailable")
|
|
|
|
print("answer (end-to-end through the validated runner)")
|
|
server.init_db()
|
|
conn = server.get_db()
|
|
conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
|
|
"source_row_id, total_invested) VALUES "
|
|
"('a','Acme Capital','Jon',0,'a',5000000),"
|
|
"('b','Beta Partners','Grant',0,'b',2000000),"
|
|
"('g','Ghost','Grant',1,'g',9000000)")
|
|
conn.commit()
|
|
|
|
r = T.answer(conn, "top investors",
|
|
chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}})
|
|
check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"],
|
|
"answer() runs the translated query")
|
|
check(r["question"] == "top investors", "answer() echoes the original question")
|
|
|
|
r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}})
|
|
check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator")
|
|
check(r["question"] == "nonsense", "answer() echoes question on error too")
|
|
|
|
r = T.answer(conn, "anything", chat_fn=boom)
|
|
check(r.get("error") == "model_unavailable", "answer() surfaces a model outage")
|
|
|
|
conn.close()
|
|
print()
|
|
if FAILS:
|
|
print(f"{len(FAILS)} FAILED")
|
|
for f in FAILS:
|
|
print(" - " + f)
|
|
sys.exit(1)
|
|
print("ALL PASS")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|