#!/usr/bin/env python3 """Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg. The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no network). Covers: - build_system() exposes the whole intent catalog as the model's closed vocabulary; - translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't declare (model noise), while every surviving value is still validated downstream; - the translation failure modes: no intent fit -> no_match; unparseable -> no_match; local model unreachable -> model_unavailable (so the endpoint can 503); - answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent from the model is still rejected by the validator (the model output is never trusted). Run: cd backend && python3 nl_query/test_translate.py """ import os import sys import tempfile _DATA = tempfile.mkdtemp() os.environ["CRM_DATA_DIR"] = _DATA os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db") sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/ import server # noqa: E402 import nl_query # noqa: E402 T = nl_query # exercise the public API (translate/answer/build_system are re-exported) FAILS = [] def check(cond, msg): print((" PASS " if cond else " FAIL ") + msg) if not cond: FAILS.append(msg) def main(): print("build_system") sysprompt = nl_query.build_system() check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key") check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default") check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices") print("translate") captured = {} def fake(prompt, system): captured["system"] = system captured["prompt"] = prompt return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}} r = T.translate("who's gone quiet for 3 months?", chat_fn=fake) check(r == {"intent": "investors_cold", "slots": {"days": 90}}, f"routes to intent + drops unknown slot 'bogus': {r}") check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog") check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question") check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match", "intent null -> no_match") check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match", "unparseable model reply -> no_match") check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match", "empty question -> no_match (no model call needed)") def boom(q, s): raise RuntimeError("spark down") check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable", "local model unreachable -> model_unavailable") print("answer (end-to-end through the validated runner)") server.init_db() conn = server.get_db() conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, " "source_row_id, total_invested) VALUES " "('a','Acme Capital','Jon',0,'a',5000000)," "('b','Beta Partners','Grant',0,'b',2000000)," "('g','Ghost','Grant',1,'g',9000000)") conn.commit() r = T.answer(conn, "top investors", chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}}) check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"], "answer() runs the translated query") check(r["question"] == "top investors", "answer() echoes the original question") r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}}) check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator") check(r["question"] == "nonsense", "answer() echoes question on error too") r = T.answer(conn, "anything", chat_fn=boom) check(r.get("error") == "model_unavailable", "answer() surfaces a model outage") conn.close() print() if FAILS: print(f"{len(FAILS)} FAILED") for f in FAILS: print(" - " + f) sys.exit(1) print("ALL PASS") if __name__ == "__main__": main()