ten31-database/backend/nl_query/test_translate.py

#!/usr/bin/env python3
"""Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg.

The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no
network). Covers:
  - build_system() exposes the whole intent catalog as the model's closed vocabulary;
  - translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't
    declare (model noise), while every surviving value is still validated downstream;
  - the translation failure modes: no intent fit -> no_match; unparseable -> no_match;
    local model unreachable -> model_unavailable (so the endpoint can 503);
  - answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent
    from the model is still rejected by the validator (the model output is never trusted).

Run: cd backend && python3 nl_query/test_translate.py
"""
import os
import sys
import tempfile

_DATA = tempfile.mkdtemp()
os.environ["CRM_DATA_DIR"] = _DATA
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  # backend/
import server  # noqa: E402
import nl_query  # noqa: E402

T = nl_query  # exercise the public API (translate/answer/build_system are re-exported)

FAILS = []


def check(cond, msg):
    print(("  PASS " if cond else "  FAIL ") + msg)
    if not cond:
        FAILS.append(msg)


def main():
    print("build_system")
    sysprompt = nl_query.build_system()
    check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key")
    check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default")
    check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices")

    print("translate")
    captured = {}

    def fake(prompt, system):
        captured["system"] = system
        captured["prompt"] = prompt
        return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}}

    r = T.translate("who's gone quiet for 3 months?", chat_fn=fake)
    check(r == {"intent": "investors_cold", "slots": {"days": 90}},
          f"routes to intent + drops unknown slot 'bogus': {r}")
    check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog")
    check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question")

    check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match",
          "intent null -> no_match")
    check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match",
          "unparseable model reply -> no_match")
    check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match",
          "empty question -> no_match (no model call needed)")

    def boom(q, s):
        raise RuntimeError("spark down")

    check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable",
          "local model unreachable -> model_unavailable")

    print("answer (end-to-end through the validated runner)")
    server.init_db()
    conn = server.get_db()
    conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
                 "source_row_id, total_invested) VALUES "
                 "('a','Acme Capital','Jon',0,'a',5000000),"
                 "('b','Beta Partners','Grant',0,'b',2000000),"
                 "('g','Ghost','Grant',1,'g',9000000)")
    conn.commit()

    r = T.answer(conn, "top investors",
                 chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}})
    check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"],
          "answer() runs the translated query")
    check(r["question"] == "top investors", "answer() echoes the original question")

    r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}})
    check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator")
    check(r["question"] == "nonsense", "answer() echoes question on error too")

    r = T.answer(conn, "anything", chat_fn=boom)
    check(r.get("error") == "model_unavailable", "answer() surfaces a model outage")

    conn.close()
    print()
    if FAILS:
        print(f"{len(FAILS)} FAILED")
        for f in FAILS:
            print("  - " + f)
        sys.exit(1)
    print("ALL PASS")


if __name__ == "__main__":
    main()