ten31-database/backend/nl_query/runner.py

"""NL-query runner — validate a {intent, slots} request, run the curated query, return rows.

This is the trust boundary. Whatever produced the request (a local model in W2, the web UI,
or a test) is untrusted: the runner accepts ONLY a known intent key and slot VALUES, coerces
each value to its declared type, and rejects anything off-spec — it never lets a caller name
a table/column, write SQL, or choose an operator. The intents do the rest with fixed,
parameterized SQL (see intents.py). All failure modes return a structured error dict; the
runner never raises to the caller (a bad `limit=abc` must not crash the request thread).
"""
import sqlite3

from .intents import INTENTS


def _coerce_slot(name, spec, raw):
    """Coerce/validate one slot value against its spec. Returns (value, error). Exactly one
    of the two is meaningful: error is None on success, else a human-readable string."""
    t = spec["type"]
    provided = raw is not None and not (isinstance(raw, str) and raw.strip() == "")

    if not provided:
        if "default" in spec:
            return spec["default"], None
        if spec.get("required"):
            return None, f"slot '{name}' is required"
        return None, None  # optional, absent

    if t == "int":
        try:
            v = int(raw)
        except (TypeError, ValueError):
            return None, f"slot '{name}' must be an integer (got {raw!r})"
        if "min" in spec:
            v = max(spec["min"], v)
        if "max" in spec:
            v = min(spec["max"], v)
        return v, None

    if t == "enum":
        v = str(raw).strip().lower()
        if v not in spec["choices"]:
            if "default" in spec:
                return spec["default"], None
            return None, f"slot '{name}' must be one of {spec['choices']} (got {raw!r})"
        return v, None

    if t == "text":
        v = str(raw).strip()
        maxlen = spec.get("maxlen", 200)
        if len(v) > maxlen:
            v = v[:maxlen]
        return v, None

    return None, f"slot '{name}' has unknown type {t!r}"  # registry bug, fail visibly


def validate(intent_key, raw_slots):
    """Validate an intent + raw slots WITHOUT running. Returns (clean_slots, error_dict).
    Useful to the translator/UI for a dry-run check. error_dict is None on success."""
    if intent_key not in INTENTS:
        return None, {"error": "unknown_intent", "intent": intent_key,
                      "detail": f"unknown intent; known: {sorted(INTENTS)}"}
    spec = INTENTS[intent_key]["slots"]
    raw_slots = raw_slots or {}
    # Reject unexpected slot keys rather than ignore them — a request shaped wrong is a
    # misunderstanding worth surfacing, not silently dropping.
    unexpected = [k for k in raw_slots if k not in spec]
    if unexpected:
        return None, {"error": "bad_slot", "intent": intent_key,
                      "detail": f"unexpected slot(s): {unexpected}; allowed: {sorted(spec)}"}
    clean = {}
    for name, sspec in spec.items():
        v, err = _coerce_slot(name, sspec, raw_slots.get(name))
        if err:
            return None, {"error": "bad_slot", "intent": intent_key, "detail": err}
        if v is not None or "default" in sspec:
            clean[name] = v
    return clean, None


def run_query(conn, intent_key, raw_slots=None, *, audit_fn=None, actor=None, source="api"):
    """Validate and execute a curated NL query. Always returns a dict — either a result
    {intent, slots, columns, rows, row_count, truncated, summary} or an error
    {error, intent, detail}. Records an audit row via audit_fn (if given) so a query made
    through a leaked/automated credential is detectable.

    audit_fn signature: audit_fn({actor, source, intent, slots, row_count, error}).
    """
    clean, err = validate(intent_key, raw_slots)
    if err:
        if audit_fn:
            try:
                audit_fn({"actor": actor, "source": source, "intent": intent_key,
                          "slots": raw_slots, "row_count": 0, "error": err["error"]})
            except Exception:
                pass
        return err

    try:
        result = INTENTS[intent_key]["run"](conn, clean)
    except sqlite3.Error as exc:
        # Surface a query failure (e.g. a missing optional table) as a visible error — never
        # swallow it and hand back an empty result that reads as an authoritative "none".
        if audit_fn:
            try:
                audit_fn({"actor": actor, "source": source, "intent": intent_key,
                          "slots": clean, "row_count": 0, "error": "query_failed"})
            except Exception:
                pass
        return {"error": "query_failed", "intent": intent_key, "detail": str(exc)}

    out = {"intent": intent_key, "slots": clean, "row_count": len(result.get("rows", [])),
           **result}
    if audit_fn:
        try:
            audit_fn({"actor": actor, "source": source, "intent": intent_key,
                      "slots": clean, "row_count": out["row_count"], "error": None})
        except Exception:
            pass
    return out


def catalog():
    """The queryable surface as data: every intent's key, summary, slot specs and example.
    Single source of truth for the W2 translator prompt and any UI hint list."""
    return [{"intent": k, "summary": v["summary"], "slots": v["slots"],
             "example": v.get("example", "")} for k, v in INTENTS.items()]