Add NL-query backend (W2): local translator + safe named-query runner
Read-only "ask the database in plain English" backend. Translation runs on
the local Qwen via Spark Control (question -> {intent, slots}); nothing leaves
the box, no Claude and no redaction boundary (the simplification chosen after
pressure-testing). The safe surface is a curated catalog of ~12 hand-written
parameterized queries; a slot validator is the trust boundary (no generic SQL,
no dynamic identifiers). POST /api/query/nl + GET /api/query/catalog, gated
require_bot_or_admin, read-only, audited. Soft-delete-correct per table.
Local Qwen translated 12/12 real example questions correctly against the live
Spark. Web "Ask" box and Matrix bot still to come (steps 4-5).
This commit is contained in:
@@ -0,0 +1,127 @@
|
||||
"""NL-query runner — validate a {intent, slots} request, run the curated query, return rows.
|
||||
|
||||
This is the trust boundary. Whatever produced the request (a local model in W2, the web UI,
|
||||
or a test) is untrusted: the runner accepts ONLY a known intent key and slot VALUES, coerces
|
||||
each value to its declared type, and rejects anything off-spec — it never lets a caller name
|
||||
a table/column, write SQL, or choose an operator. The intents do the rest with fixed,
|
||||
parameterized SQL (see intents.py). All failure modes return a structured error dict; the
|
||||
runner never raises to the caller (a bad `limit=abc` must not crash the request thread).
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
from .intents import INTENTS
|
||||
|
||||
|
||||
def _coerce_slot(name, spec, raw):
|
||||
"""Coerce/validate one slot value against its spec. Returns (value, error). Exactly one
|
||||
of the two is meaningful: error is None on success, else a human-readable string."""
|
||||
t = spec["type"]
|
||||
provided = raw is not None and not (isinstance(raw, str) and raw.strip() == "")
|
||||
|
||||
if not provided:
|
||||
if "default" in spec:
|
||||
return spec["default"], None
|
||||
if spec.get("required"):
|
||||
return None, f"slot '{name}' is required"
|
||||
return None, None # optional, absent
|
||||
|
||||
if t == "int":
|
||||
try:
|
||||
v = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None, f"slot '{name}' must be an integer (got {raw!r})"
|
||||
if "min" in spec:
|
||||
v = max(spec["min"], v)
|
||||
if "max" in spec:
|
||||
v = min(spec["max"], v)
|
||||
return v, None
|
||||
|
||||
if t == "enum":
|
||||
v = str(raw).strip().lower()
|
||||
if v not in spec["choices"]:
|
||||
if "default" in spec:
|
||||
return spec["default"], None
|
||||
return None, f"slot '{name}' must be one of {spec['choices']} (got {raw!r})"
|
||||
return v, None
|
||||
|
||||
if t == "text":
|
||||
v = str(raw).strip()
|
||||
maxlen = spec.get("maxlen", 200)
|
||||
if len(v) > maxlen:
|
||||
v = v[:maxlen]
|
||||
return v, None
|
||||
|
||||
return None, f"slot '{name}' has unknown type {t!r}" # registry bug, fail visibly
|
||||
|
||||
|
||||
def validate(intent_key, raw_slots):
|
||||
"""Validate an intent + raw slots WITHOUT running. Returns (clean_slots, error_dict).
|
||||
Useful to the translator/UI for a dry-run check. error_dict is None on success."""
|
||||
if intent_key not in INTENTS:
|
||||
return None, {"error": "unknown_intent", "intent": intent_key,
|
||||
"detail": f"unknown intent; known: {sorted(INTENTS)}"}
|
||||
spec = INTENTS[intent_key]["slots"]
|
||||
raw_slots = raw_slots or {}
|
||||
# Reject unexpected slot keys rather than ignore them — a request shaped wrong is a
|
||||
# misunderstanding worth surfacing, not silently dropping.
|
||||
unexpected = [k for k in raw_slots if k not in spec]
|
||||
if unexpected:
|
||||
return None, {"error": "bad_slot", "intent": intent_key,
|
||||
"detail": f"unexpected slot(s): {unexpected}; allowed: {sorted(spec)}"}
|
||||
clean = {}
|
||||
for name, sspec in spec.items():
|
||||
v, err = _coerce_slot(name, sspec, raw_slots.get(name))
|
||||
if err:
|
||||
return None, {"error": "bad_slot", "intent": intent_key, "detail": err}
|
||||
if v is not None or "default" in sspec:
|
||||
clean[name] = v
|
||||
return clean, None
|
||||
|
||||
|
||||
def run_query(conn, intent_key, raw_slots=None, *, audit_fn=None, actor=None, source="api"):
|
||||
"""Validate and execute a curated NL query. Always returns a dict — either a result
|
||||
{intent, slots, columns, rows, row_count, truncated, summary} or an error
|
||||
{error, intent, detail}. Records an audit row via audit_fn (if given) so a query made
|
||||
through a leaked/automated credential is detectable.
|
||||
|
||||
audit_fn signature: audit_fn({actor, source, intent, slots, row_count, error}).
|
||||
"""
|
||||
clean, err = validate(intent_key, raw_slots)
|
||||
if err:
|
||||
if audit_fn:
|
||||
try:
|
||||
audit_fn({"actor": actor, "source": source, "intent": intent_key,
|
||||
"slots": raw_slots, "row_count": 0, "error": err["error"]})
|
||||
except Exception:
|
||||
pass
|
||||
return err
|
||||
|
||||
try:
|
||||
result = INTENTS[intent_key]["run"](conn, clean)
|
||||
except sqlite3.Error as exc:
|
||||
# Surface a query failure (e.g. a missing optional table) as a visible error — never
|
||||
# swallow it and hand back an empty result that reads as an authoritative "none".
|
||||
if audit_fn:
|
||||
try:
|
||||
audit_fn({"actor": actor, "source": source, "intent": intent_key,
|
||||
"slots": clean, "row_count": 0, "error": "query_failed"})
|
||||
except Exception:
|
||||
pass
|
||||
return {"error": "query_failed", "intent": intent_key, "detail": str(exc)}
|
||||
|
||||
out = {"intent": intent_key, "slots": clean, "row_count": len(result.get("rows", [])),
|
||||
**result}
|
||||
if audit_fn:
|
||||
try:
|
||||
audit_fn({"actor": actor, "source": source, "intent": intent_key,
|
||||
"slots": clean, "row_count": out["row_count"], "error": None})
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
|
||||
def catalog():
|
||||
"""The queryable surface as data: every intent's key, summary, slot specs and example.
|
||||
Single source of truth for the W2 translator prompt and any UI hint list."""
|
||||
return [{"intent": k, "summary": v["summary"], "slots": v["slots"],
|
||||
"example": v.get("example", "")} for k, v in INTENTS.items()]
|
||||
Reference in New Issue
Block a user