ten31-database/backend/matrix_intake/query.py

"""NL-query Matrix surface (W2 step 5) — turn an '@bot <question>' message into a read-only
answer from the CRM's curated NL-query endpoint, and render that answer for the chat room.

This module is PURE (no network, no matrix-nio) so it's unit-testable offline; the async wiring
(call the endpoint, post in a thread) lives in bot.py. The endpoint does the real work:
translation runs on the box's LOCAL model (nothing leaves the box) and only the curated,
parameterized queries can run — there is no write path here, so no approval gate applies.

Trigger: a top-level message starting with '?' / '@bot' / '/ask' (see parse_trigger). We
deliberately do NOT accept a bare leading 'ask', which would collide with intake notes like
"Ask Jane to send the Q3 deck".
"""

# Markers a human wouldn't start an intake note with. '?' is handled separately (single char).
QUERY_PREFIXES = ("@bot", "/ask", "/query", "/q")

# Soft cap on rows rendered into a single chat answer. The endpoint already caps the SQL result
# (server MAX_ROWS), but 500 rows is unreadable on mobile — show the first N and say how many
# more there are (never a silent cut). Refine the question or use the web Ask box for the rest.
MAX_DISPLAY_ROWS = 30

# Column-name hints used only for nicer formatting (money / dates). Cosmetic — never affects
# what's queried (that's fixed in intents.py).
_MONEY_HINTS = ("amount", "invested", "total", "expected", "committed")
# 0/1 flag columns: suppress when 0 (noise), show a label when 1.
_FLAG_LABELS = {"graveyard": "retired", "overdue": "⚠️ overdue"}


def parse_trigger(text):
    """If `text` is addressed to the query bot, return the question (the remainder after the
    trigger, possibly an empty string when the trigger is bare). Return None if it isn't a query,
    so the caller routes it to intake instead."""
    s = (text or "").strip()
    if not s:
        return None
    if s[0] == "?":
        return s[1:].strip()
    low = s.lower()
    for p in QUERY_PREFIXES:
        if low.startswith(p):
            rest = s[len(p):]
            # Require a separator so '/asking …' isn't read as the '/ask' trigger.
            if rest == "" or rest[0] in " \t\n:,":
                return rest.lstrip(" \t\n:,").strip()
    return None


def _examples():
    return ("Try things like:\n"
            "• `?which investors haven't we contacted in 90 days?`\n"
            "• `?top 10 investors by committed capital`\n"
            "• `?when did we last reach out to Acme Capital?`\n"
            "• `?how many emails has Grant sent this month?`")


HELP = ("💬 Ask me about the fundraising database — start your message with `?` (or `@bot`).\n\n"
        + _examples())


def _is_money_col(name):
    n = name.lower()
    return any(h in n for h in _MONEY_HINTS)


def _fmt_value(col, val):
    """Format one scalar cell for chat: dates -> YYYY-MM-DD, money columns -> $1,234, else str."""
    if val is None:
        return ""
    name = col.lower()
    if name.endswith("_at") or name.endswith("date"):
        return str(val)[:10]
    if isinstance(val, (int, float)) and _is_money_col(col):
        return f"${val:,.0f}"
    return str(val)


def _render_contacts(contacts):
    """investor_lookup's nested contact dicts -> 'Name <email> (title · city, state)' lines."""
    out = []
    for c in contacts:
        bits = c.get("full_name") or "?"
        if c.get("email"):
            bits += f" <{c['email']}>"
        loc = ", ".join(x for x in (c.get("city"), c.get("state"), c.get("country")) if x)
        extra = " · ".join(x for x in (c.get("title"), loc) if x)
        if extra:
            bits += f" ({extra})"
        out.append(bits)
    return out


def _render_commitments(commitments):
    """investor_lookup's nested per-fund commitments -> 'Fund: $amount' lines."""
    out = []
    for c in commitments:
        fund = c.get("fund_name") or "?"
        amt = c.get("amount")
        out.append(f"{fund}: ${amt:,.0f}" if isinstance(amt, (int, float)) else f"{fund}: {amt}")
    return out


def _render_row(i, row, columns):
    cols = columns or list(row.keys())
    lead = None
    scalars = []
    sublines = []
    for col in cols:
        val = row.get(col)
        if isinstance(val, list):
            if not val:
                continue
            if col == "contacts":
                sublines += [f"    – {x}" for x in _render_contacts(val)]
            elif col == "commitments":
                sublines += [f"    – {x}" for x in _render_commitments(val)]
            else:  # generic list-of-dicts fallback (no intent uses this yet)
                sublines += [f"    – {', '.join(f'{k}={v}' for k, v in d.items())}"
                             for d in val if isinstance(d, dict)]
            continue
        if col in _FLAG_LABELS:
            if val:
                scalars.append(_FLAG_LABELS[col])
            continue
        s = _fmt_value(col, val)
        if s == "":
            continue
        if lead is None:  # first non-empty column is the bold identifier for the row
            lead = s
        else:
            scalars.append(f"{col}: {s}")
    head = f"{i}. **{lead}**" if lead else f"{i}."
    if scalars:
        head += " — " + " · ".join(scalars)
    return "\n".join([head] + sublines)


def _render_interpretation(intent, slots):
    if not intent:
        return ""
    if slots:
        return f"read as: {intent} ({', '.join(f'{k}={v}' for k, v in slots.items())})"
    return f"read as: {intent}"


def _render_error(err, result):
    detail = (result.get("detail") or "").strip()
    if err == "no_match":
        return "🤷 I couldn't map that to one of my saved queries.\n\n" + _examples()
    if err == "model_unavailable":
        return "⚠️ The local query model is unreachable right now — try again in a moment."
    if err == "query_failed":
        return f"⚠️ That query failed to run{(': ' + detail) if detail else ''}."
    # unknown_intent / bad_slot / anything unexpected
    return (f"⚠️ I couldn't run that ({err}){(': ' + detail) if detail else ''}.\n\n" + _examples())


def render_answer(result):
    """Render the NL-query endpoint's structured result into a Matrix markdown answer.

    `result` is the endpoint body: a hit {intent, slots, columns, rows, summary, truncated} or
    an error {error, detail}. Results never go back to any model — this is a deterministic format."""
    result = result or {}
    err = result.get("error")
    if err:
        return _render_error(err, result)

    summary = (result.get("summary") or "").strip()
    rows = result.get("rows") or []
    columns = result.get("columns") or []
    header = f"📊 {summary}" if summary else "📊 Done."
    interp = _render_interpretation(result.get("intent"), result.get("slots") or {})
    if interp:
        header += f"\n_{interp}_"
    if not rows:
        return header + "\n\n(no matching records)"

    shown = rows[:MAX_DISPLAY_ROWS]
    blocks = [_render_row(i + 1, r, columns) for i, r in enumerate(shown)]
    out = header + "\n\n" + "\n".join(blocks)

    notes = []
    extra = len(rows) - len(shown)
    if extra > 0:
        notes.append(f"+{extra} more not shown")
    if result.get("truncated"):
        notes.append("results hit the server cap")
    if notes:
        out += "\n\n_" + "; ".join(notes) + " — refine your question or use the web Ask box._"
    return out