Files
Keysat 0b893295e1 Matrix intake: fuzzy investor matching + conversational in-thread edits (v0.1.0:86)
Close the two locked post-deploy enhancements for the Matrix intake bot.

Fuzzy matching (server-side, ships in the s9pk): new find_intake_candidates in
server.py returns ranked deterministic near-matches (difflib name similarity +
token-set Jaccard, legal-suffix-aware, + email Levenshtein <= 2); GET
/api/intake/match now returns {match, candidates}. The bot surfaces a numbered
shortlist so a near-duplicate (Charlie/Charles, Acme Capital vs Acme Capital LLC,
a one-char email typo) is confirmed by a human instead of silently creating a
second investor. Exact match still auto-attaches; fuzzy candidates are never
auto-attached. The optional LLM-judge re-rank is deferred.

Conversational edits (bot-side, ships on the Spark): any in-thread reply that
isn't yes/no/edit field=value is treated as a natural-language revision and
re-run through local Qwen (parse.revise). Email integrity is preserved -- a
changed address must literally appear in the instruction; the model's email
field is structurally unreachable. No-op revisions re-prompt.

Docs/current-state brought current; 27/27 backend tests green.
2026-06-17 18:50:58 -05:00

193 lines
7.4 KiB
Python

"""Pending-proposal store + the in-thread approval state machine.
The one piece of state in the bot: a proposal awaiting a human's yes/edit/no, keyed by the
Matrix thread root (the bot's proposal lives in a thread rooted at the user's message, and
the user replies inside that thread). In-memory and ephemeral by design — a restart drops
pending proposals (the user just re-sends), matching matrix-bridge's stateless-by-default
ethos. Nothing here writes to the CRM; the bot calls the CRM client only after `approve`.
A proposal carries a `_stage`: "approval" (the normal yes/edit/no card) or "disambiguate"
(a fuzzy-match shortlist the human must resolve — pick a number / "new" / "no" — before it
becomes an approval-stage proposal). The shortlist itself rides on `_candidates`.
"""
import re
# field aliases accepted in `edit <field>=<value>`
_EDIT_ALIASES = {
"name": "investor_name", "investor": "investor_name", "firm": "investor_name", "org": "investor_name",
"contact": "contact_name", "person": "contact_name",
"email": "contact_email",
"title": "contact_title", "role": "contact_title",
"note": "note",
}
_YES = {"yes", "y", "approve", "approved", "ok", "confirm", "go", "👍", ""}
_NO = {"no", "n", "cancel", "discard", "reject", "stop", "👎", ""}
# "create a new investor anyway" replies to a disambiguation shortlist
_NEW = {"new", "none", "new investor", "none of these", "create", "create new", "add new", "neither"}
_CONTENT_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title", "note")
class ProposalStore:
def __init__(self):
self._pending = {} # thread_root -> proposal dict
def put(self, thread_root, proposal):
self._pending[thread_root] = proposal
def get(self, thread_root):
return self._pending.get(thread_root)
def pop(self, thread_root):
return self._pending.pop(thread_root, None)
def has(self, thread_root):
return thread_root in self._pending
def any_pending(self):
return bool(self._pending)
def _parse_edit(text):
"""Parse 'edit field=value' (also 'field: value'); return (canonical_field, value) or None."""
body = text.strip()
if body.lower().startswith("edit "):
body = body[5:].strip()
for sep in ("=", ":"):
if sep in body:
field, value = body.split(sep, 1)
field = field.strip().lower()
canon = _EDIT_ALIASES.get(field)
value = value.strip()
if canon and value:
return canon, value
# Not a known field on this separator — try the next one rather than bail,
# so e.g. "note: see deck=v2" still parses (split on ':' not the inner '=').
continue
return None
def interpret_reply(text):
"""Classify a threaded reply to a pending proposal.
Returns one of:
("approve", None) | ("reject", None) | ("edit", (field, value)) | ("unknown", None)
"""
t = (text or "").strip()
low = t.lower()
if low in _YES:
return ("approve", None)
if low in _NO:
return ("reject", None)
edit = _parse_edit(t)
if edit:
return ("edit", edit)
return ("unknown", None)
def apply_edit(proposal, field, value):
"""Return a copy of the proposal with one field changed."""
updated = dict(proposal)
updated[field] = value
return updated
def same_fields(a, b):
"""True if two proposals carry identical content (used to detect a no-op NL revision so we
don't tell the human 'Updated' when nothing changed)."""
return all((a or {}).get(k) == (b or {}).get(k) for k in _CONTENT_FIELDS)
def interpret_disambiguation(text, n_candidates):
"""Classify a reply to a fuzzy-match shortlist.
Returns ("pick", index) | ("new", None) | ("reject", None) | ("unknown", None). A bare
number selects that candidate; "new"/"none" creates a new investor; "no"/"cancel" discards."""
t = (text or "").strip().lower()
if not t:
return ("unknown", None)
if t in _NO:
return ("reject", None)
if t in _NEW:
return ("new", None)
m = re.fullmatch(r"#?\s*(\d{1,2})", t)
if m:
idx = int(m.group(1)) - 1
if 0 <= idx < n_candidates:
return ("pick", idx)
return ("unknown", None)
def attach_to_candidate(proposal, candidate):
"""Promote a disambiguation pick into an approval-stage meeting note on the chosen investor.
The note will target that existing grid row (via _match_id); the firm name is shown for
accuracy. Drops the shortlist."""
updated = dict(proposal)
updated.pop("_candidates", None)
updated["_stage"] = "approval"
updated["_match_id"] = candidate["id"]
updated["intent"] = "meeting_note"
if candidate.get("name"):
updated["investor_name"] = candidate["name"]
return updated
def promote_to_new(proposal):
"""Disambiguation 'new' — discard the shortlist and proceed as a new-investor proposal."""
updated = dict(proposal)
updated.pop("_candidates", None)
updated.pop("_match_id", None)
updated["_stage"] = "approval"
return updated
def render_disambiguation(proposal):
"""Render the fuzzy-match shortlist a human resolves before we create a new investor."""
name = proposal.get("investor_name") or proposal.get("contact_name") or "?"
cands = proposal.get("_candidates") or []
lines = [f"🔎 Before adding **{name}** as new — these existing investors look similar:"]
for i, c in enumerate(cands, 1):
lines.append(f" **{i}.** {c.get('name') or '?'}")
lines.append("")
lines.append("Reply a **number** to log this against that investor, **new** to add it as a "
"new investor, or **no** to discard.")
return "\n".join(lines)
def disambiguation_nudge(proposal):
"""Brief main-timeline pointer for a disambiguation proposal (the shortlist is in the thread)."""
name = proposal.get("investor_name") or proposal.get("contact_name") or "?"
return (f"🔎 **{name}** may match an existing investor — open the **thread** to pick one "
"or confirm it's new.")
def render(proposal):
"""Render a proposal as the in-thread message a human approves."""
if proposal.get("intent") == "meeting_note":
head = f"📝 Proposed **meeting note** for **{proposal.get('investor_name') or proposal.get('contact_name') or '?'}**"
else:
head = f"📇 Proposed **new investor**: **{proposal.get('investor_name') or proposal.get('contact_name') or '?'}**"
lines = [head]
fields = [
("Investor", proposal.get("investor_name")),
("Contact", proposal.get("contact_name")),
("Email", proposal.get("contact_email")),
("Title", proposal.get("contact_title")),
("Note", proposal.get("note")),
]
for label, val in fields:
if val:
lines.append(f"· {label}: {val}")
lines.append("")
lines.append("Reply **yes** to commit, **edit field=value** to change a field, or **no** to discard.")
return "\n".join(lines)
def summary_line(proposal):
"""A brief one-liner for the main-timeline nudge; the full card lives in the thread."""
name = proposal.get("investor_name") or proposal.get("contact_name") or "?"
if proposal.get("intent") == "meeting_note":
return f"📝 Proposed a meeting note for **{name}** — see the thread to review & approve."
return f"📇 Proposed a new investor: **{name}** — see the thread to review & approve."