"""CRM MCP tool logic (Workstream C) — plain functions, transport-agnostic. Kept separate from the MCP server wiring so it is unit-testable without the MCP SDK. Each function returns JSON-serializable dicts. Reads go against the CRM SQLite DB by path; retrieval wraps Spark Control /api/search; writes go through the interaction_log (guardrail #5). Tool surface: reads get_entity, search_records, get_interaction_history retrieval semantic_search, hybrid_search, keyword_search writes log_interaction, set_entity_enrichment NO outbound/contact tools — that capability is gated to Phase 3. """ import json import os import sqlite3 import sys import uuid from datetime import datetime, timezone sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest")) import config # noqa: E402 import search # noqa: E402 def _conn(db=None): c = sqlite3.connect(db or os.environ.get("CRM_DB_PATH") or config.DEFAULT_DB) c.row_factory = sqlite3.Row c.execute("PRAGMA foreign_keys=ON") return c def _now(): return datetime.now(timezone.utc).isoformat() # ── read tools ──────────────────────────────────────────────────────────────── def _contact_ids_for(c, lp_id): """All contact ids belonging to a canonical entity: directly linked contacts plus contacts whose organization resolves to this entity.""" ids = {r["source_id"] for r in c.execute( "SELECT source_id FROM entity_links WHERE canonical_id=? AND source_model='contacts'", (lp_id,))} org_src = [r["source_id"] for r in c.execute( "SELECT source_id FROM entity_links WHERE canonical_id=? AND source_model='organizations'", (lp_id,))] if org_src: q = "SELECT id FROM contacts WHERE organization_id IN (%s)" % ",".join("?" * len(org_src)) ids.update(r["id"] for r in c.execute(q, org_src)) return ids def get_entity(lp_id, db=None): """Fetch a canonical entity + its linked source rows and interaction count.""" c = _conn(db) e = c.execute("SELECT * FROM canonical_entities WHERE id=?", (lp_id,)).fetchone() if not e: c.close() return {"error": "not_found", "lp_id": lp_id} out = dict(e) out["links"] = [dict(r) for r in c.execute( "SELECT source_model, source_id, match_kind, confidence FROM entity_links WHERE canonical_id=?", (lp_id,))] cids = _contact_ids_for(c, lp_id) out["interaction_count"] = (c.execute( "SELECT COUNT(*) FROM communications WHERE contact_id IN (%s)" % ",".join("?" * len(cids)), list(cids)).fetchone()[0] if cids else 0) c.close() return out def search_records(query=None, entity_kind=None, limit=20, db=None): """Structured search over canonical entities (name substring + kind).""" c = _conn(db) sql = ("SELECT id, entity_kind, display_name, primary_email, segment, warmth_score " "FROM canonical_entities WHERE deleted_at IS NULL") args = [] if entity_kind: sql += " AND entity_kind=?" args.append(entity_kind) if query: sql += " AND lower(display_name) LIKE ?" args.append(f"%{query.lower()}%") sql += " ORDER BY display_name LIMIT ?" args.append(limit) rows = [dict(r) for r in c.execute(sql, args)] c.close() return {"results": rows, "count": len(rows)} def get_interaction_history(lp_id, limit=20, db=None): """Merged, dated interaction history for an entity: communications + grid notes.""" c = _conn(db) items = [] cids = _contact_ids_for(c, lp_id) if cids: q = ("SELECT type, subject, body, communication_date FROM communications " "WHERE contact_id IN (%s) ORDER BY communication_date DESC LIMIT ?" % ",".join("?" * len(cids))) for r in c.execute(q, [*cids, limit]): items.append({"kind": r["type"], "date": r["communication_date"], "subject": r["subject"], "text": (r["body"] or "")[:240]}) inv_src = [r["source_id"] for r in c.execute( "SELECT source_id FROM entity_links WHERE canonical_id=? AND source_model='fundraising_investors'", (lp_id,))] if inv_src: q = "SELECT notes, updated_at FROM fundraising_investors WHERE id IN (%s)" % ",".join("?" * len(inv_src)) for r in c.execute(q, inv_src): if (r["notes"] or "").strip(): items.append({"kind": "grid_note", "date": r["updated_at"], "subject": "Fundraising grid notes", "text": r["notes"][:300]}) c.close() items.sort(key=lambda x: (x["date"] or ""), reverse=True) return {"lp_id": lp_id, "items": items[:limit], "count": len(items)} # ── retrieval tools (wrap /api/search) ──────────────────────────────────────── def _filter(lp_id=None, doc_type=None, date_from=None, date_to=None): must = [] if lp_id: must.append({"key": "lp_id", "match": {"value": lp_id}}) if doc_type: must.append({"key": "doc_type", "match": {"value": doc_type}}) if date_from is not None or date_to is not None: rng = {} if date_from is not None: rng["gte"] = date_from if date_to is not None: rng["lte"] = date_to must.append({"key": "date_ts", "range": rng}) return {"must": must} if must else None def _shape(rows): out = [] for r in rows: p = r.get("payload", {}) or {} out.append({"score": r.get("score"), "lp_id": p.get("lp_id"), "lp_name": p.get("lp_name"), "doc_type": p.get("doc_type"), "date_ts": p.get("date_ts"), "text": r.get("text") or p.get("text"), "source": f"{p.get('source_model')}:{p.get('source_id')}"}) return out def hybrid_search(query, top_k=8, lp_id=None, doc_type=None, date_from=None, date_to=None): """Dense + BM25 + rerank. Default mode; best for entity-heavy queries.""" return {"mode": "hybrid", "query": query, "results": _shape(search.hybrid_search(query, top_k=top_k, filt=_filter(lp_id, doc_type, date_from, date_to)))} def semantic_search(query, top_k=8, lp_id=None, doc_type=None, date_from=None, date_to=None): """Dense only, high recall.""" return {"mode": "semantic", "query": query, "results": _shape(search.semantic_search(query, top_k=top_k, filt=_filter(lp_id, doc_type, date_from, date_to)))} def keyword_search(query, top_k=8, lp_id=None, doc_type=None, date_from=None, date_to=None): """High-precision lexical (sparse leg + rerank).""" return {"mode": "keyword", "query": query, "results": _shape(search.keyword_search(query, top_k=top_k, filt=_filter(lp_id, doc_type, date_from, date_to)))} # ── write tools (every write logged — guardrail #5) ─────────────────────────── def log_interaction(action, actor_type="agent", actor_id=None, target_id=None, target_type="canonical_entity", payload=None, source="mcp", db=None): """Append an entry to the append-only interaction log.""" c = _conn(db) iid = str(uuid.uuid4()) c.execute("""INSERT INTO interaction_log (id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at) VALUES (?,?,?,?,?,?,?,?,?,?)""", (iid, _now(), actor_type, actor_id, action, target_type, target_id, json.dumps(payload) if payload is not None else None, source, _now())) c.commit() c.close() return {"id": iid, "logged": True} _ENRICH_FIELDS = {"thesis_fit", "segment", "accreditation_status", "qp_status", "warmth_score", "source", "owner_id", "last_touch_at", "notes"} def set_entity_enrichment(lp_id, fields, actor_id="analyst", db=None): """One-way enrichment write INTO the canonical entity (guardrail #8). Logged.""" upd = {k: v for k, v in (fields or {}).items() if k in _ENRICH_FIELDS} if not upd: return {"error": "no_valid_fields", "allowed": sorted(_ENRICH_FIELDS)} c = _conn(db) sets = ", ".join(f"{k}=?" for k in upd) + ", updated_at=?" c.execute(f"UPDATE canonical_entities SET {sets} WHERE id=?", [*upd.values(), _now(), lp_id]) iid = str(uuid.uuid4()) c.execute("""INSERT INTO interaction_log (id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at) VALUES (?,?,?,?,?,?,?,?,?,?)""", (iid, _now(), "agent", actor_id, "enrichment.written", "canonical_entity", lp_id, json.dumps(upd), "mcp", _now())) c.commit() c.close() return {"lp_id": lp_id, "updated": list(upd.keys()), "log_id": iid}