dd2c34d7bc
- entity_resolution: emit member_of relationship edges (contact -> investor), so one investor entity owns many contacts (institution) and a HNWI is the N=1 case; crm_tools.get_investor_contacts + get_entity contacts/member_of; MCP tool. - seed_synthetic: multi-contact institutions to exercise it (Harbor & Vine = 5). - server.py: GET /api/system/status (index/entity/thesis/activity health) for an in-app status view (no shell needed to verify the index). - docs/thesis-seed-v1.md: grounded v1 thesis (throughline, 6 pillars, objections, per-segment angles, voice) drawn from Ten31's newsletter/site/essays. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
225 lines
10 KiB
Python
225 lines
10 KiB
Python
"""CRM MCP tool logic (Workstream C) — plain functions, transport-agnostic.
|
|
|
|
Kept separate from the MCP server wiring so it is unit-testable without the MCP
|
|
SDK. Each function returns JSON-serializable dicts. Reads go against the CRM
|
|
SQLite DB by path; retrieval wraps Spark Control /api/search; writes go through
|
|
the interaction_log (guardrail #5).
|
|
|
|
Tool surface:
|
|
reads get_entity, search_records, get_interaction_history
|
|
retrieval semantic_search, hybrid_search, keyword_search
|
|
writes log_interaction, set_entity_enrichment
|
|
NO outbound/contact tools — that capability is gated to Phase 3.
|
|
"""
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest"))
|
|
import config # noqa: E402
|
|
import search # noqa: E402
|
|
|
|
|
|
def _conn(db=None):
|
|
c = sqlite3.connect(db or os.environ.get("CRM_DB_PATH") or config.DEFAULT_DB)
|
|
c.row_factory = sqlite3.Row
|
|
c.execute("PRAGMA foreign_keys=ON")
|
|
return c
|
|
|
|
|
|
def _now():
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
# ── read tools ────────────────────────────────────────────────────────────────
|
|
|
|
def _contact_ids_for(c, lp_id):
|
|
"""All contact ids belonging to a canonical entity: directly linked contacts
|
|
plus contacts whose organization resolves to this entity."""
|
|
ids = {r["source_id"] for r in c.execute(
|
|
"SELECT source_id FROM entity_links WHERE canonical_id=? AND source_model='contacts'", (lp_id,))}
|
|
org_src = [r["source_id"] for r in c.execute(
|
|
"SELECT source_id FROM entity_links WHERE canonical_id=? AND source_model='organizations'", (lp_id,))]
|
|
if org_src:
|
|
q = "SELECT id FROM contacts WHERE organization_id IN (%s)" % ",".join("?" * len(org_src))
|
|
ids.update(r["id"] for r in c.execute(q, org_src))
|
|
return ids
|
|
|
|
|
|
def get_entity(lp_id, db=None):
|
|
"""Fetch a canonical entity + its linked source rows and interaction count."""
|
|
c = _conn(db)
|
|
e = c.execute("SELECT * FROM canonical_entities WHERE id=?", (lp_id,)).fetchone()
|
|
if not e:
|
|
c.close()
|
|
return {"error": "not_found", "lp_id": lp_id}
|
|
out = dict(e)
|
|
out["links"] = [dict(r) for r in c.execute(
|
|
"SELECT source_model, source_id, match_kind, confidence FROM entity_links WHERE canonical_id=?", (lp_id,))]
|
|
cids = _contact_ids_for(c, lp_id)
|
|
out["interaction_count"] = (c.execute(
|
|
"SELECT COUNT(*) FROM communications WHERE contact_id IN (%s)" % ",".join("?" * len(cids)),
|
|
list(cids)).fetchone()[0] if cids else 0)
|
|
# An investor's contacts (member_of edges) — and, for a person, the investor(s)
|
|
# they belong to. This is how one investor owns many contacts.
|
|
out["contacts"] = [dict(r) for r in c.execute(
|
|
"SELECT ce.id, ce.display_name, ce.primary_email FROM relationship_edges re "
|
|
"JOIN canonical_entities ce ON ce.id=re.src_id "
|
|
"WHERE re.dst_id=? AND re.edge_type='member_of' AND ce.deleted_at IS NULL ORDER BY ce.display_name", (lp_id,))]
|
|
out["member_of"] = [dict(r) for r in c.execute(
|
|
"SELECT ce.id, ce.display_name, ce.entity_kind FROM relationship_edges re "
|
|
"JOIN canonical_entities ce ON ce.id=re.dst_id "
|
|
"WHERE re.src_id=? AND re.edge_type='member_of' AND ce.deleted_at IS NULL", (lp_id,))]
|
|
out["contact_count"] = len(out["contacts"])
|
|
c.close()
|
|
return out
|
|
|
|
|
|
def get_investor_contacts(lp_id, db=None):
|
|
"""List all contacts (person entities) that belong to an investor entity —
|
|
the explicit one-investor-to-many-contacts relationship."""
|
|
c = _conn(db)
|
|
inv = c.execute("SELECT id, entity_kind, display_name FROM canonical_entities WHERE id=?", (lp_id,)).fetchone()
|
|
contacts = [dict(r) for r in c.execute(
|
|
"SELECT ce.id, ce.display_name, ce.primary_email FROM relationship_edges re "
|
|
"JOIN canonical_entities ce ON ce.id=re.src_id "
|
|
"WHERE re.dst_id=? AND re.edge_type='member_of' AND ce.deleted_at IS NULL ORDER BY ce.display_name", (lp_id,))]
|
|
c.close()
|
|
return {"investor": dict(inv) if inv else None, "contacts": contacts, "contact_count": len(contacts)}
|
|
|
|
|
|
def search_records(query=None, entity_kind=None, limit=20, db=None):
|
|
"""Structured search over canonical entities (name substring + kind)."""
|
|
c = _conn(db)
|
|
sql = ("SELECT id, entity_kind, display_name, primary_email, segment, warmth_score "
|
|
"FROM canonical_entities WHERE deleted_at IS NULL")
|
|
args = []
|
|
if entity_kind:
|
|
sql += " AND entity_kind=?"
|
|
args.append(entity_kind)
|
|
if query:
|
|
sql += " AND lower(display_name) LIKE ?"
|
|
args.append(f"%{query.lower()}%")
|
|
sql += " ORDER BY display_name LIMIT ?"
|
|
args.append(limit)
|
|
rows = [dict(r) for r in c.execute(sql, args)]
|
|
c.close()
|
|
return {"results": rows, "count": len(rows)}
|
|
|
|
|
|
def get_interaction_history(lp_id, limit=20, db=None):
|
|
"""Merged, dated interaction history for an entity: communications + grid notes."""
|
|
c = _conn(db)
|
|
items = []
|
|
cids = _contact_ids_for(c, lp_id)
|
|
if cids:
|
|
q = ("SELECT type, subject, body, communication_date FROM communications "
|
|
"WHERE contact_id IN (%s) ORDER BY communication_date DESC LIMIT ?" % ",".join("?" * len(cids)))
|
|
for r in c.execute(q, [*cids, limit]):
|
|
items.append({"kind": r["type"], "date": r["communication_date"],
|
|
"subject": r["subject"], "text": (r["body"] or "")[:240]})
|
|
inv_src = [r["source_id"] for r in c.execute(
|
|
"SELECT source_id FROM entity_links WHERE canonical_id=? AND source_model='fundraising_investors'", (lp_id,))]
|
|
if inv_src:
|
|
q = "SELECT notes, updated_at FROM fundraising_investors WHERE id IN (%s)" % ",".join("?" * len(inv_src))
|
|
for r in c.execute(q, inv_src):
|
|
if (r["notes"] or "").strip():
|
|
items.append({"kind": "grid_note", "date": r["updated_at"],
|
|
"subject": "Fundraising grid notes", "text": r["notes"][:300]})
|
|
c.close()
|
|
items.sort(key=lambda x: (x["date"] or ""), reverse=True)
|
|
return {"lp_id": lp_id, "items": items[:limit], "count": len(items)}
|
|
|
|
|
|
# ── retrieval tools (wrap /api/search) ────────────────────────────────────────
|
|
|
|
def _filter(lp_id=None, doc_type=None, date_from=None, date_to=None):
|
|
must = []
|
|
if lp_id:
|
|
must.append({"key": "lp_id", "match": {"value": lp_id}})
|
|
if doc_type:
|
|
must.append({"key": "doc_type", "match": {"value": doc_type}})
|
|
if date_from is not None or date_to is not None:
|
|
rng = {}
|
|
if date_from is not None:
|
|
rng["gte"] = date_from
|
|
if date_to is not None:
|
|
rng["lte"] = date_to
|
|
must.append({"key": "date_ts", "range": rng})
|
|
return {"must": must} if must else None
|
|
|
|
|
|
def _shape(rows):
|
|
out = []
|
|
for r in rows:
|
|
p = r.get("payload", {}) or {}
|
|
out.append({"score": r.get("score"), "lp_id": p.get("lp_id"), "lp_name": p.get("lp_name"),
|
|
"doc_type": p.get("doc_type"), "date_ts": p.get("date_ts"),
|
|
"text": r.get("text") or p.get("text"),
|
|
"source": f"{p.get('source_model')}:{p.get('source_id')}"})
|
|
return out
|
|
|
|
|
|
def hybrid_search(query, top_k=8, lp_id=None, doc_type=None, date_from=None, date_to=None):
|
|
"""Dense + BM25 + rerank. Default mode; best for entity-heavy queries."""
|
|
return {"mode": "hybrid", "query": query,
|
|
"results": _shape(search.hybrid_search(query, top_k=top_k,
|
|
filt=_filter(lp_id, doc_type, date_from, date_to)))}
|
|
|
|
|
|
def semantic_search(query, top_k=8, lp_id=None, doc_type=None, date_from=None, date_to=None):
|
|
"""Dense only, high recall."""
|
|
return {"mode": "semantic", "query": query,
|
|
"results": _shape(search.semantic_search(query, top_k=top_k,
|
|
filt=_filter(lp_id, doc_type, date_from, date_to)))}
|
|
|
|
|
|
def keyword_search(query, top_k=8, lp_id=None, doc_type=None, date_from=None, date_to=None):
|
|
"""High-precision lexical (sparse leg + rerank)."""
|
|
return {"mode": "keyword", "query": query,
|
|
"results": _shape(search.keyword_search(query, top_k=top_k,
|
|
filt=_filter(lp_id, doc_type, date_from, date_to)))}
|
|
|
|
|
|
# ── write tools (every write logged — guardrail #5) ───────────────────────────
|
|
|
|
def log_interaction(action, actor_type="agent", actor_id=None, target_id=None,
|
|
target_type="canonical_entity", payload=None, source="mcp", db=None):
|
|
"""Append an entry to the append-only interaction log."""
|
|
c = _conn(db)
|
|
iid = str(uuid.uuid4())
|
|
c.execute("""INSERT INTO interaction_log
|
|
(id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at)
|
|
VALUES (?,?,?,?,?,?,?,?,?,?)""",
|
|
(iid, _now(), actor_type, actor_id, action, target_type, target_id,
|
|
json.dumps(payload) if payload is not None else None, source, _now()))
|
|
c.commit()
|
|
c.close()
|
|
return {"id": iid, "logged": True}
|
|
|
|
|
|
_ENRICH_FIELDS = {"thesis_fit", "segment", "accreditation_status", "qp_status",
|
|
"warmth_score", "source", "owner_id", "last_touch_at", "notes"}
|
|
|
|
|
|
def set_entity_enrichment(lp_id, fields, actor_id="analyst", db=None):
|
|
"""One-way enrichment write INTO the canonical entity (guardrail #8). Logged."""
|
|
upd = {k: v for k, v in (fields or {}).items() if k in _ENRICH_FIELDS}
|
|
if not upd:
|
|
return {"error": "no_valid_fields", "allowed": sorted(_ENRICH_FIELDS)}
|
|
c = _conn(db)
|
|
sets = ", ".join(f"{k}=?" for k in upd) + ", updated_at=?"
|
|
c.execute(f"UPDATE canonical_entities SET {sets} WHERE id=?", [*upd.values(), _now(), lp_id])
|
|
iid = str(uuid.uuid4())
|
|
c.execute("""INSERT INTO interaction_log
|
|
(id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at)
|
|
VALUES (?,?,?,?,?,?,?,?,?,?)""",
|
|
(iid, _now(), "agent", actor_id, "enrichment.written", "canonical_entity", lp_id,
|
|
json.dumps(upd), "mcp", _now()))
|
|
c.commit()
|
|
c.close()
|
|
return {"lp_id": lp_id, "updated": list(upd.keys()), "log_id": iid}
|