Remove Instructions/Feedback + lp_profiles; sync retry, purge, mobile fixes (v0.1.0:104)

Removals (net -570 lines):
- Delete the Instructions and Feedback (feature_requests) pages + backend.
- Retire lp_profiles + investor_type across server, ingest, and seeds; migration
  0008 drops both empty tables (a sanctioned one-off exception to
  never-hard-delete). 0001's lp_profiles ALTER is removed so a fresh DB doesn't
  break the migration chain (live DBs already applied it).

Fixes:
- Email sync: a transient timeout no longer terminally parks a mailbox; the
  scheduler retries 'retrying' each cycle and re-includes errored accounts on an
  hourly backoff, so stuck mailboxes self-heal.
- Mobile Contacts: page through the full directory (server caps 500/page) -- one
  fetch silently truncated at 720, hiding people from the list and from search.
- Mobile email review: clock icon to set a reminder inline; approval cards show
  date/time.

New:
- Admin-only purge of soft-deleted rows (Settings -> Admin; type-to-confirm,
  refuses any row still linked to live data).

Tests: 45/45 (adds test_sync_ready + test_purge_soft_deleted). Reviewer pass
applied (NULL reminders.contact_id on contact purge). Bumped to v0.1.0:104.
This commit is contained in:
Keysat
2026-06-20 20:06:11 -05:00
parent 985cba3c81
commit 1564c087bf
21 changed files with 629 additions and 694 deletions
+1 -8
View File
@@ -4,7 +4,7 @@ Maps each CRM record type to one or more chunks per docs/EMBEDDINGS.md:
* one chunk per communications row (doc_type = the comm type)
* one chunk per MATCHED email (doc_type = email; body only when matched)
* one chunk per fundraising_investors notes LINE (the outreach log; split per line)
* one chunk each for free-text fields: contacts.notes, lp_profiles.notes,
* one chunk each for free-text fields: contacts.notes,
opportunities (description + next_step), organizations.description
Each chunk carries a canonical `lp_id` (resolved via entity_links) and a `date_ts`
@@ -104,13 +104,6 @@ def build_chunks(conn):
chunks.append(_mk(f"contacts.notes:{r['id']}", lp, lp_name, person,
"contact_note", to_epoch(r["updated_at"]), r["notes"], "contacts", r["id"]))
# lp_profiles.notes
for r in conn.execute("""SELECT lp.id, lp.contact_id, lp.notes, lp.updated_at
FROM lp_profiles lp WHERE lp.notes IS NOT NULL AND lp.notes <> '' AND lp.deleted_at IS NULL"""):
lp, lp_name, person = _contact_lp(r["contact_id"], person_canon, org_canon, name, contact_org)
chunks.append(_mk(f"lp_profiles.notes:{r['id']}", lp, lp_name, person,
"lp_note", to_epoch(r["updated_at"]), r["notes"], "lp_profiles", r["id"]))
# opportunities (description + next_step)
for r in conn.execute("""SELECT id, contact_id, name, description, next_step, updated_at
FROM opportunities WHERE deleted_at IS NULL"""):
+1 -8
View File
@@ -8,7 +8,6 @@ layer created by migration 0001:
fundraising_investors ─┴─► canonical_entities (entity_kind = lp | organization)
contacts ─┐
fundraising_contacts ─┴─► canonical_entities (entity_kind = person)
lp_profiles ───► linked to its contact's person entity
Every source row is recorded in `entity_links` so any name variant resolves to
one canonical id. This is the DETERMINISTIC tier — it merges only what we can
@@ -184,7 +183,7 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non
people — each is matched to a contact-person and recorded only as a member_of
edge to its investor entity (the grid's 'Contacts' column says who belongs to
which investor). This is what stops the double-count.
Returns contact_id -> person canonical id (for lp_profiles)."""
Returns contact_id -> person canonical id."""
merge_map = merge_map or {}
contact_to_person = {}
person_meta = {}
@@ -245,12 +244,6 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non
_link(conn, cid, "fundraising_contacts", r["id"], email or name_norm, mk, 0.95 if mk == "grid_link" else 0.9)
_member_of(conn, cid, inv_canon)
# lp_profiles -> the person entity of its contact
for r in conn.execute("SELECT id, contact_id FROM lp_profiles WHERE deleted_at IS NULL"):
cid = contact_to_person.get(r["contact_id"])
if cid:
_link(conn, cid, "lp_profiles", r["id"], r["contact_id"], "contact_fk", 1.0)
return person_meta
+2 -2
View File
@@ -34,7 +34,7 @@ import entity_resolution as er
import qdrant_io
_CHANGE_TABLES = [("communications", "communications"), ("contacts", "contacts"),
("lp_profiles", "lp_profiles"), ("opportunities", "opportunities"),
("opportunities", "opportunities"),
("organizations", "organizations"), ("fundraising_investors", "fundraising_investors")]
@@ -63,7 +63,7 @@ def _state_set(conn, key, value):
def _deleted_source_ids(conn, since):
"""CRM records soft-deleted since the watermark — their chunks get pruned."""
ids = set()
for tbl in ("contacts", "organizations", "opportunities", "communications", "lp_profiles"):
for tbl in ("contacts", "organizations", "opportunities", "communications"):
try:
for r in conn.execute(f"SELECT id FROM {tbl} WHERE deleted_at IS NOT NULL AND deleted_at > ?", (since,)):
ids.add(r["id"])
+9 -12
View File
@@ -12,7 +12,7 @@ Asserts the SAFE fix:
3. a grid contact that can't be PROVABLY matched mints NOTHING (no duplicate
person, no cross-firm name guess) — the count stays correct,
4. targeted cleanup soft-deletes a stale grid-only "twin" (person with no
contacts link) and a superseded 'lp'/'organization' row, with no enrichment,
contacts link), with no enrichment,
5. cleanup PRESERVES a grid-only person that carries enrichment (guardrail #3),
6. a re-emitted id is UN-tombstoned (no permanent burial),
7. re-running is idempotent.
@@ -58,10 +58,9 @@ CREATE TABLE contacts (
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, email TEXT);
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT);
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT, contact_id TEXT);
CREATE TABLE lp_profiles (id TEXT PRIMARY KEY, contact_id TEXT, deleted_at TEXT);
"""
SEEDED = ("per_TWIN", "per_ENR", "lp_OLD")
SEEDED = ("per_TWIN", "per_ENR")
def seed(db):
@@ -94,16 +93,14 @@ def seed(db):
"('per_ENR','person','Enriched Orphan','entity_resolution','warm')")
c.execute("INSERT INTO entity_links (id, canonical_id, source_model, source_id, match_value, match_kind, confidence, created_at) "
"VALUES ('l_enr','per_ENR','fundraising_contacts','gy','enr','name_org',0.8,'t')")
# Superseded pre-:48 kind -> prune
c.execute("INSERT INTO canonical_entities (id, entity_kind, display_name, source) VALUES "
"('lp_OLD','lp','Old LP Row','entity_resolution')")
c.commit()
c.close()
def resolved_persons(db):
c = sqlite3.connect(db)
q = "SELECT COUNT(*) FROM canonical_entities WHERE entity_kind='person' AND deleted_at IS NULL AND id NOT IN (?,?,?)"
ph = ",".join("?" * len(SEEDED))
q = f"SELECT COUNT(*) FROM canonical_entities WHERE entity_kind='person' AND deleted_at IS NULL AND id NOT IN ({ph})"
n = c.execute(q, SEEDED).fetchone()[0]
c.close()
return n
@@ -127,10 +124,11 @@ def grid_match_kinds(db):
def minted_from_grid(db):
"""Persons minted directly from a grid row (the bug). Should be 0 after the fix."""
c = sqlite3.connect(db)
n = c.execute("""SELECT COUNT(DISTINCT l.canonical_id) FROM entity_links l
ph = ",".join("?" * len(SEEDED))
n = c.execute(f"""SELECT COUNT(DISTINCT l.canonical_id) FROM entity_links l
JOIN canonical_entities ce ON ce.id=l.canonical_id AND ce.deleted_at IS NULL
WHERE l.source_model='fundraising_contacts' AND l.match_kind IN ('name_org','exact_email')
AND l.canonical_id NOT IN (?,?,?)""", SEEDED).fetchone()[0]
AND l.canonical_id NOT IN ({ph})""", SEEDED).fetchone()[0]
c.close()
return n
@@ -162,12 +160,11 @@ def main():
check(mk.get("grid_assoc", 0) == 2, f"two grid contacts matched back via grid_assoc (got {mk.get('grid_assoc',0)})")
check(mk.get("grid_link", 0) == 1, f"one grid contact linked via explicit contact_id (grid_link==1, got {mk.get('grid_link',0)})")
# Targeted cleanup: stale grid-only twin + superseded 'lp' row tombstoned...
# Targeted cleanup: stale grid-only twin tombstoned...
check(deleted_at(db, "per_TWIN") is not None, "stale grid-only twin 'per_TWIN' tombstoned")
check(deleted_at(db, "lp_OLD") is not None, "superseded 'lp' row 'lp_OLD' tombstoned")
# ...enriched grid-only person preserved.
check(deleted_at(db, "per_ENR") is None, "enriched grid-only person 'per_ENR' PRESERVED (has segment)")
check(counts1.get("pruned_stale", 0) == 2, f"exactly 2 stale rows pruned (got {counts1.get('pruned_stale')})")
check(counts1.get("pruned_stale", 0) == 1, f"exactly 1 stale row pruned (got {counts1.get('pruned_stale')})")
# Un-tombstone: soft-delete a real contact-person, then re-run -> it comes back.
alice = er._eid("per", "e|alice@x.com")