Remove Instructions/Feedback + lp_profiles; sync retry, purge, mobile fixes (v0.1.0:104)
Removals (net -570 lines): - Delete the Instructions and Feedback (feature_requests) pages + backend. - Retire lp_profiles + investor_type across server, ingest, and seeds; migration 0008 drops both empty tables (a sanctioned one-off exception to never-hard-delete). 0001's lp_profiles ALTER is removed so a fresh DB doesn't break the migration chain (live DBs already applied it). Fixes: - Email sync: a transient timeout no longer terminally parks a mailbox; the scheduler retries 'retrying' each cycle and re-includes errored accounts on an hourly backoff, so stuck mailboxes self-heal. - Mobile Contacts: page through the full directory (server caps 500/page) -- one fetch silently truncated at 720, hiding people from the list and from search. - Mobile email review: clock icon to set a reminder inline; approval cards show date/time. New: - Admin-only purge of soft-deleted rows (Settings -> Admin; type-to-confirm, refuses any row still linked to live data). Tests: 45/45 (adds test_sync_ready + test_purge_soft_deleted). Reviewer pass applied (NULL reminders.contact_id on contact purge). Bumped to v0.1.0:104.
This commit is contained in:
@@ -4,7 +4,7 @@ Maps each CRM record type to one or more chunks per docs/EMBEDDINGS.md:
|
||||
* one chunk per communications row (doc_type = the comm type)
|
||||
* one chunk per MATCHED email (doc_type = email; body only when matched)
|
||||
* one chunk per fundraising_investors notes LINE (the outreach log; split per line)
|
||||
* one chunk each for free-text fields: contacts.notes, lp_profiles.notes,
|
||||
* one chunk each for free-text fields: contacts.notes,
|
||||
opportunities (description + next_step), organizations.description
|
||||
|
||||
Each chunk carries a canonical `lp_id` (resolved via entity_links) and a `date_ts`
|
||||
@@ -104,13 +104,6 @@ def build_chunks(conn):
|
||||
chunks.append(_mk(f"contacts.notes:{r['id']}", lp, lp_name, person,
|
||||
"contact_note", to_epoch(r["updated_at"]), r["notes"], "contacts", r["id"]))
|
||||
|
||||
# lp_profiles.notes
|
||||
for r in conn.execute("""SELECT lp.id, lp.contact_id, lp.notes, lp.updated_at
|
||||
FROM lp_profiles lp WHERE lp.notes IS NOT NULL AND lp.notes <> '' AND lp.deleted_at IS NULL"""):
|
||||
lp, lp_name, person = _contact_lp(r["contact_id"], person_canon, org_canon, name, contact_org)
|
||||
chunks.append(_mk(f"lp_profiles.notes:{r['id']}", lp, lp_name, person,
|
||||
"lp_note", to_epoch(r["updated_at"]), r["notes"], "lp_profiles", r["id"]))
|
||||
|
||||
# opportunities (description + next_step)
|
||||
for r in conn.execute("""SELECT id, contact_id, name, description, next_step, updated_at
|
||||
FROM opportunities WHERE deleted_at IS NULL"""):
|
||||
|
||||
@@ -8,7 +8,6 @@ layer created by migration 0001:
|
||||
fundraising_investors ─┴─► canonical_entities (entity_kind = lp | organization)
|
||||
contacts ─┐
|
||||
fundraising_contacts ─┴─► canonical_entities (entity_kind = person)
|
||||
lp_profiles ───► linked to its contact's person entity
|
||||
|
||||
Every source row is recorded in `entity_links` so any name variant resolves to
|
||||
one canonical id. This is the DETERMINISTIC tier — it merges only what we can
|
||||
@@ -184,7 +183,7 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non
|
||||
people — each is matched to a contact-person and recorded only as a member_of
|
||||
edge to its investor entity (the grid's 'Contacts' column says who belongs to
|
||||
which investor). This is what stops the double-count.
|
||||
Returns contact_id -> person canonical id (for lp_profiles)."""
|
||||
Returns contact_id -> person canonical id."""
|
||||
merge_map = merge_map or {}
|
||||
contact_to_person = {}
|
||||
person_meta = {}
|
||||
@@ -245,12 +244,6 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non
|
||||
_link(conn, cid, "fundraising_contacts", r["id"], email or name_norm, mk, 0.95 if mk == "grid_link" else 0.9)
|
||||
_member_of(conn, cid, inv_canon)
|
||||
|
||||
# lp_profiles -> the person entity of its contact
|
||||
for r in conn.execute("SELECT id, contact_id FROM lp_profiles WHERE deleted_at IS NULL"):
|
||||
cid = contact_to_person.get(r["contact_id"])
|
||||
if cid:
|
||||
_link(conn, cid, "lp_profiles", r["id"], r["contact_id"], "contact_fk", 1.0)
|
||||
|
||||
return person_meta
|
||||
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ import entity_resolution as er
|
||||
import qdrant_io
|
||||
|
||||
_CHANGE_TABLES = [("communications", "communications"), ("contacts", "contacts"),
|
||||
("lp_profiles", "lp_profiles"), ("opportunities", "opportunities"),
|
||||
("opportunities", "opportunities"),
|
||||
("organizations", "organizations"), ("fundraising_investors", "fundraising_investors")]
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ def _state_set(conn, key, value):
|
||||
def _deleted_source_ids(conn, since):
|
||||
"""CRM records soft-deleted since the watermark — their chunks get pruned."""
|
||||
ids = set()
|
||||
for tbl in ("contacts", "organizations", "opportunities", "communications", "lp_profiles"):
|
||||
for tbl in ("contacts", "organizations", "opportunities", "communications"):
|
||||
try:
|
||||
for r in conn.execute(f"SELECT id FROM {tbl} WHERE deleted_at IS NOT NULL AND deleted_at > ?", (since,)):
|
||||
ids.add(r["id"])
|
||||
|
||||
@@ -12,7 +12,7 @@ Asserts the SAFE fix:
|
||||
3. a grid contact that can't be PROVABLY matched mints NOTHING (no duplicate
|
||||
person, no cross-firm name guess) — the count stays correct,
|
||||
4. targeted cleanup soft-deletes a stale grid-only "twin" (person with no
|
||||
contacts link) and a superseded 'lp'/'organization' row, with no enrichment,
|
||||
contacts link), with no enrichment,
|
||||
5. cleanup PRESERVES a grid-only person that carries enrichment (guardrail #3),
|
||||
6. a re-emitted id is UN-tombstoned (no permanent burial),
|
||||
7. re-running is idempotent.
|
||||
@@ -58,10 +58,9 @@ CREATE TABLE contacts (
|
||||
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, email TEXT);
|
||||
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT);
|
||||
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT, contact_id TEXT);
|
||||
CREATE TABLE lp_profiles (id TEXT PRIMARY KEY, contact_id TEXT, deleted_at TEXT);
|
||||
"""
|
||||
|
||||
SEEDED = ("per_TWIN", "per_ENR", "lp_OLD")
|
||||
SEEDED = ("per_TWIN", "per_ENR")
|
||||
|
||||
|
||||
def seed(db):
|
||||
@@ -94,16 +93,14 @@ def seed(db):
|
||||
"('per_ENR','person','Enriched Orphan','entity_resolution','warm')")
|
||||
c.execute("INSERT INTO entity_links (id, canonical_id, source_model, source_id, match_value, match_kind, confidence, created_at) "
|
||||
"VALUES ('l_enr','per_ENR','fundraising_contacts','gy','enr','name_org',0.8,'t')")
|
||||
# Superseded pre-:48 kind -> prune
|
||||
c.execute("INSERT INTO canonical_entities (id, entity_kind, display_name, source) VALUES "
|
||||
"('lp_OLD','lp','Old LP Row','entity_resolution')")
|
||||
c.commit()
|
||||
c.close()
|
||||
|
||||
|
||||
def resolved_persons(db):
|
||||
c = sqlite3.connect(db)
|
||||
q = "SELECT COUNT(*) FROM canonical_entities WHERE entity_kind='person' AND deleted_at IS NULL AND id NOT IN (?,?,?)"
|
||||
ph = ",".join("?" * len(SEEDED))
|
||||
q = f"SELECT COUNT(*) FROM canonical_entities WHERE entity_kind='person' AND deleted_at IS NULL AND id NOT IN ({ph})"
|
||||
n = c.execute(q, SEEDED).fetchone()[0]
|
||||
c.close()
|
||||
return n
|
||||
@@ -127,10 +124,11 @@ def grid_match_kinds(db):
|
||||
def minted_from_grid(db):
|
||||
"""Persons minted directly from a grid row (the bug). Should be 0 after the fix."""
|
||||
c = sqlite3.connect(db)
|
||||
n = c.execute("""SELECT COUNT(DISTINCT l.canonical_id) FROM entity_links l
|
||||
ph = ",".join("?" * len(SEEDED))
|
||||
n = c.execute(f"""SELECT COUNT(DISTINCT l.canonical_id) FROM entity_links l
|
||||
JOIN canonical_entities ce ON ce.id=l.canonical_id AND ce.deleted_at IS NULL
|
||||
WHERE l.source_model='fundraising_contacts' AND l.match_kind IN ('name_org','exact_email')
|
||||
AND l.canonical_id NOT IN (?,?,?)""", SEEDED).fetchone()[0]
|
||||
AND l.canonical_id NOT IN ({ph})""", SEEDED).fetchone()[0]
|
||||
c.close()
|
||||
return n
|
||||
|
||||
@@ -162,12 +160,11 @@ def main():
|
||||
check(mk.get("grid_assoc", 0) == 2, f"two grid contacts matched back via grid_assoc (got {mk.get('grid_assoc',0)})")
|
||||
check(mk.get("grid_link", 0) == 1, f"one grid contact linked via explicit contact_id (grid_link==1, got {mk.get('grid_link',0)})")
|
||||
|
||||
# Targeted cleanup: stale grid-only twin + superseded 'lp' row tombstoned...
|
||||
# Targeted cleanup: stale grid-only twin tombstoned...
|
||||
check(deleted_at(db, "per_TWIN") is not None, "stale grid-only twin 'per_TWIN' tombstoned")
|
||||
check(deleted_at(db, "lp_OLD") is not None, "superseded 'lp' row 'lp_OLD' tombstoned")
|
||||
# ...enriched grid-only person preserved.
|
||||
check(deleted_at(db, "per_ENR") is None, "enriched grid-only person 'per_ENR' PRESERVED (has segment)")
|
||||
check(counts1.get("pruned_stale", 0) == 2, f"exactly 2 stale rows pruned (got {counts1.get('pruned_stale')})")
|
||||
check(counts1.get("pruned_stale", 0) == 1, f"exactly 1 stale row pruned (got {counts1.get('pruned_stale')})")
|
||||
|
||||
# Un-tombstone: soft-delete a real contact-person, then re-run -> it comes back.
|
||||
alice = er._eid("per", "e|alice@x.com")
|
||||
|
||||
Reference in New Issue
Block a user