Grid/contacts unification step 1: real contact_id link + grid as front door (v0.1.0:52)
Structural fix for the duplicate-people class of bug: instead of matching a grid contact "pill" to a contacts row heuristically by name/email (which drifted and caused the 1406 double-count), link them by id. Backend: - Migration 0004: fundraising_contacts.contact_id (additive, nullable, logical FK to contacts(id)) + index. Paired down migration. - sync_fundraising_relational now stores the id that _upsert_contact_from_fundraising already returns, so every grid contact carries its contacts-table id. - _backfill_grid_contact_ids: one-time, idempotent backfill on startup (re-runs the grid sync once if any row lacks contact_id), so existing data links immediately. - entity_resolution: grid pass prefers the explicit contact_id link (match_kind 'grid_link') over heuristic email / name+investor, guarded by a PRAGMA check so older DBs without the column still work. Frontend: - Fundraising grid "+ Row" -> "+ Investor" (clear, single investor entry point). - Contacts page: the "+ Add Contact" trigger is replaced by a pointer to the grid; the page is now a read/search/edit view (ContactDetailPanel still edits all fields). New people are added from the grid. No contact data is removed. Tests: backend/ingest/test_entity_resolution.py extended (explicit-link case, 11/11) and a new backend/test_grid_contact_link.py integration test (init_db applies 0004, sync populates contact_id to the right contact, re-sync is idempotent). py_compile + frontend html.parser clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -57,7 +57,7 @@ CREATE TABLE contacts (
|
||||
);
|
||||
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, email TEXT);
|
||||
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT);
|
||||
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT);
|
||||
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT, contact_id TEXT);
|
||||
CREATE TABLE lp_profiles (id TEXT PRIMARY KEY, contact_id TEXT, deleted_at TEXT);
|
||||
"""
|
||||
|
||||
@@ -72,15 +72,17 @@ def seed(db):
|
||||
("c1", "Alice", "Anderson", "alice@x.com", None), # email, no org
|
||||
("c2", "Bob", "Brown", None, None), # no email, no org
|
||||
("c3", "Dave", "Davis", None, "o1"), # no email, org = Acme
|
||||
("c4", "Frank", "Foster", "frank@x.com", None), # target of an explicit id link
|
||||
])
|
||||
c.executemany("INSERT INTO fundraising_investors (id, investor_name) VALUES (?,?)", [
|
||||
("i_acme", "Acme Capital"), ("i_beta", "Beta Family Office"),
|
||||
])
|
||||
c.executemany("INSERT INTO fundraising_contacts (id, full_name, email, investor_id) VALUES (?,?,?,?)", [
|
||||
("g_alice", "Alice Anderson", "alice@x.com", "i_beta"), # -> email match to c1
|
||||
("g_dave", "Dave Davis", None, "i_acme"), # -> name+investor match to c3
|
||||
("g_bob", "Bob Brown", None, "i_beta"), # -> MISS (c2 has no org) -> mint NOTHING
|
||||
("g_carol", "Carol Clark", None, "i_beta"), # -> MISS (no contact) -> mint NOTHING
|
||||
c.executemany("INSERT INTO fundraising_contacts (id, full_name, email, investor_id, contact_id) VALUES (?,?,?,?,?)", [
|
||||
("g_alice", "Alice Anderson", "alice@x.com", "i_beta", None), # -> email match to c1
|
||||
("g_dave", "Dave Davis", None, "i_acme", None), # -> name+investor match to c3
|
||||
("g_bob", "Bob Brown", None, "i_beta", None), # -> MISS (c2 has no org) -> mint NOTHING
|
||||
("g_carol", "Carol Clark", None, "i_beta", None), # -> MISS (no contact) -> mint NOTHING
|
||||
("g_link", "Totally Mismatched", None, "i_beta", "c4"), # -> explicit contact_id link wins over name/inv
|
||||
])
|
||||
# Stale grid-only "twin" (person, only a fundraising_contacts link, no enrichment) -> prune
|
||||
c.execute("INSERT INTO canonical_entities (id, entity_kind, display_name, source) VALUES "
|
||||
@@ -150,13 +152,15 @@ def main():
|
||||
counts1, _ = er.run(db)
|
||||
print(f"Run 1 counts: {counts1}")
|
||||
|
||||
# 3 contacts; grid rows either link back (g_alice, g_dave) or are skipped
|
||||
# (g_bob, g_carol). NO grid row mints a person -> count stays 3, not 5-7.
|
||||
check(resolved_persons(db) == 3, f"resolved persons == 3 (got {resolved_persons(db)}); old double-count would be 5-7")
|
||||
# 4 contacts; grid rows either link back (g_alice email, g_dave name+inv,
|
||||
# g_link explicit id) or are skipped (g_bob, g_carol). NO grid row mints a
|
||||
# person -> count stays 4, not 7-9.
|
||||
check(resolved_persons(db) == 4, f"resolved persons == 4 (got {resolved_persons(db)}); old double-count would be 7-9")
|
||||
check(minted_from_grid(db) == 0, f"zero persons minted from grid rows (got {minted_from_grid(db)})")
|
||||
|
||||
mk = grid_match_kinds(db)
|
||||
check(mk.get("grid_assoc", 0) == 2, f"two grid contacts matched back via grid_assoc (got {mk.get('grid_assoc',0)})")
|
||||
check(mk.get("grid_link", 0) == 1, f"one grid contact linked via explicit contact_id (grid_link==1, got {mk.get('grid_link',0)})")
|
||||
|
||||
# Targeted cleanup: stale grid-only twin + superseded 'lp' row tombstoned...
|
||||
check(deleted_at(db, "per_TWIN") is not None, "stale grid-only twin 'per_TWIN' tombstoned")
|
||||
@@ -174,7 +178,7 @@ def main():
|
||||
counts2, _ = er.run(db)
|
||||
print(f"Run 2 counts: {counts2}")
|
||||
check(deleted_at(db, alice) is None, "re-emitted contact-person is UN-tombstoned (no permanent burial)")
|
||||
check(resolved_persons(db) == 3, f"resolved persons stable at 3 on re-run (got {resolved_persons(db)})")
|
||||
check(resolved_persons(db) == 4, f"resolved persons stable at 4 on re-run (got {resolved_persons(db)})")
|
||||
check(counts2.get("pruned_stale", 0) == 0, f"nothing re-pruned on idempotent re-run (got {counts2.get('pruned_stale')})")
|
||||
|
||||
print()
|
||||
|
||||
Reference in New Issue
Block a user