From 2afed210cbe4c089988647224b86b3441e6bb9c0 Mon Sep 17 00:00:00 2001 From: Keysat Date: Fri, 5 Jun 2026 15:10:26 -0500 Subject: [PATCH] Grid/contacts unification step 1: real contact_id link + grid as front door (v0.1.0:52) Structural fix for the duplicate-people class of bug: instead of matching a grid contact "pill" to a contacts row heuristically by name/email (which drifted and caused the 1406 double-count), link them by id. Backend: - Migration 0004: fundraising_contacts.contact_id (additive, nullable, logical FK to contacts(id)) + index. Paired down migration. - sync_fundraising_relational now stores the id that _upsert_contact_from_fundraising already returns, so every grid contact carries its contacts-table id. - _backfill_grid_contact_ids: one-time, idempotent backfill on startup (re-runs the grid sync once if any row lacks contact_id), so existing data links immediately. - entity_resolution: grid pass prefers the explicit contact_id link (match_kind 'grid_link') over heuristic email / name+investor, guarded by a PRAGMA check so older DBs without the column still work. Frontend: - Fundraising grid "+ Row" -> "+ Investor" (clear, single investor entry point). - Contacts page: the "+ Add Contact" trigger is replaced by a pointer to the grid; the page is now a read/search/edit view (ContactDetailPanel still edits all fields). New people are added from the grid. No contact data is removed. Tests: backend/ingest/test_entity_resolution.py extended (explicit-link case, 11/11) and a new backend/test_grid_contact_link.py integration test (init_db applies 0004, sync populates contact_id to the right contact, re-sync is idempotent). py_compile + frontend html.parser clean. Co-Authored-By: Claude Opus 4.8 --- backend/ingest/entity_resolution.py | 36 ++++----- backend/ingest/test_entity_resolution.py | 24 +++--- .../0004_grid_contact_link.down.sql | 7 ++ backend/migrations/0004_grid_contact_link.sql | 17 +++++ backend/server.py | 49 ++++++++++-- backend/test_grid_contact_link.py | 76 +++++++++++++++++++ frontend/index.html | 4 +- start9/0.4/startos/utils.ts | 5 +- start9/0.4/startos/versions/index.ts | 5 +- start9/0.4/startos/versions/v0.1.0.52.ts | 21 +++++ 10 files changed, 203 insertions(+), 41 deletions(-) create mode 100644 backend/migrations/0004_grid_contact_link.down.sql create mode 100644 backend/migrations/0004_grid_contact_link.sql create mode 100644 backend/test_grid_contact_link.py create mode 100644 start9/0.4/startos/versions/v0.1.0.52.ts diff --git a/backend/ingest/entity_resolution.py b/backend/ingest/entity_resolution.py index 158008f..1162c3b 100644 --- a/backend/ingest/entity_resolution.py +++ b/backend/ingest/entity_resolution.py @@ -220,27 +220,29 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non if cid: contact_to_person[r["id"]] = cid - # 2. Grid contacts are associations, not new people: match to a contact-person - # (by email, else name within the same investor) and just add membership. - # Only create a person when there is genuinely no matching contact. - for r in conn.execute("SELECT id, full_name, email, investor_id FROM fundraising_contacts"): + # 2. Grid contacts are associations, not new people: link each to its + # contacts-table person and record membership. We prefer the EXPLICIT + # contact_id link (migration 0004 — the grid pill stores the id of the + # contact it was created from), and fall back to provable email / exact name + # within the same investor for rows not yet backfilled. On a miss we + # deliberately do NOT mint a person: the old else-branch mint is exactly what + # produced the people double-count, and guessing by name across firms risks + # binding two different same-named people — honest separation, never on a guess. + fc_cols = {row[1] for row in conn.execute("PRAGMA table_info(fundraising_contacts)")} + has_contact_id = "contact_id" in fc_cols + sel = ("SELECT id, full_name, email, investor_id" + + (", contact_id" if has_contact_id else "") + " FROM fundraising_contacts") + for r in conn.execute(sel): email = norm_email(r["email"]) name_norm = norm_text(r["full_name"] or "") inv_canon = org_canon_by_fundinv.get(r["investor_id"]) - # Match the grid contact to its contacts-table person by PROVABLE keys only: - # exact email, else exact name within the SAME canonical investor. The app - # keeps the grid and the contacts table in sync (_upsert_contact_from_ - # fundraising), so a grid contact IS an existing contact-person, never a new - # one. On a confident match, record the membership. On a miss we deliberately - # do NOT mint a person: the old else-branch mint is exactly what produced the - # people double-count (a grid row whose (name, investor) key didn't line up - # with its contact minted a duplicate), and guessing by name across firms - # risks binding two different same-named people. Unresolved grid rows are - # left for the explicit contact_id link planned in the grid/contacts - # unification — honest separation: never merge or mint on a guess. - cid = (by_email.get(email) if email else None) or by_name_inv.get((name_norm, inv_canon or "")) + link_cid = r["contact_id"] if has_contact_id else None + cid = (contact_to_person.get(link_cid) if link_cid else None) \ + or (by_email.get(email) if email else None) \ + or by_name_inv.get((name_norm, inv_canon or "")) if cid: - _link(conn, cid, "fundraising_contacts", r["id"], email or name_norm, "grid_assoc", 0.9) + mk = "grid_link" if (link_cid and contact_to_person.get(link_cid)) else "grid_assoc" + _link(conn, cid, "fundraising_contacts", r["id"], email or name_norm, mk, 0.95 if mk == "grid_link" else 0.9) _member_of(conn, cid, inv_canon) # lp_profiles -> the person entity of its contact diff --git a/backend/ingest/test_entity_resolution.py b/backend/ingest/test_entity_resolution.py index d50d59e..6707488 100644 --- a/backend/ingest/test_entity_resolution.py +++ b/backend/ingest/test_entity_resolution.py @@ -57,7 +57,7 @@ CREATE TABLE contacts ( ); CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, email TEXT); CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT); -CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT); +CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT, contact_id TEXT); CREATE TABLE lp_profiles (id TEXT PRIMARY KEY, contact_id TEXT, deleted_at TEXT); """ @@ -72,15 +72,17 @@ def seed(db): ("c1", "Alice", "Anderson", "alice@x.com", None), # email, no org ("c2", "Bob", "Brown", None, None), # no email, no org ("c3", "Dave", "Davis", None, "o1"), # no email, org = Acme + ("c4", "Frank", "Foster", "frank@x.com", None), # target of an explicit id link ]) c.executemany("INSERT INTO fundraising_investors (id, investor_name) VALUES (?,?)", [ ("i_acme", "Acme Capital"), ("i_beta", "Beta Family Office"), ]) - c.executemany("INSERT INTO fundraising_contacts (id, full_name, email, investor_id) VALUES (?,?,?,?)", [ - ("g_alice", "Alice Anderson", "alice@x.com", "i_beta"), # -> email match to c1 - ("g_dave", "Dave Davis", None, "i_acme"), # -> name+investor match to c3 - ("g_bob", "Bob Brown", None, "i_beta"), # -> MISS (c2 has no org) -> mint NOTHING - ("g_carol", "Carol Clark", None, "i_beta"), # -> MISS (no contact) -> mint NOTHING + c.executemany("INSERT INTO fundraising_contacts (id, full_name, email, investor_id, contact_id) VALUES (?,?,?,?,?)", [ + ("g_alice", "Alice Anderson", "alice@x.com", "i_beta", None), # -> email match to c1 + ("g_dave", "Dave Davis", None, "i_acme", None), # -> name+investor match to c3 + ("g_bob", "Bob Brown", None, "i_beta", None), # -> MISS (c2 has no org) -> mint NOTHING + ("g_carol", "Carol Clark", None, "i_beta", None), # -> MISS (no contact) -> mint NOTHING + ("g_link", "Totally Mismatched", None, "i_beta", "c4"), # -> explicit contact_id link wins over name/inv ]) # Stale grid-only "twin" (person, only a fundraising_contacts link, no enrichment) -> prune c.execute("INSERT INTO canonical_entities (id, entity_kind, display_name, source) VALUES " @@ -150,13 +152,15 @@ def main(): counts1, _ = er.run(db) print(f"Run 1 counts: {counts1}") - # 3 contacts; grid rows either link back (g_alice, g_dave) or are skipped - # (g_bob, g_carol). NO grid row mints a person -> count stays 3, not 5-7. - check(resolved_persons(db) == 3, f"resolved persons == 3 (got {resolved_persons(db)}); old double-count would be 5-7") + # 4 contacts; grid rows either link back (g_alice email, g_dave name+inv, + # g_link explicit id) or are skipped (g_bob, g_carol). NO grid row mints a + # person -> count stays 4, not 7-9. + check(resolved_persons(db) == 4, f"resolved persons == 4 (got {resolved_persons(db)}); old double-count would be 7-9") check(minted_from_grid(db) == 0, f"zero persons minted from grid rows (got {minted_from_grid(db)})") mk = grid_match_kinds(db) check(mk.get("grid_assoc", 0) == 2, f"two grid contacts matched back via grid_assoc (got {mk.get('grid_assoc',0)})") + check(mk.get("grid_link", 0) == 1, f"one grid contact linked via explicit contact_id (grid_link==1, got {mk.get('grid_link',0)})") # Targeted cleanup: stale grid-only twin + superseded 'lp' row tombstoned... check(deleted_at(db, "per_TWIN") is not None, "stale grid-only twin 'per_TWIN' tombstoned") @@ -174,7 +178,7 @@ def main(): counts2, _ = er.run(db) print(f"Run 2 counts: {counts2}") check(deleted_at(db, alice) is None, "re-emitted contact-person is UN-tombstoned (no permanent burial)") - check(resolved_persons(db) == 3, f"resolved persons stable at 3 on re-run (got {resolved_persons(db)})") + check(resolved_persons(db) == 4, f"resolved persons stable at 4 on re-run (got {resolved_persons(db)})") check(counts2.get("pruned_stale", 0) == 0, f"nothing re-pruned on idempotent re-run (got {counts2.get('pruned_stale')})") print() diff --git a/backend/migrations/0004_grid_contact_link.down.sql b/backend/migrations/0004_grid_contact_link.down.sql new file mode 100644 index 0000000..3f10e4e --- /dev/null +++ b/backend/migrations/0004_grid_contact_link.down.sql @@ -0,0 +1,7 @@ +-- Reversal of 0004_grid_contact_link.sql (manual; .down files are never auto-applied). +-- +-- SQLite < 3.35 cannot DROP COLUMN. The added column is nullable and ignored by +-- any code path predating it, so leaving it in place is harmless. The index can be +-- dropped freely. On SQLite >= 3.35 the column itself may also be dropped. +DROP INDEX IF EXISTS idx_fundraising_contacts_contact; +-- ALTER TABLE fundraising_contacts DROP COLUMN contact_id; -- SQLite >= 3.35 only diff --git a/backend/migrations/0004_grid_contact_link.sql b/backend/migrations/0004_grid_contact_link.sql new file mode 100644 index 0000000..b583450 --- /dev/null +++ b/backend/migrations/0004_grid_contact_link.sql @@ -0,0 +1,17 @@ +-- Grid/contacts unification — explicit link from a fundraising-grid contact +-- "pill" to its row in the contacts table. +-- +-- ADDITIVE + REVERSIBLE (CLAUDE.md guardrail #3): adds one nullable column. +-- Until now a grid contact was tied to its contact only by name/email matching, +-- which drifted and produced the people double-count. fundraising_contacts.contact_id +-- records the real link (populated by sync_fundraising_relational, which already +-- upserts the contact and now stores its id). entity_resolution prefers this link +-- over heuristic matching. +-- +-- contact_id is a LOGICAL foreign key to contacts(id). It is intentionally NOT a +-- declared SQLite FOREIGN KEY: contacts are soft-deleted (never hard-deleted), so +-- there is nothing to cascade, and SQLite's ALTER TABLE ADD COLUMN cannot add an +-- enforced FK cleanly. Nullable so existing rows are valid until backfilled. +ALTER TABLE fundraising_contacts ADD COLUMN contact_id TEXT; + +CREATE INDEX IF NOT EXISTS idx_fundraising_contacts_contact ON fundraising_contacts(contact_id); diff --git a/backend/server.py b/backend/server.py index 404b6e6..d30e37a 100644 --- a/backend/server.py +++ b/backend/server.py @@ -462,6 +462,13 @@ def init_db(): except Exception as _e: print(f"[migrations] core migration warning: {_e}") + # One-time: populate the new fundraising_contacts.contact_id (migration 0004) + # by re-running the grid→relational sync. No-op once every row is linked. + try: + _backfill_grid_contact_ids(conn) + except Exception as _e: + print(f"[backfill] grid contact_id backfill warning: {_e}") + conn.close() print(f"Database initialized at {DB_PATH}") @@ -977,6 +984,32 @@ def _sync_contact_to_fundraising_state(conn, contact_row, actor_user_id=None, re """, (json.dumps(grid), json.dumps(next_views), next_version, actor_user_id, now())) sync_fundraising_relational(conn, grid, next_views, actor_user_id=actor_user_id) +def _backfill_grid_contact_ids(conn): + """One-time backfill for migration 0004: populate fundraising_contacts.contact_id + by re-running the grid→relational sync once. Fires only when the column exists + AND some row still lacks a contact_id, so it runs once after the migration and is + a no-op thereafter. Safe + idempotent: the fundraising_* tables are derived and + rebuilt on every sync, and _upsert_contact_from_fundraising matches existing + contacts by email/name (never creates a duplicate on re-run).""" + try: + need = conn.execute("SELECT 1 FROM fundraising_contacts WHERE contact_id IS NULL LIMIT 1").fetchone() + except sqlite3.OperationalError: + return # contact_id column not present (migration 0004 not applied) + if not need: + return + row = conn.execute("SELECT grid_json, views_json FROM fundraising_state WHERE id = 'main'").fetchone() + if not row or not row[0]: + return + try: + grid = json.loads(row[0]) + views = json.loads(row[1]) if row[1] else [] + except Exception: + return + sync_fundraising_relational(conn, sanitize_fundraising_grid(grid), views) + conn.commit() + print("[backfill] populated fundraising_contacts.contact_id from grid sync") + + def sync_fundraising_relational(conn, grid, views, actor_user_id=None): columns = grid.get('columns', []) if isinstance(grid, dict) else [] rows = grid.get('rows', []) if isinstance(grid, dict) else [] @@ -1084,23 +1117,23 @@ def sync_fundraising_relational(conn, grid, views, actor_user_id=None): contact_payload = dict(c) if lead_source and not str(contact_payload.get('source') or '').strip(): contact_payload['source'] = lead_source - _upsert_contact_from_fundraising(conn, investor_name, contact_payload, actor_user_id=actor_user_id) + linked_contact_id = _upsert_contact_from_fundraising(conn, investor_name, contact_payload, actor_user_id=actor_user_id) conn.execute(""" INSERT INTO fundraising_contacts ( - id, investor_id, full_name, email, title, city, state, country, location_query, sort_order, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + id, investor_id, full_name, email, title, city, state, country, location_query, sort_order, contact_id, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( generate_id(), investor_id, full_name, email, str(c.get('title') or ''), str(c.get('city') or ''), str(c.get('state') or ''), str(c.get('country') or ''), - str(c.get('location_query') or ''), i, now() + str(c.get('location_query') or ''), i, linked_contact_id, now() )) elif isinstance(contacts, str) and contacts.strip(): - _upsert_contact_from_fundraising(conn, investor_name, {"name": contacts.strip(), "email": "", "title": "", "source": lead_source}, actor_user_id=actor_user_id) + linked_contact_id = _upsert_contact_from_fundraising(conn, investor_name, {"name": contacts.strip(), "email": "", "title": "", "source": lead_source}, actor_user_id=actor_user_id) conn.execute(""" INSERT INTO fundraising_contacts ( - id, investor_id, full_name, email, title, city, state, country, location_query, sort_order, updated_at - ) VALUES (?, ?, ?, '', '', '', '', '', '', 0, ?) - """, (generate_id(), investor_id, contacts.strip(), now())) + id, investor_id, full_name, email, title, city, state, country, location_query, sort_order, contact_id, updated_at + ) VALUES (?, ?, ?, '', '', '', '', '', '', 0, ?, ?) + """, (generate_id(), investor_id, contacts.strip(), linked_contact_id, now())) conn.execute("DELETE FROM fundraising_commitments WHERE investor_id = ?", (investor_id,)) for _, col in fund_columns: diff --git a/backend/test_grid_contact_link.py b/backend/test_grid_contact_link.py new file mode 100644 index 0000000..2503152 --- /dev/null +++ b/backend/test_grid_contact_link.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +"""Integration test for the grid→contact id-link wiring (migration 0004 + sync). + +Imports the real server module against a throwaway DB, runs init_db (which applies +the 0004 migration adding fundraising_contacts.contact_id), then drives a grid +sync and asserts the grid contact is linked to the contacts-table row the app +created for it. Verifies the end-to-end backend wiring the entity resolver relies on. + +Run: cd backend && python3 test_grid_contact_link.py +""" +import os +import sqlite3 +import sys +import tempfile + +_tmp = tempfile.mkdtemp() +os.environ["CRM_DATA_DIR"] = _tmp +os.environ["CRM_DB_PATH"] = os.path.join(_tmp, "crm.db") +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import server # noqa: E402 + +FAILS = [] + + +def check(cond, msg): + print((" PASS " if cond else " FAIL ") + msg) + if not cond: + FAILS.append(msg) + + +def main(): + server.init_db() + conn = sqlite3.connect(server.DB_PATH) + conn.row_factory = sqlite3.Row + + cols = {r[1] for r in conn.execute("PRAGMA table_info(fundraising_contacts)")} + check("contact_id" in cols, "migration 0004 added fundraising_contacts.contact_id") + + grid = { + "columns": [ + {"id": "investor_name", "label": "Investor Name", "type": "text"}, + {"id": "contacts", "label": "Contacts", "type": "contacts"}, + ], + "rows": [ + {"id": "row-test-1", "investor_name": "Testco Capital", + "contacts": [{"name": "Jane Doe", "email": "jane@testco.com", "title": "Partner"}]}, + ], + } + server.sync_fundraising_relational(conn, server.sanitize_fundraising_grid(grid), []) + conn.commit() + + fc = conn.execute("SELECT full_name, contact_id FROM fundraising_contacts WHERE full_name='Jane Doe'").fetchone() + check(bool(fc and fc["contact_id"]), f"grid contact_id populated by sync (got {dict(fc) if fc else None})") + + if fc and fc["contact_id"]: + ct = conn.execute("SELECT id, email FROM contacts WHERE id=?", (fc["contact_id"],)).fetchone() + check(bool(ct and ct["email"] == "jane@testco.com"), + f"link points to the correct contacts row (got {dict(ct) if ct else None})") + + # Re-sync is idempotent: still exactly one linked contact for Jane. + server.sync_fundraising_relational(conn, server.sanitize_fundraising_grid(grid), []) + conn.commit() + n = conn.execute("SELECT COUNT(*) FROM contacts WHERE lower(email)='jane@testco.com'").fetchone()[0] + check(n == 1, f"re-sync does not duplicate the contact (got {n})") + + conn.close() + print() + if FAILS: + print(f"FAILED ({len(FAILS)})") + sys.exit(1) + print("ALL PASS (grid contact_id link wiring)") + + +if __name__ == "__main__": + main() diff --git a/frontend/index.html b/frontend/index.html index 2641726..733148c 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -3534,7 +3534,7 @@ value={search} onChange={(e) => setSearch(e.target.value)} /> - + New people are added from the Fundraising Grid; click anyone here to view or edit. {loading ? ( @@ -7126,7 +7126,7 @@ {teamMembers.map((m) => )} - + diff --git a/start9/0.4/startos/utils.ts b/start9/0.4/startos/utils.ts index ba48654..97d0ead 100644 --- a/start9/0.4/startos/utils.ts +++ b/start9/0.4/startos/utils.ts @@ -16,8 +16,9 @@ export const PACKAGE_TITLE = 'Ten31 Database' // * 0.1.0:48 (entity model: investors vs people; fixes double-count) // * 0.1.0:49 (Architect: Claude thesis generation + Thesis Workshop screen) // * 0.1.0:50 (Set Anthropic API Key UI action — no terminal needed) -// * Current: 0.1.0:51 (entity-resolution fix: people double-count + duplicate queue) -export const PACKAGE_VERSION = '0.1.0:51' +// * 0.1.0:51 (entity-resolution fix: people double-count + duplicate queue) +// * Current: 0.1.0:52 (grid/contacts unification: contact_id link + grid as front door) +export const PACKAGE_VERSION = '0.1.0:52' export const DATA_MOUNT_PATH = '/data' export const WEB_PORT = 8080 diff --git a/start9/0.4/startos/versions/index.ts b/start9/0.4/startos/versions/index.ts index d50a147..dd81fda 100644 --- a/start9/0.4/startos/versions/index.ts +++ b/start9/0.4/startos/versions/index.ts @@ -12,8 +12,9 @@ import { v_0_1_0_48 } from './v0.1.0.48' import { v_0_1_0_49 } from './v0.1.0.49' import { v_0_1_0_50 } from './v0.1.0.50' import { v_0_1_0_51 } from './v0.1.0.51' +import { v_0_1_0_52 } from './v0.1.0.52' export const versionGraph = VersionGraph.of({ - current: v_0_1_0_51, - other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42, v_0_1_0_43, v_0_1_0_44, v_0_1_0_45, v_0_1_0_46, v_0_1_0_47, v_0_1_0_48, v_0_1_0_49, v_0_1_0_50], + current: v_0_1_0_52, + other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42, v_0_1_0_43, v_0_1_0_44, v_0_1_0_45, v_0_1_0_46, v_0_1_0_47, v_0_1_0_48, v_0_1_0_49, v_0_1_0_50, v_0_1_0_51], }) diff --git a/start9/0.4/startos/versions/v0.1.0.52.ts b/start9/0.4/startos/versions/v0.1.0.52.ts new file mode 100644 index 0000000..151456d --- /dev/null +++ b/start9/0.4/startos/versions/v0.1.0.52.ts @@ -0,0 +1,21 @@ +import { VersionInfo } from '@start9labs/start-sdk' + +// Grid/contacts unification (step 1): the fundraising grid is the single front +// door. Adds a real link (fundraising_contacts.contact_id, migration 0004) from +// each grid contact "pill" to its contacts-table row — populated by the grid sync +// and backfilled once on upgrade — so entity resolution links by id instead of +// heuristic name/email matching (ending the duplicate-people class of bug). UI: +// the grid's "+ Row" is now "+ Investor"; the Contacts page is a read/search/edit +// view (new people are added from the grid). Additive migration; no data loss. +export const v_0_1_0_52 = VersionInfo.of({ + version: '0.1.0:52', + releaseNotes: { + en_US: [ + 'Grid/contacts unification: the Fundraising Grid is now the single place to', + 'add investors and contacts, each grid contact is linked to its contact record', + 'by a real id (no more name-matching), and the Contacts page becomes a', + 'view/search/edit list. Existing links are backfilled automatically on upgrade.', + ].join(' '), + }, + migrations: { up: async () => {}, down: async () => {} }, +})