Soft-delete + source-count diagnostics; thesis v4 (0.1.0:47)
- DELETE handlers soft-delete (set deleted_at) + cascade contact -> opps/comms/lp instead of hard-deleting (guardrail #3); list queries filter deleted rows. - ingest: chunking excludes soft-deleted records; qdrant delete-by-source-id; sync prunes soft-deleted records' vectors incrementally. - /api/system/status returns raw source-record counts for sanity-checking. - docs/thesis-seed-v4.md (no "bet" language, scarcity-forward, freedom-tech as a banner option, tightened pillars, reworked segments + edge). Soft-delete verified via the running HTTP server (delete -> hidden + row kept). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -91,7 +91,7 @@ def build_chunks(conn):
|
||||
|
||||
# communications
|
||||
for r in conn.execute("""SELECT id, contact_id, type, subject, body, outcome, next_action, communication_date
|
||||
FROM communications"""):
|
||||
FROM communications WHERE deleted_at IS NULL"""):
|
||||
lp, lp_name, person = _contact_lp(r["contact_id"], person_canon, org_canon, name, contact_org)
|
||||
parts = [p for p in (r["subject"], r["body"], r["outcome"], r["next_action"]) if (p or "").strip()]
|
||||
chunks.append(_mk(f"communications:{r['id']}", lp, lp_name, person,
|
||||
@@ -99,21 +99,21 @@ def build_chunks(conn):
|
||||
"\n".join(parts), "communications", r["id"]))
|
||||
|
||||
# contacts.notes
|
||||
for r in conn.execute("SELECT id, notes, updated_at FROM contacts WHERE notes IS NOT NULL AND notes <> ''"):
|
||||
for r in conn.execute("SELECT id, notes, updated_at FROM contacts WHERE notes IS NOT NULL AND notes <> '' AND deleted_at IS NULL"):
|
||||
lp, lp_name, person = _contact_lp(r["id"], person_canon, org_canon, name, contact_org)
|
||||
chunks.append(_mk(f"contacts.notes:{r['id']}", lp, lp_name, person,
|
||||
"contact_note", to_epoch(r["updated_at"]), r["notes"], "contacts", r["id"]))
|
||||
|
||||
# lp_profiles.notes
|
||||
for r in conn.execute("""SELECT lp.id, lp.contact_id, lp.notes, lp.updated_at
|
||||
FROM lp_profiles lp WHERE lp.notes IS NOT NULL AND lp.notes <> ''"""):
|
||||
FROM lp_profiles lp WHERE lp.notes IS NOT NULL AND lp.notes <> '' AND lp.deleted_at IS NULL"""):
|
||||
lp, lp_name, person = _contact_lp(r["contact_id"], person_canon, org_canon, name, contact_org)
|
||||
chunks.append(_mk(f"lp_profiles.notes:{r['id']}", lp, lp_name, person,
|
||||
"lp_note", to_epoch(r["updated_at"]), r["notes"], "lp_profiles", r["id"]))
|
||||
|
||||
# opportunities (description + next_step)
|
||||
for r in conn.execute("""SELECT id, contact_id, name, description, next_step, updated_at
|
||||
FROM opportunities"""):
|
||||
FROM opportunities WHERE deleted_at IS NULL"""):
|
||||
lp, lp_name, person = _contact_lp(r["contact_id"], person_canon, org_canon, name, contact_org)
|
||||
parts = [p for p in (r["name"], r["description"], r["next_step"]) if (p or "").strip()]
|
||||
chunks.append(_mk(f"opportunities:{r['id']}", lp, lp_name, person,
|
||||
@@ -121,7 +121,7 @@ def build_chunks(conn):
|
||||
|
||||
# organizations.description
|
||||
for r in conn.execute("""SELECT id, description, updated_at FROM organizations
|
||||
WHERE description IS NOT NULL AND description <> ''"""):
|
||||
WHERE description IS NOT NULL AND description <> '' AND deleted_at IS NULL"""):
|
||||
lp = org_canon.get(r["id"])
|
||||
chunks.append(_mk(f"organizations.description:{r['id']}", lp, name.get(lp), None,
|
||||
"org_note", to_epoch(r["updated_at"]), r["description"], "organizations", r["id"]))
|
||||
|
||||
@@ -48,3 +48,16 @@ def upsert(points):
|
||||
def count():
|
||||
status, data = _req("POST", f"/collections/{COL}/points/count", {"exact": True})
|
||||
return (data or {}).get("result", {}).get("count")
|
||||
|
||||
|
||||
def delete_by_source_ids(source_ids):
|
||||
"""Delete all chunks belonging to the given CRM source records (by payload
|
||||
source_id) — used to prune soft-deleted records from the index."""
|
||||
ids = list(source_ids)
|
||||
if not ids:
|
||||
return None
|
||||
status, data = _req("POST", f"/collections/{COL}/points/delete?wait=true",
|
||||
{"filter": {"must": [{"key": "source_id", "match": {"any": ids}}]}})
|
||||
if status not in (200, 201):
|
||||
raise RuntimeError(f"delete points -> {status}: {data}")
|
||||
return data
|
||||
|
||||
@@ -60,6 +60,18 @@ def _state_set(conn, key, value):
|
||||
(key, value, _now()))
|
||||
|
||||
|
||||
def _deleted_source_ids(conn, since):
|
||||
"""CRM records soft-deleted since the watermark — their chunks get pruned."""
|
||||
ids = set()
|
||||
for tbl in ("contacts", "organizations", "opportunities", "communications", "lp_profiles"):
|
||||
try:
|
||||
for r in conn.execute(f"SELECT id FROM {tbl} WHERE deleted_at IS NOT NULL AND deleted_at > ?", (since,)):
|
||||
ids.add(r["id"])
|
||||
except Exception:
|
||||
pass
|
||||
return ids
|
||||
|
||||
|
||||
def _changed_source_ids(conn, since):
|
||||
changed = set()
|
||||
for tbl, model in _CHANGE_TABLES:
|
||||
@@ -91,6 +103,10 @@ def run(db, recreate=False, fuzzy=False, batch=32):
|
||||
if last is None or recreate:
|
||||
mode, target = "full", all_chunks
|
||||
else:
|
||||
# Prune chunks of records soft-deleted since the last sync.
|
||||
deleted = _deleted_source_ids(conn, last)
|
||||
if deleted:
|
||||
qdrant_io.delete_by_source_ids(deleted)
|
||||
changed = _changed_source_ids(conn, last)
|
||||
mode, target = "incremental", [c for c in all_chunks
|
||||
if (c["source_model"], c["source_id"]) in changed]
|
||||
|
||||
+25
-8
@@ -2012,7 +2012,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
(SELECT MAX(communication_date) FROM communications WHERE contact_id = c.id) as last_contact_date
|
||||
FROM contacts c
|
||||
LEFT JOIN organizations o ON c.organization_id = o.id
|
||||
WHERE 1=1
|
||||
WHERE 1=1 AND c.deleted_at IS NULL
|
||||
"""
|
||||
args = []
|
||||
|
||||
@@ -2197,7 +2197,13 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
return self.send_error_json("Contact not found", 404)
|
||||
|
||||
_sync_contact_to_fundraising_state(conn, row_to_dict(existing), actor_user_id=user['user_id'], remove=True)
|
||||
conn.execute("DELETE FROM contacts WHERE id = ?", (contact_id,))
|
||||
# Soft-delete (guardrail #3 — never hard-delete): mark deleted_at and
|
||||
# cascade to the contact's opportunities, communications, and lp_profile.
|
||||
_ts = now()
|
||||
conn.execute("UPDATE contacts SET deleted_at = ?, updated_at = ? WHERE id = ?", (_ts, _ts, contact_id))
|
||||
conn.execute("UPDATE opportunities SET deleted_at = ? WHERE contact_id = ? AND deleted_at IS NULL", (_ts, contact_id))
|
||||
conn.execute("UPDATE communications SET deleted_at = ? WHERE contact_id = ? AND deleted_at IS NULL", (_ts, contact_id))
|
||||
conn.execute("UPDATE lp_profiles SET deleted_at = ? WHERE contact_id = ? AND deleted_at IS NULL", (_ts, contact_id))
|
||||
log_audit(conn, user['user_id'], 'contact', contact_id, 'delete')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -2213,7 +2219,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
SELECT o.*,
|
||||
(SELECT COUNT(*) FROM contacts WHERE organization_id = o.id) as contact_count,
|
||||
(SELECT COALESCE(SUM(commitment_amount), 0) FROM opportunities WHERE organization_id = o.id AND stage = 'funded') as total_funded
|
||||
FROM organizations o WHERE 1=1
|
||||
FROM organizations o WHERE 1=1 AND o.deleted_at IS NULL
|
||||
"""
|
||||
args = []
|
||||
if params.get('search'):
|
||||
@@ -2314,7 +2320,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
return self.send_error_json("Organization not found", 404)
|
||||
|
||||
conn.execute("UPDATE contacts SET organization_id = NULL WHERE organization_id = ?", (org_id,))
|
||||
conn.execute("DELETE FROM organizations WHERE id = ?", (org_id,))
|
||||
conn.execute("UPDATE organizations SET deleted_at = ?, updated_at = ? WHERE id = ?", (now(), now(), org_id))
|
||||
log_audit(conn, user['user_id'], 'organization', org_id, 'delete')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -2333,7 +2339,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
LEFT JOIN contacts c ON op.contact_id = c.id
|
||||
LEFT JOIN organizations o ON op.organization_id = o.id
|
||||
LEFT JOIN users u ON op.owner_id = u.id
|
||||
WHERE 1=1
|
||||
WHERE 1=1 AND op.deleted_at IS NULL
|
||||
"""
|
||||
args = []
|
||||
if params.get('stage'):
|
||||
@@ -2524,7 +2530,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
conn.close()
|
||||
return self.send_error_json("Opportunity not found", 404)
|
||||
|
||||
conn.execute("DELETE FROM opportunities WHERE id = ?", (opp_id,))
|
||||
conn.execute("UPDATE opportunities SET deleted_at = ?, updated_at = ? WHERE id = ?", (now(), now(), opp_id))
|
||||
log_audit(conn, user['user_id'], 'opportunity', opp_id, 'delete')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -2541,7 +2547,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
FROM communications cm
|
||||
LEFT JOIN contacts c ON cm.contact_id = c.id
|
||||
LEFT JOIN users u ON cm.created_by = u.id
|
||||
WHERE 1=1
|
||||
WHERE 1=1 AND cm.deleted_at IS NULL
|
||||
"""
|
||||
args = []
|
||||
if params.get('contact_id'):
|
||||
@@ -2810,7 +2816,7 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
conn.close()
|
||||
return self.send_error_json("Communication not found", 404)
|
||||
|
||||
conn.execute("DELETE FROM communications WHERE id = ?", (comm_id,))
|
||||
conn.execute("UPDATE communications SET deleted_at = ?, updated_at = ? WHERE id = ?", (now(), now(), comm_id))
|
||||
log_audit(conn, user['user_id'], 'communication', comm_id, 'delete')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -3492,6 +3498,17 @@ class CRMHandler(BaseHTTPRequestHandler):
|
||||
except Exception:
|
||||
out['pending_merge_candidates'] = None
|
||||
out['index_job'] = entity_jobs.get_status() if entity_jobs else None
|
||||
# Raw source-record counts, so the resolved canonical numbers can be
|
||||
# sanity-checked against what's actually in the CRM.
|
||||
try:
|
||||
out['source_counts'] = {
|
||||
'contacts': conn.execute("SELECT COUNT(*) FROM contacts WHERE deleted_at IS NULL").fetchone()[0],
|
||||
'organizations': conn.execute("SELECT COUNT(*) FROM organizations WHERE deleted_at IS NULL").fetchone()[0],
|
||||
'fundraising_investors': conn.execute("SELECT COUNT(*) FROM fundraising_investors").fetchone()[0],
|
||||
'fundraising_contacts': conn.execute("SELECT COUNT(*) FROM fundraising_contacts").fetchone()[0],
|
||||
}
|
||||
except Exception:
|
||||
out['source_counts'] = None
|
||||
conn.close()
|
||||
self.send_json({"data": out})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user