Soft-delete + source-count diagnostics; thesis v4 (0.1.0:47)
- DELETE handlers soft-delete (set deleted_at) + cascade contact -> opps/comms/lp instead of hard-deleting (guardrail #3); list queries filter deleted rows. - ingest: chunking excludes soft-deleted records; qdrant delete-by-source-id; sync prunes soft-deleted records' vectors incrementally. - /api/system/status returns raw source-record counts for sanity-checking. - docs/thesis-seed-v4.md (no "bet" language, scarcity-forward, freedom-tech as a banner option, tightened pillars, reworked segments + edge). Soft-delete verified via the running HTTP server (delete -> hidden + row kept). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -60,6 +60,18 @@ def _state_set(conn, key, value):
|
||||
(key, value, _now()))
|
||||
|
||||
|
||||
def _deleted_source_ids(conn, since):
|
||||
"""CRM records soft-deleted since the watermark — their chunks get pruned."""
|
||||
ids = set()
|
||||
for tbl in ("contacts", "organizations", "opportunities", "communications", "lp_profiles"):
|
||||
try:
|
||||
for r in conn.execute(f"SELECT id FROM {tbl} WHERE deleted_at IS NOT NULL AND deleted_at > ?", (since,)):
|
||||
ids.add(r["id"])
|
||||
except Exception:
|
||||
pass
|
||||
return ids
|
||||
|
||||
|
||||
def _changed_source_ids(conn, since):
|
||||
changed = set()
|
||||
for tbl, model in _CHANGE_TABLES:
|
||||
@@ -91,6 +103,10 @@ def run(db, recreate=False, fuzzy=False, batch=32):
|
||||
if last is None or recreate:
|
||||
mode, target = "full", all_chunks
|
||||
else:
|
||||
# Prune chunks of records soft-deleted since the last sync.
|
||||
deleted = _deleted_source_ids(conn, last)
|
||||
if deleted:
|
||||
qdrant_io.delete_by_source_ids(deleted)
|
||||
changed = _changed_source_ids(conn, last)
|
||||
mode, target = "incremental", [c for c in all_chunks
|
||||
if (c["source_model"], c["source_id"]) in changed]
|
||||
|
||||
Reference in New Issue
Block a user