c7ce44d963
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
51 lines
1.6 KiB
Python
51 lines
1.6 KiB
Python
"""Minimal Qdrant REST client for the ingest pipeline (direct to QDRANT_URL).
|
|
|
|
Creates the crm_chunks collection per EMBEDDINGS.md: a named dense vector
|
|
(1024, Cosine) + a named sparse vector with modifier:idf, plus payload indexes.
|
|
"""
|
|
import config
|
|
import http_util
|
|
|
|
Q = config.QDRANT_URL
|
|
COL = config.COLLECTION
|
|
|
|
|
|
def _req(method, path, body=None):
|
|
return http_util.request(method, f"{Q}{path}", body, verify=False)
|
|
|
|
|
|
def exists() -> bool:
|
|
status, _ = _req("GET", f"/collections/{COL}")
|
|
return status == 200
|
|
|
|
|
|
def create_collection(recreate=False, dim=config.DENSE_DIM):
|
|
if exists():
|
|
if not recreate:
|
|
return "exists"
|
|
_req("DELETE", f"/collections/{COL}")
|
|
status, data = _req("PUT", f"/collections/{COL}", {
|
|
"vectors": {"dense": {"size": dim, "distance": "Cosine"}},
|
|
"sparse_vectors": {"sparse": {"modifier": "idf"}},
|
|
})
|
|
if status not in (200, 201):
|
|
raise RuntimeError(f"create collection -> {status}: {data}")
|
|
return "created"
|
|
|
|
|
|
def ensure_indexes():
|
|
for field, schema in (("lp_id", "keyword"), ("doc_type", "keyword"), ("date_ts", "integer")):
|
|
_req("PUT", f"/collections/{COL}/index", {"field_name": field, "field_schema": schema})
|
|
|
|
|
|
def upsert(points):
|
|
status, data = _req("PUT", f"/collections/{COL}/points?wait=true", {"points": points})
|
|
if status not in (200, 201):
|
|
raise RuntimeError(f"upsert -> {status}: {data}")
|
|
return data
|
|
|
|
|
|
def count():
|
|
status, data = _req("POST", f"/collections/{COL}/points/count", {"exact": True})
|
|
return (data or {}).get("result", {}).get("count")
|