"""Minimal Qdrant REST client for the ingest pipeline (direct to QDRANT_URL). Creates the crm_chunks collection per EMBEDDINGS.md: a named dense vector (1024, Cosine) + a named sparse vector with modifier:idf, plus payload indexes. """ import config import http_util Q = config.QDRANT_URL COL = config.COLLECTION def _req(method, path, body=None): return http_util.request(method, f"{Q}{path}", body, verify=False) def exists() -> bool: status, _ = _req("GET", f"/collections/{COL}") return status == 200 def create_collection(recreate=False, dim=config.DENSE_DIM): if exists(): if not recreate: return "exists" _req("DELETE", f"/collections/{COL}") status, data = _req("PUT", f"/collections/{COL}", { "vectors": {"dense": {"size": dim, "distance": "Cosine"}}, "sparse_vectors": {"sparse": {"modifier": "idf"}}, }) if status not in (200, 201): raise RuntimeError(f"create collection -> {status}: {data}") return "created" def ensure_indexes(): for field, schema in (("lp_id", "keyword"), ("doc_type", "keyword"), ("date_ts", "integer")): _req("PUT", f"/collections/{COL}/index", {"field_name": field, "field_schema": schema}) def upsert(points): status, data = _req("PUT", f"/collections/{COL}/points?wait=true", {"points": points}) if status not in (200, 201): raise RuntimeError(f"upsert -> {status}: {data}") return data def count(): status, data = _req("POST", f"/collections/{COL}/points/count", {"exact": True}) return (data or {}).get("result", {}).get("count")