f357c23c75
- Fuzzy tier (backend/ingest/fuzzy_resolve.py + llm.py): local Qwen adjudicates the deterministic resolver's flagged name-variant candidates; merges are durable via entity_merges (deterministic re-runs respect them), losers soft-deleted, logged. Idempotent. - Incremental sync (backend/ingest/sync.py): re-embeds only rows changed since a watermark (ingest_sync_state); first run / --recreate = full. Tested full→0→1. - Start9 packaging (start9/0.4): Dockerfile bundles ingest+mcp + fastembed/mcp; "Build search index" action runs the init in a subcontainer; MCP shipped as a manual stdio server (not a daemon); version 0.1.0:44. INGEST_PACKAGING.md. - backfill.py: factored embed_and_upsert() shared with sync. Verified end-to-end on synthetic data + live Sparks/Qwen/Qdrant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
40 lines
1.5 KiB
Python
40 lines
1.5 KiB
Python
"""Local Qwen chat client via Spark Control /v1/chat/completions.
|
|
|
|
Used for the privacy-sensitive, high-volume reasoning that must stay on Ten31
|
|
infra (entity-resolution adjudication, triage). Frontier reasoning still goes to
|
|
Claude; this is the local leg. Thinking is disabled for fast structured output.
|
|
"""
|
|
import json
|
|
import re
|
|
|
|
import config
|
|
import http_util
|
|
|
|
|
|
def chat(prompt, system=None, max_tokens=200, temperature=0.0):
|
|
messages = []
|
|
if system:
|
|
messages.append({"role": "system", "content": system})
|
|
messages.append({"role": "user", "content": prompt})
|
|
body = {"model": config.CHAT_MODEL, "messages": messages,
|
|
"temperature": temperature, "max_tokens": max_tokens,
|
|
"chat_template_kwargs": {"enable_thinking": False}}
|
|
status, data = http_util.request("POST", f"{config.SPARK_CONTROL_URL}/v1/chat/completions",
|
|
body, verify=config.SPARK_VERIFY_TLS)
|
|
if status != 200:
|
|
raise RuntimeError(f"/v1/chat/completions -> {status}: {data}")
|
|
return (data["choices"][0]["message"].get("content") or "").strip()
|
|
|
|
|
|
def chat_json(prompt, system=None, max_tokens=200):
|
|
"""Chat and parse the first JSON object from the reply (tolerant of fences)."""
|
|
raw = chat(prompt, system=system, max_tokens=max_tokens)
|
|
raw = re.sub(r"^```(json)?|```$", "", raw.strip(), flags=re.MULTILINE).strip()
|
|
m = re.search(r"\{.*\}", raw, re.DOTALL)
|
|
if not m:
|
|
return None
|
|
try:
|
|
return json.loads(m.group(0))
|
|
except json.JSONDecodeError:
|
|
return None
|