Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)

This commit is contained in:
Keysat
2026-06-15 09:24:29 -05:00
commit a6aec77506
77 changed files with 6263 additions and 0 deletions
+242
View File
@@ -0,0 +1,242 @@
"""Spark Control HTTP client (handoff §13.2 endpoint table).
Enforces the two operational invariants from §4.1 / §13.4 (revised per infra guidance 2026-06-09):
1. AUDIO concurrency is CAPPED at 2 in-flight (hard ceiling 3), GLOBAL across both parakeet
endpoints (/v1/audio/transcriptions + /api/audio/diarize*) — they share ONE serial GPU. A
process-wide BoundedSemaphore enforces it. Going wider buys ZERO throughput (requests queue and
hold the GPU); 2 just keeps the GPU continuously fed with no idle gap = full throughput.
2. Transient unresponsiveness is NORMAL, not failure: when the GPU stays continuously busy the
/health and in-flight requests can briefly (1-4s) stop responding. Timeouts / 503s /
connection-resets are "busy, retry" — handled by short exponential backoff, never treated as work loss.
NOTE: request/response *shapes* for the non-OpenAI endpoints (/api/audio/*, /scrub,
/rehydrate, /api/search) are provisional and marked TODO(contract) — confirm against the
live gateway's /api/endpoints. The OpenAI-compatible routes (/v1/*) follow the standard.
"""
from __future__ import annotations
import logging
import threading
import time
from pathlib import Path
from typing import Any
import requests
log = logging.getLogger(__name__)
# Process-wide AUDIO in-flight cap, GLOBAL across both parakeet endpoints. Single serial GPU shared
# with the operator's production app → concurrency only deepens the queue + lengthens transient
# busy-blips; sit at 2 (full throughput, ~2-3s busy windows), hard ceiling 3.
_AUDIO_MAX = 3
_AUDIO_SEM = threading.BoundedSemaphore(2)
def _set_audio_concurrency(n: int) -> None:
"""Resize the global audio semaphore (clamped to [1, _AUDIO_MAX]). Called at client init from config;
set before any worker threads start, so the rebind is not racing in-flight acquirers."""
global _AUDIO_SEM
_AUDIO_SEM = threading.BoundedSemaphore(min(_AUDIO_MAX, max(1, int(n))))
class SparkControlError(RuntimeError):
pass
class SparkControl:
def __init__(
self,
base_url: str,
*,
verify_tls: bool = False,
timeout: float = 120.0,
llm_model: str = "",
embed_model: str = "",
transcribe_model: str = "",
audio_concurrency: int = 2,
) -> None:
self.base = base_url.rstrip("/")
self.verify = verify_tls
self.timeout = timeout
self.llm_model = llm_model
self.embed_model = embed_model
self.transcribe_model = transcribe_model
_set_audio_concurrency(audio_concurrency)
self._session = requests.Session()
if not verify_tls:
# same-LAN self-signed cert (§13): suppress the per-request InsecureRequestWarning noise.
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# ---------- low-level ----------
def _post(
self,
path: str,
*,
json: Any = None,
files: Any = None,
data: Any = None,
retries: int = 4,
backoff: float = 5.0,
) -> Any:
url = f"{self.base}{path}"
for attempt in range(retries + 1):
try:
r = self._session.post(
url, json=json, files=files, data=data,
timeout=self.timeout, verify=self.verify,
)
if r.status_code == 503:
raise SparkControlError("503 from Spark Control (GPU busy / cold start)")
r.raise_for_status()
return r.json()
except (requests.RequestException, SparkControlError) as e:
if attempt < retries:
sleep = backoff * (2 ** attempt)
log.warning("Spark Control POST %s failed (%s); retry %d/%d in %.0fs",
path, e, attempt + 1, retries, sleep)
time.sleep(sleep)
else:
raise SparkControlError(f"POST {path} failed after {retries} retries: {e}") from e
def _get(self, path: str) -> Any:
r = self._session.get(f"{self.base}{path}", timeout=self.timeout, verify=self.verify)
r.raise_for_status()
return r.json()
# ---------- health / discovery (§13.2) ----------
def status(self) -> Any:
return self._get("/api/status")
def endpoints(self) -> Any:
return self._get("/api/endpoints")
# ---------- local LLM: extraction + scoring helpers (§4.2) ----------
def chat(
self,
messages: list[dict[str, str]],
*,
json_object: bool = True,
temperature: float = 0.0,
enable_thinking: bool = False,
max_tokens: int | None = None,
) -> Any:
"""Deterministic, no-chain-of-thought extraction per §4.2 (temp 0, thinking off,
JSON mode for guaranteed-valid JSON)."""
body: dict[str, Any] = {
"model": self.llm_model,
"messages": messages,
"temperature": temperature,
"chat_template_kwargs": {"enable_thinking": enable_thinking},
}
if json_object:
body["response_format"] = {"type": "json_object"}
if max_tokens:
body["max_tokens"] = max_tokens
return self._post("/v1/chat/completions", json=body)
# ---------- embeddings / rerank / hybrid search (§4.3) ----------
def embed(self, inputs: list[str]) -> Any:
"""Embed DISTILLED PROPOSITIONS, not raw chunks (§4.3)."""
return self._post("/v1/embeddings", json={"model": self.embed_model, "input": inputs})
def rerank(self, query: str, documents: list[str], *, top_n: int | None = None) -> Any:
body: dict[str, Any] = {"query": query, "documents": documents}
if top_n:
body["top_n"] = top_n
return self._post("/v1/rerank", json=body)
def search(
self,
query: str,
*,
collection: str,
top_k: int = 10,
retrieve_n: int | None = None,
rerank: bool = True,
filter: dict[str, Any] | None = None,
with_payload: bool = True,
min_score: float | None = None,
dense_vector_name: str = "bge_m3",
sparse_vector_name: str = "bm25",
text_field: str = "proposition",
) -> Any:
"""Hybrid dense+sparse retrieval (RRF) + optional rerank over a Qdrant collection (§4.3).
The gateway defaults vector names to 'dense'/'sparse'; our `propositions` collection uses
named vectors bge_m3/bm25, so they must be passed explicitly (confirmed live)."""
body: dict[str, Any] = {
"query": query, "collection": collection, "top_k": top_k,
"rerank": rerank, "with_payload": with_payload,
"dense_vector_name": dense_vector_name,
"sparse_vector_name": sparse_vector_name,
"text_field": text_field,
}
if retrieve_n is not None:
body["retrieve_n"] = retrieve_n
if filter is not None:
body["filter"] = filter
if min_score is not None:
body["min_score"] = min_score
return self._post("/api/search", json=body)
# ---------- audio: capped at 2 in-flight GLOBAL (semaphore), short busy-retry ----------
# backoff=1.5 → ~1.5/3/6/12/24s: tuned to ride out the 1-4s busy-blips, not the old 5-40s.
def transcribe(self, audio_path: str | Path, *, response_format: str = "verbose_json") -> Any:
with _AUDIO_SEM, open(audio_path, "rb") as f:
return self._post(
"/v1/audio/transcriptions",
files={"file": f},
data={"model": self.transcribe_model, "response_format": response_format},
retries=5, backoff=1.5,
)
def diarize_chunk(self, audio_path: str | Path) -> Any:
# TODO(contract): confirm /api/audio/diarize-chunk response shape (segments + 192-d voiceprint).
with _AUDIO_SEM, open(audio_path, "rb") as f:
return self._post("/api/audio/diarize-chunk", files={"file": f}, retries=5, backoff=1.5)
def transcribe_with_speakers(self, audio_path: str | Path) -> Any:
with _AUDIO_SEM, open(audio_path, "rb") as f:
return self._post("/api/audio/transcribe-with-speakers", files={"file": f}, retries=5, backoff=1.5)
# ---------- frontier sovereignty boundary (§4.6) ----------
# Confirmed contract (gateway /openapi.json):
# /scrub: task_id*, items*, known_entities, actor, tier1_action, bucket, ner, map_handle
# /rehydrate: task_id*, map_handle*, items*, actor, strict
# De-identifies IDENTITIES into stable placeholders; the de-anon map stays on the box and is
# referenced by `map_handle`. Exposure/position data must NEVER be sent here at all (§4.6).
def scrub(
self,
items: list[Any],
*,
task_id: str,
known_entities: dict[str, str] | None = None,
actor: str | None = None,
ner: bool = True,
) -> Any:
"""Returns the scrubbed items + a `map_handle` to pass to rehydrate. `known_entities` is the
caller-supplied dictionary (Strike→[FUND_1]); `ner` toggles the local-Qwen NER backstop."""
body: dict[str, Any] = {"task_id": task_id, "items": items, "ner": ner}
if known_entities is not None:
body["known_entities"] = known_entities
if actor is not None:
body["actor"] = actor
return self._post("/scrub", json=body)
def rehydrate(self, items: list[Any], *, task_id: str, map_handle: str, strict: bool = False) -> Any:
"""Restore real identities in the frontier's output locally, using the scrub `map_handle`."""
return self._post("/rehydrate", json={
"task_id": task_id, "map_handle": map_handle, "items": items, "strict": strict,
})
def from_config(cfg: Any) -> SparkControl:
return SparkControl(
cfg.spark_control_url,
verify_tls=cfg.spark_verify_tls,
timeout=cfg.spark_timeout_s,
llm_model=cfg.local_llm_model,
embed_model=cfg.embed_model,
transcribe_model=cfg.transcribe_model,
audio_concurrency=getattr(cfg, "audio_concurrency", 2),
)