Files
ten31-signal-engine/signal_engine/extract/backends.py
T
Keysat 87b6b05d67 Add request timeout and retry to Gemini extraction backend
A timeout-less generate_content call hung the single-threaded extract worker for ~50 min mid-batch. Set an HTTP timeout (120s) plus 4 retries with backoff, mirroring SparkControl._post; transient 504/read-timeouts now self-heal instead of freezing the run.
2026-06-16 08:45:12 -05:00

82 lines
3.8 KiB
Python

"""Pluggable extraction backends (§scaling).
The §4.2 extractor calls a backend that turns chat messages into a JSON string. Default is the
LOCAL Qwen via Spark Control (the ~95%-local design). The Gemini backend is the documented
overflow/fallback for bulk back-cataloging at scale, or if the Sparks are unavailable — used for
the PUBLIC corpus only, never conviction/exposure data (sovereignty boundary, §4.6).
A backend exposes: complete_json(messages, max_tokens) -> str (a JSON object string).
"""
from __future__ import annotations
import logging
import time
log = logging.getLogger(__name__)
class LocalQwenBackend:
name = "local"
def __init__(self, sc) -> None:
self.sc = sc
def complete_json(self, messages: list[dict], *, max_tokens: int = 4000) -> str:
resp = self.sc.chat(messages, json_object=True, temperature=0,
enable_thinking=False, max_tokens=max_tokens)
return resp["choices"][0]["message"]["content"]
class GeminiBackend:
"""Gemini fallback/overflow. Implemented against the `google-genai` SDK. NOTE: untested until a
key is provided — validate end-to-end before relying on it for a real backfill. The async BATCH
API is the eventual scale path; this synchronous form is the drop-in fallback."""
name = "gemini"
def __init__(self, api_key: str, model: str = "gemini-2.5-flash", *,
timeout_s: float = 120.0, retries: int = 4) -> None:
from google import genai # guarded import; pip install google-genai
from google.genai import types
self._genai = genai
self._types = types
# http_options.timeout is in MILLISECONDS — without it a stalled request hangs the (single-
# threaded) worker forever; one such hang froze a whole batch for ~50 min before this fix.
self.client = genai.Client(api_key=api_key,
http_options=types.HttpOptions(timeout=int(timeout_s * 1000)))
self.model = model
self.retries = retries
def complete_json(self, messages: list[dict], *, max_tokens: int = 4000) -> str:
types = self._types
system = "\n\n".join(m["content"] for m in messages if m["role"] == "system")
user = "\n\n".join(m["content"] for m in messages if m["role"] != "system")
cfg = types.GenerateContentConfig(
system_instruction=system or None,
temperature=0,
max_output_tokens=max_tokens,
response_mime_type="application/json",
# Gemini 2.5 thinks by default and spends the output budget on reasoning tokens —
# it hit MAX_TOKENS with ~3.8k thoughts and a truncated JSON body (0 claims parsed).
# Extraction is deterministic, no-CoT (mirrors the local path's enable_thinking=False).
thinking_config=types.ThinkingConfig(thinking_budget=0),
)
for attempt in range(self.retries + 1):
try:
resp = self.client.models.generate_content(model=self.model, contents=user, config=cfg)
return resp.text or "{}"
except Exception as e: # noqa: BLE001 — timeout/5xx/429/network: back off and retry
if attempt >= self.retries:
raise
sleep = 2.0 * (2 ** attempt)
log.warning("Gemini call failed (%s); retry %d/%d in %.0fs", e, attempt + 1, self.retries, sleep)
time.sleep(sleep)
def from_config(cfg, sc) -> "LocalQwenBackend | GeminiBackend":
if cfg.extraction_backend == "gemini":
if not cfg.gemini_api_key:
log.warning("EXTRACTION_BACKEND=gemini but GEMINI_API_KEY missing — falling back to local")
else:
return GeminiBackend(cfg.gemini_api_key, cfg.gemini_model)
return LocalQwenBackend(sc)