Add request timeout and retry to Gemini extraction backend
A timeout-less generate_content call hung the single-threaded extract worker for ~50 min mid-batch. Set an HTTP timeout (120s) plus 4 retries with backoff, mirroring SparkControl._post; transient 504/read-timeouts now self-heal instead of freezing the run.
This commit is contained in:
@@ -10,6 +10,7 @@ A backend exposes: complete_json(messages, max_tokens) -> str (a JSON object st
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -32,20 +33,24 @@ class GeminiBackend:
|
|||||||
API is the eventual scale path; this synchronous form is the drop-in fallback."""
|
API is the eventual scale path; this synchronous form is the drop-in fallback."""
|
||||||
name = "gemini"
|
name = "gemini"
|
||||||
|
|
||||||
def __init__(self, api_key: str, model: str = "gemini-2.5-flash") -> None:
|
def __init__(self, api_key: str, model: str = "gemini-2.5-flash", *,
|
||||||
|
timeout_s: float = 120.0, retries: int = 4) -> None:
|
||||||
from google import genai # guarded import; pip install google-genai
|
from google import genai # guarded import; pip install google-genai
|
||||||
|
from google.genai import types
|
||||||
self._genai = genai
|
self._genai = genai
|
||||||
self.client = genai.Client(api_key=api_key)
|
self._types = types
|
||||||
|
# http_options.timeout is in MILLISECONDS — without it a stalled request hangs the (single-
|
||||||
|
# threaded) worker forever; one such hang froze a whole batch for ~50 min before this fix.
|
||||||
|
self.client = genai.Client(api_key=api_key,
|
||||||
|
http_options=types.HttpOptions(timeout=int(timeout_s * 1000)))
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.retries = retries
|
||||||
|
|
||||||
def complete_json(self, messages: list[dict], *, max_tokens: int = 4000) -> str:
|
def complete_json(self, messages: list[dict], *, max_tokens: int = 4000) -> str:
|
||||||
from google.genai import types
|
types = self._types
|
||||||
system = "\n\n".join(m["content"] for m in messages if m["role"] == "system")
|
system = "\n\n".join(m["content"] for m in messages if m["role"] == "system")
|
||||||
user = "\n\n".join(m["content"] for m in messages if m["role"] != "system")
|
user = "\n\n".join(m["content"] for m in messages if m["role"] != "system")
|
||||||
resp = self.client.models.generate_content(
|
cfg = types.GenerateContentConfig(
|
||||||
model=self.model,
|
|
||||||
contents=user,
|
|
||||||
config=types.GenerateContentConfig(
|
|
||||||
system_instruction=system or None,
|
system_instruction=system or None,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_output_tokens=max_tokens,
|
max_output_tokens=max_tokens,
|
||||||
@@ -54,9 +59,17 @@ class GeminiBackend:
|
|||||||
# it hit MAX_TOKENS with ~3.8k thoughts and a truncated JSON body (0 claims parsed).
|
# it hit MAX_TOKENS with ~3.8k thoughts and a truncated JSON body (0 claims parsed).
|
||||||
# Extraction is deterministic, no-CoT (mirrors the local path's enable_thinking=False).
|
# Extraction is deterministic, no-CoT (mirrors the local path's enable_thinking=False).
|
||||||
thinking_config=types.ThinkingConfig(thinking_budget=0),
|
thinking_config=types.ThinkingConfig(thinking_budget=0),
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
for attempt in range(self.retries + 1):
|
||||||
|
try:
|
||||||
|
resp = self.client.models.generate_content(model=self.model, contents=user, config=cfg)
|
||||||
return resp.text or "{}"
|
return resp.text or "{}"
|
||||||
|
except Exception as e: # noqa: BLE001 — timeout/5xx/429/network: back off and retry
|
||||||
|
if attempt >= self.retries:
|
||||||
|
raise
|
||||||
|
sleep = 2.0 * (2 ** attempt)
|
||||||
|
log.warning("Gemini call failed (%s); retry %d/%d in %.0fs", e, attempt + 1, self.retries, sleep)
|
||||||
|
time.sleep(sleep)
|
||||||
|
|
||||||
|
|
||||||
def from_config(cfg, sc) -> "LocalQwenBackend | GeminiBackend":
|
def from_config(cfg, sc) -> "LocalQwenBackend | GeminiBackend":
|
||||||
|
|||||||
Reference in New Issue
Block a user