From e8d50efdf4350559313166bea02eab240cd2941e Mon Sep 17 00:00:00 2001
From: Keysat <licensing@keysat.xyz>
Date: Mon, 15 Jun 2026 22:28:12 -0500
Subject: [PATCH] Disable Gemini thinking budget in extraction backend

gemini-2.5-flash thinks by default and spent ~3.8k of the 4k output budget on reasoning, hitting MAX_TOKENS with a truncated JSON body -> 0 claims parsed. Set thinking_budget=0 so the full budget goes to the answer (mirrors the local path's enable_thinking=False). On the validation chunk this went from 0 -> 11 claims.
---
 signal_engine/extract/backends.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/signal_engine/extract/backends.py b/signal_engine/extract/backends.py
index b80b073..83ad2b2 100644
--- a/signal_engine/extract/backends.py
+++ b/signal_engine/extract/backends.py
@@ -50,6 +50,10 @@ class GeminiBackend:
                 temperature=0,
                 max_output_tokens=max_tokens,
                 response_mime_type="application/json",
+                # Gemini 2.5 thinks by default and spends the output budget on reasoning tokens —
+                # it hit MAX_TOKENS with ~3.8k thoughts and a truncated JSON body (0 claims parsed).
+                # Extraction is deterministic, no-CoT (mirrors the local path's enable_thinking=False).
+                thinking_config=types.ThinkingConfig(thinking_budget=0),
             ),
         )
         return resp.text or "{}"