v0.2.7 configurable Gemini models + per-pipeline backend preference

2026-05-12 00:15:07 -05:00
parent cd377683fb
commit 9af70302b1
11 changed files with 273 additions and 22 deletions
@@ -36,6 +36,33 @@ export const configFile = FileHelper.json(
    relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
    relay_gemma_model: z.string().default('gemma3:27b'),

+    // ── Gemini model selection ──
+    // Operator can pick which Gemini SKU is used per pipeline step
+    // without rebuilding the relay. Defaults match Google's typical
+    // recommendations: Flash for transcription (cheap, fast,
+    // multimodal-capable), Pro for analysis (higher quality on
+    // structured-JSON outputs). Operators can swap to flash for
+    // analysis when they want faster + cheaper at the cost of some
+    // section-boundary precision.
+    relay_gemini_transcription_model: z.string().default('gemini-3-flash-preview'),
+    relay_gemini_analysis_model: z.string().default('gemini-3.1-pro-preview'),
+
+    // ── Backend routing preference per pipeline ──
+    // Controls whether the relay tries Gemini first (current default —
+    // best quality, costs operator's Gemini API budget) or the
+    // operator-hardware backend first (saves Gemini budget, may be
+    // slower depending on the operator's hardware). One of:
+    //   - "gemini_first"   try Gemini until per-tier cap, then hardware
+    //   - "hardware_first" try hardware first, fall back to Gemini
+    //   - "gemini_only"    Gemini only, fail when cap is exceeded
+    //   - "hardware_only"  Hardware only, fail when not configured
+    relay_transcribe_backend_preference: z
+      .enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
+      .default('gemini_first'),
+    relay_analyze_backend_preference: z
+      .enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
+      .default('gemini_first'),
+
    // ── License server ──
    // URL of the Keysat license server used for the cached online
    // license-validation check. Defaults to the public endpoint;