v0.2.7 configurable Gemini models + per-pipeline backend preference

This commit is contained in:
local
2026-05-12 00:15:07 -05:00
parent cd377683fb
commit 9af70302b1
11 changed files with 273 additions and 22 deletions
+27
View File
@@ -36,6 +36,33 @@ export const configFile = FileHelper.json(
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
relay_gemma_model: z.string().default('gemma3:27b'),
// ── Gemini model selection ──
// Operator can pick which Gemini SKU is used per pipeline step
// without rebuilding the relay. Defaults match Google's typical
// recommendations: Flash for transcription (cheap, fast,
// multimodal-capable), Pro for analysis (higher quality on
// structured-JSON outputs). Operators can swap to flash for
// analysis when they want faster + cheaper at the cost of some
// section-boundary precision.
relay_gemini_transcription_model: z.string().default('gemini-3-flash-preview'),
relay_gemini_analysis_model: z.string().default('gemini-3.1-pro-preview'),
// ── Backend routing preference per pipeline ──
// Controls whether the relay tries Gemini first (current default —
// best quality, costs operator's Gemini API budget) or the
// operator-hardware backend first (saves Gemini budget, may be
// slower depending on the operator's hardware). One of:
// - "gemini_first" try Gemini until per-tier cap, then hardware
// - "hardware_first" try hardware first, fall back to Gemini
// - "gemini_only" Gemini only, fail when cap is exceeded
// - "hardware_only" Hardware only, fail when not configured
relay_transcribe_backend_preference: z
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
.default('gemini_first'),
relay_analyze_backend_preference: z
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
.default('gemini_first'),
// ── License server ──
// URL of the Keysat license server used for the cached online
// license-validation check. Defaults to the public endpoint;