import { sdk } from '../sdk' import { configFile } from '../file-models/config.json' const { InputSpec, Value } = sdk // Lets the operator tune which backend gets tried first per pipeline // step (transcribe vs analyze) AND which Gemini SKU is used when // Gemini is the backend. All four knobs live-reload — change them // via this action and the next relay request honors the new values // without a daemon restart. const inputSpec = InputSpec.of({ // ── Gemini model selection ── // Both fields are radio-select with curated options. The relay's // Gemini backend automatically falls back to lower-tier models in // this same list when the chosen one returns a 503 / capacity / // rate-limit error — see server/backends/gemini.js for the // fallback-chain logic. relay_gemini_transcription_model: Value.select({ name: 'Gemini Transcription Model', description: "Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).", default: 'gemini-3-flash-preview', values: { 'gemini-3-flash-preview': 'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)', 'gemini-2.5-flash': 'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)', 'gemini-2.0-flash': 'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)', }, }), relay_gemini_analysis_model: Value.select({ name: 'Gemini Analysis Model', description: "Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).", default: 'gemini-3.1-pro-preview', values: { 'gemini-3.1-pro-preview': 'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)', 'gemini-3-pro-preview': 'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)', 'gemini-3-flash-preview': 'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts', 'gemini-2.5-flash': 'Gemini 2.5 Flash — prior Flash gen', }, }), // ── Backend routing preference per pipeline ── relay_transcribe_backend_preference: Value.select({ name: 'Transcribe Backend Preference', description: 'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.', default: 'gemini_first', values: { gemini_first: 'Gemini first → operator hardware (Parakeet) when cap exceeded', hardware_first: 'Operator hardware first → Gemini as fallback', gemini_only: 'Gemini only — fail when cap is exceeded', hardware_only: 'Hardware only — fail when no Parakeet endpoint is configured', }, }), relay_analyze_backend_preference: Value.select({ name: 'Analyze Backend Preference', description: 'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.', default: 'gemini_first', values: { gemini_first: 'Gemini first → operator hardware (Gemma) when cap exceeded', hardware_first: 'Operator hardware first → Gemini as fallback', gemini_only: 'Gemini only — fail when cap is exceeded', hardware_only: 'Hardware only — fail when no Gemma endpoint is configured', }, }), }) export const setBackendRouting = sdk.Action.withInput( 'set-backend-routing', async ({ effects }) => ({ name: 'Set Backend Routing & Models', description: "Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.", warning: null, allowedStatuses: 'any', group: 'AI Backends', visibility: 'enabled', }), inputSpec, async ({ effects }) => { const config = await configFile.read().once() // Coerce any previously-saved model name to a value in the new // select's options. Older 0.2.7-era saved configs could hold a // free-text value that's no longer in the dropdown — clamp to a // sensible default rather than presenting an invalid radio. const TX_OPTIONS = [ 'gemini-3-flash-preview', 'gemini-2.5-flash', 'gemini-2.0-flash', ] as const const AN_OPTIONS = [ 'gemini-3.1-pro-preview', 'gemini-3-pro-preview', 'gemini-3-flash-preview', 'gemini-2.5-flash', ] as const const tx = config?.relay_gemini_transcription_model as | (typeof TX_OPTIONS)[number] | undefined const an = config?.relay_gemini_analysis_model as | (typeof AN_OPTIONS)[number] | undefined return { relay_gemini_transcription_model: tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview', relay_gemini_analysis_model: an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview', relay_transcribe_backend_preference: (config?.relay_transcribe_backend_preference as | 'gemini_first' | 'hardware_first' | 'gemini_only' | 'hardware_only' | undefined) || 'gemini_first', relay_analyze_backend_preference: (config?.relay_analyze_backend_preference as | 'gemini_first' | 'hardware_first' | 'gemini_only' | 'hardware_only' | undefined) || 'gemini_first', } }, async ({ effects, input }) => { await configFile.merge(effects, { relay_gemini_transcription_model: ( input.relay_gemini_transcription_model || 'gemini-3-flash-preview' ).trim(), relay_gemini_analysis_model: ( input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview' ).trim(), relay_transcribe_backend_preference: input.relay_transcribe_backend_preference, relay_analyze_backend_preference: input.relay_analyze_backend_preference, }) return null }, )