v0.2.9 Gemini model selects + fallback chain

This commit is contained in:
local
2026-05-12 00:45:41 -05:00
parent 05ebeb5d51
commit 8ffc3ffb73
5 changed files with 209 additions and 72 deletions
+50 -12
View File
@@ -11,23 +11,40 @@ const { InputSpec, Value } = sdk
const inputSpec = InputSpec.of({
// ── Gemini model selection ──
relay_gemini_transcription_model: Value.text({
// Both fields are radio-select with curated options. The relay's
// Gemini backend automatically falls back to lower-tier models in
// this same list when the chosen one returns a 503 / capacity /
// rate-limit error — see server/backends/gemini.js for the
// fallback-chain logic.
relay_gemini_transcription_model: Value.select({
name: 'Gemini Transcription Model',
description:
"The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
required: true,
"Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
default: 'gemini-3-flash-preview',
minLength: 1,
maxLength: 128,
values: {
'gemini-3-flash-preview':
'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
'gemini-2.5-flash':
'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
'gemini-2.0-flash':
'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
},
}),
relay_gemini_analysis_model: Value.text({
relay_gemini_analysis_model: Value.select({
name: 'Gemini Analysis Model',
description:
"The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
required: true,
"Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
default: 'gemini-3.1-pro-preview',
minLength: 1,
maxLength: 128,
values: {
'gemini-3.1-pro-preview':
'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
'gemini-3-pro-preview':
'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
'gemini-3-flash-preview':
'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
'gemini-2.5-flash':
'Gemini 2.5 Flash — prior Flash gen',
},
}),
// ── Backend routing preference per pipeline ──
@@ -78,11 +95,32 @@ export const setBackendRouting = sdk.Action.withInput(
async ({ effects }) => {
const config = await configFile.read().once()
// Coerce any previously-saved model name to a value in the new
// select's options. Older 0.2.7-era saved configs could hold a
// free-text value that's no longer in the dropdown — clamp to a
// sensible default rather than presenting an invalid radio.
const TX_OPTIONS = [
'gemini-3-flash-preview',
'gemini-2.5-flash',
'gemini-2.0-flash',
] as const
const AN_OPTIONS = [
'gemini-3.1-pro-preview',
'gemini-3-pro-preview',
'gemini-3-flash-preview',
'gemini-2.5-flash',
] as const
const tx = config?.relay_gemini_transcription_model as
| (typeof TX_OPTIONS)[number]
| undefined
const an = config?.relay_gemini_analysis_model as
| (typeof AN_OPTIONS)[number]
| undefined
return {
relay_gemini_transcription_model:
config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
relay_gemini_analysis_model:
config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
relay_transcribe_backend_preference:
(config?.relay_transcribe_backend_preference as
| 'gemini_first'