v0.2.9 Gemini model selects + fallback chain

This commit is contained in:
local
2026-05-12 00:45:41 -05:00
parent 05ebeb5d51
commit 8ffc3ffb73
5 changed files with 209 additions and 72 deletions
+50 -12
View File
@@ -11,23 +11,40 @@ const { InputSpec, Value } = sdk
const inputSpec = InputSpec.of({
// ── Gemini model selection ──
relay_gemini_transcription_model: Value.text({
// Both fields are radio-select with curated options. The relay's
// Gemini backend automatically falls back to lower-tier models in
// this same list when the chosen one returns a 503 / capacity /
// rate-limit error — see server/backends/gemini.js for the
// fallback-chain logic.
relay_gemini_transcription_model: Value.select({
name: 'Gemini Transcription Model',
description:
"The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
required: true,
"Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
default: 'gemini-3-flash-preview',
minLength: 1,
maxLength: 128,
values: {
'gemini-3-flash-preview':
'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
'gemini-2.5-flash':
'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
'gemini-2.0-flash':
'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
},
}),
relay_gemini_analysis_model: Value.text({
relay_gemini_analysis_model: Value.select({
name: 'Gemini Analysis Model',
description:
"The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
required: true,
"Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
default: 'gemini-3.1-pro-preview',
minLength: 1,
maxLength: 128,
values: {
'gemini-3.1-pro-preview':
'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
'gemini-3-pro-preview':
'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
'gemini-3-flash-preview':
'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
'gemini-2.5-flash':
'Gemini 2.5 Flash — prior Flash gen',
},
}),
// ── Backend routing preference per pipeline ──
@@ -78,11 +95,32 @@ export const setBackendRouting = sdk.Action.withInput(
async ({ effects }) => {
const config = await configFile.read().once()
// Coerce any previously-saved model name to a value in the new
// select's options. Older 0.2.7-era saved configs could hold a
// free-text value that's no longer in the dropdown — clamp to a
// sensible default rather than presenting an invalid radio.
const TX_OPTIONS = [
'gemini-3-flash-preview',
'gemini-2.5-flash',
'gemini-2.0-flash',
] as const
const AN_OPTIONS = [
'gemini-3.1-pro-preview',
'gemini-3-pro-preview',
'gemini-3-flash-preview',
'gemini-2.5-flash',
] as const
const tx = config?.relay_gemini_transcription_model as
| (typeof TX_OPTIONS)[number]
| undefined
const an = config?.relay_gemini_analysis_model as
| (typeof AN_OPTIONS)[number]
| undefined
return {
relay_gemini_transcription_model:
config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
relay_gemini_analysis_model:
config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
relay_transcribe_backend_preference:
(config?.relay_transcribe_backend_preference as
| 'gemini_first'
+3 -2
View File
@@ -9,8 +9,9 @@ import { v_0_2_5 } from './v0.2.5'
import { v_0_2_6 } from './v0.2.6'
import { v_0_2_7 } from './v0.2.7'
import { v_0_2_8 } from './v0.2.8'
import { v_0_2_9 } from './v0.2.9'
export const versionGraph = VersionGraph.of({
current: v_0_2_8,
other: [v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
current: v_0_2_9,
other: [v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
})
+13
View File
@@ -0,0 +1,13 @@
import { VersionInfo } from '@start9labs/start-sdk'
export const v_0_2_9 = VersionInfo.of({
version: '0.2.9:0',
releaseNotes: {
en_US:
'Set Backend Routing & Models action: Gemini transcription and analysis fields are now radio-select dropdowns with curated options (transcribe: 3-flash, 2.5-flash, 2.0-flash; analyze: 3.1-pro, 3-pro, 3-flash, 2.5-flash). Gemini backend automatically falls back to lower-tier models in the same chain when the primary returns a 503/capacity/rate-limit error. Audit log records the model that actually served each call, so dashboard reflects fallback behavior accurately.',
},
migrations: {
up: async ({ effects }) => {},
down: async ({ effects }) => {},
},
})