v0.2.9 Gemini model selects + fallback chain
This commit is contained in:
@@ -11,23 +11,40 @@ const { InputSpec, Value } = sdk
|
||||
|
||||
const inputSpec = InputSpec.of({
|
||||
// ── Gemini model selection ──
|
||||
relay_gemini_transcription_model: Value.text({
|
||||
// Both fields are radio-select with curated options. The relay's
|
||||
// Gemini backend automatically falls back to lower-tier models in
|
||||
// this same list when the chosen one returns a 503 / capacity /
|
||||
// rate-limit error — see server/backends/gemini.js for the
|
||||
// fallback-chain logic.
|
||||
relay_gemini_transcription_model: Value.select({
|
||||
name: 'Gemini Transcription Model',
|
||||
description:
|
||||
"The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
|
||||
required: true,
|
||||
"Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
|
||||
default: 'gemini-3-flash-preview',
|
||||
minLength: 1,
|
||||
maxLength: 128,
|
||||
values: {
|
||||
'gemini-3-flash-preview':
|
||||
'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
|
||||
'gemini-2.5-flash':
|
||||
'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
|
||||
'gemini-2.0-flash':
|
||||
'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
|
||||
},
|
||||
}),
|
||||
relay_gemini_analysis_model: Value.text({
|
||||
relay_gemini_analysis_model: Value.select({
|
||||
name: 'Gemini Analysis Model',
|
||||
description:
|
||||
"The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
|
||||
required: true,
|
||||
"Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
|
||||
default: 'gemini-3.1-pro-preview',
|
||||
minLength: 1,
|
||||
maxLength: 128,
|
||||
values: {
|
||||
'gemini-3.1-pro-preview':
|
||||
'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
|
||||
'gemini-3-pro-preview':
|
||||
'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
|
||||
'gemini-3-flash-preview':
|
||||
'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
|
||||
'gemini-2.5-flash':
|
||||
'Gemini 2.5 Flash — prior Flash gen',
|
||||
},
|
||||
}),
|
||||
|
||||
// ── Backend routing preference per pipeline ──
|
||||
@@ -78,11 +95,32 @@ export const setBackendRouting = sdk.Action.withInput(
|
||||
|
||||
async ({ effects }) => {
|
||||
const config = await configFile.read().once()
|
||||
// Coerce any previously-saved model name to a value in the new
|
||||
// select's options. Older 0.2.7-era saved configs could hold a
|
||||
// free-text value that's no longer in the dropdown — clamp to a
|
||||
// sensible default rather than presenting an invalid radio.
|
||||
const TX_OPTIONS = [
|
||||
'gemini-3-flash-preview',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.0-flash',
|
||||
] as const
|
||||
const AN_OPTIONS = [
|
||||
'gemini-3.1-pro-preview',
|
||||
'gemini-3-pro-preview',
|
||||
'gemini-3-flash-preview',
|
||||
'gemini-2.5-flash',
|
||||
] as const
|
||||
const tx = config?.relay_gemini_transcription_model as
|
||||
| (typeof TX_OPTIONS)[number]
|
||||
| undefined
|
||||
const an = config?.relay_gemini_analysis_model as
|
||||
| (typeof AN_OPTIONS)[number]
|
||||
| undefined
|
||||
return {
|
||||
relay_gemini_transcription_model:
|
||||
config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
|
||||
tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
|
||||
relay_gemini_analysis_model:
|
||||
config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
|
||||
an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
|
||||
relay_transcribe_backend_preference:
|
||||
(config?.relay_transcribe_backend_preference as
|
||||
| 'gemini_first'
|
||||
|
||||
Reference in New Issue
Block a user