v0.2.9 Gemini model selects + fallback chain

2026-05-12 00:45:41 -05:00
parent 05ebeb5d51
commit 8ffc3ffb73
5 changed files with 209 additions and 72 deletions
@@ -11,23 +11,40 @@ const { InputSpec, Value } = sdk

 const inputSpec = InputSpec.of({
  // ── Gemini model selection ──
-  relay_gemini_transcription_model: Value.text({
+  // Both fields are radio-select with curated options. The relay's
+  // Gemini backend automatically falls back to lower-tier models in
+  // this same list when the chosen one returns a 503 / capacity /
+  // rate-limit error — see server/backends/gemini.js for the
+  // fallback-chain logic.
+  relay_gemini_transcription_model: Value.select({
    name: 'Gemini Transcription Model',
    description:
-      "The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
-    required: true,
+      "Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
    default: 'gemini-3-flash-preview',
-    minLength: 1,
-    maxLength: 128,
+    values: {
+      'gemini-3-flash-preview':
+        'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
+      'gemini-2.5-flash':
+        'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
+      'gemini-2.0-flash':
+        'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
+    },
  }),
-  relay_gemini_analysis_model: Value.text({
+  relay_gemini_analysis_model: Value.select({
    name: 'Gemini Analysis Model',
    description:
-      "The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
-    required: true,
+      "Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
    default: 'gemini-3.1-pro-preview',
-    minLength: 1,
-    maxLength: 128,
+    values: {
+      'gemini-3.1-pro-preview':
+        'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
+      'gemini-3-pro-preview':
+        'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
+      'gemini-3-flash-preview':
+        'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
+      'gemini-2.5-flash':
+        'Gemini 2.5 Flash — prior Flash gen',
+    },
  }),

  // ── Backend routing preference per pipeline ──
@@ -78,11 +95,32 @@ export const setBackendRouting = sdk.Action.withInput(

  async ({ effects }) => {
    const config = await configFile.read().once()
+    // Coerce any previously-saved model name to a value in the new
+    // select's options. Older 0.2.7-era saved configs could hold a
+    // free-text value that's no longer in the dropdown — clamp to a
+    // sensible default rather than presenting an invalid radio.
+    const TX_OPTIONS = [
+      'gemini-3-flash-preview',
+      'gemini-2.5-flash',
+      'gemini-2.0-flash',
+    ] as const
+    const AN_OPTIONS = [
+      'gemini-3.1-pro-preview',
+      'gemini-3-pro-preview',
+      'gemini-3-flash-preview',
+      'gemini-2.5-flash',
+    ] as const
+    const tx = config?.relay_gemini_transcription_model as
+      | (typeof TX_OPTIONS)[number]
+      | undefined
+    const an = config?.relay_gemini_analysis_model as
+      | (typeof AN_OPTIONS)[number]
+      | undefined
    return {
      relay_gemini_transcription_model:
-        config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
+        tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
      relay_gemini_analysis_model:
-        config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
+        an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
      relay_transcribe_backend_preference:
        (config?.relay_transcribe_backend_preference as
          | 'gemini_first'
@@ -9,8 +9,9 @@ import { v_0_2_5 } from './v0.2.5'
 import { v_0_2_6 } from './v0.2.6'
 import { v_0_2_7 } from './v0.2.7'
 import { v_0_2_8 } from './v0.2.8'
+import { v_0_2_9 } from './v0.2.9'

 export const versionGraph = VersionGraph.of({
-  current: v_0_2_8,
-  other: [v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
+  current: v_0_2_9,
+  other: [v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
 })
@@ -0,0 +1,13 @@
+import { VersionInfo } from '@start9labs/start-sdk'
+
+export const v_0_2_9 = VersionInfo.of({
+  version: '0.2.9:0',
+  releaseNotes: {
+    en_US:
+      'Set Backend Routing & Models action: Gemini transcription and analysis fields are now radio-select dropdowns with curated options (transcribe: 3-flash, 2.5-flash, 2.0-flash; analyze: 3.1-pro, 3-pro, 3-flash, 2.5-flash). Gemini backend automatically falls back to lower-tier models in the same chain when the primary returns a 503/capacity/rate-limit error. Audit log records the model that actually served each call, so dashboard reflects fallback behavior accurately.',
+  },
+  migrations: {
+    up: async ({ effects }) => {},
+    down: async ({ effects }) => {},
+  },
+})