v0.2.7 configurable Gemini models + per-pipeline backend preference

2026-05-12 00:15:07 -05:00
parent cd377683fb
commit 9af70302b1
11 changed files with 273 additions and 22 deletions
@@ -5,11 +5,13 @@ import { setParakeetUrl } from './setParakeetUrl'
 import { setGemmaUrl } from './setGemmaUrl'
 import { setAdminPassword } from './setAdminPassword'
 import { adjustTierQuotas } from './adjustTierQuotas'
+import { setBackendRouting } from './setBackendRouting'

 export const actions = sdk.Actions.of()
  .addAction(setGeminiKey)
  .addAction(setKeysatBaseUrl)
  .addAction(setParakeetUrl)
  .addAction(setGemmaUrl)
+  .addAction(setBackendRouting)
  .addAction(setAdminPassword)
  .addAction(adjustTierQuotas)
@@ -0,0 +1,116 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+
+const { InputSpec, Value } = sdk
+
+// Lets the operator tune which backend gets tried first per pipeline
+// step (transcribe vs analyze) AND which Gemini SKU is used when
+// Gemini is the backend. All four knobs live-reload — change them
+// via this action and the next relay request honors the new values
+// without a daemon restart.
+
+const inputSpec = InputSpec.of({
+  // ── Gemini model selection ──
+  relay_gemini_transcription_model: Value.text({
+    name: 'Gemini Transcription Model',
+    description:
+      "The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
+    required: true,
+    default: 'gemini-3-flash-preview',
+    minLength: 1,
+    maxLength: 128,
+  }),
+  relay_gemini_analysis_model: Value.text({
+    name: 'Gemini Analysis Model',
+    description:
+      "The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
+    required: true,
+    default: 'gemini-3.1-pro-preview',
+    minLength: 1,
+    maxLength: 128,
+  }),
+
+  // ── Backend routing preference per pipeline ──
+  relay_transcribe_backend_preference: Value.select({
+    name: 'Transcribe Backend Preference',
+    description:
+      'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
+    default: 'gemini_first',
+    values: {
+      gemini_first:
+        'Gemini first → operator hardware (Parakeet) when cap exceeded',
+      hardware_first: 'Operator hardware first → Gemini as fallback',
+      gemini_only: 'Gemini only — fail when cap is exceeded',
+      hardware_only:
+        'Hardware only — fail when no Parakeet endpoint is configured',
+    },
+  }),
+  relay_analyze_backend_preference: Value.select({
+    name: 'Analyze Backend Preference',
+    description:
+      'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
+    default: 'gemini_first',
+    values: {
+      gemini_first:
+        'Gemini first → operator hardware (Gemma) when cap exceeded',
+      hardware_first: 'Operator hardware first → Gemini as fallback',
+      gemini_only: 'Gemini only — fail when cap is exceeded',
+      hardware_only:
+        'Hardware only — fail when no Gemma endpoint is configured',
+    },
+  }),
+})
+
+export const setBackendRouting = sdk.Action.withInput(
+  'set-backend-routing',
+
+  async ({ effects }) => ({
+    name: 'Set Backend Routing & Models',
+    description:
+      "Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'AI Backends',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    return {
+      relay_gemini_transcription_model:
+        config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
+      relay_gemini_analysis_model:
+        config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
+      relay_transcribe_backend_preference:
+        (config?.relay_transcribe_backend_preference as
+          | 'gemini_first'
+          | 'hardware_first'
+          | 'gemini_only'
+          | 'hardware_only'
+          | undefined) || 'gemini_first',
+      relay_analyze_backend_preference:
+        (config?.relay_analyze_backend_preference as
+          | 'gemini_first'
+          | 'hardware_first'
+          | 'gemini_only'
+          | 'hardware_only'
+          | undefined) || 'gemini_first',
+    }
+  },
+
+  async ({ effects, input }) => {
+    await configFile.merge(effects, {
+      relay_gemini_transcription_model: (
+        input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
+      ).trim(),
+      relay_gemini_analysis_model: (
+        input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
+      ).trim(),
+      relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
+      relay_analyze_backend_preference: input.relay_analyze_backend_preference,
+    })
+    return null
+  },
+)