Files
recap-relay/startos/actions/setBackendRouting.ts
T
2026-05-12 00:45:41 -05:00

155 lines
6.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { sdk } from '../sdk'
import { configFile } from '../file-models/config.json'
const { InputSpec, Value } = sdk
// Lets the operator tune which backend gets tried first per pipeline
// step (transcribe vs analyze) AND which Gemini SKU is used when
// Gemini is the backend. All four knobs live-reload — change them
// via this action and the next relay request honors the new values
// without a daemon restart.
const inputSpec = InputSpec.of({
// ── Gemini model selection ──
// Both fields are radio-select with curated options. The relay's
// Gemini backend automatically falls back to lower-tier models in
// this same list when the chosen one returns a 503 / capacity /
// rate-limit error — see server/backends/gemini.js for the
// fallback-chain logic.
relay_gemini_transcription_model: Value.select({
name: 'Gemini Transcription Model',
description:
"Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
default: 'gemini-3-flash-preview',
values: {
'gemini-3-flash-preview':
'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
'gemini-2.5-flash':
'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
'gemini-2.0-flash':
'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
},
}),
relay_gemini_analysis_model: Value.select({
name: 'Gemini Analysis Model',
description:
"Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
default: 'gemini-3.1-pro-preview',
values: {
'gemini-3.1-pro-preview':
'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
'gemini-3-pro-preview':
'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
'gemini-3-flash-preview':
'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
'gemini-2.5-flash':
'Gemini 2.5 Flash — prior Flash gen',
},
}),
// ── Backend routing preference per pipeline ──
relay_transcribe_backend_preference: Value.select({
name: 'Transcribe Backend Preference',
description:
'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
default: 'gemini_first',
values: {
gemini_first:
'Gemini first → operator hardware (Parakeet) when cap exceeded',
hardware_first: 'Operator hardware first → Gemini as fallback',
gemini_only: 'Gemini only — fail when cap is exceeded',
hardware_only:
'Hardware only — fail when no Parakeet endpoint is configured',
},
}),
relay_analyze_backend_preference: Value.select({
name: 'Analyze Backend Preference',
description:
'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
default: 'gemini_first',
values: {
gemini_first:
'Gemini first → operator hardware (Gemma) when cap exceeded',
hardware_first: 'Operator hardware first → Gemini as fallback',
gemini_only: 'Gemini only — fail when cap is exceeded',
hardware_only:
'Hardware only — fail when no Gemma endpoint is configured',
},
}),
})
export const setBackendRouting = sdk.Action.withInput(
'set-backend-routing',
async ({ effects }) => ({
name: 'Set Backend Routing & Models',
description:
"Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
warning: null,
allowedStatuses: 'any',
group: 'AI Backends',
visibility: 'enabled',
}),
inputSpec,
async ({ effects }) => {
const config = await configFile.read().once()
// Coerce any previously-saved model name to a value in the new
// select's options. Older 0.2.7-era saved configs could hold a
// free-text value that's no longer in the dropdown — clamp to a
// sensible default rather than presenting an invalid radio.
const TX_OPTIONS = [
'gemini-3-flash-preview',
'gemini-2.5-flash',
'gemini-2.0-flash',
] as const
const AN_OPTIONS = [
'gemini-3.1-pro-preview',
'gemini-3-pro-preview',
'gemini-3-flash-preview',
'gemini-2.5-flash',
] as const
const tx = config?.relay_gemini_transcription_model as
| (typeof TX_OPTIONS)[number]
| undefined
const an = config?.relay_gemini_analysis_model as
| (typeof AN_OPTIONS)[number]
| undefined
return {
relay_gemini_transcription_model:
tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
relay_gemini_analysis_model:
an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
relay_transcribe_backend_preference:
(config?.relay_transcribe_backend_preference as
| 'gemini_first'
| 'hardware_first'
| 'gemini_only'
| 'hardware_only'
| undefined) || 'gemini_first',
relay_analyze_backend_preference:
(config?.relay_analyze_backend_preference as
| 'gemini_first'
| 'hardware_first'
| 'gemini_only'
| 'hardware_only'
| undefined) || 'gemini_first',
}
},
async ({ effects, input }) => {
await configFile.merge(effects, {
relay_gemini_transcription_model: (
input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
).trim(),
relay_gemini_analysis_model: (
input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
).trim(),
relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
relay_analyze_backend_preference: input.relay_analyze_backend_preference,
})
return null
},
)