155 lines
6.2 KiB
TypeScript
155 lines
6.2 KiB
TypeScript
import { sdk } from '../sdk'
|
||
import { configFile } from '../file-models/config.json'
|
||
|
||
const { InputSpec, Value } = sdk
|
||
|
||
// Lets the operator tune which backend gets tried first per pipeline
|
||
// step (transcribe vs analyze) AND which Gemini SKU is used when
|
||
// Gemini is the backend. All four knobs live-reload — change them
|
||
// via this action and the next relay request honors the new values
|
||
// without a daemon restart.
|
||
|
||
const inputSpec = InputSpec.of({
|
||
// ── Gemini model selection ──
|
||
// Both fields are radio-select with curated options. The relay's
|
||
// Gemini backend automatically falls back to lower-tier models in
|
||
// this same list when the chosen one returns a 503 / capacity /
|
||
// rate-limit error — see server/backends/gemini.js for the
|
||
// fallback-chain logic.
|
||
relay_gemini_transcription_model: Value.select({
|
||
name: 'Gemini Transcription Model',
|
||
description:
|
||
"Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
|
||
default: 'gemini-3-flash-preview',
|
||
values: {
|
||
'gemini-3-flash-preview':
|
||
'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
|
||
'gemini-2.5-flash':
|
||
'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
|
||
'gemini-2.0-flash':
|
||
'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
|
||
},
|
||
}),
|
||
relay_gemini_analysis_model: Value.select({
|
||
name: 'Gemini Analysis Model',
|
||
description:
|
||
"Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
|
||
default: 'gemini-3.1-pro-preview',
|
||
values: {
|
||
'gemini-3.1-pro-preview':
|
||
'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
|
||
'gemini-3-pro-preview':
|
||
'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
|
||
'gemini-3-flash-preview':
|
||
'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
|
||
'gemini-2.5-flash':
|
||
'Gemini 2.5 Flash — prior Flash gen',
|
||
},
|
||
}),
|
||
|
||
// ── Backend routing preference per pipeline ──
|
||
relay_transcribe_backend_preference: Value.select({
|
||
name: 'Transcribe Backend Preference',
|
||
description:
|
||
'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
|
||
default: 'gemini_first',
|
||
values: {
|
||
gemini_first:
|
||
'Gemini first → operator hardware (Parakeet) when cap exceeded',
|
||
hardware_first: 'Operator hardware first → Gemini as fallback',
|
||
gemini_only: 'Gemini only — fail when cap is exceeded',
|
||
hardware_only:
|
||
'Hardware only — fail when no Parakeet endpoint is configured',
|
||
},
|
||
}),
|
||
relay_analyze_backend_preference: Value.select({
|
||
name: 'Analyze Backend Preference',
|
||
description:
|
||
'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
|
||
default: 'gemini_first',
|
||
values: {
|
||
gemini_first:
|
||
'Gemini first → operator hardware (Gemma) when cap exceeded',
|
||
hardware_first: 'Operator hardware first → Gemini as fallback',
|
||
gemini_only: 'Gemini only — fail when cap is exceeded',
|
||
hardware_only:
|
||
'Hardware only — fail when no Gemma endpoint is configured',
|
||
},
|
||
}),
|
||
})
|
||
|
||
export const setBackendRouting = sdk.Action.withInput(
|
||
'set-backend-routing',
|
||
|
||
async ({ effects }) => ({
|
||
name: 'Set Backend Routing & Models',
|
||
description:
|
||
"Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
|
||
warning: null,
|
||
allowedStatuses: 'any',
|
||
group: 'AI Backends',
|
||
visibility: 'enabled',
|
||
}),
|
||
|
||
inputSpec,
|
||
|
||
async ({ effects }) => {
|
||
const config = await configFile.read().once()
|
||
// Coerce any previously-saved model name to a value in the new
|
||
// select's options. Older 0.2.7-era saved configs could hold a
|
||
// free-text value that's no longer in the dropdown — clamp to a
|
||
// sensible default rather than presenting an invalid radio.
|
||
const TX_OPTIONS = [
|
||
'gemini-3-flash-preview',
|
||
'gemini-2.5-flash',
|
||
'gemini-2.0-flash',
|
||
] as const
|
||
const AN_OPTIONS = [
|
||
'gemini-3.1-pro-preview',
|
||
'gemini-3-pro-preview',
|
||
'gemini-3-flash-preview',
|
||
'gemini-2.5-flash',
|
||
] as const
|
||
const tx = config?.relay_gemini_transcription_model as
|
||
| (typeof TX_OPTIONS)[number]
|
||
| undefined
|
||
const an = config?.relay_gemini_analysis_model as
|
||
| (typeof AN_OPTIONS)[number]
|
||
| undefined
|
||
return {
|
||
relay_gemini_transcription_model:
|
||
tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
|
||
relay_gemini_analysis_model:
|
||
an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
|
||
relay_transcribe_backend_preference:
|
||
(config?.relay_transcribe_backend_preference as
|
||
| 'gemini_first'
|
||
| 'hardware_first'
|
||
| 'gemini_only'
|
||
| 'hardware_only'
|
||
| undefined) || 'gemini_first',
|
||
relay_analyze_backend_preference:
|
||
(config?.relay_analyze_backend_preference as
|
||
| 'gemini_first'
|
||
| 'hardware_first'
|
||
| 'gemini_only'
|
||
| 'hardware_only'
|
||
| undefined) || 'gemini_first',
|
||
}
|
||
},
|
||
|
||
async ({ effects, input }) => {
|
||
await configFile.merge(effects, {
|
||
relay_gemini_transcription_model: (
|
||
input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
|
||
).trim(),
|
||
relay_gemini_analysis_model: (
|
||
input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
|
||
).trim(),
|
||
relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
|
||
relay_analyze_backend_preference: input.relay_analyze_backend_preference,
|
||
})
|
||
return null
|
||
},
|
||
)
|