117 lines
4.6 KiB
TypeScript
117 lines
4.6 KiB
TypeScript
import { sdk } from '../sdk'
|
|
import { configFile } from '../file-models/config.json'
|
|
|
|
const { InputSpec, Value } = sdk
|
|
|
|
// Lets the operator tune which backend gets tried first per pipeline
|
|
// step (transcribe vs analyze) AND which Gemini SKU is used when
|
|
// Gemini is the backend. All four knobs live-reload — change them
|
|
// via this action and the next relay request honors the new values
|
|
// without a daemon restart.
|
|
|
|
const inputSpec = InputSpec.of({
|
|
// ── Gemini model selection ──
|
|
relay_gemini_transcription_model: Value.text({
|
|
name: 'Gemini Transcription Model',
|
|
description:
|
|
"The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
|
|
required: true,
|
|
default: 'gemini-3-flash-preview',
|
|
minLength: 1,
|
|
maxLength: 128,
|
|
}),
|
|
relay_gemini_analysis_model: Value.text({
|
|
name: 'Gemini Analysis Model',
|
|
description:
|
|
"The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
|
|
required: true,
|
|
default: 'gemini-3.1-pro-preview',
|
|
minLength: 1,
|
|
maxLength: 128,
|
|
}),
|
|
|
|
// ── Backend routing preference per pipeline ──
|
|
relay_transcribe_backend_preference: Value.select({
|
|
name: 'Transcribe Backend Preference',
|
|
description:
|
|
'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
|
|
default: 'gemini_first',
|
|
values: {
|
|
gemini_first:
|
|
'Gemini first → operator hardware (Parakeet) when cap exceeded',
|
|
hardware_first: 'Operator hardware first → Gemini as fallback',
|
|
gemini_only: 'Gemini only — fail when cap is exceeded',
|
|
hardware_only:
|
|
'Hardware only — fail when no Parakeet endpoint is configured',
|
|
},
|
|
}),
|
|
relay_analyze_backend_preference: Value.select({
|
|
name: 'Analyze Backend Preference',
|
|
description:
|
|
'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
|
|
default: 'gemini_first',
|
|
values: {
|
|
gemini_first:
|
|
'Gemini first → operator hardware (Gemma) when cap exceeded',
|
|
hardware_first: 'Operator hardware first → Gemini as fallback',
|
|
gemini_only: 'Gemini only — fail when cap is exceeded',
|
|
hardware_only:
|
|
'Hardware only — fail when no Gemma endpoint is configured',
|
|
},
|
|
}),
|
|
})
|
|
|
|
export const setBackendRouting = sdk.Action.withInput(
|
|
'set-backend-routing',
|
|
|
|
async ({ effects }) => ({
|
|
name: 'Set Backend Routing & Models',
|
|
description:
|
|
"Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
|
|
warning: null,
|
|
allowedStatuses: 'any',
|
|
group: 'AI Backends',
|
|
visibility: 'enabled',
|
|
}),
|
|
|
|
inputSpec,
|
|
|
|
async ({ effects }) => {
|
|
const config = await configFile.read().once()
|
|
return {
|
|
relay_gemini_transcription_model:
|
|
config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
|
|
relay_gemini_analysis_model:
|
|
config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
|
|
relay_transcribe_backend_preference:
|
|
(config?.relay_transcribe_backend_preference as
|
|
| 'gemini_first'
|
|
| 'hardware_first'
|
|
| 'gemini_only'
|
|
| 'hardware_only'
|
|
| undefined) || 'gemini_first',
|
|
relay_analyze_backend_preference:
|
|
(config?.relay_analyze_backend_preference as
|
|
| 'gemini_first'
|
|
| 'hardware_first'
|
|
| 'gemini_only'
|
|
| 'hardware_only'
|
|
| undefined) || 'gemini_first',
|
|
}
|
|
},
|
|
|
|
async ({ effects, input }) => {
|
|
await configFile.merge(effects, {
|
|
relay_gemini_transcription_model: (
|
|
input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
|
|
).trim(),
|
|
relay_gemini_analysis_model: (
|
|
input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
|
|
).trim(),
|
|
relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
|
|
relay_analyze_backend_preference: input.relay_analyze_backend_preference,
|
|
})
|
|
return null
|
|
},
|
|
)
|