v0.2.7 configurable Gemini models + per-pipeline backend preference
This commit is contained in:
@@ -5,11 +5,13 @@ import { setParakeetUrl } from './setParakeetUrl'
|
||||
import { setGemmaUrl } from './setGemmaUrl'
|
||||
import { setAdminPassword } from './setAdminPassword'
|
||||
import { adjustTierQuotas } from './adjustTierQuotas'
|
||||
import { setBackendRouting } from './setBackendRouting'
|
||||
|
||||
export const actions = sdk.Actions.of()
|
||||
.addAction(setGeminiKey)
|
||||
.addAction(setKeysatBaseUrl)
|
||||
.addAction(setParakeetUrl)
|
||||
.addAction(setGemmaUrl)
|
||||
.addAction(setBackendRouting)
|
||||
.addAction(setAdminPassword)
|
||||
.addAction(adjustTierQuotas)
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
import { sdk } from '../sdk'
|
||||
import { configFile } from '../file-models/config.json'
|
||||
|
||||
const { InputSpec, Value } = sdk
|
||||
|
||||
// Lets the operator tune which backend gets tried first per pipeline
|
||||
// step (transcribe vs analyze) AND which Gemini SKU is used when
|
||||
// Gemini is the backend. All four knobs live-reload — change them
|
||||
// via this action and the next relay request honors the new values
|
||||
// without a daemon restart.
|
||||
|
||||
const inputSpec = InputSpec.of({
|
||||
// ── Gemini model selection ──
|
||||
relay_gemini_transcription_model: Value.text({
|
||||
name: 'Gemini Transcription Model',
|
||||
description:
|
||||
"The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
|
||||
required: true,
|
||||
default: 'gemini-3-flash-preview',
|
||||
minLength: 1,
|
||||
maxLength: 128,
|
||||
}),
|
||||
relay_gemini_analysis_model: Value.text({
|
||||
name: 'Gemini Analysis Model',
|
||||
description:
|
||||
"The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
|
||||
required: true,
|
||||
default: 'gemini-3.1-pro-preview',
|
||||
minLength: 1,
|
||||
maxLength: 128,
|
||||
}),
|
||||
|
||||
// ── Backend routing preference per pipeline ──
|
||||
relay_transcribe_backend_preference: Value.select({
|
||||
name: 'Transcribe Backend Preference',
|
||||
description:
|
||||
'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
|
||||
default: 'gemini_first',
|
||||
values: {
|
||||
gemini_first:
|
||||
'Gemini first → operator hardware (Parakeet) when cap exceeded',
|
||||
hardware_first: 'Operator hardware first → Gemini as fallback',
|
||||
gemini_only: 'Gemini only — fail when cap is exceeded',
|
||||
hardware_only:
|
||||
'Hardware only — fail when no Parakeet endpoint is configured',
|
||||
},
|
||||
}),
|
||||
relay_analyze_backend_preference: Value.select({
|
||||
name: 'Analyze Backend Preference',
|
||||
description:
|
||||
'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
|
||||
default: 'gemini_first',
|
||||
values: {
|
||||
gemini_first:
|
||||
'Gemini first → operator hardware (Gemma) when cap exceeded',
|
||||
hardware_first: 'Operator hardware first → Gemini as fallback',
|
||||
gemini_only: 'Gemini only — fail when cap is exceeded',
|
||||
hardware_only:
|
||||
'Hardware only — fail when no Gemma endpoint is configured',
|
||||
},
|
||||
}),
|
||||
})
|
||||
|
||||
export const setBackendRouting = sdk.Action.withInput(
|
||||
'set-backend-routing',
|
||||
|
||||
async ({ effects }) => ({
|
||||
name: 'Set Backend Routing & Models',
|
||||
description:
|
||||
"Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
|
||||
warning: null,
|
||||
allowedStatuses: 'any',
|
||||
group: 'AI Backends',
|
||||
visibility: 'enabled',
|
||||
}),
|
||||
|
||||
inputSpec,
|
||||
|
||||
async ({ effects }) => {
|
||||
const config = await configFile.read().once()
|
||||
return {
|
||||
relay_gemini_transcription_model:
|
||||
config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
|
||||
relay_gemini_analysis_model:
|
||||
config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
|
||||
relay_transcribe_backend_preference:
|
||||
(config?.relay_transcribe_backend_preference as
|
||||
| 'gemini_first'
|
||||
| 'hardware_first'
|
||||
| 'gemini_only'
|
||||
| 'hardware_only'
|
||||
| undefined) || 'gemini_first',
|
||||
relay_analyze_backend_preference:
|
||||
(config?.relay_analyze_backend_preference as
|
||||
| 'gemini_first'
|
||||
| 'hardware_first'
|
||||
| 'gemini_only'
|
||||
| 'hardware_only'
|
||||
| undefined) || 'gemini_first',
|
||||
}
|
||||
},
|
||||
|
||||
async ({ effects, input }) => {
|
||||
await configFile.merge(effects, {
|
||||
relay_gemini_transcription_model: (
|
||||
input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
|
||||
).trim(),
|
||||
relay_gemini_analysis_model: (
|
||||
input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
|
||||
).trim(),
|
||||
relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
|
||||
relay_analyze_backend_preference: input.relay_analyze_backend_preference,
|
||||
})
|
||||
return null
|
||||
},
|
||||
)
|
||||
@@ -36,6 +36,33 @@ export const configFile = FileHelper.json(
|
||||
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
|
||||
relay_gemma_model: z.string().default('gemma3:27b'),
|
||||
|
||||
// ── Gemini model selection ──
|
||||
// Operator can pick which Gemini SKU is used per pipeline step
|
||||
// without rebuilding the relay. Defaults match Google's typical
|
||||
// recommendations: Flash for transcription (cheap, fast,
|
||||
// multimodal-capable), Pro for analysis (higher quality on
|
||||
// structured-JSON outputs). Operators can swap to flash for
|
||||
// analysis when they want faster + cheaper at the cost of some
|
||||
// section-boundary precision.
|
||||
relay_gemini_transcription_model: z.string().default('gemini-3-flash-preview'),
|
||||
relay_gemini_analysis_model: z.string().default('gemini-3.1-pro-preview'),
|
||||
|
||||
// ── Backend routing preference per pipeline ──
|
||||
// Controls whether the relay tries Gemini first (current default —
|
||||
// best quality, costs operator's Gemini API budget) or the
|
||||
// operator-hardware backend first (saves Gemini budget, may be
|
||||
// slower depending on the operator's hardware). One of:
|
||||
// - "gemini_first" try Gemini until per-tier cap, then hardware
|
||||
// - "hardware_first" try hardware first, fall back to Gemini
|
||||
// - "gemini_only" Gemini only, fail when cap is exceeded
|
||||
// - "hardware_only" Hardware only, fail when not configured
|
||||
relay_transcribe_backend_preference: z
|
||||
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
|
||||
.default('gemini_first'),
|
||||
relay_analyze_backend_preference: z
|
||||
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
|
||||
.default('gemini_first'),
|
||||
|
||||
// ── License server ──
|
||||
// URL of the Keysat license server used for the cached online
|
||||
// license-validation check. Defaults to the public endpoint;
|
||||
|
||||
@@ -7,8 +7,9 @@ import { v_0_2_3 } from './v0.2.3'
|
||||
import { v_0_2_4 } from './v0.2.4'
|
||||
import { v_0_2_5 } from './v0.2.5'
|
||||
import { v_0_2_6 } from './v0.2.6'
|
||||
import { v_0_2_7 } from './v0.2.7'
|
||||
|
||||
export const versionGraph = VersionGraph.of({
|
||||
current: v_0_2_6,
|
||||
other: [v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
|
||||
current: v_0_2_7,
|
||||
other: [v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
|
||||
})
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
export const v_0_2_7 = VersionInfo.of({
|
||||
version: '0.2.7:0',
|
||||
releaseNotes: {
|
||||
en_US:
|
||||
'New "Set Backend Routing & Models" action exposes four operator knobs: Gemini transcription model, Gemini analysis model, transcribe backend preference (gemini_first / hardware_first / gemini_only / hardware_only), and analyze backend preference. Routing strategies are honored by planBackend per-pipeline, so the operator can route transcribe to Parakeet for speed AND analyze through Gemini Flash for faster + cheaper structured output, or any other combination.',
|
||||
},
|
||||
migrations: {
|
||||
up: async ({ effects }) => {},
|
||||
down: async ({ effects }) => {},
|
||||
},
|
||||
})
|
||||
Reference in New Issue
Block a user