v0.2.1 model names config-driven

This commit is contained in:
local
2026-05-11 20:27:19 -05:00
parent cccbee27e4
commit c9f051cd07
9 changed files with 75 additions and 28 deletions
+16 -4
View File
@@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json'
const { InputSpec, Value } = sdk
// Optional Gemma/Ollama endpoint for the operator-hardware analysis
// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe
// overflow, this handles analyze overflow.
// Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
// + which model to request. Both fields live-reload so the operator
// can pull a different Gemma SKU on Ollama and update the model name
// here without restarting the relay.
const inputSpec = InputSpec.of({
relay_gemma_base_url: Value.text({
name: 'Gemma Base URL',
@@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({
},
],
}),
relay_gemma_model: Value.text({
name: 'Gemma Model Name',
description:
'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
required: true,
default: 'gemma3:27b',
minLength: 1,
maxLength: 128,
}),
})
export const setGemmaUrl = sdk.Action.withInput(
@@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput(
async ({ effects }) => ({
name: 'Set Gemma URL',
description:
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.',
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
warning: null,
allowedStatuses: 'any',
group: null,
@@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput(
const config = await configFile.read().once()
return {
relay_gemma_base_url: config?.relay_gemma_base_url || '',
relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
}
},
async ({ effects, input }) => {
await configFile.merge(effects, {
relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
})
return null
},
+17 -8
View File
@@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json'
const { InputSpec, Value } = sdk
// Optional Parakeet endpoint for the operator-hardware fallback path.
// When a Pro/Max user exceeds their Gemini monthly cap, the relay
// routes transcribe requests here instead. Empty disables the fallback
// — over-cap users get 402.
//
// In a typical setup this points at the operator's NVIDIA Spark or
// similar local GPU box running the NeMo / Parakeet HTTP wrapper.
// Operator's Parakeet endpoint + which model to request. Both fields
// live-reload — change them via this action and the next relay request
// picks up the new values without a daemon restart.
const inputSpec = InputSpec.of({
relay_parakeet_base_url: Value.text({
name: 'Parakeet Base URL',
@@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({
},
],
}),
relay_parakeet_model: Value.text({
name: 'Parakeet Model Name',
description:
'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
required: true,
default: 'parakeet-tdt-0.6b-v3',
minLength: 1,
maxLength: 128,
}),
})
export const setParakeetUrl = sdk.Action.withInput(
@@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput(
async ({ effects }) => ({
name: 'Set Parakeet URL',
description:
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.",
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
warning: null,
allowedStatuses: 'any',
group: null,
@@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput(
const config = await configFile.read().once()
return {
relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
relay_parakeet_model:
config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
}
},
async ({ effects, input }) => {
await configFile.merge(effects, {
relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
relay_parakeet_model:
(input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
})
return null
},
+7
View File
@@ -28,6 +28,13 @@ export const configFile = FileHelper.json(
// and return 402 once exceeded (no fallback).
relay_parakeet_base_url: z.string().default(''),
relay_gemma_base_url: z.string().default(''),
// Model identifiers to send in the upstream request bodies. The
// operator's Ollama or Parakeet wrapper may serve different models
// depending on what's been pulled; making these config-driven
// means the operator can swap models without rebuilding the relay.
// Live-reloaded — change applies to the next request.
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
relay_gemma_model: z.string().default('gemma3:27b'),
// ── License server ──
// URL of the Keysat license server used for the cached online
+3 -2
View File
@@ -1,8 +1,9 @@
import { VersionGraph } from '@start9labs/start-sdk'
import { v_0_1_0 } from './v0.1.0'
import { v_0_2_0 } from './v0.2.0'
import { v_0_2_1 } from './v0.2.1'
export const versionGraph = VersionGraph.of({
current: v_0_2_0,
other: [v_0_1_0],
current: v_0_2_1,
other: [v_0_2_0, v_0_1_0],
})
+13
View File
@@ -0,0 +1,13 @@
import { VersionInfo } from '@start9labs/start-sdk'
export const v_0_2_1 = VersionInfo.of({
version: '0.2.1:0',
releaseNotes: {
en_US:
'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.',
},
migrations: {
up: async ({ effects }) => {},
down: async ({ effects }) => {},
},
})