diff --git a/server/backends/hardware.js b/server/backends/hardware.js index 51339b7..98d431d 100644 --- a/server/backends/hardware.js +++ b/server/backends/hardware.js @@ -18,26 +18,25 @@ const ANALYZE_MAX_TOKENS = 16000; // internet, so generous timeouts. Same scale as Recap's defaults. const DEFAULT_TIMEOUT_MS = 900_000; -// Pull the model identifier out of the prompt if the operator wants a -// specific Gemma SKU. We default to "gemma3:27b" which is the typical -// Ollama tag for the analysis-capable Gemma model. Operators with a -// different deployment can update this via a future StartOS action; -// for v0.2 it's hardcoded. -const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b"; - -// Parakeet's typical model identifier. Mirrors what Recap's whisper.js -// sends when the operator points the relay at a NeMo Parakeet HTTP -// wrapper. Configurable via env var for non-default deployments. -const HARDWARE_TRANSCRIBE_MODEL = - process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3"; +// Defaults used only when the route handler doesn't supply explicit +// model names (e.g. a unit test instantiating the backend directly). +// In production the model names come from relay-config.json via +// setParakeetUrl / setGemmaUrl, so the operator can swap models on +// their Ollama deployment without rebuilding the relay. +const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3"; +const DEFAULT_ANALYZE_MODEL = "gemma3:27b"; export function createHardwareBackend({ parakeetBaseURL = "", gemmaBaseURL = "", + parakeetModel = DEFAULT_TRANSCRIBE_MODEL, + gemmaModel = DEFAULT_ANALYZE_MODEL, timeoutMs = DEFAULT_TIMEOUT_MS, } = {}) { const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : ""; const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : ""; + const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL; + const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL; return { hasTranscribe: !!parakeet, @@ -69,7 +68,7 @@ export function createHardwareBackend({ const form = new FormData(); const blob = new Blob([audio], { type: mimeType }); form.append("file", blob, "audio.bin"); - form.append("model", HARDWARE_TRANSCRIBE_MODEL); + form.append("model", transcribeModel); if (richMode) { form.append("response_format", "verbose_json"); form.append("timestamp_granularities[]", "segment"); @@ -171,7 +170,7 @@ export function createHardwareBackend({ method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ - model: HARDWARE_ANALYZE_MODEL, + model: analyzeModel, max_tokens: ANALYZE_MAX_TOKENS, messages: [{ role: "user", content: prompt }], stream: false, diff --git a/server/config.js b/server/config.js index fc09080..365b0d5 100644 --- a/server/config.js +++ b/server/config.js @@ -16,6 +16,8 @@ function defaultConfig() { relay_gemini_api_key: "", relay_parakeet_base_url: "", relay_gemma_base_url: "", + relay_parakeet_model: "parakeet-tdt-0.6b-v3", + relay_gemma_model: "gemma3:27b", relay_keysat_base_url: "https://keysat.xyz", relay_admin_username: "", relay_admin_password_hash: "", diff --git a/server/routes/analyze.js b/server/routes/analyze.js index 31eb9e0..517c037 100644 --- a/server/routes/analyze.js +++ b/server/routes/analyze.js @@ -84,6 +84,8 @@ export function analyzeRouter() { const backend = createHardwareBackend({ parakeetBaseURL: cfg.relay_parakeet_base_url, gemmaBaseURL: cfg.relay_gemma_base_url, + parakeetModel: cfg.relay_parakeet_model, + gemmaModel: cfg.relay_gemma_model, }); result = await backend.analyzeText({ prompt }); } diff --git a/server/routes/transcribe.js b/server/routes/transcribe.js index 8dea556..bb5b03c 100644 --- a/server/routes/transcribe.js +++ b/server/routes/transcribe.js @@ -110,6 +110,8 @@ export function transcribeRouter() { const backend = createHardwareBackend({ parakeetBaseURL: cfg.relay_parakeet_base_url, gemmaBaseURL: cfg.relay_gemma_base_url, + parakeetModel: cfg.relay_parakeet_model, + gemmaModel: cfg.relay_gemma_model, }); result = await backend.transcribeAudio({ audio: req.file.buffer, diff --git a/startos/actions/setGemmaUrl.ts b/startos/actions/setGemmaUrl.ts index 4706f09..311d5ab 100644 --- a/startos/actions/setGemmaUrl.ts +++ b/startos/actions/setGemmaUrl.ts @@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json' const { InputSpec, Value } = sdk -// Optional Gemma/Ollama endpoint for the operator-hardware analysis -// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe -// overflow, this handles analyze overflow. +// Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint +// + which model to request. Both fields live-reload so the operator +// can pull a different Gemma SKU on Ollama and update the model name +// here without restarting the relay. const inputSpec = InputSpec.of({ relay_gemma_base_url: Value.text({ name: 'Gemma Base URL', @@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({ }, ], }), + relay_gemma_model: Value.text({ + name: 'Gemma Model Name', + description: + 'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b', + required: true, + default: 'gemma3:27b', + minLength: 1, + maxLength: 128, + }), }) export const setGemmaUrl = sdk.Action.withInput( @@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput( async ({ effects }) => ({ name: 'Set Gemma URL', description: - 'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.', + 'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.', warning: null, allowedStatuses: 'any', group: null, @@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput( const config = await configFile.read().once() return { relay_gemma_base_url: config?.relay_gemma_base_url || '', + relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b', } }, async ({ effects, input }) => { await configFile.merge(effects, { relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(), + relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(), }) return null }, diff --git a/startos/actions/setParakeetUrl.ts b/startos/actions/setParakeetUrl.ts index ee3e182..84a2687 100644 --- a/startos/actions/setParakeetUrl.ts +++ b/startos/actions/setParakeetUrl.ts @@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json' const { InputSpec, Value } = sdk -// Optional Parakeet endpoint for the operator-hardware fallback path. -// When a Pro/Max user exceeds their Gemini monthly cap, the relay -// routes transcribe requests here instead. Empty disables the fallback -// — over-cap users get 402. -// -// In a typical setup this points at the operator's NVIDIA Spark or -// similar local GPU box running the NeMo / Parakeet HTTP wrapper. +// Operator's Parakeet endpoint + which model to request. Both fields +// live-reload — change them via this action and the next relay request +// picks up the new values without a daemon restart. const inputSpec = InputSpec.of({ relay_parakeet_base_url: Value.text({ name: 'Parakeet Base URL', @@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({ }, ], }), + relay_parakeet_model: Value.text({ + name: 'Parakeet Model Name', + description: + 'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3', + required: true, + default: 'parakeet-tdt-0.6b-v3', + minLength: 1, + maxLength: 128, + }), }) export const setParakeetUrl = sdk.Action.withInput( @@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput( async ({ effects }) => ({ name: 'Set Parakeet URL', description: - "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.", + "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.", warning: null, allowedStatuses: 'any', group: null, @@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput( const config = await configFile.read().once() return { relay_parakeet_base_url: config?.relay_parakeet_base_url || '', + relay_parakeet_model: + config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3', } }, async ({ effects, input }) => { await configFile.merge(effects, { relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(), + relay_parakeet_model: + (input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(), }) return null }, diff --git a/startos/file-models/config.json.ts b/startos/file-models/config.json.ts index 94e2997..ec697c8 100644 --- a/startos/file-models/config.json.ts +++ b/startos/file-models/config.json.ts @@ -28,6 +28,13 @@ export const configFile = FileHelper.json( // and return 402 once exceeded (no fallback). relay_parakeet_base_url: z.string().default(''), relay_gemma_base_url: z.string().default(''), + // Model identifiers to send in the upstream request bodies. The + // operator's Ollama or Parakeet wrapper may serve different models + // depending on what's been pulled; making these config-driven + // means the operator can swap models without rebuilding the relay. + // Live-reloaded — change applies to the next request. + relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'), + relay_gemma_model: z.string().default('gemma3:27b'), // ── License server ── // URL of the Keysat license server used for the cached online diff --git a/startos/versions/index.ts b/startos/versions/index.ts index 2bd08ba..1157765 100644 --- a/startos/versions/index.ts +++ b/startos/versions/index.ts @@ -1,8 +1,9 @@ import { VersionGraph } from '@start9labs/start-sdk' import { v_0_1_0 } from './v0.1.0' import { v_0_2_0 } from './v0.2.0' +import { v_0_2_1 } from './v0.2.1' export const versionGraph = VersionGraph.of({ - current: v_0_2_0, - other: [v_0_1_0], + current: v_0_2_1, + other: [v_0_2_0, v_0_1_0], }) diff --git a/startos/versions/v0.2.1.ts b/startos/versions/v0.2.1.ts new file mode 100644 index 0000000..a30cabe --- /dev/null +++ b/startos/versions/v0.2.1.ts @@ -0,0 +1,13 @@ +import { VersionInfo } from '@start9labs/start-sdk' + +export const v_0_2_1 = VersionInfo.of({ + version: '0.2.1:0', + releaseNotes: { + en_US: + 'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.', + }, + migrations: { + up: async ({ effects }) => {}, + down: async ({ effects }) => {}, + }, +})