v0.2.1 model names config-driven

This commit is contained in:
local
2026-05-11 20:27:19 -05:00
parent cccbee27e4
commit c9f051cd07
9 changed files with 75 additions and 28 deletions
+13 -14
View File
@@ -18,26 +18,25 @@ const ANALYZE_MAX_TOKENS = 16000;
// internet, so generous timeouts. Same scale as Recap's defaults. // internet, so generous timeouts. Same scale as Recap's defaults.
const DEFAULT_TIMEOUT_MS = 900_000; const DEFAULT_TIMEOUT_MS = 900_000;
// Pull the model identifier out of the prompt if the operator wants a // Defaults used only when the route handler doesn't supply explicit
// specific Gemma SKU. We default to "gemma3:27b" which is the typical // model names (e.g. a unit test instantiating the backend directly).
// Ollama tag for the analysis-capable Gemma model. Operators with a // In production the model names come from relay-config.json via
// different deployment can update this via a future StartOS action; // setParakeetUrl / setGemmaUrl, so the operator can swap models on
// for v0.2 it's hardcoded. // their Ollama deployment without rebuilding the relay.
const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b"; const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
// Parakeet's typical model identifier. Mirrors what Recap's whisper.js
// sends when the operator points the relay at a NeMo Parakeet HTTP
// wrapper. Configurable via env var for non-default deployments.
const HARDWARE_TRANSCRIBE_MODEL =
process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3";
export function createHardwareBackend({ export function createHardwareBackend({
parakeetBaseURL = "", parakeetBaseURL = "",
gemmaBaseURL = "", gemmaBaseURL = "",
parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
gemmaModel = DEFAULT_ANALYZE_MODEL,
timeoutMs = DEFAULT_TIMEOUT_MS, timeoutMs = DEFAULT_TIMEOUT_MS,
} = {}) { } = {}) {
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : ""; const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : ""; const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
return { return {
hasTranscribe: !!parakeet, hasTranscribe: !!parakeet,
@@ -69,7 +68,7 @@ export function createHardwareBackend({
const form = new FormData(); const form = new FormData();
const blob = new Blob([audio], { type: mimeType }); const blob = new Blob([audio], { type: mimeType });
form.append("file", blob, "audio.bin"); form.append("file", blob, "audio.bin");
form.append("model", HARDWARE_TRANSCRIBE_MODEL); form.append("model", transcribeModel);
if (richMode) { if (richMode) {
form.append("response_format", "verbose_json"); form.append("response_format", "verbose_json");
form.append("timestamp_granularities[]", "segment"); form.append("timestamp_granularities[]", "segment");
@@ -171,7 +170,7 @@ export function createHardwareBackend({
method: "POST", method: "POST",
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
body: JSON.stringify({ body: JSON.stringify({
model: HARDWARE_ANALYZE_MODEL, model: analyzeModel,
max_tokens: ANALYZE_MAX_TOKENS, max_tokens: ANALYZE_MAX_TOKENS,
messages: [{ role: "user", content: prompt }], messages: [{ role: "user", content: prompt }],
stream: false, stream: false,
+2
View File
@@ -16,6 +16,8 @@ function defaultConfig() {
relay_gemini_api_key: "", relay_gemini_api_key: "",
relay_parakeet_base_url: "", relay_parakeet_base_url: "",
relay_gemma_base_url: "", relay_gemma_base_url: "",
relay_parakeet_model: "parakeet-tdt-0.6b-v3",
relay_gemma_model: "gemma3:27b",
relay_keysat_base_url: "https://keysat.xyz", relay_keysat_base_url: "https://keysat.xyz",
relay_admin_username: "", relay_admin_username: "",
relay_admin_password_hash: "", relay_admin_password_hash: "",
+2
View File
@@ -84,6 +84,8 @@ export function analyzeRouter() {
const backend = createHardwareBackend({ const backend = createHardwareBackend({
parakeetBaseURL: cfg.relay_parakeet_base_url, parakeetBaseURL: cfg.relay_parakeet_base_url,
gemmaBaseURL: cfg.relay_gemma_base_url, gemmaBaseURL: cfg.relay_gemma_base_url,
parakeetModel: cfg.relay_parakeet_model,
gemmaModel: cfg.relay_gemma_model,
}); });
result = await backend.analyzeText({ prompt }); result = await backend.analyzeText({ prompt });
} }
+2
View File
@@ -110,6 +110,8 @@ export function transcribeRouter() {
const backend = createHardwareBackend({ const backend = createHardwareBackend({
parakeetBaseURL: cfg.relay_parakeet_base_url, parakeetBaseURL: cfg.relay_parakeet_base_url,
gemmaBaseURL: cfg.relay_gemma_base_url, gemmaBaseURL: cfg.relay_gemma_base_url,
parakeetModel: cfg.relay_parakeet_model,
gemmaModel: cfg.relay_gemma_model,
}); });
result = await backend.transcribeAudio({ result = await backend.transcribeAudio({
audio: req.file.buffer, audio: req.file.buffer,
+16 -4
View File
@@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json'
const { InputSpec, Value } = sdk const { InputSpec, Value } = sdk
// Optional Gemma/Ollama endpoint for the operator-hardware analysis // Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe // + which model to request. Both fields live-reload so the operator
// overflow, this handles analyze overflow. // can pull a different Gemma SKU on Ollama and update the model name
// here without restarting the relay.
const inputSpec = InputSpec.of({ const inputSpec = InputSpec.of({
relay_gemma_base_url: Value.text({ relay_gemma_base_url: Value.text({
name: 'Gemma Base URL', name: 'Gemma Base URL',
@@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({
}, },
], ],
}), }),
relay_gemma_model: Value.text({
name: 'Gemma Model Name',
description:
'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
required: true,
default: 'gemma3:27b',
minLength: 1,
maxLength: 128,
}),
}) })
export const setGemmaUrl = sdk.Action.withInput( export const setGemmaUrl = sdk.Action.withInput(
@@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput(
async ({ effects }) => ({ async ({ effects }) => ({
name: 'Set Gemma URL', name: 'Set Gemma URL',
description: description:
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.', 'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
warning: null, warning: null,
allowedStatuses: 'any', allowedStatuses: 'any',
group: null, group: null,
@@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput(
const config = await configFile.read().once() const config = await configFile.read().once()
return { return {
relay_gemma_base_url: config?.relay_gemma_base_url || '', relay_gemma_base_url: config?.relay_gemma_base_url || '',
relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
} }
}, },
async ({ effects, input }) => { async ({ effects, input }) => {
await configFile.merge(effects, { await configFile.merge(effects, {
relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(), relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
}) })
return null return null
}, },
+17 -8
View File
@@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json'
const { InputSpec, Value } = sdk const { InputSpec, Value } = sdk
// Optional Parakeet endpoint for the operator-hardware fallback path. // Operator's Parakeet endpoint + which model to request. Both fields
// When a Pro/Max user exceeds their Gemini monthly cap, the relay // live-reload — change them via this action and the next relay request
// routes transcribe requests here instead. Empty disables the fallback // picks up the new values without a daemon restart.
// — over-cap users get 402.
//
// In a typical setup this points at the operator's NVIDIA Spark or
// similar local GPU box running the NeMo / Parakeet HTTP wrapper.
const inputSpec = InputSpec.of({ const inputSpec = InputSpec.of({
relay_parakeet_base_url: Value.text({ relay_parakeet_base_url: Value.text({
name: 'Parakeet Base URL', name: 'Parakeet Base URL',
@@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({
}, },
], ],
}), }),
relay_parakeet_model: Value.text({
name: 'Parakeet Model Name',
description:
'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
required: true,
default: 'parakeet-tdt-0.6b-v3',
minLength: 1,
maxLength: 128,
}),
}) })
export const setParakeetUrl = sdk.Action.withInput( export const setParakeetUrl = sdk.Action.withInput(
@@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput(
async ({ effects }) => ({ async ({ effects }) => ({
name: 'Set Parakeet URL', name: 'Set Parakeet URL',
description: description:
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.", "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
warning: null, warning: null,
allowedStatuses: 'any', allowedStatuses: 'any',
group: null, group: null,
@@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput(
const config = await configFile.read().once() const config = await configFile.read().once()
return { return {
relay_parakeet_base_url: config?.relay_parakeet_base_url || '', relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
relay_parakeet_model:
config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
} }
}, },
async ({ effects, input }) => { async ({ effects, input }) => {
await configFile.merge(effects, { await configFile.merge(effects, {
relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(), relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
relay_parakeet_model:
(input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
}) })
return null return null
}, },
+7
View File
@@ -28,6 +28,13 @@ export const configFile = FileHelper.json(
// and return 402 once exceeded (no fallback). // and return 402 once exceeded (no fallback).
relay_parakeet_base_url: z.string().default(''), relay_parakeet_base_url: z.string().default(''),
relay_gemma_base_url: z.string().default(''), relay_gemma_base_url: z.string().default(''),
// Model identifiers to send in the upstream request bodies. The
// operator's Ollama or Parakeet wrapper may serve different models
// depending on what's been pulled; making these config-driven
// means the operator can swap models without rebuilding the relay.
// Live-reloaded — change applies to the next request.
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
relay_gemma_model: z.string().default('gemma3:27b'),
// ── License server ── // ── License server ──
// URL of the Keysat license server used for the cached online // URL of the Keysat license server used for the cached online
+3 -2
View File
@@ -1,8 +1,9 @@
import { VersionGraph } from '@start9labs/start-sdk' import { VersionGraph } from '@start9labs/start-sdk'
import { v_0_1_0 } from './v0.1.0' import { v_0_1_0 } from './v0.1.0'
import { v_0_2_0 } from './v0.2.0' import { v_0_2_0 } from './v0.2.0'
import { v_0_2_1 } from './v0.2.1'
export const versionGraph = VersionGraph.of({ export const versionGraph = VersionGraph.of({
current: v_0_2_0, current: v_0_2_1,
other: [v_0_1_0], other: [v_0_2_0, v_0_1_0],
}) })
+13
View File
@@ -0,0 +1,13 @@
import { VersionInfo } from '@start9labs/start-sdk'
export const v_0_2_1 = VersionInfo.of({
version: '0.2.1:0',
releaseNotes: {
en_US:
'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.',
},
migrations: {
up: async ({ effects }) => {},
down: async ({ effects }) => {},
},
})