v0.2.1 model names config-driven
This commit is contained in:
+13
-14
@@ -18,26 +18,25 @@ const ANALYZE_MAX_TOKENS = 16000;
|
||||
// internet, so generous timeouts. Same scale as Recap's defaults.
|
||||
const DEFAULT_TIMEOUT_MS = 900_000;
|
||||
|
||||
// Pull the model identifier out of the prompt if the operator wants a
|
||||
// specific Gemma SKU. We default to "gemma3:27b" which is the typical
|
||||
// Ollama tag for the analysis-capable Gemma model. Operators with a
|
||||
// different deployment can update this via a future StartOS action;
|
||||
// for v0.2 it's hardcoded.
|
||||
const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b";
|
||||
|
||||
// Parakeet's typical model identifier. Mirrors what Recap's whisper.js
|
||||
// sends when the operator points the relay at a NeMo Parakeet HTTP
|
||||
// wrapper. Configurable via env var for non-default deployments.
|
||||
const HARDWARE_TRANSCRIBE_MODEL =
|
||||
process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3";
|
||||
// Defaults used only when the route handler doesn't supply explicit
|
||||
// model names (e.g. a unit test instantiating the backend directly).
|
||||
// In production the model names come from relay-config.json via
|
||||
// setParakeetUrl / setGemmaUrl, so the operator can swap models on
|
||||
// their Ollama deployment without rebuilding the relay.
|
||||
const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
|
||||
const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
|
||||
|
||||
export function createHardwareBackend({
|
||||
parakeetBaseURL = "",
|
||||
gemmaBaseURL = "",
|
||||
parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
|
||||
gemmaModel = DEFAULT_ANALYZE_MODEL,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
} = {}) {
|
||||
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
|
||||
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
|
||||
const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
|
||||
const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
|
||||
|
||||
return {
|
||||
hasTranscribe: !!parakeet,
|
||||
@@ -69,7 +68,7 @@ export function createHardwareBackend({
|
||||
const form = new FormData();
|
||||
const blob = new Blob([audio], { type: mimeType });
|
||||
form.append("file", blob, "audio.bin");
|
||||
form.append("model", HARDWARE_TRANSCRIBE_MODEL);
|
||||
form.append("model", transcribeModel);
|
||||
if (richMode) {
|
||||
form.append("response_format", "verbose_json");
|
||||
form.append("timestamp_granularities[]", "segment");
|
||||
@@ -171,7 +170,7 @@ export function createHardwareBackend({
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: HARDWARE_ANALYZE_MODEL,
|
||||
model: analyzeModel,
|
||||
max_tokens: ANALYZE_MAX_TOKENS,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
stream: false,
|
||||
|
||||
@@ -16,6 +16,8 @@ function defaultConfig() {
|
||||
relay_gemini_api_key: "",
|
||||
relay_parakeet_base_url: "",
|
||||
relay_gemma_base_url: "",
|
||||
relay_parakeet_model: "parakeet-tdt-0.6b-v3",
|
||||
relay_gemma_model: "gemma3:27b",
|
||||
relay_keysat_base_url: "https://keysat.xyz",
|
||||
relay_admin_username: "",
|
||||
relay_admin_password_hash: "",
|
||||
|
||||
@@ -84,6 +84,8 @@ export function analyzeRouter() {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
});
|
||||
result = await backend.analyzeText({ prompt });
|
||||
}
|
||||
|
||||
@@ -110,6 +110,8 @@ export function transcribeRouter() {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: req.file.buffer,
|
||||
|
||||
@@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json'
|
||||
|
||||
const { InputSpec, Value } = sdk
|
||||
|
||||
// Optional Gemma/Ollama endpoint for the operator-hardware analysis
|
||||
// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe
|
||||
// overflow, this handles analyze overflow.
|
||||
// Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
|
||||
// + which model to request. Both fields live-reload so the operator
|
||||
// can pull a different Gemma SKU on Ollama and update the model name
|
||||
// here without restarting the relay.
|
||||
const inputSpec = InputSpec.of({
|
||||
relay_gemma_base_url: Value.text({
|
||||
name: 'Gemma Base URL',
|
||||
@@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({
|
||||
},
|
||||
],
|
||||
}),
|
||||
relay_gemma_model: Value.text({
|
||||
name: 'Gemma Model Name',
|
||||
description:
|
||||
'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
|
||||
required: true,
|
||||
default: 'gemma3:27b',
|
||||
minLength: 1,
|
||||
maxLength: 128,
|
||||
}),
|
||||
})
|
||||
|
||||
export const setGemmaUrl = sdk.Action.withInput(
|
||||
@@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput(
|
||||
async ({ effects }) => ({
|
||||
name: 'Set Gemma URL',
|
||||
description:
|
||||
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.',
|
||||
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
|
||||
warning: null,
|
||||
allowedStatuses: 'any',
|
||||
group: null,
|
||||
@@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput(
|
||||
const config = await configFile.read().once()
|
||||
return {
|
||||
relay_gemma_base_url: config?.relay_gemma_base_url || '',
|
||||
relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
|
||||
}
|
||||
},
|
||||
|
||||
async ({ effects, input }) => {
|
||||
await configFile.merge(effects, {
|
||||
relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
|
||||
relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
|
||||
})
|
||||
return null
|
||||
},
|
||||
|
||||
@@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json'
|
||||
|
||||
const { InputSpec, Value } = sdk
|
||||
|
||||
// Optional Parakeet endpoint for the operator-hardware fallback path.
|
||||
// When a Pro/Max user exceeds their Gemini monthly cap, the relay
|
||||
// routes transcribe requests here instead. Empty disables the fallback
|
||||
// — over-cap users get 402.
|
||||
//
|
||||
// In a typical setup this points at the operator's NVIDIA Spark or
|
||||
// similar local GPU box running the NeMo / Parakeet HTTP wrapper.
|
||||
// Operator's Parakeet endpoint + which model to request. Both fields
|
||||
// live-reload — change them via this action and the next relay request
|
||||
// picks up the new values without a daemon restart.
|
||||
const inputSpec = InputSpec.of({
|
||||
relay_parakeet_base_url: Value.text({
|
||||
name: 'Parakeet Base URL',
|
||||
@@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({
|
||||
},
|
||||
],
|
||||
}),
|
||||
relay_parakeet_model: Value.text({
|
||||
name: 'Parakeet Model Name',
|
||||
description:
|
||||
'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
|
||||
required: true,
|
||||
default: 'parakeet-tdt-0.6b-v3',
|
||||
minLength: 1,
|
||||
maxLength: 128,
|
||||
}),
|
||||
})
|
||||
|
||||
export const setParakeetUrl = sdk.Action.withInput(
|
||||
@@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput(
|
||||
async ({ effects }) => ({
|
||||
name: 'Set Parakeet URL',
|
||||
description:
|
||||
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.",
|
||||
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
|
||||
warning: null,
|
||||
allowedStatuses: 'any',
|
||||
group: null,
|
||||
@@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput(
|
||||
const config = await configFile.read().once()
|
||||
return {
|
||||
relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
|
||||
relay_parakeet_model:
|
||||
config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
|
||||
}
|
||||
},
|
||||
|
||||
async ({ effects, input }) => {
|
||||
await configFile.merge(effects, {
|
||||
relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
|
||||
relay_parakeet_model:
|
||||
(input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
|
||||
})
|
||||
return null
|
||||
},
|
||||
|
||||
@@ -28,6 +28,13 @@ export const configFile = FileHelper.json(
|
||||
// and return 402 once exceeded (no fallback).
|
||||
relay_parakeet_base_url: z.string().default(''),
|
||||
relay_gemma_base_url: z.string().default(''),
|
||||
// Model identifiers to send in the upstream request bodies. The
|
||||
// operator's Ollama or Parakeet wrapper may serve different models
|
||||
// depending on what's been pulled; making these config-driven
|
||||
// means the operator can swap models without rebuilding the relay.
|
||||
// Live-reloaded — change applies to the next request.
|
||||
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
|
||||
relay_gemma_model: z.string().default('gemma3:27b'),
|
||||
|
||||
// ── License server ──
|
||||
// URL of the Keysat license server used for the cached online
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { VersionGraph } from '@start9labs/start-sdk'
|
||||
import { v_0_1_0 } from './v0.1.0'
|
||||
import { v_0_2_0 } from './v0.2.0'
|
||||
import { v_0_2_1 } from './v0.2.1'
|
||||
|
||||
export const versionGraph = VersionGraph.of({
|
||||
current: v_0_2_0,
|
||||
other: [v_0_1_0],
|
||||
current: v_0_2_1,
|
||||
other: [v_0_2_0, v_0_1_0],
|
||||
})
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
export const v_0_2_1 = VersionInfo.of({
|
||||
version: '0.2.1:0',
|
||||
releaseNotes: {
|
||||
en_US:
|
||||
'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.',
|
||||
},
|
||||
migrations: {
|
||||
up: async ({ effects }) => {},
|
||||
down: async ({ effects }) => {},
|
||||
},
|
||||
})
|
||||
Reference in New Issue
Block a user