v0.2.1 model names config-driven
This commit is contained in:
+13
-14
@@ -18,26 +18,25 @@ const ANALYZE_MAX_TOKENS = 16000;
|
|||||||
// internet, so generous timeouts. Same scale as Recap's defaults.
|
// internet, so generous timeouts. Same scale as Recap's defaults.
|
||||||
const DEFAULT_TIMEOUT_MS = 900_000;
|
const DEFAULT_TIMEOUT_MS = 900_000;
|
||||||
|
|
||||||
// Pull the model identifier out of the prompt if the operator wants a
|
// Defaults used only when the route handler doesn't supply explicit
|
||||||
// specific Gemma SKU. We default to "gemma3:27b" which is the typical
|
// model names (e.g. a unit test instantiating the backend directly).
|
||||||
// Ollama tag for the analysis-capable Gemma model. Operators with a
|
// In production the model names come from relay-config.json via
|
||||||
// different deployment can update this via a future StartOS action;
|
// setParakeetUrl / setGemmaUrl, so the operator can swap models on
|
||||||
// for v0.2 it's hardcoded.
|
// their Ollama deployment without rebuilding the relay.
|
||||||
const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b";
|
const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
|
||||||
|
const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
|
||||||
// Parakeet's typical model identifier. Mirrors what Recap's whisper.js
|
|
||||||
// sends when the operator points the relay at a NeMo Parakeet HTTP
|
|
||||||
// wrapper. Configurable via env var for non-default deployments.
|
|
||||||
const HARDWARE_TRANSCRIBE_MODEL =
|
|
||||||
process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3";
|
|
||||||
|
|
||||||
export function createHardwareBackend({
|
export function createHardwareBackend({
|
||||||
parakeetBaseURL = "",
|
parakeetBaseURL = "",
|
||||||
gemmaBaseURL = "",
|
gemmaBaseURL = "",
|
||||||
|
parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
|
||||||
|
gemmaModel = DEFAULT_ANALYZE_MODEL,
|
||||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||||
} = {}) {
|
} = {}) {
|
||||||
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
|
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
|
||||||
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
|
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
|
||||||
|
const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
|
||||||
|
const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
hasTranscribe: !!parakeet,
|
hasTranscribe: !!parakeet,
|
||||||
@@ -69,7 +68,7 @@ export function createHardwareBackend({
|
|||||||
const form = new FormData();
|
const form = new FormData();
|
||||||
const blob = new Blob([audio], { type: mimeType });
|
const blob = new Blob([audio], { type: mimeType });
|
||||||
form.append("file", blob, "audio.bin");
|
form.append("file", blob, "audio.bin");
|
||||||
form.append("model", HARDWARE_TRANSCRIBE_MODEL);
|
form.append("model", transcribeModel);
|
||||||
if (richMode) {
|
if (richMode) {
|
||||||
form.append("response_format", "verbose_json");
|
form.append("response_format", "verbose_json");
|
||||||
form.append("timestamp_granularities[]", "segment");
|
form.append("timestamp_granularities[]", "segment");
|
||||||
@@ -171,7 +170,7 @@ export function createHardwareBackend({
|
|||||||
method: "POST",
|
method: "POST",
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: HARDWARE_ANALYZE_MODEL,
|
model: analyzeModel,
|
||||||
max_tokens: ANALYZE_MAX_TOKENS,
|
max_tokens: ANALYZE_MAX_TOKENS,
|
||||||
messages: [{ role: "user", content: prompt }],
|
messages: [{ role: "user", content: prompt }],
|
||||||
stream: false,
|
stream: false,
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ function defaultConfig() {
|
|||||||
relay_gemini_api_key: "",
|
relay_gemini_api_key: "",
|
||||||
relay_parakeet_base_url: "",
|
relay_parakeet_base_url: "",
|
||||||
relay_gemma_base_url: "",
|
relay_gemma_base_url: "",
|
||||||
|
relay_parakeet_model: "parakeet-tdt-0.6b-v3",
|
||||||
|
relay_gemma_model: "gemma3:27b",
|
||||||
relay_keysat_base_url: "https://keysat.xyz",
|
relay_keysat_base_url: "https://keysat.xyz",
|
||||||
relay_admin_username: "",
|
relay_admin_username: "",
|
||||||
relay_admin_password_hash: "",
|
relay_admin_password_hash: "",
|
||||||
|
|||||||
@@ -84,6 +84,8 @@ export function analyzeRouter() {
|
|||||||
const backend = createHardwareBackend({
|
const backend = createHardwareBackend({
|
||||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||||
|
parakeetModel: cfg.relay_parakeet_model,
|
||||||
|
gemmaModel: cfg.relay_gemma_model,
|
||||||
});
|
});
|
||||||
result = await backend.analyzeText({ prompt });
|
result = await backend.analyzeText({ prompt });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -110,6 +110,8 @@ export function transcribeRouter() {
|
|||||||
const backend = createHardwareBackend({
|
const backend = createHardwareBackend({
|
||||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||||
|
parakeetModel: cfg.relay_parakeet_model,
|
||||||
|
gemmaModel: cfg.relay_gemma_model,
|
||||||
});
|
});
|
||||||
result = await backend.transcribeAudio({
|
result = await backend.transcribeAudio({
|
||||||
audio: req.file.buffer,
|
audio: req.file.buffer,
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json'
|
|||||||
|
|
||||||
const { InputSpec, Value } = sdk
|
const { InputSpec, Value } = sdk
|
||||||
|
|
||||||
// Optional Gemma/Ollama endpoint for the operator-hardware analysis
|
// Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
|
||||||
// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe
|
// + which model to request. Both fields live-reload so the operator
|
||||||
// overflow, this handles analyze overflow.
|
// can pull a different Gemma SKU on Ollama and update the model name
|
||||||
|
// here without restarting the relay.
|
||||||
const inputSpec = InputSpec.of({
|
const inputSpec = InputSpec.of({
|
||||||
relay_gemma_base_url: Value.text({
|
relay_gemma_base_url: Value.text({
|
||||||
name: 'Gemma Base URL',
|
name: 'Gemma Base URL',
|
||||||
@@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
|
relay_gemma_model: Value.text({
|
||||||
|
name: 'Gemma Model Name',
|
||||||
|
description:
|
||||||
|
'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
|
||||||
|
required: true,
|
||||||
|
default: 'gemma3:27b',
|
||||||
|
minLength: 1,
|
||||||
|
maxLength: 128,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
export const setGemmaUrl = sdk.Action.withInput(
|
export const setGemmaUrl = sdk.Action.withInput(
|
||||||
@@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput(
|
|||||||
async ({ effects }) => ({
|
async ({ effects }) => ({
|
||||||
name: 'Set Gemma URL',
|
name: 'Set Gemma URL',
|
||||||
description:
|
description:
|
||||||
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.',
|
'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
|
||||||
warning: null,
|
warning: null,
|
||||||
allowedStatuses: 'any',
|
allowedStatuses: 'any',
|
||||||
group: null,
|
group: null,
|
||||||
@@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput(
|
|||||||
const config = await configFile.read().once()
|
const config = await configFile.read().once()
|
||||||
return {
|
return {
|
||||||
relay_gemma_base_url: config?.relay_gemma_base_url || '',
|
relay_gemma_base_url: config?.relay_gemma_base_url || '',
|
||||||
|
relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
async ({ effects, input }) => {
|
async ({ effects, input }) => {
|
||||||
await configFile.merge(effects, {
|
await configFile.merge(effects, {
|
||||||
relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
|
relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
|
||||||
|
relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
|
||||||
})
|
})
|
||||||
return null
|
return null
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json'
|
|||||||
|
|
||||||
const { InputSpec, Value } = sdk
|
const { InputSpec, Value } = sdk
|
||||||
|
|
||||||
// Optional Parakeet endpoint for the operator-hardware fallback path.
|
// Operator's Parakeet endpoint + which model to request. Both fields
|
||||||
// When a Pro/Max user exceeds their Gemini monthly cap, the relay
|
// live-reload — change them via this action and the next relay request
|
||||||
// routes transcribe requests here instead. Empty disables the fallback
|
// picks up the new values without a daemon restart.
|
||||||
// — over-cap users get 402.
|
|
||||||
//
|
|
||||||
// In a typical setup this points at the operator's NVIDIA Spark or
|
|
||||||
// similar local GPU box running the NeMo / Parakeet HTTP wrapper.
|
|
||||||
const inputSpec = InputSpec.of({
|
const inputSpec = InputSpec.of({
|
||||||
relay_parakeet_base_url: Value.text({
|
relay_parakeet_base_url: Value.text({
|
||||||
name: 'Parakeet Base URL',
|
name: 'Parakeet Base URL',
|
||||||
@@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
|
relay_parakeet_model: Value.text({
|
||||||
|
name: 'Parakeet Model Name',
|
||||||
|
description:
|
||||||
|
'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
|
||||||
|
required: true,
|
||||||
|
default: 'parakeet-tdt-0.6b-v3',
|
||||||
|
minLength: 1,
|
||||||
|
maxLength: 128,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
export const setParakeetUrl = sdk.Action.withInput(
|
export const setParakeetUrl = sdk.Action.withInput(
|
||||||
@@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput(
|
|||||||
async ({ effects }) => ({
|
async ({ effects }) => ({
|
||||||
name: 'Set Parakeet URL',
|
name: 'Set Parakeet URL',
|
||||||
description:
|
description:
|
||||||
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.",
|
"Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
|
||||||
warning: null,
|
warning: null,
|
||||||
allowedStatuses: 'any',
|
allowedStatuses: 'any',
|
||||||
group: null,
|
group: null,
|
||||||
@@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput(
|
|||||||
const config = await configFile.read().once()
|
const config = await configFile.read().once()
|
||||||
return {
|
return {
|
||||||
relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
|
relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
|
||||||
|
relay_parakeet_model:
|
||||||
|
config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
async ({ effects, input }) => {
|
async ({ effects, input }) => {
|
||||||
await configFile.merge(effects, {
|
await configFile.merge(effects, {
|
||||||
relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
|
relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
|
||||||
|
relay_parakeet_model:
|
||||||
|
(input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
|
||||||
})
|
})
|
||||||
return null
|
return null
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -28,6 +28,13 @@ export const configFile = FileHelper.json(
|
|||||||
// and return 402 once exceeded (no fallback).
|
// and return 402 once exceeded (no fallback).
|
||||||
relay_parakeet_base_url: z.string().default(''),
|
relay_parakeet_base_url: z.string().default(''),
|
||||||
relay_gemma_base_url: z.string().default(''),
|
relay_gemma_base_url: z.string().default(''),
|
||||||
|
// Model identifiers to send in the upstream request bodies. The
|
||||||
|
// operator's Ollama or Parakeet wrapper may serve different models
|
||||||
|
// depending on what's been pulled; making these config-driven
|
||||||
|
// means the operator can swap models without rebuilding the relay.
|
||||||
|
// Live-reloaded — change applies to the next request.
|
||||||
|
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
|
||||||
|
relay_gemma_model: z.string().default('gemma3:27b'),
|
||||||
|
|
||||||
// ── License server ──
|
// ── License server ──
|
||||||
// URL of the Keysat license server used for the cached online
|
// URL of the Keysat license server used for the cached online
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import { VersionGraph } from '@start9labs/start-sdk'
|
import { VersionGraph } from '@start9labs/start-sdk'
|
||||||
import { v_0_1_0 } from './v0.1.0'
|
import { v_0_1_0 } from './v0.1.0'
|
||||||
import { v_0_2_0 } from './v0.2.0'
|
import { v_0_2_0 } from './v0.2.0'
|
||||||
|
import { v_0_2_1 } from './v0.2.1'
|
||||||
|
|
||||||
export const versionGraph = VersionGraph.of({
|
export const versionGraph = VersionGraph.of({
|
||||||
current: v_0_2_0,
|
current: v_0_2_1,
|
||||||
other: [v_0_1_0],
|
other: [v_0_2_0, v_0_1_0],
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
import { VersionInfo } from '@start9labs/start-sdk'
|
||||||
|
|
||||||
|
export const v_0_2_1 = VersionInfo.of({
|
||||||
|
version: '0.2.1:0',
|
||||||
|
releaseNotes: {
|
||||||
|
en_US:
|
||||||
|
'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.',
|
||||||
|
},
|
||||||
|
migrations: {
|
||||||
|
up: async ({ effects }) => {},
|
||||||
|
down: async ({ effects }) => {},
|
||||||
|
},
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user