v0.2.1 model names config-driven
This commit is contained in:
+13
-14
@@ -18,26 +18,25 @@ const ANALYZE_MAX_TOKENS = 16000;
|
||||
// internet, so generous timeouts. Same scale as Recap's defaults.
|
||||
const DEFAULT_TIMEOUT_MS = 900_000;
|
||||
|
||||
// Pull the model identifier out of the prompt if the operator wants a
|
||||
// specific Gemma SKU. We default to "gemma3:27b" which is the typical
|
||||
// Ollama tag for the analysis-capable Gemma model. Operators with a
|
||||
// different deployment can update this via a future StartOS action;
|
||||
// for v0.2 it's hardcoded.
|
||||
const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b";
|
||||
|
||||
// Parakeet's typical model identifier. Mirrors what Recap's whisper.js
|
||||
// sends when the operator points the relay at a NeMo Parakeet HTTP
|
||||
// wrapper. Configurable via env var for non-default deployments.
|
||||
const HARDWARE_TRANSCRIBE_MODEL =
|
||||
process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3";
|
||||
// Defaults used only when the route handler doesn't supply explicit
|
||||
// model names (e.g. a unit test instantiating the backend directly).
|
||||
// In production the model names come from relay-config.json via
|
||||
// setParakeetUrl / setGemmaUrl, so the operator can swap models on
|
||||
// their Ollama deployment without rebuilding the relay.
|
||||
const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
|
||||
const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
|
||||
|
||||
export function createHardwareBackend({
|
||||
parakeetBaseURL = "",
|
||||
gemmaBaseURL = "",
|
||||
parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
|
||||
gemmaModel = DEFAULT_ANALYZE_MODEL,
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
} = {}) {
|
||||
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
|
||||
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
|
||||
const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
|
||||
const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
|
||||
|
||||
return {
|
||||
hasTranscribe: !!parakeet,
|
||||
@@ -69,7 +68,7 @@ export function createHardwareBackend({
|
||||
const form = new FormData();
|
||||
const blob = new Blob([audio], { type: mimeType });
|
||||
form.append("file", blob, "audio.bin");
|
||||
form.append("model", HARDWARE_TRANSCRIBE_MODEL);
|
||||
form.append("model", transcribeModel);
|
||||
if (richMode) {
|
||||
form.append("response_format", "verbose_json");
|
||||
form.append("timestamp_granularities[]", "segment");
|
||||
@@ -171,7 +170,7 @@ export function createHardwareBackend({
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: HARDWARE_ANALYZE_MODEL,
|
||||
model: analyzeModel,
|
||||
max_tokens: ANALYZE_MAX_TOKENS,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
stream: false,
|
||||
|
||||
Reference in New Issue
Block a user