283 lines
10 KiB
JavaScript
283 lines
10 KiB
JavaScript
// Operator-hardware fallback backend. Forwards transcribe requests to
|
|
// a Parakeet endpoint (or any Whisper-API-compatible server — same wire
|
|
// format) and analyze requests to a Gemma endpoint (or any
|
|
// OpenAI-compatible chat-completions server).
|
|
//
|
|
// Used when a Pro/Max user has exceeded their monthly Gemini cap.
|
|
// Returns the same shape gemini.js produces so route handlers don't
|
|
// need a backend-specific branch downstream:
|
|
// transcribeAudio → { text, segments, duration_seconds }
|
|
// analyzeText → { text }
|
|
//
|
|
// Both endpoints are reached via plain fetch — no SDK dependency keeps
|
|
// the relay container slim and the upstream wire format is dead-simple
|
|
// for these two well-known shapes.
|
|
|
|
const ANALYZE_MAX_TOKENS = 16000;
|
|
// Gemma served locally tends to live on the host's LAN, not the public
|
|
// internet, so generous timeouts. Same scale as Recap's defaults.
|
|
const DEFAULT_TIMEOUT_MS = 900_000;
|
|
|
|
// Defaults used only when the route handler doesn't supply explicit
|
|
// model names (e.g. a unit test instantiating the backend directly).
|
|
// In production the model names come from relay-config.json via
|
|
// setParakeetUrl / setGemmaUrl, so the operator can swap models on
|
|
// their Ollama deployment without rebuilding the relay.
|
|
const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
|
|
const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
|
|
|
|
// Normalize an OpenAI-API-compatible base URL: strip trailing slash
|
|
// AND strip a trailing `/v1` segment if the operator pasted one,
|
|
// because we always append `/v1/...` below. Without this, a base URL
|
|
// of `http://192.168.1.87:8000/v1` would produce
|
|
// `http://192.168.1.87:8000/v1/v1/audio/transcriptions` → 404.
|
|
function normalizeApiBase(url) {
|
|
let s = (url || "").trim().replace(/\/$/, "");
|
|
s = s.replace(/\/v1$/, "");
|
|
return s;
|
|
}
|
|
|
|
export function createHardwareBackend({
|
|
parakeetBaseURL = "",
|
|
gemmaBaseURL = "",
|
|
parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
|
|
gemmaModel = DEFAULT_ANALYZE_MODEL,
|
|
timeoutMs = DEFAULT_TIMEOUT_MS,
|
|
} = {}) {
|
|
const parakeet = normalizeApiBase(parakeetBaseURL);
|
|
const gemma = normalizeApiBase(gemmaBaseURL);
|
|
const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
|
|
const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
|
|
|
|
return {
|
|
hasTranscribe: !!parakeet,
|
|
hasAnalyze: !!gemma,
|
|
|
|
// POST <parakeet>/v1/audio/transcriptions with the OpenAI Whisper
|
|
// multipart shape. Parakeet wrappers (NeMo + the patched one Recap
|
|
// already talks to) honor this format and return segments with
|
|
// per-segment timestamps when timestamp_granularities=segment is
|
|
// requested. Falls back to a bare request if the rich shape 4xx/5xxs.
|
|
async transcribeAudio({
|
|
audio,
|
|
mimeType = "application/octet-stream",
|
|
offsetSeconds = 0,
|
|
}) {
|
|
if (!parakeet) {
|
|
const err = new Error(
|
|
"operator-hardware transcribe is not configured (relay_parakeet_base_url is empty)"
|
|
);
|
|
err.status = 503;
|
|
throw err;
|
|
}
|
|
|
|
// Try the rich request first (verbose_json + segment timestamps).
|
|
// FormData/Blob globals are available in Node 20+. Wrap the
|
|
// received Buffer in a Blob so the multipart body is properly
|
|
// chunked instead of falling back to base64.
|
|
const buildForm = (richMode) => {
|
|
const form = new FormData();
|
|
const blob = new Blob([audio], { type: mimeType });
|
|
form.append("file", blob, "audio.bin");
|
|
form.append("model", transcribeModel);
|
|
if (richMode) {
|
|
form.append("response_format", "verbose_json");
|
|
form.append("timestamp_granularities[]", "segment");
|
|
}
|
|
return form;
|
|
};
|
|
|
|
// Path candidates, in order. The OpenAI Whisper standard is
|
|
// `/v1/audio/transcriptions`; some self-hosted wrappers (or
|
|
// operators who pasted their base URL with a path already
|
|
// stripped) expose the endpoint at `/audio/transcriptions`
|
|
// instead. We try the standard path first, then fall back on
|
|
// 404 only — other status codes (rate-limit, 500) shouldn't
|
|
// trigger a different path retry.
|
|
const pathCandidates = [
|
|
"/v1/audio/transcriptions",
|
|
"/audio/transcriptions",
|
|
];
|
|
let res = null;
|
|
let lastUrl = null;
|
|
let pathErrSummary = null;
|
|
for (const p of pathCandidates) {
|
|
const url = `${parakeet}${p}`;
|
|
lastUrl = url;
|
|
try {
|
|
res = await fetch(url, {
|
|
method: "POST",
|
|
body: buildForm(true),
|
|
signal: AbortSignal.timeout(timeoutMs),
|
|
});
|
|
} catch (err) {
|
|
const e = new Error(
|
|
`Parakeet transcribe network error at ${url}: ${err?.message || err}`
|
|
);
|
|
e.status = 502;
|
|
throw e;
|
|
}
|
|
if (res.status !== 404) break;
|
|
// 404 → try the next path candidate. Capture the body for the
|
|
// final error message if all candidates 404.
|
|
pathErrSummary = await safeBody(res);
|
|
console.warn(
|
|
`[hardware] 404 at ${url} — trying next path candidate`
|
|
);
|
|
}
|
|
|
|
// If the wrapper rejects the rich params (4xx other than 404 we
|
|
// already exhausted, or 5xx), retry with bare-bones at the
|
|
// working URL.
|
|
if (!res.ok && res.status >= 400 && res.status < 600 && res.status !== 404) {
|
|
const richBody = await safeBody(res);
|
|
console.warn(
|
|
`[hardware] rich Parakeet request to ${lastUrl} returned ${res.status}: ${richBody.slice(0, 200)} — retrying bare`
|
|
);
|
|
try {
|
|
res = await fetch(lastUrl, {
|
|
method: "POST",
|
|
body: buildForm(false),
|
|
signal: AbortSignal.timeout(timeoutMs),
|
|
});
|
|
} catch (err) {
|
|
const e = new Error(
|
|
`Parakeet transcribe network error (fallback) at ${lastUrl}: ${err?.message || err}`
|
|
);
|
|
e.status = 502;
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
if (!res.ok) {
|
|
const body = await safeBody(res);
|
|
const hint =
|
|
res.status === 404
|
|
? ` (tried ${pathCandidates.join(" and ")} on base ${parakeet} — wrapper may expose the endpoint at a different path; check the Parakeet URL or container logs)`
|
|
: "";
|
|
const e = new Error(
|
|
`Parakeet transcribe ${res.status} at ${lastUrl}: ${body.slice(0, 300)}${hint}`
|
|
);
|
|
e.status = res.status;
|
|
throw e;
|
|
}
|
|
|
|
const data = await res.json();
|
|
const segments = Array.isArray(data.segments) ? data.segments : [];
|
|
|
|
// Offset support: when the relay caller is processing a chunked
|
|
// audio file, it asks for transcripts at a non-zero base time.
|
|
// Parakeet returns timestamps relative to the chunk; shift them
|
|
// up by offsetSeconds so the combined transcript downstream
|
|
// lines up with the real video timeline.
|
|
const shifted = segments.map((s) => ({
|
|
start: (s.start || 0) + offsetSeconds,
|
|
end: (s.end || 0) + offsetSeconds,
|
|
text: (s.text || "").trim(),
|
|
}));
|
|
|
|
// Build the [MM:SS] text format Recap's parseTimestampedTranscript
|
|
// already speaks. The route handler will pass this straight back
|
|
// to Recap, which parses it on the client side.
|
|
const lines = shifted.length
|
|
? shifted.map((s) => `[${formatMmSs(s.start)}] ${s.text}`)
|
|
: [`[0:00] ${(data.text || "").trim()}`];
|
|
|
|
return {
|
|
text: lines.join("\n"),
|
|
segments: shifted,
|
|
duration_seconds: data.duration || 0,
|
|
usage: null, // hardware backend doesn't expose token counts
|
|
model: transcribeModel,
|
|
};
|
|
},
|
|
|
|
// POST <gemma>/v1/chat/completions with the OpenAI shape. Ollama's
|
|
// server, vLLM, llama.cpp's HTTP server, and most other OSS LLM
|
|
// runners support this wire format — so we don't lock the relay
|
|
// to one specific Gemma deployment.
|
|
async analyzeText({ prompt }) {
|
|
if (!gemma) {
|
|
const err = new Error(
|
|
"operator-hardware analyze is not configured (relay_gemma_base_url is empty)"
|
|
);
|
|
err.status = 503;
|
|
throw err;
|
|
}
|
|
|
|
// Same path-fallback shape as Parakeet transcribe. Standard
|
|
// OpenAI-compatible path is /v1/chat/completions; some Ollama
|
|
// versions also expose it at /chat/completions without the /v1.
|
|
const pathCandidates = ["/v1/chat/completions", "/chat/completions"];
|
|
let res = null;
|
|
let lastUrl = null;
|
|
for (const p of pathCandidates) {
|
|
const url = `${gemma}${p}`;
|
|
lastUrl = url;
|
|
try {
|
|
res = await fetch(url, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
model: analyzeModel,
|
|
max_tokens: ANALYZE_MAX_TOKENS,
|
|
messages: [{ role: "user", content: prompt }],
|
|
stream: false,
|
|
}),
|
|
signal: AbortSignal.timeout(timeoutMs),
|
|
});
|
|
} catch (err) {
|
|
const e = new Error(
|
|
`Gemma analyze network error at ${url}: ${err?.message || err}`
|
|
);
|
|
e.status = 502;
|
|
throw e;
|
|
}
|
|
if (res.status !== 404) break;
|
|
console.warn(
|
|
`[hardware] 404 at ${url} — trying next path candidate`
|
|
);
|
|
}
|
|
|
|
if (!res.ok) {
|
|
const body = await safeBody(res);
|
|
const hint =
|
|
res.status === 404
|
|
? ` (tried ${pathCandidates.join(" and ")} on base ${gemma} — check the Gemma/Ollama URL)`
|
|
: "";
|
|
const e = new Error(
|
|
`Gemma analyze ${res.status} at ${lastUrl}: ${body.slice(0, 300)}${hint}`
|
|
);
|
|
e.status = res.status;
|
|
throw e;
|
|
}
|
|
|
|
const data = await res.json();
|
|
const text = data?.choices?.[0]?.message?.content || "";
|
|
return {
|
|
text,
|
|
usage: null,
|
|
model: analyzeModel,
|
|
};
|
|
},
|
|
};
|
|
}
|
|
|
|
function formatMmSs(seconds) {
|
|
const s = Math.max(0, Math.floor(seconds));
|
|
const h = Math.floor(s / 3600);
|
|
const m = Math.floor((s % 3600) / 60);
|
|
const sec = s % 60;
|
|
if (h > 0)
|
|
return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
|
|
return `${m}:${String(sec).padStart(2, "0")}`;
|
|
}
|
|
|
|
async function safeBody(res) {
|
|
try {
|
|
return await res.text();
|
|
} catch {
|
|
return "";
|
|
}
|
|
}
|