Files
recap-relay/server/backends/hardware.js
T
2026-05-11 20:27:19 -05:00

219 lines
7.5 KiB
JavaScript

// Operator-hardware fallback backend. Forwards transcribe requests to
// a Parakeet endpoint (or any Whisper-API-compatible server — same wire
// format) and analyze requests to a Gemma endpoint (or any
// OpenAI-compatible chat-completions server).
//
// Used when a Pro/Max user has exceeded their monthly Gemini cap.
// Returns the same shape gemini.js produces so route handlers don't
// need a backend-specific branch downstream:
// transcribeAudio → { text, segments, duration_seconds }
// analyzeText → { text }
//
// Both endpoints are reached via plain fetch — no SDK dependency keeps
// the relay container slim and the upstream wire format is dead-simple
// for these two well-known shapes.
const ANALYZE_MAX_TOKENS = 16000;
// Gemma served locally tends to live on the host's LAN, not the public
// internet, so generous timeouts. Same scale as Recap's defaults.
const DEFAULT_TIMEOUT_MS = 900_000;
// Defaults used only when the route handler doesn't supply explicit
// model names (e.g. a unit test instantiating the backend directly).
// In production the model names come from relay-config.json via
// setParakeetUrl / setGemmaUrl, so the operator can swap models on
// their Ollama deployment without rebuilding the relay.
const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
export function createHardwareBackend({
parakeetBaseURL = "",
gemmaBaseURL = "",
parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
gemmaModel = DEFAULT_ANALYZE_MODEL,
timeoutMs = DEFAULT_TIMEOUT_MS,
} = {}) {
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
return {
hasTranscribe: !!parakeet,
hasAnalyze: !!gemma,
// POST <parakeet>/v1/audio/transcriptions with the OpenAI Whisper
// multipart shape. Parakeet wrappers (NeMo + the patched one Recap
// already talks to) honor this format and return segments with
// per-segment timestamps when timestamp_granularities=segment is
// requested. Falls back to a bare request if the rich shape 4xx/5xxs.
async transcribeAudio({
audio,
mimeType = "application/octet-stream",
offsetSeconds = 0,
}) {
if (!parakeet) {
const err = new Error(
"operator-hardware transcribe is not configured (relay_parakeet_base_url is empty)"
);
err.status = 503;
throw err;
}
// Try the rich request first (verbose_json + segment timestamps).
// FormData/Blob globals are available in Node 20+. Wrap the
// received Buffer in a Blob so the multipart body is properly
// chunked instead of falling back to base64.
const buildForm = (richMode) => {
const form = new FormData();
const blob = new Blob([audio], { type: mimeType });
form.append("file", blob, "audio.bin");
form.append("model", transcribeModel);
if (richMode) {
form.append("response_format", "verbose_json");
form.append("timestamp_granularities[]", "segment");
}
return form;
};
const url = `${parakeet}/v1/audio/transcriptions`;
let res;
try {
res = await fetch(url, {
method: "POST",
body: buildForm(true),
signal: AbortSignal.timeout(timeoutMs),
});
} catch (err) {
const e = new Error(
`Parakeet transcribe network error: ${err?.message || err}`
);
e.status = 502;
throw e;
}
// If the wrapper rejects the rich params, retry with bare-bones.
if (!res.ok && res.status >= 400 && res.status < 600) {
const richBody = await safeBody(res);
console.warn(
`[hardware] rich Parakeet request returned ${res.status}: ${richBody.slice(0, 200)} — retrying bare`
);
try {
res = await fetch(url, {
method: "POST",
body: buildForm(false),
signal: AbortSignal.timeout(timeoutMs),
});
} catch (err) {
const e = new Error(
`Parakeet transcribe network error (fallback): ${err?.message || err}`
);
e.status = 502;
throw e;
}
}
if (!res.ok) {
const body = await safeBody(res);
const e = new Error(
`Parakeet transcribe ${res.status}: ${body.slice(0, 300)}`
);
e.status = res.status;
throw e;
}
const data = await res.json();
const segments = Array.isArray(data.segments) ? data.segments : [];
// Offset support: when the relay caller is processing a chunked
// audio file, it asks for transcripts at a non-zero base time.
// Parakeet returns timestamps relative to the chunk; shift them
// up by offsetSeconds so the combined transcript downstream
// lines up with the real video timeline.
const shifted = segments.map((s) => ({
start: (s.start || 0) + offsetSeconds,
end: (s.end || 0) + offsetSeconds,
text: (s.text || "").trim(),
}));
// Build the [MM:SS] text format Recap's parseTimestampedTranscript
// already speaks. The route handler will pass this straight back
// to Recap, which parses it on the client side.
const lines = shifted.length
? shifted.map((s) => `[${formatMmSs(s.start)}] ${s.text}`)
: [`[0:00] ${(data.text || "").trim()}`];
return {
text: lines.join("\n"),
segments: shifted,
duration_seconds: data.duration || 0,
};
},
// POST <gemma>/v1/chat/completions with the OpenAI shape. Ollama's
// server, vLLM, llama.cpp's HTTP server, and most other OSS LLM
// runners support this wire format — so we don't lock the relay
// to one specific Gemma deployment.
async analyzeText({ prompt }) {
if (!gemma) {
const err = new Error(
"operator-hardware analyze is not configured (relay_gemma_base_url is empty)"
);
err.status = 503;
throw err;
}
const url = `${gemma}/v1/chat/completions`;
let res;
try {
res = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: analyzeModel,
max_tokens: ANALYZE_MAX_TOKENS,
messages: [{ role: "user", content: prompt }],
stream: false,
}),
signal: AbortSignal.timeout(timeoutMs),
});
} catch (err) {
const e = new Error(
`Gemma analyze network error: ${err?.message || err}`
);
e.status = 502;
throw e;
}
if (!res.ok) {
const body = await safeBody(res);
const e = new Error(`Gemma analyze ${res.status}: ${body.slice(0, 300)}`);
e.status = res.status;
throw e;
}
const data = await res.json();
const text = data?.choices?.[0]?.message?.content || "";
return { text };
},
};
}
function formatMmSs(seconds) {
const s = Math.max(0, Math.floor(seconds));
const h = Math.floor(s / 3600);
const m = Math.floor((s % 3600) / 60);
const sec = s % 60;
if (h > 0)
return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
return `${m}:${String(sec).padStart(2, "0")}`;
}
async function safeBody(res) {
try {
return await res.text();
} catch {
return "";
}
}