v0.2 hardware backend
This commit is contained in:
+194
-30
@@ -1,55 +1,219 @@
|
||||
// Operator-hardware fallback backend. Forwards transcribe requests to
|
||||
// the operator's Parakeet (or any Whisper-API-compatible) endpoint and
|
||||
// analyze requests to their Gemma (or any OpenAI-API-compatible) endpoint.
|
||||
// a Parakeet endpoint (or any Whisper-API-compatible server — same wire
|
||||
// format) and analyze requests to a Gemma endpoint (or any
|
||||
// OpenAI-compatible chat-completions server).
|
||||
//
|
||||
// v0.1 is a stub — the endpoints are wired up, but no operator has
|
||||
// pointed a real Parakeet/Gemma at the relay yet. Returns a 503
|
||||
// "hardware fallback not yet wired" so the credits.js routing logic
|
||||
// still applies but users get a clear message instead of a silent
|
||||
// failure.
|
||||
// Used when a Pro/Max user has exceeded their monthly Gemini cap.
|
||||
// Returns the same shape gemini.js produces so route handlers don't
|
||||
// need a backend-specific branch downstream:
|
||||
// transcribeAudio → { text, segments, duration_seconds }
|
||||
// analyzeText → { text }
|
||||
//
|
||||
// Both endpoints are reached via plain fetch — no SDK dependency keeps
|
||||
// the relay container slim and the upstream wire format is dead-simple
|
||||
// for these two well-known shapes.
|
||||
|
||||
const ANALYZE_MAX_TOKENS = 16000;
|
||||
// Gemma served locally tends to live on the host's LAN, not the public
|
||||
// internet, so generous timeouts. Same scale as Recap's defaults.
|
||||
const DEFAULT_TIMEOUT_MS = 900_000;
|
||||
|
||||
// Pull the model identifier out of the prompt if the operator wants a
|
||||
// specific Gemma SKU. We default to "gemma3:27b" which is the typical
|
||||
// Ollama tag for the analysis-capable Gemma model. Operators with a
|
||||
// different deployment can update this via a future StartOS action;
|
||||
// for v0.2 it's hardcoded.
|
||||
const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b";
|
||||
|
||||
// Parakeet's typical model identifier. Mirrors what Recap's whisper.js
|
||||
// sends when the operator points the relay at a NeMo Parakeet HTTP
|
||||
// wrapper. Configurable via env var for non-default deployments.
|
||||
const HARDWARE_TRANSCRIBE_MODEL =
|
||||
process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3";
|
||||
|
||||
export function createHardwareBackend({
|
||||
parakeetBaseURL = "",
|
||||
gemmaBaseURL = "",
|
||||
timeoutMs = DEFAULT_TIMEOUT_MS,
|
||||
} = {}) {
|
||||
const hasParakeet = !!parakeetBaseURL;
|
||||
const hasGemma = !!gemmaBaseURL;
|
||||
const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
|
||||
const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
|
||||
|
||||
return {
|
||||
hasTranscribe: hasParakeet,
|
||||
hasAnalyze: hasGemma,
|
||||
hasTranscribe: !!parakeet,
|
||||
hasAnalyze: !!gemma,
|
||||
|
||||
async transcribeAudio() {
|
||||
if (!hasParakeet) {
|
||||
// POST <parakeet>/v1/audio/transcriptions with the OpenAI Whisper
|
||||
// multipart shape. Parakeet wrappers (NeMo + the patched one Recap
|
||||
// already talks to) honor this format and return segments with
|
||||
// per-segment timestamps when timestamp_granularities=segment is
|
||||
// requested. Falls back to a bare request if the rich shape 4xx/5xxs.
|
||||
async transcribeAudio({
|
||||
audio,
|
||||
mimeType = "application/octet-stream",
|
||||
offsetSeconds = 0,
|
||||
}) {
|
||||
if (!parakeet) {
|
||||
const err = new Error(
|
||||
"operator-hardware transcribe path is not configured (relay_parakeet_base_url is empty)"
|
||||
"operator-hardware transcribe is not configured (relay_parakeet_base_url is empty)"
|
||||
);
|
||||
err.status = 503;
|
||||
throw err;
|
||||
}
|
||||
// TODO v0.2: POST audio to parakeetBaseURL using the OpenAI
|
||||
// audio-transcriptions wire format Recap already speaks. Return
|
||||
// { text, segments, duration_seconds } in the same shape as
|
||||
// gemini.js's transcribeAudio.
|
||||
const err = new Error("operator-hardware transcribe path not yet implemented in relay v0.1");
|
||||
err.status = 503;
|
||||
throw err;
|
||||
|
||||
// Try the rich request first (verbose_json + segment timestamps).
|
||||
// FormData/Blob globals are available in Node 20+. Wrap the
|
||||
// received Buffer in a Blob so the multipart body is properly
|
||||
// chunked instead of falling back to base64.
|
||||
const buildForm = (richMode) => {
|
||||
const form = new FormData();
|
||||
const blob = new Blob([audio], { type: mimeType });
|
||||
form.append("file", blob, "audio.bin");
|
||||
form.append("model", HARDWARE_TRANSCRIBE_MODEL);
|
||||
if (richMode) {
|
||||
form.append("response_format", "verbose_json");
|
||||
form.append("timestamp_granularities[]", "segment");
|
||||
}
|
||||
return form;
|
||||
};
|
||||
|
||||
const url = `${parakeet}/v1/audio/transcriptions`;
|
||||
let res;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: "POST",
|
||||
body: buildForm(true),
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
});
|
||||
} catch (err) {
|
||||
const e = new Error(
|
||||
`Parakeet transcribe network error: ${err?.message || err}`
|
||||
);
|
||||
e.status = 502;
|
||||
throw e;
|
||||
}
|
||||
|
||||
// If the wrapper rejects the rich params, retry with bare-bones.
|
||||
if (!res.ok && res.status >= 400 && res.status < 600) {
|
||||
const richBody = await safeBody(res);
|
||||
console.warn(
|
||||
`[hardware] rich Parakeet request returned ${res.status}: ${richBody.slice(0, 200)} — retrying bare`
|
||||
);
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: "POST",
|
||||
body: buildForm(false),
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
});
|
||||
} catch (err) {
|
||||
const e = new Error(
|
||||
`Parakeet transcribe network error (fallback): ${err?.message || err}`
|
||||
);
|
||||
e.status = 502;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await safeBody(res);
|
||||
const e = new Error(
|
||||
`Parakeet transcribe ${res.status}: ${body.slice(0, 300)}`
|
||||
);
|
||||
e.status = res.status;
|
||||
throw e;
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
const segments = Array.isArray(data.segments) ? data.segments : [];
|
||||
|
||||
// Offset support: when the relay caller is processing a chunked
|
||||
// audio file, it asks for transcripts at a non-zero base time.
|
||||
// Parakeet returns timestamps relative to the chunk; shift them
|
||||
// up by offsetSeconds so the combined transcript downstream
|
||||
// lines up with the real video timeline.
|
||||
const shifted = segments.map((s) => ({
|
||||
start: (s.start || 0) + offsetSeconds,
|
||||
end: (s.end || 0) + offsetSeconds,
|
||||
text: (s.text || "").trim(),
|
||||
}));
|
||||
|
||||
// Build the [MM:SS] text format Recap's parseTimestampedTranscript
|
||||
// already speaks. The route handler will pass this straight back
|
||||
// to Recap, which parses it on the client side.
|
||||
const lines = shifted.length
|
||||
? shifted.map((s) => `[${formatMmSs(s.start)}] ${s.text}`)
|
||||
: [`[0:00] ${(data.text || "").trim()}`];
|
||||
|
||||
return {
|
||||
text: lines.join("\n"),
|
||||
segments: shifted,
|
||||
duration_seconds: data.duration || 0,
|
||||
};
|
||||
},
|
||||
|
||||
async analyzeText() {
|
||||
if (!hasGemma) {
|
||||
// POST <gemma>/v1/chat/completions with the OpenAI shape. Ollama's
|
||||
// server, vLLM, llama.cpp's HTTP server, and most other OSS LLM
|
||||
// runners support this wire format — so we don't lock the relay
|
||||
// to one specific Gemma deployment.
|
||||
async analyzeText({ prompt }) {
|
||||
if (!gemma) {
|
||||
const err = new Error(
|
||||
"operator-hardware analyze path is not configured (relay_gemma_base_url is empty)"
|
||||
"operator-hardware analyze is not configured (relay_gemma_base_url is empty)"
|
||||
);
|
||||
err.status = 503;
|
||||
throw err;
|
||||
}
|
||||
// TODO v0.2: POST prompt to gemmaBaseURL using either /api/generate
|
||||
// (Ollama native) or /v1/chat/completions (OpenAI-compatible).
|
||||
// Return { text } matching gemini.js's analyzeText.
|
||||
const err = new Error("operator-hardware analyze path not yet implemented in relay v0.1");
|
||||
err.status = 503;
|
||||
throw err;
|
||||
|
||||
const url = `${gemma}/v1/chat/completions`;
|
||||
let res;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: HARDWARE_ANALYZE_MODEL,
|
||||
max_tokens: ANALYZE_MAX_TOKENS,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
});
|
||||
} catch (err) {
|
||||
const e = new Error(
|
||||
`Gemma analyze network error: ${err?.message || err}`
|
||||
);
|
||||
e.status = 502;
|
||||
throw e;
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await safeBody(res);
|
||||
const e = new Error(`Gemma analyze ${res.status}: ${body.slice(0, 300)}`);
|
||||
e.status = res.status;
|
||||
throw e;
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
const text = data?.choices?.[0]?.message?.content || "";
|
||||
return { text };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function formatMmSs(seconds) {
|
||||
const s = Math.max(0, Math.floor(seconds));
|
||||
const h = Math.floor(s / 3600);
|
||||
const m = Math.floor((s % 3600) / 60);
|
||||
const sec = s % 60;
|
||||
if (h > 0)
|
||||
return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
|
||||
return `${m}:${String(sec).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
async function safeBody(res) {
|
||||
try {
|
||||
return await res.text();
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user