// Operator-hardware fallback backend. Forwards transcribe requests to // a Parakeet endpoint (or any Whisper-API-compatible server — same wire // format) and analyze requests to a Gemma endpoint (or any // OpenAI-compatible chat-completions server). // // Used when a Pro/Max user has exceeded their monthly Gemini cap. // Returns the same shape gemini.js produces so route handlers don't // need a backend-specific branch downstream: // transcribeAudio → { text, segments, duration_seconds } // analyzeText → { text } // // Both endpoints are reached via plain fetch — no SDK dependency keeps // the relay container slim and the upstream wire format is dead-simple // for these two well-known shapes. const ANALYZE_MAX_TOKENS = 16000; // Gemma served locally tends to live on the host's LAN, not the public // internet, so generous timeouts. Same scale as Recap's defaults. const DEFAULT_TIMEOUT_MS = 900_000; // Pull the model identifier out of the prompt if the operator wants a // specific Gemma SKU. We default to "gemma3:27b" which is the typical // Ollama tag for the analysis-capable Gemma model. Operators with a // different deployment can update this via a future StartOS action; // for v0.2 it's hardcoded. const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b"; // Parakeet's typical model identifier. Mirrors what Recap's whisper.js // sends when the operator points the relay at a NeMo Parakeet HTTP // wrapper. Configurable via env var for non-default deployments. const HARDWARE_TRANSCRIBE_MODEL = process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3"; export function createHardwareBackend({ parakeetBaseURL = "", gemmaBaseURL = "", timeoutMs = DEFAULT_TIMEOUT_MS, } = {}) { const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : ""; const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : ""; return { hasTranscribe: !!parakeet, hasAnalyze: !!gemma, // POST /v1/audio/transcriptions with the OpenAI Whisper // multipart shape. Parakeet wrappers (NeMo + the patched one Recap // already talks to) honor this format and return segments with // per-segment timestamps when timestamp_granularities=segment is // requested. Falls back to a bare request if the rich shape 4xx/5xxs. async transcribeAudio({ audio, mimeType = "application/octet-stream", offsetSeconds = 0, }) { if (!parakeet) { const err = new Error( "operator-hardware transcribe is not configured (relay_parakeet_base_url is empty)" ); err.status = 503; throw err; } // Try the rich request first (verbose_json + segment timestamps). // FormData/Blob globals are available in Node 20+. Wrap the // received Buffer in a Blob so the multipart body is properly // chunked instead of falling back to base64. const buildForm = (richMode) => { const form = new FormData(); const blob = new Blob([audio], { type: mimeType }); form.append("file", blob, "audio.bin"); form.append("model", HARDWARE_TRANSCRIBE_MODEL); if (richMode) { form.append("response_format", "verbose_json"); form.append("timestamp_granularities[]", "segment"); } return form; }; const url = `${parakeet}/v1/audio/transcriptions`; let res; try { res = await fetch(url, { method: "POST", body: buildForm(true), signal: AbortSignal.timeout(timeoutMs), }); } catch (err) { const e = new Error( `Parakeet transcribe network error: ${err?.message || err}` ); e.status = 502; throw e; } // If the wrapper rejects the rich params, retry with bare-bones. if (!res.ok && res.status >= 400 && res.status < 600) { const richBody = await safeBody(res); console.warn( `[hardware] rich Parakeet request returned ${res.status}: ${richBody.slice(0, 200)} — retrying bare` ); try { res = await fetch(url, { method: "POST", body: buildForm(false), signal: AbortSignal.timeout(timeoutMs), }); } catch (err) { const e = new Error( `Parakeet transcribe network error (fallback): ${err?.message || err}` ); e.status = 502; throw e; } } if (!res.ok) { const body = await safeBody(res); const e = new Error( `Parakeet transcribe ${res.status}: ${body.slice(0, 300)}` ); e.status = res.status; throw e; } const data = await res.json(); const segments = Array.isArray(data.segments) ? data.segments : []; // Offset support: when the relay caller is processing a chunked // audio file, it asks for transcripts at a non-zero base time. // Parakeet returns timestamps relative to the chunk; shift them // up by offsetSeconds so the combined transcript downstream // lines up with the real video timeline. const shifted = segments.map((s) => ({ start: (s.start || 0) + offsetSeconds, end: (s.end || 0) + offsetSeconds, text: (s.text || "").trim(), })); // Build the [MM:SS] text format Recap's parseTimestampedTranscript // already speaks. The route handler will pass this straight back // to Recap, which parses it on the client side. const lines = shifted.length ? shifted.map((s) => `[${formatMmSs(s.start)}] ${s.text}`) : [`[0:00] ${(data.text || "").trim()}`]; return { text: lines.join("\n"), segments: shifted, duration_seconds: data.duration || 0, }; }, // POST /v1/chat/completions with the OpenAI shape. Ollama's // server, vLLM, llama.cpp's HTTP server, and most other OSS LLM // runners support this wire format — so we don't lock the relay // to one specific Gemma deployment. async analyzeText({ prompt }) { if (!gemma) { const err = new Error( "operator-hardware analyze is not configured (relay_gemma_base_url is empty)" ); err.status = 503; throw err; } const url = `${gemma}/v1/chat/completions`; let res; try { res = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: HARDWARE_ANALYZE_MODEL, max_tokens: ANALYZE_MAX_TOKENS, messages: [{ role: "user", content: prompt }], stream: false, }), signal: AbortSignal.timeout(timeoutMs), }); } catch (err) { const e = new Error( `Gemma analyze network error: ${err?.message || err}` ); e.status = 502; throw e; } if (!res.ok) { const body = await safeBody(res); const e = new Error(`Gemma analyze ${res.status}: ${body.slice(0, 300)}`); e.status = res.status; throw e; } const data = await res.json(); const text = data?.choices?.[0]?.message?.content || ""; return { text }; }, }; } function formatMmSs(seconds) { const s = Math.max(0, Math.floor(seconds)); const h = Math.floor(s / 3600); const m = Math.floor((s % 3600) / 60); const sec = s % 60; if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`; return `${m}:${String(sec).padStart(2, "0")}`; } async function safeBody(res) { try { return await res.text(); } catch { return ""; } }