// Relay provider — proxies transcription + analysis calls through the // operator's relay backend (operator-side service, not in this repo). // The relay handles billing/credit accounting, picks the actual backing // model (Gemini, the operator's Parakeet+Gemma, etc.), and returns a // uniform response with the user's remaining credit balance. // // Auth shape: // - `X-Recap-Install-Id` header on every call (required; identifies // the credit owner). Comes from ./install-id.js. // - `Authorization: Bearer ` header when a license is // present. Absent = treat as Core (free) tier. // - `X-Recap-Job-Id` header on every call, common across the // transcribe + analyze pair that make up one full summary. The // relay decrements credits on the FIRST call with a given job_id; // subsequent calls with the same id are free (until job_id // expires, ~1h). Means "1 full summary = 1 credit" regardless of // whether one or both pipeline steps go through the relay. // // Response envelope (success and error both): // { // "result": { ...endpoint-specific payload... }, // "credits_remaining": , // "tier": "core" | "pro" | "max", // "credit_charged": // } // // What this provider does NOT do: // - Validate the user's license. Relay does that server-side. // - Track historical credit usage. Relay's DB owns the ledger. // - Choose which backing model the relay uses. Operator's call. import { createReadStream } from "fs"; import { retryAPI, formatTime } from "../util.js"; import { zeroCost } from "./cost.js"; import { updateRelayState, recordRelayError } from "../relay-state.js"; // Provider name shown in logs + chunk pagination labels. "relay" rather // than e.g. "keysat-relay" because operators may run their own relay // using a different backend brand — the name should describe the // architecture, not the operator. const NAME = "relay"; // Models exposed to the picker. The relay decides what actually runs — // these labels are placeholders so the picker can show something. const RELAY_TRANSCRIPTION_MODELS = ["relay-default"]; const RELAY_ANALYSIS_MODELS = ["relay-default"]; export function createRelayProvider({ baseURL, installId, licenseKey, timeoutMs = 900_000, } = {}) { if (!baseURL) { throw new Error( "createRelayProvider: baseURL is required (e.g. https://relay.keysat.xyz)" ); } if (!installId) { throw new Error( "createRelayProvider: installId is required (boot must initInstallId first)" ); } const base = baseURL.replace(/\/$/, ""); // Build the auth/identity headers attached to every relay call. // job_id is optional but the orchestration layer should always pass // one — without it the relay can't bundle the transcribe + analyze // pair into a single credit charge. function buildHeaders({ extra = {}, jobId } = {}) { const h = { "X-Recap-Install-Id": installId, ...extra, }; if (licenseKey) h["Authorization"] = `Bearer ${licenseKey}`; if (jobId) h["X-Recap-Job-Id"] = jobId; return h; } // Common error-handling wrapper. The relay's contract is that ANY // response (success or failure) carries the standard envelope so // Recap can keep its balance display accurate even on errors. We // try to parse error bodies to harvest that. async function postRelay({ path, body, headers, signal }) { let res; try { res = await fetch(`${base}${path}`, { method: "POST", headers, body, signal, }); } catch (err) { recordRelayError(err?.message || String(err)); throw err; } const text = await res.text(); let parsed = null; try { parsed = text ? JSON.parse(text) : null; } catch {} if (parsed && (typeof parsed.credits_remaining === "number" || parsed.tier)) { updateRelayState(parsed); } if (!res.ok) { const msg = parsed?.error || parsed?.message || text?.slice(0, 300) || `HTTP ${res.status}`; const err = new Error(`Relay ${path} ${res.status}: ${msg}`); err.status = res.status; err.envelope = parsed; if (!parsed) recordRelayError(msg); throw err; } return parsed; } return { name: NAME, capabilities: { transcribe: true, analyze: true, // The relay's model catalog is internal — Recap doesn't pick. // listModels: false signals the picker to skip the dropdown. listModels: false, }, listAnalysisModels() { return [...RELAY_ANALYSIS_MODELS]; }, listTranscriptionModels() { return [...RELAY_TRANSCRIPTION_MODELS]; }, async transcribeAudio({ filePath, mimeType, titleHint, channelHint = "", descriptionHint = "", chaptersHint = [], offsetSeconds = 0, onProgress = () => {}, signal, jobId, }) { onProgress( `Uploading audio${offsetSeconds > 0 ? ` (offset ${formatTime(offsetSeconds)})` : ""} to relay (${base})...` ); const start = Date.now(); // Use multipart form encoding so the audio binary doesn't have // to be base64-blown-up. Node 20+ provides global FormData; pair // it with a stream so we don't load the whole audio file into // memory. const form = new FormData(); const blob = await fileToBlob(filePath, mimeType); form.append("audio", blob, "audio.bin"); form.append("mime_type", mimeType || "application/octet-stream"); if (titleHint) form.append("title", titleHint); if (channelHint) form.append("channel", channelHint); if (descriptionHint) form.append("description", descriptionHint); if (Array.isArray(chaptersHint) && chaptersHint.length > 0) { form.append("chapters", JSON.stringify(chaptersHint)); } form.append("offset_seconds", String(offsetSeconds)); const envelope = await retryAPI( () => postRelay({ path: "/relay/transcribe", body: form, headers: buildHeaders({ jobId }), signal, }), { retries: 2, delayMs: 5000, label: `Relay transcribe${offsetSeconds > 0 ? ` (chunk@${formatTime(offsetSeconds)})` : ""}`, log: (msg) => onProgress(msg), } ); const elapsed = ((Date.now() - start) / 1000).toFixed(1); const remaining = typeof envelope.credits_remaining === "number" ? `, ${envelope.credits_remaining} credits left` : ""; onProgress(`Relay transcribe complete in ${elapsed}s${remaining}`); // Relay's transcribe result shape: { text, segments?: [{start, // end, text}], duration? }. We don't fabricate segment timestamps // here — the orchestration layer's synthesizeEntriesFromText // handles single-segment-only responses. const result = envelope.result || {}; const segments = Array.isArray(result.segments) ? result.segments : []; const lines = segments.length ? segments.map( (s) => `[${formatTime(s.start || 0)}] ${(s.text || "").trim()}` ) : [`[0:00] ${(result.text || "").trim()}`]; const text = lines.join("\n"); // Cost from Recap's POV is always zero — credits are the unit, // and they're tracked separately. The orchestration layer's // cost-summing code keeps working unchanged. const cost = zeroCost({ inputTokens: 0, outputTokens: 0, thinkingTokens: 0, }); return { text, usage: { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0 }, cost, finishReason: null, blockReason: "none", raw: envelope, }; }, // Peek at the install's current credit balance without charging. // Used by /api/relay/status to populate the picker banner on boot // (or on demand) so the user sees real numbers before running any // summarize. Short timeout — if the relay is unreachable we want // the UI to fall back to "balance unknown" quickly, not hang. async pingBalance({ timeoutMs = 5000, signal } = {}) { const headers = buildHeaders({}); const ac = new AbortController(); const timer = setTimeout(() => ac.abort(), timeoutMs); const combined = signal ? AbortSignal.any([signal, ac.signal]) : ac.signal; try { const res = await fetch(`${base}/relay/balance`, { method: "GET", headers, signal: combined, }); const text = await res.text(); let parsed = null; try { parsed = text ? JSON.parse(text) : null; } catch {} if (parsed && (typeof parsed.credits_remaining === "number" || parsed.tier)) { updateRelayState(parsed); } if (!res.ok) { const msg = parsed?.error || parsed?.message || text?.slice(0, 300) || `HTTP ${res.status}`; recordRelayError(msg); const err = new Error(`Relay /balance ${res.status}: ${msg}`); err.status = res.status; throw err; } return parsed; } catch (err) { if (err?.name === "AbortError") { recordRelayError(`balance ping timed out after ${timeoutMs}ms`); } else if (!err.status) { recordRelayError(err?.message || String(err)); } throw err; } finally { clearTimeout(timer); } }, async analyzeText({ prompt, onProgress = () => {}, retries = 2, signal, jobId, }) { const start = Date.now(); const headers = buildHeaders({ extra: { "Content-Type": "application/json" }, jobId, }); const envelope = await retryAPI( () => postRelay({ path: "/relay/analyze", body: JSON.stringify({ prompt }), headers, signal, }), { retries, delayMs: 5000, label: "Relay analyze", log: (msg) => onProgress(msg), } ); const elapsed = ((Date.now() - start) / 1000).toFixed(1); const remaining = typeof envelope.credits_remaining === "number" ? `, ${envelope.credits_remaining} credits left` : ""; onProgress(`Relay analyze complete in ${elapsed}s${remaining}`); const result = envelope.result || {}; const text = typeof result.text === "string" ? result.text : ""; const cost = zeroCost({ inputTokens: 0, outputTokens: 0, thinkingTokens: 0, }); return { text, usage: { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0 }, cost, finishReason: null, raw: envelope, }; }, }; } // Streams a file off disk into a Blob with the given MIME type for // FormData upload. Node's global Blob/File don't accept a stream // directly the way browser File objects do, so we read into a Buffer // here. Acceptable for typical podcast chunk sizes (~50 MB at most // after the orchestration layer's 45-min split). async function fileToBlob(filePath, mimeType) { const { promises: fsp } = await import("fs"); const buf = await fsp.readFile(filePath); return new Blob([buf], { type: mimeType || "application/octet-stream" }); }