// POST /relay/transcribe — forwards an audio payload to the chosen // backend (Gemini first, operator hardware as overflow) and returns // the standard envelope. // // Request shape: multipart/form-data // audio: binary audio file (required) // mime_type: string (default application/octet-stream) // title: string (optional, used by Gemini prompt) // channel: string (optional) // description: string (optional) // chapters: JSON-stringified array (optional) // offset_seconds: number string (optional, for chunked audio) // // Headers: // X-Recap-Install-Id (required) // X-Recap-Job-Id (optional but expected — pairs with /analyze) // Authorization (optional Bearer LIC1-... for licensed tiers) // // Response (standard envelope): // { // result: { text: "[MM:SS] ...", segments: [], duration_seconds: 0 }, // credits_remaining, tier, credit_charged // } // // Every outcome (success / quota-refused / backend-error) writes one // row to the audit log so the admin dashboard can compute cost, // margin, and speed metrics. import express from "express"; import multer from "multer"; import { resolveIdentity, identityTier } from "../identity.js"; import { getOrCreateRow, planBackend, commitCredit, licenseFingerprint, } from "../credits.js"; import { lookupJob, markJobCharged, refundJob } from "../job-credits.js"; import { getConfigSnapshot, getTierQuotas } from "../config.js"; import { createGeminiBackend } from "../backends/gemini.js"; import { createHardwareBackend } from "../backends/hardware.js"; import { envelope, errorEnvelope } from "./envelope.js"; import { recordCall } from "../audit-log.js"; import { calcGeminiCost } from "../pricing.js"; import { getAudioDurationSecondsFromBuffer } from "../audio-meta.js"; import { resolveHardwareConfig } from "../hardware-config.js"; import { reportHealthEvent } from "../spark-control-events.js"; const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 200 * 1024 * 1024 }, // 200 MB per request }); export function transcribeRouter() { const router = express.Router(); router.post("/transcribe", upload.single("audio"), async (req, res) => { const t0 = Date.now(); const jobId = req.header("X-Recap-Job-Id") || null; let identity; try { identity = await resolveIdentity(req); } catch (err) { const e = await errorEnvelope({ error: err?.message || "auth_error", statusHint: err?.status || 401, }); return res.status(e.statusHint || 401).json(e.body); } if (identity.kind === "license" && !identity.installId) { const e = await errorEnvelope({ error: "missing X-Recap-Install-Id header", statusHint: 400, }); return res.status(400).json(e.body); } const { creditKey, installId, license } = identity; if (!req.file) { const e = await errorEnvelope({ error: "missing audio file", creditKey, installId, statusHint: 400 }); return res.status(400).json(e.body); } const row = await getOrCreateRow({ creditKey, installId, license }); const tier = identityTier(identity, row); row.tier_snapshot = tier; const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license); const auditInstall = installId || identity.userId || null; // Probe audio duration BEFORE the backend call so we can record // it on every audit row (success and error alike). Used by the // dashboard to normalize wall-clock time to "ms per minute of // audio" — a backend-agnostic speed benchmark. const audioSeconds = await getAudioDurationSecondsFromBuffer( req.file?.buffer ); // Billing vs. routing are decoupled — see analyze.js for the // full reasoning. Look up job to decide whether to charge a // credit, but always run planBackend fresh so transcribe's // routing decision respects relay_transcribe_backend_preference. const reusedJob = !!lookupJob({ creditKey, installId, license, jobId }); const cfg = await getConfigSnapshot(); const hw = await resolveHardwareConfig(cfg); // Operator-only diagnostic — see the matching comment in // summarize-url.js for the full reasoning. We don't 503 here on // blocked_reason because doing so pre-empts planBackend and // surfaces operator-internal wording to clients even when // Gemini was the configured preference. if (hw.transcribe.blocked_reason) { console.warn( `[transcribe] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`, ); } const hasHardware = !!hw.transcribe.url; const quota = await getTierQuotas(); const preference = cfg.relay_transcribe_backend_preference || "gemini_first"; const plan = planBackend(row, quota, { hasHardware, preference }); if (!plan.allowed) { await recordCall({ install_id: auditInstall, license_fingerprint: licenseFp, tier, pipeline: "transcribe", backend: null, model: null, status: "refused", credit_charged: 0, duration_ms: Date.now() - t0, audio_seconds: audioSeconds, cost_usd: 0, job_id: jobId, error: plan.reason, }); const e = await errorEnvelope({ error: plan.reason, creditKey, installId, tier, statusHint: 402, }); return res.status(402).json(e.body); } const chosenBackend = plan.backend; let result; try { if (chosenBackend === "gemini") { const backend = createGeminiBackend({ apiKey: cfg.relay_gemini_api_key, transcriptionModel: cfg.relay_gemini_transcription_model, analysisModel: cfg.relay_gemini_analysis_model, txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60, txConcurrency: cfg.relay_gemini_tx_concurrency || 12, }); result = await backend.transcribeAudio({ audio: req.file.buffer, mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream", title: req.body?.title || "", channel: req.body?.channel || "", description: req.body?.description || "", chapters: parseChaptersField(req.body?.chapters), offsetSeconds: Number(req.body?.offset_seconds) || 0, }); } else { const backend = createHardwareBackend({ parakeetBaseURL: hw.transcribe.url || "", gemmaBaseURL: hw.analyze.url || "", sparkControlBaseURL: hw.sparkBase || "", parakeetModel: hw.transcribe.model || "", gemmaModel: hw.analyze.model || "", txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60, txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30, diarizationEnabled: !!cfg.relay_hardware_diarization_enabled, clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70, anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30, smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15, uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10, txConcurrency: cfg.relay_hardware_tx_concurrency || 4, }); result = await backend.transcribeAudio({ audio: req.file.buffer, mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream", offsetSeconds: Number(req.body?.offset_seconds) || 0, }); } } catch (err) { if (reusedJob) await refundJob({ creditKey, installId, license, jobId }); console.error(`[relay/transcribe] backend error: ${err?.message}`); // Fire-and-forget health report for hardware-served calls; // Gemini failures are a separate observability surface. if (chosenBackend === "hardware") { reportHealthEvent({ service: "parakeet", ok: false, error: (err?.message || String(err)).slice(0, 280), ms: Date.now() - t0, }); } await recordCall({ install_id: auditInstall, license_fingerprint: licenseFp, tier, pipeline: "transcribe", backend: chosenBackend, model: chosenBackend === "gemini" ? cfg.relay_gemini_transcription_model : hw.transcribe.model || "(auto)", status: "error", credit_charged: 0, duration_ms: Date.now() - t0, audio_seconds: audioSeconds, cost_usd: 0, job_id: jobId, error: (err?.message || String(err)).slice(0, 200), }); const e = await errorEnvelope({ error: err?.message || "backend_error", creditKey, installId, tier, statusHint: err?.status || 502, }); return res.status(e.statusHint).json(e.body); } let creditCharged = 0; if (!reusedJob) { await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier }); await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier }); creditCharged = 1; } // Success — write the audit row with cost details. Gemini's usage // metadata gives us token counts; calcGeminiCost translates that // into USD. Hardware-served calls have no token data and we // report cost_usd: 0 (operator's hardware is fixed-cost). const costDetails = chosenBackend === "gemini" && result.usage ? calcGeminiCost(result.model, result.usage) : { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0, }; await recordCall({ install_id: installId, license_fingerprint: licenseFp, tier, pipeline: "transcribe", backend: chosenBackend, model: result?.model || null, status: "success", credit_charged: creditCharged, duration_ms: Date.now() - t0, audio_seconds: audioSeconds, job_id: jobId, attempts: result?.attempts || null, ...costDetails, }); const body = await envelope({ result, creditKey, installId, license, tier, creditCharged }); res.json(body); }); return router; } function parseChaptersField(raw) { if (!raw) return []; try { const arr = JSON.parse(raw); return Array.isArray(arr) ? arr : []; } catch { return []; } }