// GET /relay/capabilities — per-install metadata for Recap clients to // plan their audio handling. Tells Recap whether to chunk a long // audio file before sending it, based on which backend THIS install's // next transcribe call will actually route to. // // The decision is install-specific because the relay's routing // preference combined with the install's tier + current Gemini cap // consumption determines the backend per request. In `gemini_first` // mode, the same operator config will route a fresh install to // Gemini (chunking required) but route a cap-exhausted install to // hardware (no chunking needed) — so a global capabilities answer // would be wrong half the time. // // Inputs: // X-Recap-Install-Id (optional but strongly recommended) // Authorization (optional Bearer license — affects tier lookup) // // Without an install_id, returns Gemini-safe limits conservatively // (the chunking path always works; the no-chunking path only works // when hardware actually serves the call). // // Output shape (unchanged from v1 — pure additive on the routing // logic): // { // max_audio_mb: number, // max_audio_minutes: number, // preferred_chunk_seconds: number | null, // null = don't chunk // reason: string // human-readable // } import express from "express"; import { getConfigSnapshot, getTierQuotas } from "../config.js"; import { resolveLicense } from "../keysat-client.js"; import { getOrCreateRow, planBackend } from "../credits.js"; import { resolveHardwareConfig } from "../hardware-config.js"; // Gemini File API can handle audio up to ~9.5 hours per generateContent // call and files up to 2GB. The conservative 60-min/30-MB ceiling we // shipped originally was sized for free-tier worries that no longer // apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the // relay-URL fast-path for content up to 4 hours instead of falling // back to client-side chunked uploads (which lose the buyer-bandwidth // savings and serialize the calls). const GEMINI_LIMITS = Object.freeze({ max_audio_mb: 200, max_audio_minutes: 240, preferred_chunk_seconds: 2700, // 45 min — server-side chunking still // kicks in for stability on the longest // files, but only on the actual call; // doesn't gate client-side chunking. }); const HARDWARE_LIMITS = Object.freeze({ // Effectively unbounded — Parakeet wrappers commonly handle 2+ hour // audio in one shot. Set high but finite ceilings so a 24-hour file // doesn't OOM the operator's GPU box silently. max_audio_mb: 500, max_audio_minutes: 240, preferred_chunk_seconds: null, }); export function capabilitiesRouter() { const router = express.Router(); router.get("/capabilities", async (req, res) => { const cfg = await getConfigSnapshot(); const txPref = cfg.relay_transcribe_backend_preference || "gemini_first"; const hw = await resolveHardwareConfig(cfg); const hasHardware = !!hw.transcribe.url; const installId = req.header("X-Recap-Install-Id") || null; const auth = req.header("Authorization") || null; // ── TTS availability (audio-first "walking mode") ── // Operator-wide, not install-specific: whether ANY TTS backend can // serve a /relay/tts call given the operator's config. The Recap app // uses has_tts to decide whether to show the "Listen" button at all // (it additionally gates the feature to Max users on its own side). const ttsPref = cfg.relay_tts_backend_preference || "hardware_first"; const kokoroReady = !!hw.tts?.url; const elevenConfigured = !!( cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id ); const ttsBackend = ttsPref === "hardware_only" ? kokoroReady ? "kokoro" : null : ttsPref === "cloud_only" ? elevenConfigured ? "elevenlabs" : null : ttsPref === "cloud_first" ? elevenConfigured ? "elevenlabs" : kokoroReady ? "kokoro" : null : kokoroReady // hardware_first (default) ? "kokoro" : elevenConfigured ? "elevenlabs" : null; const ttsCaps = { has_tts: !!ttsBackend, tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null tts_default_voice: cfg.relay_tts_default_voice || null, }; // If we have an install_id, run the same routing logic the actual // transcribe route uses so the chunking decision matches the // backend that will actually serve the call. if (installId) { try { const license = await resolveLicense(auth); const row = await getOrCreateRow({ installId, license }); row.tier_snapshot = license.tier; const quota = await getTierQuotas(); const plan = planBackend(row, quota, { hasHardware, preference: txPref, }); if (plan.allowed && plan.backend === "hardware") { return res.json({ ...HARDWARE_LIMITS, ...ttsCaps, reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`, }); } if (plan.allowed && plan.backend === "gemini") { return res.json({ ...GEMINI_LIMITS, ...ttsCaps, reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`, }); } // planBackend refused entirely (out of credits / no backend // configured). Return Gemini-safe defaults so the client still // chunks defensively and gets a clean 402 from the real // transcribe call rather than a confusing transport failure. return res.json({ ...GEMINI_LIMITS, ...ttsCaps, reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`, }); } catch (err) { // License lookup or row read failed — fall through to the // anonymous path so the client at least gets safe defaults. console.warn( `[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults` ); } } // Anonymous (no install_id) or install-aware path failed. Pick // capabilities from the operator-wide routing preference alone: // hardware_only / hardware_first → hardware-safe limits (provided // hardware is configured) // gemini_only / gemini_first → Gemini-safe (will always work // for the first attempt; in // gemini_first the eventual // overflow to hardware can // handle bigger files too, but // chunking still works for both) // // When `hardware_first` is set but Parakeet isn't actually // configured, the relay will fall back to Gemini — so report // Gemini-safe limits in that case. const hardwareCapable = hasHardware && (txPref === "hardware_only" || txPref === "hardware_first"); if (hardwareCapable) { return res.json({ ...HARDWARE_LIMITS, ...ttsCaps, reason: `hardware-capable backend preference (${txPref})`, }); } return res.json({ ...GEMINI_LIMITS, ...ttsCaps, reason: `Gemini-backed preference (${txPref})`, }); }); return router; }