Files
recap-relay/server/routes/capabilities.js
T

187 lines
7.5 KiB
JavaScript

// GET /relay/capabilities — per-install metadata for Recap clients to
// plan their audio handling. Tells Recap whether to chunk a long
// audio file before sending it, based on which backend THIS install's
// next transcribe call will actually route to.
//
// The decision is install-specific because the relay's routing
// preference combined with the install's tier + current Gemini cap
// consumption determines the backend per request. In `gemini_first`
// mode, the same operator config will route a fresh install to
// Gemini (chunking required) but route a cap-exhausted install to
// hardware (no chunking needed) — so a global capabilities answer
// would be wrong half the time.
//
// Inputs:
// X-Recap-Install-Id (optional but strongly recommended)
// Authorization (optional Bearer license — affects tier lookup)
//
// Without an install_id, returns Gemini-safe limits conservatively
// (the chunking path always works; the no-chunking path only works
// when hardware actually serves the call).
//
// Output shape (unchanged from v1 — pure additive on the routing
// logic):
// {
// max_audio_mb: number,
// max_audio_minutes: number,
// preferred_chunk_seconds: number | null, // null = don't chunk
// reason: string // human-readable
// }
import express from "express";
import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { resolveLicense } from "../keysat-client.js";
import { getOrCreateRow, planBackend } from "../credits.js";
import { resolveHardwareConfig } from "../hardware-config.js";
// Gemini File API can handle audio up to ~9.5 hours per generateContent
// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
// shipped originally was sized for free-tier worries that no longer
// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
// relay-URL fast-path for content up to 4 hours instead of falling
// back to client-side chunked uploads (which lose the buyer-bandwidth
// savings and serialize the calls).
const GEMINI_LIMITS = Object.freeze({
max_audio_mb: 200,
max_audio_minutes: 240,
preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
// kicks in for stability on the longest
// files, but only on the actual call;
// doesn't gate client-side chunking.
});
const HARDWARE_LIMITS = Object.freeze({
// Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
// audio in one shot. Set high but finite ceilings so a 24-hour file
// doesn't OOM the operator's GPU box silently.
max_audio_mb: 500,
max_audio_minutes: 240,
preferred_chunk_seconds: null,
});
export function capabilitiesRouter() {
const router = express.Router();
router.get("/capabilities", async (req, res) => {
const cfg = await getConfigSnapshot();
const txPref =
cfg.relay_transcribe_backend_preference || "gemini_first";
const hw = await resolveHardwareConfig(cfg);
const hasHardware = !!hw.transcribe.url;
const installId = req.header("X-Recap-Install-Id") || null;
const auth = req.header("Authorization") || null;
// ── TTS availability (audio-first "walking mode") ──
// Operator-wide, not install-specific: whether ANY TTS backend can
// serve a /relay/tts call given the operator's config. The Recap app
// uses has_tts to decide whether to show the "Listen" button at all
// (it additionally gates the feature to Max users on its own side).
const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
const kokoroReady = !!hw.tts?.url;
const elevenConfigured = !!(
cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
);
const ttsBackend =
ttsPref === "hardware_only"
? kokoroReady
? "kokoro"
: null
: ttsPref === "cloud_only"
? elevenConfigured
? "elevenlabs"
: null
: ttsPref === "cloud_first"
? elevenConfigured
? "elevenlabs"
: kokoroReady
? "kokoro"
: null
: kokoroReady // hardware_first (default)
? "kokoro"
: elevenConfigured
? "elevenlabs"
: null;
const ttsCaps = {
has_tts: !!ttsBackend,
tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
tts_default_voice: cfg.relay_tts_default_voice || null,
};
// If we have an install_id, run the same routing logic the actual
// transcribe route uses so the chunking decision matches the
// backend that will actually serve the call.
if (installId) {
try {
const license = await resolveLicense(auth);
const row = await getOrCreateRow({ installId, license });
row.tier_snapshot = license.tier;
const quota = await getTierQuotas();
const plan = planBackend(row, quota, {
hasHardware,
preference: txPref,
});
if (plan.allowed && plan.backend === "hardware") {
return res.json({
...HARDWARE_LIMITS,
...ttsCaps,
reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
});
}
if (plan.allowed && plan.backend === "gemini") {
return res.json({
...GEMINI_LIMITS,
...ttsCaps,
reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
});
}
// planBackend refused entirely (out of credits / no backend
// configured). Return Gemini-safe defaults so the client still
// chunks defensively and gets a clean 402 from the real
// transcribe call rather than a confusing transport failure.
return res.json({
...GEMINI_LIMITS,
...ttsCaps,
reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
});
} catch (err) {
// License lookup or row read failed — fall through to the
// anonymous path so the client at least gets safe defaults.
console.warn(
`[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
);
}
}
// Anonymous (no install_id) or install-aware path failed. Pick
// capabilities from the operator-wide routing preference alone:
// hardware_only / hardware_first → hardware-safe limits (provided
// hardware is configured)
// gemini_only / gemini_first → Gemini-safe (will always work
// for the first attempt; in
// gemini_first the eventual
// overflow to hardware can
// handle bigger files too, but
// chunking still works for both)
//
// When `hardware_first` is set but Parakeet isn't actually
// configured, the relay will fall back to Gemini — so report
// Gemini-safe limits in that case.
const hardwareCapable =
hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
if (hardwareCapable) {
return res.json({
...HARDWARE_LIMITS,
...ttsCaps,
reason: `hardware-capable backend preference (${txPref})`,
});
}
return res.json({
...GEMINI_LIMITS,
...ttsCaps,
reason: `Gemini-backed preference (${txPref})`,
});
});
return router;
}