187 lines
7.5 KiB
JavaScript
187 lines
7.5 KiB
JavaScript
// GET /relay/capabilities — per-install metadata for Recap clients to
|
|
// plan their audio handling. Tells Recap whether to chunk a long
|
|
// audio file before sending it, based on which backend THIS install's
|
|
// next transcribe call will actually route to.
|
|
//
|
|
// The decision is install-specific because the relay's routing
|
|
// preference combined with the install's tier + current Gemini cap
|
|
// consumption determines the backend per request. In `gemini_first`
|
|
// mode, the same operator config will route a fresh install to
|
|
// Gemini (chunking required) but route a cap-exhausted install to
|
|
// hardware (no chunking needed) — so a global capabilities answer
|
|
// would be wrong half the time.
|
|
//
|
|
// Inputs:
|
|
// X-Recap-Install-Id (optional but strongly recommended)
|
|
// Authorization (optional Bearer license — affects tier lookup)
|
|
//
|
|
// Without an install_id, returns Gemini-safe limits conservatively
|
|
// (the chunking path always works; the no-chunking path only works
|
|
// when hardware actually serves the call).
|
|
//
|
|
// Output shape (unchanged from v1 — pure additive on the routing
|
|
// logic):
|
|
// {
|
|
// max_audio_mb: number,
|
|
// max_audio_minutes: number,
|
|
// preferred_chunk_seconds: number | null, // null = don't chunk
|
|
// reason: string // human-readable
|
|
// }
|
|
|
|
import express from "express";
|
|
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
|
import { resolveLicense } from "../keysat-client.js";
|
|
import { getOrCreateRow, planBackend } from "../credits.js";
|
|
import { resolveHardwareConfig } from "../hardware-config.js";
|
|
|
|
// Gemini File API can handle audio up to ~9.5 hours per generateContent
|
|
// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
|
|
// shipped originally was sized for free-tier worries that no longer
|
|
// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
|
|
// relay-URL fast-path for content up to 4 hours instead of falling
|
|
// back to client-side chunked uploads (which lose the buyer-bandwidth
|
|
// savings and serialize the calls).
|
|
const GEMINI_LIMITS = Object.freeze({
|
|
max_audio_mb: 200,
|
|
max_audio_minutes: 240,
|
|
preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
|
|
// kicks in for stability on the longest
|
|
// files, but only on the actual call;
|
|
// doesn't gate client-side chunking.
|
|
});
|
|
|
|
const HARDWARE_LIMITS = Object.freeze({
|
|
// Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
|
|
// audio in one shot. Set high but finite ceilings so a 24-hour file
|
|
// doesn't OOM the operator's GPU box silently.
|
|
max_audio_mb: 500,
|
|
max_audio_minutes: 240,
|
|
preferred_chunk_seconds: null,
|
|
});
|
|
|
|
export function capabilitiesRouter() {
|
|
const router = express.Router();
|
|
|
|
router.get("/capabilities", async (req, res) => {
|
|
const cfg = await getConfigSnapshot();
|
|
const txPref =
|
|
cfg.relay_transcribe_backend_preference || "gemini_first";
|
|
const hw = await resolveHardwareConfig(cfg);
|
|
const hasHardware = !!hw.transcribe.url;
|
|
const installId = req.header("X-Recap-Install-Id") || null;
|
|
const auth = req.header("Authorization") || null;
|
|
|
|
// ── TTS availability (audio-first "walking mode") ──
|
|
// Operator-wide, not install-specific: whether ANY TTS backend can
|
|
// serve a /relay/tts call given the operator's config. The Recap app
|
|
// uses has_tts to decide whether to show the "Listen" button at all
|
|
// (it additionally gates the feature to Max users on its own side).
|
|
const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
|
|
const kokoroReady = !!hw.tts?.url;
|
|
const elevenConfigured = !!(
|
|
cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
|
|
);
|
|
const ttsBackend =
|
|
ttsPref === "hardware_only"
|
|
? kokoroReady
|
|
? "kokoro"
|
|
: null
|
|
: ttsPref === "cloud_only"
|
|
? elevenConfigured
|
|
? "elevenlabs"
|
|
: null
|
|
: ttsPref === "cloud_first"
|
|
? elevenConfigured
|
|
? "elevenlabs"
|
|
: kokoroReady
|
|
? "kokoro"
|
|
: null
|
|
: kokoroReady // hardware_first (default)
|
|
? "kokoro"
|
|
: elevenConfigured
|
|
? "elevenlabs"
|
|
: null;
|
|
const ttsCaps = {
|
|
has_tts: !!ttsBackend,
|
|
tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
|
|
tts_default_voice: cfg.relay_tts_default_voice || null,
|
|
};
|
|
|
|
// If we have an install_id, run the same routing logic the actual
|
|
// transcribe route uses so the chunking decision matches the
|
|
// backend that will actually serve the call.
|
|
if (installId) {
|
|
try {
|
|
const license = await resolveLicense(auth);
|
|
const row = await getOrCreateRow({ installId, license });
|
|
row.tier_snapshot = license.tier;
|
|
const quota = await getTierQuotas();
|
|
const plan = planBackend(row, quota, {
|
|
hasHardware,
|
|
preference: txPref,
|
|
});
|
|
if (plan.allowed && plan.backend === "hardware") {
|
|
return res.json({
|
|
...HARDWARE_LIMITS,
|
|
...ttsCaps,
|
|
reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
|
|
});
|
|
}
|
|
if (plan.allowed && plan.backend === "gemini") {
|
|
return res.json({
|
|
...GEMINI_LIMITS,
|
|
...ttsCaps,
|
|
reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
|
|
});
|
|
}
|
|
// planBackend refused entirely (out of credits / no backend
|
|
// configured). Return Gemini-safe defaults so the client still
|
|
// chunks defensively and gets a clean 402 from the real
|
|
// transcribe call rather than a confusing transport failure.
|
|
return res.json({
|
|
...GEMINI_LIMITS,
|
|
...ttsCaps,
|
|
reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
|
|
});
|
|
} catch (err) {
|
|
// License lookup or row read failed — fall through to the
|
|
// anonymous path so the client at least gets safe defaults.
|
|
console.warn(
|
|
`[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
|
|
);
|
|
}
|
|
}
|
|
|
|
// Anonymous (no install_id) or install-aware path failed. Pick
|
|
// capabilities from the operator-wide routing preference alone:
|
|
// hardware_only / hardware_first → hardware-safe limits (provided
|
|
// hardware is configured)
|
|
// gemini_only / gemini_first → Gemini-safe (will always work
|
|
// for the first attempt; in
|
|
// gemini_first the eventual
|
|
// overflow to hardware can
|
|
// handle bigger files too, but
|
|
// chunking still works for both)
|
|
//
|
|
// When `hardware_first` is set but Parakeet isn't actually
|
|
// configured, the relay will fall back to Gemini — so report
|
|
// Gemini-safe limits in that case.
|
|
const hardwareCapable =
|
|
hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
|
|
if (hardwareCapable) {
|
|
return res.json({
|
|
...HARDWARE_LIMITS,
|
|
...ttsCaps,
|
|
reason: `hardware-capable backend preference (${txPref})`,
|
|
});
|
|
}
|
|
return res.json({
|
|
...GEMINI_LIMITS,
|
|
...ttsCaps,
|
|
reason: `Gemini-backed preference (${txPref})`,
|
|
});
|
|
});
|
|
|
|
return router;
|
|
}
|