Files
recap-relay/server/routes/capabilities.js
T

71 lines
2.9 KiB
JavaScript

// GET /relay/capabilities — operator-aware metadata for Recap clients
// to plan their audio handling. Returns the upper bounds the relay's
// CURRENT routing config can comfortably accept, so Recap can decide
// whether to chunk a long video before sending it.
//
// Today's logic:
// - When the operator's transcribe_backend_preference routes through
// Gemini at all (gemini_first / gemini_only), we report Gemini-safe
// limits (60 min / 30 MB / 2700 s chunks). Even with hardware as
// overflow, the FIRST attempt is Gemini, which needs the chunk
// budget.
// - When the operator's preference is hardware-only (or hardware-
// first with overflow to Gemini disabled in spirit), we report
// "unbounded" — the operator's Parakeet wrapper can typically
// ingest 2+ hour podcasts in a single shot, so chunking just adds
// extra inference passes and timestamp-stitching overhead.
//
// Recap reads this once on boot + on policy refresh; when its
// transcriptionProvider is "relay", it honors these limits instead of
// its own hardcoded thresholds. For non-relay providers, Recap's
// internal per-provider thresholds apply.
import express from "express";
import { getConfigSnapshot } from "../config.js";
export function capabilitiesRouter() {
const router = express.Router();
router.get("/capabilities", async (_req, res) => {
const cfg = await getConfigSnapshot();
const txPref =
cfg.relay_transcribe_backend_preference || "gemini_first";
const hasParakeet = !!cfg.relay_parakeet_base_url;
// Conservative default: Gemini-safe limits unless the operator has
// explicitly said "use hardware (only or first) and I've got a
// Parakeet endpoint wired up". Without the Parakeet endpoint we
// can't make use of larger inputs — Gemini's the only path —
// so we'd just be lying to the client.
const hardwareCapable =
hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first");
if (hardwareCapable) {
res.json({
// Effective unbounded — Parakeet wrappers commonly handle 2+
// hour audio in one shot. Set high but finite ceilings so a
// 24-hour file doesn't OOM the operator's GPU box silently.
max_audio_mb: 500,
max_audio_minutes: 240,
preferred_chunk_seconds: null,
// Diagnostic — Recap doesn't need this but the dashboard / a
// curious operator might want to know which limit shape they
// returned and why.
reason: "hardware-capable backend preference (" + txPref + ")",
});
} else {
res.json({
// Gemini File-API + practical reliability limits. Matches
// Recap's pre-relay defaults so existing chunking behavior
// is preserved.
max_audio_mb: 30,
max_audio_minutes: 60,
preferred_chunk_seconds: 2700, // 45 min chunks
reason: "Gemini-backed preference (" + txPref + ")",
});
}
});
return router;
}