Wire new routes; identity, summarize-url, dashboard, admin
This commit is contained in:
+166
-50
@@ -1,69 +1,185 @@
|
||||
// GET /relay/capabilities — operator-aware metadata for Recap clients
|
||||
// to plan their audio handling. Returns the upper bounds the relay's
|
||||
// CURRENT routing config can comfortably accept, so Recap can decide
|
||||
// whether to chunk a long video before sending it.
|
||||
// GET /relay/capabilities — per-install metadata for Recap clients to
|
||||
// plan their audio handling. Tells Recap whether to chunk a long
|
||||
// audio file before sending it, based on which backend THIS install's
|
||||
// next transcribe call will actually route to.
|
||||
//
|
||||
// Today's logic:
|
||||
// - When the operator's transcribe_backend_preference routes through
|
||||
// Gemini at all (gemini_first / gemini_only), we report Gemini-safe
|
||||
// limits (60 min / 30 MB / 2700 s chunks). Even with hardware as
|
||||
// overflow, the FIRST attempt is Gemini, which needs the chunk
|
||||
// budget.
|
||||
// - When the operator's preference is hardware-only (or hardware-
|
||||
// first with overflow to Gemini disabled in spirit), we report
|
||||
// "unbounded" — the operator's Parakeet wrapper can typically
|
||||
// ingest 2+ hour podcasts in a single shot, so chunking just adds
|
||||
// extra inference passes and timestamp-stitching overhead.
|
||||
// The decision is install-specific because the relay's routing
|
||||
// preference combined with the install's tier + current Gemini cap
|
||||
// consumption determines the backend per request. In `gemini_first`
|
||||
// mode, the same operator config will route a fresh install to
|
||||
// Gemini (chunking required) but route a cap-exhausted install to
|
||||
// hardware (no chunking needed) — so a global capabilities answer
|
||||
// would be wrong half the time.
|
||||
//
|
||||
// Recap reads this once on boot + on policy refresh; when its
|
||||
// transcriptionProvider is "relay", it honors these limits instead of
|
||||
// its own hardcoded thresholds. For non-relay providers, Recap's
|
||||
// internal per-provider thresholds apply.
|
||||
// Inputs:
|
||||
// X-Recap-Install-Id (optional but strongly recommended)
|
||||
// Authorization (optional Bearer license — affects tier lookup)
|
||||
//
|
||||
// Without an install_id, returns Gemini-safe limits conservatively
|
||||
// (the chunking path always works; the no-chunking path only works
|
||||
// when hardware actually serves the call).
|
||||
//
|
||||
// Output shape (unchanged from v1 — pure additive on the routing
|
||||
// logic):
|
||||
// {
|
||||
// max_audio_mb: number,
|
||||
// max_audio_minutes: number,
|
||||
// preferred_chunk_seconds: number | null, // null = don't chunk
|
||||
// reason: string // human-readable
|
||||
// }
|
||||
|
||||
import express from "express";
|
||||
import { getConfigSnapshot } from "../config.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend } from "../credits.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
|
||||
// Gemini File API can handle audio up to ~9.5 hours per generateContent
|
||||
// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
|
||||
// shipped originally was sized for free-tier worries that no longer
|
||||
// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
|
||||
// relay-URL fast-path for content up to 4 hours instead of falling
|
||||
// back to client-side chunked uploads (which lose the buyer-bandwidth
|
||||
// savings and serialize the calls).
|
||||
const GEMINI_LIMITS = Object.freeze({
|
||||
max_audio_mb: 200,
|
||||
max_audio_minutes: 240,
|
||||
preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
|
||||
// kicks in for stability on the longest
|
||||
// files, but only on the actual call;
|
||||
// doesn't gate client-side chunking.
|
||||
});
|
||||
|
||||
const HARDWARE_LIMITS = Object.freeze({
|
||||
// Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
|
||||
// audio in one shot. Set high but finite ceilings so a 24-hour file
|
||||
// doesn't OOM the operator's GPU box silently.
|
||||
max_audio_mb: 500,
|
||||
max_audio_minutes: 240,
|
||||
preferred_chunk_seconds: null,
|
||||
});
|
||||
|
||||
export function capabilitiesRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
router.get("/capabilities", async (_req, res) => {
|
||||
router.get("/capabilities", async (req, res) => {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const txPref =
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const hasParakeet = !!cfg.relay_parakeet_base_url;
|
||||
const hw = await resolveHardwareConfig(cfg);
|
||||
const hasHardware = !!hw.transcribe.url;
|
||||
const installId = req.header("X-Recap-Install-Id") || null;
|
||||
const auth = req.header("Authorization") || null;
|
||||
|
||||
// Conservative default: Gemini-safe limits unless the operator has
|
||||
// explicitly said "use hardware (only or first) and I've got a
|
||||
// Parakeet endpoint wired up". Without the Parakeet endpoint we
|
||||
// can't make use of larger inputs — Gemini's the only path —
|
||||
// so we'd just be lying to the client.
|
||||
// ── TTS availability (audio-first "walking mode") ──
|
||||
// Operator-wide, not install-specific: whether ANY TTS backend can
|
||||
// serve a /relay/tts call given the operator's config. The Recap app
|
||||
// uses has_tts to decide whether to show the "Listen" button at all
|
||||
// (it additionally gates the feature to Max users on its own side).
|
||||
const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
|
||||
const kokoroReady = !!hw.tts?.url;
|
||||
const elevenConfigured = !!(
|
||||
cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
|
||||
);
|
||||
const ttsBackend =
|
||||
ttsPref === "hardware_only"
|
||||
? kokoroReady
|
||||
? "kokoro"
|
||||
: null
|
||||
: ttsPref === "cloud_only"
|
||||
? elevenConfigured
|
||||
? "elevenlabs"
|
||||
: null
|
||||
: ttsPref === "cloud_first"
|
||||
? elevenConfigured
|
||||
? "elevenlabs"
|
||||
: kokoroReady
|
||||
? "kokoro"
|
||||
: null
|
||||
: kokoroReady // hardware_first (default)
|
||||
? "kokoro"
|
||||
: elevenConfigured
|
||||
? "elevenlabs"
|
||||
: null;
|
||||
const ttsCaps = {
|
||||
has_tts: !!ttsBackend,
|
||||
tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
|
||||
tts_default_voice: cfg.relay_tts_default_voice || null,
|
||||
};
|
||||
|
||||
// If we have an install_id, run the same routing logic the actual
|
||||
// transcribe route uses so the chunking decision matches the
|
||||
// backend that will actually serve the call.
|
||||
if (installId) {
|
||||
try {
|
||||
const license = await resolveLicense(auth);
|
||||
const row = await getOrCreateRow({ installId, license });
|
||||
row.tier_snapshot = license.tier;
|
||||
const quota = await getTierQuotas();
|
||||
const plan = planBackend(row, quota, {
|
||||
hasHardware,
|
||||
preference: txPref,
|
||||
});
|
||||
if (plan.allowed && plan.backend === "hardware") {
|
||||
return res.json({
|
||||
...HARDWARE_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
|
||||
});
|
||||
}
|
||||
if (plan.allowed && plan.backend === "gemini") {
|
||||
return res.json({
|
||||
...GEMINI_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
|
||||
});
|
||||
}
|
||||
// planBackend refused entirely (out of credits / no backend
|
||||
// configured). Return Gemini-safe defaults so the client still
|
||||
// chunks defensively and gets a clean 402 from the real
|
||||
// transcribe call rather than a confusing transport failure.
|
||||
return res.json({
|
||||
...GEMINI_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
|
||||
});
|
||||
} catch (err) {
|
||||
// License lookup or row read failed — fall through to the
|
||||
// anonymous path so the client at least gets safe defaults.
|
||||
console.warn(
|
||||
`[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Anonymous (no install_id) or install-aware path failed. Pick
|
||||
// capabilities from the operator-wide routing preference alone:
|
||||
// hardware_only / hardware_first → hardware-safe limits (provided
|
||||
// hardware is configured)
|
||||
// gemini_only / gemini_first → Gemini-safe (will always work
|
||||
// for the first attempt; in
|
||||
// gemini_first the eventual
|
||||
// overflow to hardware can
|
||||
// handle bigger files too, but
|
||||
// chunking still works for both)
|
||||
//
|
||||
// When `hardware_first` is set but Parakeet isn't actually
|
||||
// configured, the relay will fall back to Gemini — so report
|
||||
// Gemini-safe limits in that case.
|
||||
const hardwareCapable =
|
||||
hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first");
|
||||
|
||||
hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
|
||||
if (hardwareCapable) {
|
||||
res.json({
|
||||
// Effective unbounded — Parakeet wrappers commonly handle 2+
|
||||
// hour audio in one shot. Set high but finite ceilings so a
|
||||
// 24-hour file doesn't OOM the operator's GPU box silently.
|
||||
max_audio_mb: 500,
|
||||
max_audio_minutes: 240,
|
||||
preferred_chunk_seconds: null,
|
||||
// Diagnostic — Recap doesn't need this but the dashboard / a
|
||||
// curious operator might want to know which limit shape they
|
||||
// returned and why.
|
||||
reason: "hardware-capable backend preference (" + txPref + ")",
|
||||
});
|
||||
} else {
|
||||
res.json({
|
||||
// Gemini File-API + practical reliability limits. Matches
|
||||
// Recap's pre-relay defaults so existing chunking behavior
|
||||
// is preserved.
|
||||
max_audio_mb: 30,
|
||||
max_audio_minutes: 60,
|
||||
preferred_chunk_seconds: 2700, // 45 min chunks
|
||||
reason: "Gemini-backed preference (" + txPref + ")",
|
||||
return res.json({
|
||||
...HARDWARE_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `hardware-capable backend preference (${txPref})`,
|
||||
});
|
||||
}
|
||||
return res.json({
|
||||
...GEMINI_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `Gemini-backed preference (${txPref})`,
|
||||
});
|
||||
});
|
||||
|
||||
return router;
|
||||
|
||||
Reference in New Issue
Block a user