Wire new routes; identity, summarize-url, dashboard, admin

2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
@@ -1,69 +1,185 @@
-// GET /relay/capabilities — operator-aware metadata for Recap clients
-// to plan their audio handling. Returns the upper bounds the relay's
-// CURRENT routing config can comfortably accept, so Recap can decide
-// whether to chunk a long video before sending it.
+// GET /relay/capabilities — per-install metadata for Recap clients to
+// plan their audio handling. Tells Recap whether to chunk a long
+// audio file before sending it, based on which backend THIS install's
+// next transcribe call will actually route to.
 //
-// Today's logic:
-//   - When the operator's transcribe_backend_preference routes through
-//     Gemini at all (gemini_first / gemini_only), we report Gemini-safe
-//     limits (60 min / 30 MB / 2700 s chunks). Even with hardware as
-//     overflow, the FIRST attempt is Gemini, which needs the chunk
-//     budget.
-//   - When the operator's preference is hardware-only (or hardware-
-//     first with overflow to Gemini disabled in spirit), we report
-//     "unbounded" — the operator's Parakeet wrapper can typically
-//     ingest 2+ hour podcasts in a single shot, so chunking just adds
-//     extra inference passes and timestamp-stitching overhead.
+// The decision is install-specific because the relay's routing
+// preference combined with the install's tier + current Gemini cap
+// consumption determines the backend per request. In `gemini_first`
+// mode, the same operator config will route a fresh install to
+// Gemini (chunking required) but route a cap-exhausted install to
+// hardware (no chunking needed) — so a global capabilities answer
+// would be wrong half the time.
 //
-// Recap reads this once on boot + on policy refresh; when its
-// transcriptionProvider is "relay", it honors these limits instead of
-// its own hardcoded thresholds. For non-relay providers, Recap's
-// internal per-provider thresholds apply.
+// Inputs:
+//   X-Recap-Install-Id  (optional but strongly recommended)
+//   Authorization       (optional Bearer license — affects tier lookup)
+//
+// Without an install_id, returns Gemini-safe limits conservatively
+// (the chunking path always works; the no-chunking path only works
+// when hardware actually serves the call).
+//
+// Output shape (unchanged from v1 — pure additive on the routing
+// logic):
+//   {
+//     max_audio_mb:           number,
+//     max_audio_minutes:      number,
+//     preferred_chunk_seconds: number | null,  // null = don't chunk
+//     reason:                 string           // human-readable
+//   }

 import express from "express";
-import { getConfigSnapshot } from "../config.js";
+import { getConfigSnapshot, getTierQuotas } from "../config.js";
+import { resolveLicense } from "../keysat-client.js";
+import { getOrCreateRow, planBackend } from "../credits.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+
+// Gemini File API can handle audio up to ~9.5 hours per generateContent
+// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
+// shipped originally was sized for free-tier worries that no longer
+// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
+// relay-URL fast-path for content up to 4 hours instead of falling
+// back to client-side chunked uploads (which lose the buyer-bandwidth
+// savings and serialize the calls).
+const GEMINI_LIMITS = Object.freeze({
+  max_audio_mb: 200,
+  max_audio_minutes: 240,
+  preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
+                                 // kicks in for stability on the longest
+                                 // files, but only on the actual call;
+                                 // doesn't gate client-side chunking.
+});
+
+const HARDWARE_LIMITS = Object.freeze({
+  // Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
+  // audio in one shot. Set high but finite ceilings so a 24-hour file
+  // doesn't OOM the operator's GPU box silently.
+  max_audio_mb: 500,
+  max_audio_minutes: 240,
+  preferred_chunk_seconds: null,
+});

 export function capabilitiesRouter() {
  const router = express.Router();

-  router.get("/capabilities", async (_req, res) => {
+  router.get("/capabilities", async (req, res) => {
    const cfg = await getConfigSnapshot();
    const txPref =
      cfg.relay_transcribe_backend_preference || "gemini_first";
-    const hasParakeet = !!cfg.relay_parakeet_base_url;
+    const hw = await resolveHardwareConfig(cfg);
+    const hasHardware = !!hw.transcribe.url;
+    const installId = req.header("X-Recap-Install-Id") || null;
+    const auth = req.header("Authorization") || null;

-    // Conservative default: Gemini-safe limits unless the operator has
-    // explicitly said "use hardware (only or first) and I've got a
-    // Parakeet endpoint wired up". Without the Parakeet endpoint we
-    // can't make use of larger inputs — Gemini's the only path —
-    // so we'd just be lying to the client.
+    // ── TTS availability (audio-first "walking mode") ──
+    // Operator-wide, not install-specific: whether ANY TTS backend can
+    // serve a /relay/tts call given the operator's config. The Recap app
+    // uses has_tts to decide whether to show the "Listen" button at all
+    // (it additionally gates the feature to Max users on its own side).
+    const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
+    const kokoroReady = !!hw.tts?.url;
+    const elevenConfigured = !!(
+      cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
+    );
+    const ttsBackend =
+      ttsPref === "hardware_only"
+        ? kokoroReady
+          ? "kokoro"
+          : null
+        : ttsPref === "cloud_only"
+        ? elevenConfigured
+          ? "elevenlabs"
+          : null
+        : ttsPref === "cloud_first"
+        ? elevenConfigured
+          ? "elevenlabs"
+          : kokoroReady
+          ? "kokoro"
+          : null
+        : kokoroReady // hardware_first (default)
+        ? "kokoro"
+        : elevenConfigured
+        ? "elevenlabs"
+        : null;
+    const ttsCaps = {
+      has_tts: !!ttsBackend,
+      tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
+      tts_default_voice: cfg.relay_tts_default_voice || null,
+    };
+
+    // If we have an install_id, run the same routing logic the actual
+    // transcribe route uses so the chunking decision matches the
+    // backend that will actually serve the call.
+    if (installId) {
+      try {
+        const license = await resolveLicense(auth);
+        const row = await getOrCreateRow({ installId, license });
+        row.tier_snapshot = license.tier;
+        const quota = await getTierQuotas();
+        const plan = planBackend(row, quota, {
+          hasHardware,
+          preference: txPref,
+        });
+        if (plan.allowed && plan.backend === "hardware") {
+          return res.json({
+            ...HARDWARE_LIMITS,
+            ...ttsCaps,
+            reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
+          });
+        }
+        if (plan.allowed && plan.backend === "gemini") {
+          return res.json({
+            ...GEMINI_LIMITS,
+            ...ttsCaps,
+            reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
+          });
+        }
+        // planBackend refused entirely (out of credits / no backend
+        // configured). Return Gemini-safe defaults so the client still
+        // chunks defensively and gets a clean 402 from the real
+        // transcribe call rather than a confusing transport failure.
+        return res.json({
+          ...GEMINI_LIMITS,
+          ...ttsCaps,
+          reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
+        });
+      } catch (err) {
+        // License lookup or row read failed — fall through to the
+        // anonymous path so the client at least gets safe defaults.
+        console.warn(
+          `[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
+        );
+      }
+    }
+
+    // Anonymous (no install_id) or install-aware path failed. Pick
+    // capabilities from the operator-wide routing preference alone:
+    //   hardware_only / hardware_first → hardware-safe limits (provided
+    //                                    hardware is configured)
+    //   gemini_only / gemini_first     → Gemini-safe (will always work
+    //                                    for the first attempt; in
+    //                                    gemini_first the eventual
+    //                                    overflow to hardware can
+    //                                    handle bigger files too, but
+    //                                    chunking still works for both)
+    //
+    // When `hardware_first` is set but Parakeet isn't actually
+    // configured, the relay will fall back to Gemini — so report
+    // Gemini-safe limits in that case.
    const hardwareCapable =
-      hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first");
-
+      hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
    if (hardwareCapable) {
-      res.json({
-        // Effective unbounded — Parakeet wrappers commonly handle 2+
-        // hour audio in one shot. Set high but finite ceilings so a
-        // 24-hour file doesn't OOM the operator's GPU box silently.
-        max_audio_mb: 500,
-        max_audio_minutes: 240,
-        preferred_chunk_seconds: null,
-        // Diagnostic — Recap doesn't need this but the dashboard / a
-        // curious operator might want to know which limit shape they
-        // returned and why.
-        reason: "hardware-capable backend preference (" + txPref + ")",
-      });
-    } else {
-      res.json({
-        // Gemini File-API + practical reliability limits. Matches
-        // Recap's pre-relay defaults so existing chunking behavior
-        // is preserved.
-        max_audio_mb: 30,
-        max_audio_minutes: 60,
-        preferred_chunk_seconds: 2700, // 45 min chunks
-        reason: "Gemini-backed preference (" + txPref + ")",
+      return res.json({
+        ...HARDWARE_LIMITS,
+        ...ttsCaps,
+        reason: `hardware-capable backend preference (${txPref})`,
      });
    }
+    return res.json({
+      ...GEMINI_LIMITS,
+      ...ttsCaps,
+      reason: `Gemini-backed preference (${txPref})`,
+    });
  });

  return router;