recap-relay/server/routes/capabilities.js

// GET /relay/capabilities — per-install metadata for Recap clients to
// plan their audio handling. Tells Recap whether to chunk a long
// audio file before sending it, based on which backend THIS install's
// next transcribe call will actually route to.
//
// The decision is install-specific because the relay's routing
// preference combined with the install's tier + current Gemini cap
// consumption determines the backend per request. In `gemini_first`
// mode, the same operator config will route a fresh install to
// Gemini (chunking required) but route a cap-exhausted install to
// hardware (no chunking needed) — so a global capabilities answer
// would be wrong half the time.
//
// Inputs:
//   X-Recap-Install-Id  (optional but strongly recommended)
//   Authorization       (optional Bearer license — affects tier lookup)
//
// Without an install_id, returns Gemini-safe limits conservatively
// (the chunking path always works; the no-chunking path only works
// when hardware actually serves the call).
//
// Output shape (unchanged from v1 — pure additive on the routing
// logic):
//   {
//     max_audio_mb:           number,
//     max_audio_minutes:      number,
//     preferred_chunk_seconds: number | null,  // null = don't chunk
//     reason:                 string           // human-readable
//   }

import express from "express";
import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { resolveLicense } from "../keysat-client.js";
import { getOrCreateRow, planBackend } from "../credits.js";
import { resolveHardwareConfig } from "../hardware-config.js";

// Gemini File API can handle audio up to ~9.5 hours per generateContent
// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
// shipped originally was sized for free-tier worries that no longer
// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
// relay-URL fast-path for content up to 4 hours instead of falling
// back to client-side chunked uploads (which lose the buyer-bandwidth
// savings and serialize the calls).
const GEMINI_LIMITS = Object.freeze({
  max_audio_mb: 200,
  max_audio_minutes: 240,
  preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
                                 // kicks in for stability on the longest
                                 // files, but only on the actual call;
                                 // doesn't gate client-side chunking.
});

const HARDWARE_LIMITS = Object.freeze({
  // Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
  // audio in one shot. Set high but finite ceilings so a 24-hour file
  // doesn't OOM the operator's GPU box silently.
  max_audio_mb: 500,
  max_audio_minutes: 240,
  preferred_chunk_seconds: null,
});

export function capabilitiesRouter() {
  const router = express.Router();

  router.get("/capabilities", async (req, res) => {
    const cfg = await getConfigSnapshot();
    const txPref =
      cfg.relay_transcribe_backend_preference || "gemini_first";
    const hw = await resolveHardwareConfig(cfg);
    const hasHardware = !!hw.transcribe.url;
    const installId = req.header("X-Recap-Install-Id") || null;
    const auth = req.header("Authorization") || null;

    // ── TTS availability (audio-first "walking mode") ──
    // Operator-wide, not install-specific: whether ANY TTS backend can
    // serve a /relay/tts call given the operator's config. The Recap app
    // uses has_tts to decide whether to show the "Listen" button at all
    // (it additionally gates the feature to Max users on its own side).
    const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
    const kokoroReady = !!hw.tts?.url;
    const elevenConfigured = !!(
      cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
    );
    const ttsBackend =
      ttsPref === "hardware_only"
        ? kokoroReady
          ? "kokoro"
          : null
        : ttsPref === "cloud_only"
        ? elevenConfigured
          ? "elevenlabs"
          : null
        : ttsPref === "cloud_first"
        ? elevenConfigured
          ? "elevenlabs"
          : kokoroReady
          ? "kokoro"
          : null
        : kokoroReady // hardware_first (default)
        ? "kokoro"
        : elevenConfigured
        ? "elevenlabs"
        : null;
    const ttsCaps = {
      has_tts: !!ttsBackend,
      tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
      tts_default_voice: cfg.relay_tts_default_voice || null,
    };

    // If we have an install_id, run the same routing logic the actual
    // transcribe route uses so the chunking decision matches the
    // backend that will actually serve the call.
    if (installId) {
      try {
        const license = await resolveLicense(auth);
        const row = await getOrCreateRow({ installId, license });
        row.tier_snapshot = license.tier;
        const quota = await getTierQuotas();
        const plan = planBackend(row, quota, {
          hasHardware,
          preference: txPref,
        });
        if (plan.allowed && plan.backend === "hardware") {
          return res.json({
            ...HARDWARE_LIMITS,
            ...ttsCaps,
            reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
          });
        }
        if (plan.allowed && plan.backend === "gemini") {
          return res.json({
            ...GEMINI_LIMITS,
            ...ttsCaps,
            reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
          });
        }
        // planBackend refused entirely (out of credits / no backend
        // configured). Return Gemini-safe defaults so the client still
        // chunks defensively and gets a clean 402 from the real
        // transcribe call rather than a confusing transport failure.
        return res.json({
          ...GEMINI_LIMITS,
          ...ttsCaps,
          reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
        });
      } catch (err) {
        // License lookup or row read failed — fall through to the
        // anonymous path so the client at least gets safe defaults.
        console.warn(
          `[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
        );
      }
    }

    // Anonymous (no install_id) or install-aware path failed. Pick
    // capabilities from the operator-wide routing preference alone:
    //   hardware_only / hardware_first → hardware-safe limits (provided
    //                                    hardware is configured)
    //   gemini_only / gemini_first     → Gemini-safe (will always work
    //                                    for the first attempt; in
    //                                    gemini_first the eventual
    //                                    overflow to hardware can
    //                                    handle bigger files too, but
    //                                    chunking still works for both)
    //
    // When `hardware_first` is set but Parakeet isn't actually
    // configured, the relay will fall back to Gemini — so report
    // Gemini-safe limits in that case.
    const hardwareCapable =
      hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
    if (hardwareCapable) {
      return res.json({
        ...HARDWARE_LIMITS,
        ...ttsCaps,
        reason: `hardware-capable backend preference (${txPref})`,
      });
    }
    return res.json({
      ...GEMINI_LIMITS,
      ...ttsCaps,
      reason: `Gemini-backed preference (${txPref})`,
    });
  });

  return router;
}