Add multi-tenant cloud mode: self-serve purchase, credit metering, core-decoupling

Introduces RECAP_MODE=multi alongside single-mode self-host: - Tenant auth + accounts (magic-link via System SMTP), per-tenant credit pool, anonymous trial minting with per-IP/-64 caps - Self-serve Pro/Max purchase: inline Lightning (BTCPay) + card (Zaprite), prepaid 30-day periods, expiry-reminder emails - Core-decoupling: relay owns cloud tier/expiry keyed by Recaps user-id - SQLite (better-sqlite3) schema for multi-mode; filesystem unchanged for single - StartOS actions/versions through 0.2.155
2026-06-13 14:25:05 -05:00
parent db580abad7
commit 0ae59f3550
176 changed files with 23823 additions and 803 deletions
@@ -0,0 +1,258 @@
+// Audio-first ("walking mode") TTS routes. Turns a saved recap's per-topic
+// summaries into spoken MP3 clips (via the relay's /relay/tts → Kokoro),
+// caches them next to the session JSON, and serves them to the player.
+//
+// Endpoints (all scope-isolated to the requesting user's library):
+//   GET  /api/tts/availability         — { has_tts, allowed, default_voice }
+//   POST /api/tts/generate/:id/:index  — synthesize + cache ONE topic clip
+//                                        (idempotent, retried, deduped by job
+//                                        id). The player calls this on demand
+//                                        as it reaches each topic + prefetches
+//                                        the next, so clips are generated when
+//                                        needed and retried until they succeed
+//                                        rather than skipped.
+//   GET  /api/tts/status/:id           — { total, ready:[idx...], done }
+//   GET  /api/tts/audio/:id/:index     — serve a cached topic clip (mp3)
+//
+// Access policy (the "Max gate"):
+//   - single mode: the operator owns the box AND the TTS hardware, so no
+//     tier gate — TTS is available whenever the relay advertises has_tts.
+//   - multi mode admin: the operator; allowed.
+//   - multi-tenant cloud users: any paid subscription (Pro or Max). The
+//     operator can tighten this to Max-only here if shared TTS hardware
+//     throughput becomes a constraint.
+//
+// Billing: all of a recap's topics share ONE relay job id (`tts:<id>`), so
+// the relay charges at most 1 credit to voice an entire recap.
+
+import fs from "fs/promises";
+import path from "path";
+
+import {
+  scopeForRequest,
+  sessionAudioDir,
+  loadSession,
+  patchSession,
+} from "./history.js";
+import { getProvider, resolveProviderOpts } from "./providers/index.js";
+import { getRelayCapabilities } from "./relay-capabilities.js";
+
+const CLIP_FORMAT = "mp3";
+const CLIP_EXT = "mp3";
+
+// Whether THIS request's user may generate TTS. See the policy note above.
+export function userHasTtsAccess(req) {
+  // Single mode (or no request context): operator owns the hardware.
+  if (!req || req.recapMode !== "multi") return true;
+  // Multi-mode admin = the operator.
+  if (req.user && req.user.is_admin) return true;
+  // Multi-tenant cloud user: Pro or Max. Core-decoupling — the tier is the
+  // relay-owned subscription tier, cached on the Recaps account
+  // (req.user.tier), kept in sync by the operator grant flow.
+  const tier = req.user?.tier;
+  return tier === "pro" || tier === "max";
+}
+
+// The text we speak for a topic: its title as a lead-in, then the summary,
+// so an eyes-free listener hears what the topic is before its recap.
+export function chunkSpeechText(chunk) {
+  const title = (chunk?.title || "").trim();
+  const summary = (chunk?.summary || "").trim();
+  if (title && summary) return `${title}. ${summary}`;
+  return summary || title || "";
+}
+
+function clipFileName(index) {
+  return `topic-${index}.${CLIP_EXT}`;
+}
+
+// Server-side retries per clip on a transient (5xx/network) relay failure,
+// on top of any retry the relay itself does.
+const GEN_RETRIES = 2;
+
+// Generate + cache ONE topic clip. Idempotent: returns {cached:true} if the
+// file already exists. Retries transient failures; a 4xx (e.g. bad voice) or
+// empty summary is permanent (no retry). Returns
+// { ok, cached?, empty?, error?, voice? }.
+async function generateClip({ scope, id, index, chunk, provider, jobId, voice }) {
+  const dir = sessionAudioDir(scope, id);
+  const file = path.join(dir, clipFileName(index));
+  try {
+    await fs.access(file);
+    return { ok: true, cached: true };
+  } catch {}
+  const text = chunkSpeechText(chunk);
+  if (!text) return { ok: false, empty: true, error: "empty_summary" };
+  await fs.mkdir(dir, { recursive: true }).catch(() => {});
+  let lastErr = null;
+  for (let attempt = 1; attempt <= GEN_RETRIES + 1; attempt++) {
+    try {
+      const r = await provider.tts({ text, voice, format: CLIP_FORMAT, jobId });
+      await fs.writeFile(file, r.audio);
+      return { ok: true, voice: r.voice, backend: r.backend };
+    } catch (err) {
+      lastErr = err;
+      const status = err?.status || 0;
+      console.warn(
+        `[tts] clip ${index} attempt ${attempt}/${GEN_RETRIES + 1} failed (${status || "net"}): ${err?.message || err}`,
+      );
+      if (status >= 400 && status < 500) break; // client error → permanent
+      if (attempt <= GEN_RETRIES) await new Promise((r2) => setTimeout(r2, 600));
+    }
+  }
+  return { ok: false, error: (lastErr?.message || "tts_failed").slice(0, 200) };
+}
+
+function resolveScope(req, res) {
+  try {
+    return scopeForRequest(req);
+  } catch {
+    res.status(401).json({ error: "no_scope" });
+    return null;
+  }
+}
+
+export function setupTtsRoutes(app) {
+  // Lightweight probe for the frontend: should it show the "Listen"
+  // affordance, and what's the default voice?
+  app.get("/api/tts/availability", (req, res) => {
+    const caps = getRelayCapabilities();
+    res.json({
+      has_tts: !!caps.has_tts,
+      tts_backend: caps.tts_backend || null,
+      default_voice: caps.tts_default_voice || null,
+      allowed: userHasTtsAccess(req) && !!caps.has_tts,
+    });
+  });
+
+  // Generate (or return cached) the audio for ONE topic. The player calls
+  // this on demand as it reaches each topic — and prefetches the next — so a
+  // clip is generated when needed and RETRIED until it succeeds, rather than
+  // skipped. Idempotent + deduped by the shared job id (≤1 credit/recap).
+  //
+  // Responses:
+  //   200 { ok:true, index, cached }   — clip is ready to play
+  //   200 { ok:false, empty:true }     — topic has no summary text (permanent;
+  //                                      client should not retry)
+  //   502 { ok:false, error }          — transient failure; client retries
+  app.post("/api/tts/generate/:id/:index", async (req, res) => {
+    const scope = resolveScope(req, res);
+    if (!scope) return;
+    if (!userHasTtsAccess(req)) {
+      return res.status(403).json({
+        error: "tts_requires_subscription",
+        message: "Audio recaps are available to Pro and Max subscribers.",
+      });
+    }
+    const caps = getRelayCapabilities();
+    if (!caps.has_tts) {
+      return res.status(503).json({
+        error: "tts_unavailable",
+        message: "Text-to-speech isn't available on this relay right now.",
+      });
+    }
+    const id = req.params.id;
+    const index = parseInt(req.params.index, 10);
+    const session = await loadSession(scope, id);
+    if (!session) return res.status(404).json({ error: "session_not_found" });
+    const chunks = Array.isArray(session.chunks) ? session.chunks : [];
+    if (!Number.isInteger(index) || index < 0 || index >= chunks.length) {
+      return res.status(400).json({ error: "bad_index" });
+    }
+    let provider;
+    try {
+      provider = getProvider("relay", resolveProviderOpts("relay", { req }));
+    } catch (err) {
+      return res.status(503).json({
+        error: "relay_unavailable",
+        message: err?.message || "Relay is not configured.",
+      });
+    }
+    const voice =
+      typeof req.query.voice === "string" && req.query.voice.trim()
+        ? req.query.voice.trim()
+        : undefined;
+    const result = await generateClip({
+      scope,
+      id,
+      index,
+      chunk: chunks[index],
+      provider,
+      jobId: `tts:${id}`, // one credit for the whole recap
+      voice,
+    });
+    if (result.ok) {
+      patchSession(scope, id, {
+        summaryAudio: {
+          ready: true,
+          total: chunks.length,
+          voice: result.voice || caps.tts_default_voice || null,
+          format: CLIP_FORMAT,
+          updatedAt: new Date().toISOString(),
+        },
+      }).catch(() => {});
+      return res.json({ ok: true, index, cached: !!result.cached, voice: result.voice || null });
+    }
+    if (result.empty) {
+      return res.json({ ok: false, index, empty: true, error: "empty_summary" });
+    }
+    return res.status(502).json({ ok: false, index, error: result.error || "tts_failed" });
+  });
+
+  // Which topics are already synthesized for a recap.
+  app.get("/api/tts/status/:id", async (req, res) => {
+    const scope = resolveScope(req, res);
+    if (!scope) return;
+    const session = await loadSession(scope, req.params.id);
+    if (!session) return res.status(404).json({ error: "session_not_found" });
+    const total = Array.isArray(session.chunks) ? session.chunks.length : 0;
+    const dir = sessionAudioDir(scope, req.params.id);
+    let files = [];
+    try {
+      files = await fs.readdir(dir);
+    } catch {}
+    const ready = files
+      .map((f) => {
+        const m = new RegExp(`^topic-(\\d+)\\.${CLIP_EXT}$`).exec(f);
+        return m ? Number(m[1]) : null;
+      })
+      .filter((n) => n !== null)
+      .sort((a, b) => a - b);
+    const caps = getRelayCapabilities();
+    res.json({
+      total,
+      ready,
+      done: total > 0 && ready.length >= total,
+      allowed: userHasTtsAccess(req) && !!caps.has_tts,
+      voice: session.summaryAudio?.voice || caps.tts_default_voice || null,
+    });
+  });
+
+  // Serve one cached topic clip. sendFile handles Range requests (so the
+  // <audio> element can seek) and 404s cleanly when the clip isn't ready.
+  app.get("/api/tts/audio/:id/:index", async (req, res) => {
+    const scope = resolveScope(req, res);
+    if (!scope) return;
+    const idx = parseInt(req.params.index, 10);
+    if (!Number.isInteger(idx) || idx < 0) {
+      return res.status(400).json({ error: "bad_index" });
+    }
+    const file = path.join(sessionAudioDir(scope, req.params.id), clipFileName(idx));
+    res.sendFile(
+      file,
+      {
+        headers: {
+          "Content-Type": "audio/mpeg",
+          // Content is immutable for a given (session, topic) — safe to
+          // cache hard, which also primes the Phase 4 offline service worker.
+          "Cache-Control": "private, max-age=31536000, immutable",
+        },
+      },
+      (err) => {
+        if (err && !res.headersSent) {
+          res.status(404).json({ error: "clip_not_ready" });
+        }
+      }
+    );
+  });
+}