Add multi-tenant cloud mode: self-serve purchase, credit metering, core-decoupling
Introduces RECAP_MODE=multi alongside single-mode self-host: - Tenant auth + accounts (magic-link via System SMTP), per-tenant credit pool, anonymous trial minting with per-IP/-64 caps - Self-serve Pro/Max purchase: inline Lightning (BTCPay) + card (Zaprite), prepaid 30-day periods, expiry-reminder emails - Core-decoupling: relay owns cloud tier/expiry keyed by Recaps user-id - SQLite (better-sqlite3) schema for multi-mode; filesystem unchanged for single - StartOS actions/versions through 0.2.155
This commit is contained in:
@@ -0,0 +1,258 @@
|
||||
// Audio-first ("walking mode") TTS routes. Turns a saved recap's per-topic
|
||||
// summaries into spoken MP3 clips (via the relay's /relay/tts → Kokoro),
|
||||
// caches them next to the session JSON, and serves them to the player.
|
||||
//
|
||||
// Endpoints (all scope-isolated to the requesting user's library):
|
||||
// GET /api/tts/availability — { has_tts, allowed, default_voice }
|
||||
// POST /api/tts/generate/:id/:index — synthesize + cache ONE topic clip
|
||||
// (idempotent, retried, deduped by job
|
||||
// id). The player calls this on demand
|
||||
// as it reaches each topic + prefetches
|
||||
// the next, so clips are generated when
|
||||
// needed and retried until they succeed
|
||||
// rather than skipped.
|
||||
// GET /api/tts/status/:id — { total, ready:[idx...], done }
|
||||
// GET /api/tts/audio/:id/:index — serve a cached topic clip (mp3)
|
||||
//
|
||||
// Access policy (the "Max gate"):
|
||||
// - single mode: the operator owns the box AND the TTS hardware, so no
|
||||
// tier gate — TTS is available whenever the relay advertises has_tts.
|
||||
// - multi mode admin: the operator; allowed.
|
||||
// - multi-tenant cloud users: any paid subscription (Pro or Max). The
|
||||
// operator can tighten this to Max-only here if shared TTS hardware
|
||||
// throughput becomes a constraint.
|
||||
//
|
||||
// Billing: all of a recap's topics share ONE relay job id (`tts:<id>`), so
|
||||
// the relay charges at most 1 credit to voice an entire recap.
|
||||
|
||||
import fs from "fs/promises";
|
||||
import path from "path";
|
||||
|
||||
import {
|
||||
scopeForRequest,
|
||||
sessionAudioDir,
|
||||
loadSession,
|
||||
patchSession,
|
||||
} from "./history.js";
|
||||
import { getProvider, resolveProviderOpts } from "./providers/index.js";
|
||||
import { getRelayCapabilities } from "./relay-capabilities.js";
|
||||
|
||||
const CLIP_FORMAT = "mp3";
|
||||
const CLIP_EXT = "mp3";
|
||||
|
||||
// Whether THIS request's user may generate TTS. See the policy note above.
|
||||
export function userHasTtsAccess(req) {
|
||||
// Single mode (or no request context): operator owns the hardware.
|
||||
if (!req || req.recapMode !== "multi") return true;
|
||||
// Multi-mode admin = the operator.
|
||||
if (req.user && req.user.is_admin) return true;
|
||||
// Multi-tenant cloud user: Pro or Max. Core-decoupling — the tier is the
|
||||
// relay-owned subscription tier, cached on the Recaps account
|
||||
// (req.user.tier), kept in sync by the operator grant flow.
|
||||
const tier = req.user?.tier;
|
||||
return tier === "pro" || tier === "max";
|
||||
}
|
||||
|
||||
// The text we speak for a topic: its title as a lead-in, then the summary,
|
||||
// so an eyes-free listener hears what the topic is before its recap.
|
||||
export function chunkSpeechText(chunk) {
|
||||
const title = (chunk?.title || "").trim();
|
||||
const summary = (chunk?.summary || "").trim();
|
||||
if (title && summary) return `${title}. ${summary}`;
|
||||
return summary || title || "";
|
||||
}
|
||||
|
||||
function clipFileName(index) {
|
||||
return `topic-${index}.${CLIP_EXT}`;
|
||||
}
|
||||
|
||||
// Server-side retries per clip on a transient (5xx/network) relay failure,
|
||||
// on top of any retry the relay itself does.
|
||||
const GEN_RETRIES = 2;
|
||||
|
||||
// Generate + cache ONE topic clip. Idempotent: returns {cached:true} if the
|
||||
// file already exists. Retries transient failures; a 4xx (e.g. bad voice) or
|
||||
// empty summary is permanent (no retry). Returns
|
||||
// { ok, cached?, empty?, error?, voice? }.
|
||||
async function generateClip({ scope, id, index, chunk, provider, jobId, voice }) {
|
||||
const dir = sessionAudioDir(scope, id);
|
||||
const file = path.join(dir, clipFileName(index));
|
||||
try {
|
||||
await fs.access(file);
|
||||
return { ok: true, cached: true };
|
||||
} catch {}
|
||||
const text = chunkSpeechText(chunk);
|
||||
if (!text) return { ok: false, empty: true, error: "empty_summary" };
|
||||
await fs.mkdir(dir, { recursive: true }).catch(() => {});
|
||||
let lastErr = null;
|
||||
for (let attempt = 1; attempt <= GEN_RETRIES + 1; attempt++) {
|
||||
try {
|
||||
const r = await provider.tts({ text, voice, format: CLIP_FORMAT, jobId });
|
||||
await fs.writeFile(file, r.audio);
|
||||
return { ok: true, voice: r.voice, backend: r.backend };
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
const status = err?.status || 0;
|
||||
console.warn(
|
||||
`[tts] clip ${index} attempt ${attempt}/${GEN_RETRIES + 1} failed (${status || "net"}): ${err?.message || err}`,
|
||||
);
|
||||
if (status >= 400 && status < 500) break; // client error → permanent
|
||||
if (attempt <= GEN_RETRIES) await new Promise((r2) => setTimeout(r2, 600));
|
||||
}
|
||||
}
|
||||
return { ok: false, error: (lastErr?.message || "tts_failed").slice(0, 200) };
|
||||
}
|
||||
|
||||
function resolveScope(req, res) {
|
||||
try {
|
||||
return scopeForRequest(req);
|
||||
} catch {
|
||||
res.status(401).json({ error: "no_scope" });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function setupTtsRoutes(app) {
|
||||
// Lightweight probe for the frontend: should it show the "Listen"
|
||||
// affordance, and what's the default voice?
|
||||
app.get("/api/tts/availability", (req, res) => {
|
||||
const caps = getRelayCapabilities();
|
||||
res.json({
|
||||
has_tts: !!caps.has_tts,
|
||||
tts_backend: caps.tts_backend || null,
|
||||
default_voice: caps.tts_default_voice || null,
|
||||
allowed: userHasTtsAccess(req) && !!caps.has_tts,
|
||||
});
|
||||
});
|
||||
|
||||
// Generate (or return cached) the audio for ONE topic. The player calls
|
||||
// this on demand as it reaches each topic — and prefetches the next — so a
|
||||
// clip is generated when needed and RETRIED until it succeeds, rather than
|
||||
// skipped. Idempotent + deduped by the shared job id (≤1 credit/recap).
|
||||
//
|
||||
// Responses:
|
||||
// 200 { ok:true, index, cached } — clip is ready to play
|
||||
// 200 { ok:false, empty:true } — topic has no summary text (permanent;
|
||||
// client should not retry)
|
||||
// 502 { ok:false, error } — transient failure; client retries
|
||||
app.post("/api/tts/generate/:id/:index", async (req, res) => {
|
||||
const scope = resolveScope(req, res);
|
||||
if (!scope) return;
|
||||
if (!userHasTtsAccess(req)) {
|
||||
return res.status(403).json({
|
||||
error: "tts_requires_subscription",
|
||||
message: "Audio recaps are available to Pro and Max subscribers.",
|
||||
});
|
||||
}
|
||||
const caps = getRelayCapabilities();
|
||||
if (!caps.has_tts) {
|
||||
return res.status(503).json({
|
||||
error: "tts_unavailable",
|
||||
message: "Text-to-speech isn't available on this relay right now.",
|
||||
});
|
||||
}
|
||||
const id = req.params.id;
|
||||
const index = parseInt(req.params.index, 10);
|
||||
const session = await loadSession(scope, id);
|
||||
if (!session) return res.status(404).json({ error: "session_not_found" });
|
||||
const chunks = Array.isArray(session.chunks) ? session.chunks : [];
|
||||
if (!Number.isInteger(index) || index < 0 || index >= chunks.length) {
|
||||
return res.status(400).json({ error: "bad_index" });
|
||||
}
|
||||
let provider;
|
||||
try {
|
||||
provider = getProvider("relay", resolveProviderOpts("relay", { req }));
|
||||
} catch (err) {
|
||||
return res.status(503).json({
|
||||
error: "relay_unavailable",
|
||||
message: err?.message || "Relay is not configured.",
|
||||
});
|
||||
}
|
||||
const voice =
|
||||
typeof req.query.voice === "string" && req.query.voice.trim()
|
||||
? req.query.voice.trim()
|
||||
: undefined;
|
||||
const result = await generateClip({
|
||||
scope,
|
||||
id,
|
||||
index,
|
||||
chunk: chunks[index],
|
||||
provider,
|
||||
jobId: `tts:${id}`, // one credit for the whole recap
|
||||
voice,
|
||||
});
|
||||
if (result.ok) {
|
||||
patchSession(scope, id, {
|
||||
summaryAudio: {
|
||||
ready: true,
|
||||
total: chunks.length,
|
||||
voice: result.voice || caps.tts_default_voice || null,
|
||||
format: CLIP_FORMAT,
|
||||
updatedAt: new Date().toISOString(),
|
||||
},
|
||||
}).catch(() => {});
|
||||
return res.json({ ok: true, index, cached: !!result.cached, voice: result.voice || null });
|
||||
}
|
||||
if (result.empty) {
|
||||
return res.json({ ok: false, index, empty: true, error: "empty_summary" });
|
||||
}
|
||||
return res.status(502).json({ ok: false, index, error: result.error || "tts_failed" });
|
||||
});
|
||||
|
||||
// Which topics are already synthesized for a recap.
|
||||
app.get("/api/tts/status/:id", async (req, res) => {
|
||||
const scope = resolveScope(req, res);
|
||||
if (!scope) return;
|
||||
const session = await loadSession(scope, req.params.id);
|
||||
if (!session) return res.status(404).json({ error: "session_not_found" });
|
||||
const total = Array.isArray(session.chunks) ? session.chunks.length : 0;
|
||||
const dir = sessionAudioDir(scope, req.params.id);
|
||||
let files = [];
|
||||
try {
|
||||
files = await fs.readdir(dir);
|
||||
} catch {}
|
||||
const ready = files
|
||||
.map((f) => {
|
||||
const m = new RegExp(`^topic-(\\d+)\\.${CLIP_EXT}$`).exec(f);
|
||||
return m ? Number(m[1]) : null;
|
||||
})
|
||||
.filter((n) => n !== null)
|
||||
.sort((a, b) => a - b);
|
||||
const caps = getRelayCapabilities();
|
||||
res.json({
|
||||
total,
|
||||
ready,
|
||||
done: total > 0 && ready.length >= total,
|
||||
allowed: userHasTtsAccess(req) && !!caps.has_tts,
|
||||
voice: session.summaryAudio?.voice || caps.tts_default_voice || null,
|
||||
});
|
||||
});
|
||||
|
||||
// Serve one cached topic clip. sendFile handles Range requests (so the
|
||||
// <audio> element can seek) and 404s cleanly when the clip isn't ready.
|
||||
app.get("/api/tts/audio/:id/:index", async (req, res) => {
|
||||
const scope = resolveScope(req, res);
|
||||
if (!scope) return;
|
||||
const idx = parseInt(req.params.index, 10);
|
||||
if (!Number.isInteger(idx) || idx < 0) {
|
||||
return res.status(400).json({ error: "bad_index" });
|
||||
}
|
||||
const file = path.join(sessionAudioDir(scope, req.params.id), clipFileName(idx));
|
||||
res.sendFile(
|
||||
file,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "audio/mpeg",
|
||||
// Content is immutable for a given (session, topic) — safe to
|
||||
// cache hard, which also primes the Phase 4 offline service worker.
|
||||
"Cache-Control": "private, max-age=31536000, immutable",
|
||||
},
|
||||
},
|
||||
(err) => {
|
||||
if (err && !res.headersSent) {
|
||||
res.status(404).json({ error: "clip_not_ready" });
|
||||
}
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user