Wire new routes; identity, summarize-url, dashboard, admin

This commit is contained in:
Keysat
2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
+6257 -14
View File
File diff suppressed because it is too large Load Diff
+9 -1
View File
@@ -6,6 +6,7 @@
// access to the relay's /admin endpoints. // access to the relay's /admin endpoints.
import { scryptSync, timingSafeEqual, createHmac } from "crypto"; import { scryptSync, timingSafeEqual, createHmac } from "crypto";
import express from "express";
import { getConfigSnapshot } from "./config.js"; import { getConfigSnapshot } from "./config.js";
const SCRYPT_KEYLEN = 64; const SCRYPT_KEYLEN = 64;
@@ -60,6 +61,13 @@ export function setupAdminAuthMiddleware(app) {
if (!req.path.startsWith(ADMIN_PREFIX)) return next(); if (!req.path.startsWith(ADMIN_PREFIX)) return next();
// /admin/login is reachable without auth. // /admin/login is reachable without auth.
if (req.path === "/admin/login" || req.path === "/admin/status") return next(); if (req.path === "/admin/login" || req.path === "/admin/status") return next();
// /admin/btcpay/callback is hit via a POST-redirect from BTCPay
// after the operator clicks "Approve" in their authorize page.
// The cookie may not flow on cross-site POST (SameSite=Lax), so
// we exempt this path and validate via a state token instead —
// /admin/btcpay/start stashes a random token in setup-context,
// and the callback rejects requests without a matching one.
if (req.path === "/admin/btcpay/callback") return next();
const cfg = await getConfigSnapshot(); const cfg = await getConfigSnapshot();
if (!cfg.relay_admin_password_hash) { if (!cfg.relay_admin_password_hash) {
// No password set — admin endpoints are disabled entirely. // No password set — admin endpoints are disabled entirely.
@@ -82,7 +90,7 @@ export function setupAdminAuthRoutes(app) {
}); });
}); });
app.post("/admin/login", async (req, res) => { app.post("/admin/login", express.json(), async (req, res) => {
const cfg = await getConfigSnapshot(); const cfg = await getConfigSnapshot();
if (!cfg.relay_admin_password_hash) { if (!cfg.relay_admin_password_hash) {
return res.status(400).json({ error: "admin_disabled" }); return res.status(400).json({ error: "admin_disabled" });
+403 -1
View File
@@ -10,6 +10,13 @@
// { // {
// ts: ms-epoch when the request landed // ts: ms-epoch when the request landed
// install_id: X-Recap-Install-Id (truncated for log readability) // install_id: X-Recap-Install-Id (truncated for log readability)
// license_fingerprint: stable 16-hex hash of the licenseUuid for
// paid-tier calls; null for anonymous/Core. Added
// in the license-keyed-credits refactor so spend can
// be aggregated by license-pool (since one license
// may span multiple installs). install_id is STILL
// logged on every entry — license_fingerprint is
// additive forensic visibility, not a replacement.
// tier: "core" | "pro" | "max" // tier: "core" | "pro" | "max"
// pipeline: "transcribe" | "analyze" // pipeline: "transcribe" | "analyze"
// backend: "gemini" | "hardware" // backend: "gemini" | "hardware"
@@ -36,9 +43,16 @@ import path from "path";
let dataDir = "/data"; let dataDir = "/data";
let logPath = "/data/relay-calls.ndjson"; let logPath = "/data/relay-calls.ndjson";
// Size at which we rotate the live ndjson to a dated archive. Picked
// to roughly match a year of high-volume relay traffic — a typical
// entry is ~400 bytes, so 50MB ≈ 130k entries. Rotation runs once at
// boot; the operator can also rotate manually any time.
const ROTATION_THRESHOLD_BYTES = 50 * 1024 * 1024;
export async function initAuditLog({ dataDir: dd }) { export async function initAuditLog({ dataDir: dd }) {
if (dd) dataDir = dd; if (dd) dataDir = dd;
logPath = path.join(dataDir, "relay-calls.ndjson"); logPath = path.join(dataDir, "relay-calls.ndjson");
await maybeRotateLog();
// Ensure the file exists so the streaming read path doesn't trip. // Ensure the file exists so the streaming read path doesn't trip.
try { try {
await fs.access(logPath); await fs.access(logPath);
@@ -48,6 +62,46 @@ export async function initAuditLog({ dataDir: dd }) {
console.log(`[audit-log] writing to ${logPath}`); console.log(`[audit-log] writing to ${logPath}`);
} }
// Rotate the live ndjson to a dated archive when it grows past the
// threshold. The dashboard's `readEntries` always reads the live file
// only — archived entries fall out of the rolling 30-day window
// naturally and are kept around as raw files for ad-hoc analysis or
// long-term storage / CSV export. If a same-day archive already exists
// (e.g. operator restarts the relay mid-rotation), append a counter.
async function maybeRotateLog() {
let stat;
try {
stat = await fs.stat(logPath);
} catch {
return; // No file yet — nothing to rotate.
}
if (stat.size < ROTATION_THRESHOLD_BYTES) return;
const ymd = new Date().toISOString().slice(0, 10);
let archive = path.join(dataDir, `relay-calls-${ymd}.ndjson`);
let counter = 1;
while (true) {
try {
await fs.access(archive);
// Exists; pick a new name with a counter suffix.
archive = path.join(dataDir, `relay-calls-${ymd}.${counter}.ndjson`);
counter += 1;
if (counter > 99) return; // pathological — give up rotating
} catch {
break; // Free name found.
}
}
try {
await fs.rename(logPath, archive);
await fs.writeFile(logPath, "", { mode: 0o600 });
console.log(
`[audit-log] rotated ${(stat.size / 1024 / 1024).toFixed(1)}MB → ${archive}`
);
} catch (err) {
console.warn(`[audit-log] rotation failed: ${err?.message || err}`);
}
}
// Best-effort append. Errors are logged but never rethrown — losing // Best-effort append. Errors are logged but never rethrown — losing
// an audit line shouldn't fail the relay call that caused it. // an audit line shouldn't fail the relay call that caused it.
export async function recordCall(entry) { export async function recordCall(entry) {
@@ -59,6 +113,55 @@ export async function recordCall(entry) {
} }
} }
// Truncate the entire audit log. Used by the dashboard's "Delete all"
// button for cleanup before going-live or after a string of bad-data
// test runs (relay re-installed mid-run, config tweaks producing
// inconsistent measurements, etc.). Destructive — no undo.
export async function clearAllAuditEntries() {
try {
await fs.writeFile(logPath, "", { mode: 0o600 });
return { ok: true };
} catch (err) {
return { ok: false, error: err?.message || String(err) };
}
}
// Delete audit rows matching specific job_ids. Reads the whole log,
// filters out lines belonging to the target jobs, writes the remainder
// back. O(N) on the file size; fine for any plausible audit log (we
// rotate at 64MB anyway). Returns the count of rows removed.
export async function deleteAuditRowsByJobIds(jobIds) {
if (!Array.isArray(jobIds) || jobIds.length === 0) return { deleted: 0 };
const idSet = new Set(jobIds);
const lines = [];
let deleted = 0;
try {
const stream = createReadStream(logPath, { encoding: "utf8" });
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
for await (const line of rl) {
if (!line.trim()) continue;
try {
const r = JSON.parse(line);
if (r.job_id && idSet.has(r.job_id)) {
deleted += 1;
continue;
}
lines.push(line);
} catch {
// Bad line — preserve it rather than dropping; matches the
// skip-and-continue behavior of readEntries.
lines.push(line);
}
}
} catch (err) {
if (err.code !== "ENOENT") throw err;
}
await fs.writeFile(logPath, lines.join("\n") + (lines.length ? "\n" : ""), {
mode: 0o600,
});
return { deleted };
}
// Read all entries since `sinceMs` (default: 30 days). Streamed // Read all entries since `sinceMs` (default: 30 days). Streamed
// line-by-line so the whole file doesn't sit in memory at once. // line-by-line so the whole file doesn't sit in memory at once.
// Returned array is newest-first. // Returned array is newest-first.
@@ -114,13 +217,21 @@ export function aggregate(entries) {
} }
// ── By tier ── // ── By tier ──
// unique_users is install-count for Core (no license to dedup against)
// and distinct-license-count for paid tiers (so a Pro license active
// on two installs counts ONCE here, matching the post-refactor credit
// model where they share one monthly pool). Falls back to install_id
// for paid entries that predate the license_fingerprint field.
const byTier = groupBy(entries, (e) => e.tier || "unknown"); const byTier = groupBy(entries, (e) => e.tier || "unknown");
const tierRows = Object.entries(byTier).map(([tier, list]) => ({ const tierRows = Object.entries(byTier).map(([tier, list]) => ({
tier, tier,
calls: list.length, calls: list.length,
cost_usd: sumBy(list, "cost_usd"), cost_usd: sumBy(list, "cost_usd"),
avg_duration_ms: avgBy(list, "duration_ms"), avg_duration_ms: avgBy(list, "duration_ms"),
unique_installs: new Set(list.map((e) => e.install_id)).size, unique_installs:
tier === "core" || tier === "unknown"
? new Set(list.map((e) => e.install_id)).size
: new Set(list.map((e) => e.license_fingerprint || e.install_id)).size,
})); }));
// ── By model ── // ── By model ──
@@ -170,6 +281,30 @@ export function aggregate(entries) {
.sort((a, b) => b.cost_usd - a.cost_usd) .sort((a, b) => b.cost_usd - a.cost_usd)
.slice(0, 20); .slice(0, 20);
// ── By license fingerprint (top 20 by spend, paid tiers only) ──
// One license may span multiple installs (cloud account + self-host),
// and the post-refactor credit ledger aggregates their spend onto a
// single pool. This view mirrors that — operators get a "by paid
// user" rollup that doesn't double-count multi-install Pros, plus an
// install-count column to see distribution per license.
const byLicense = groupBy(
entries.filter((e) => e.license_fingerprint),
(e) => e.license_fingerprint
);
const licenseRows = Object.entries(byLicense)
.map(([fp, list]) => ({
license_fingerprint: fp,
tier_snapshot: list[0]?.tier || "core",
calls: list.length,
cost_usd: sumBy(list, "cost_usd"),
summaries: new Set(list.map((e) => e.job_id).filter(Boolean)).size,
unique_installs: new Set(list.map((e) => e.install_id).filter(Boolean)).size,
avg_duration_ms: avgBy(list, "duration_ms"),
last_active_at: Math.max(...list.map((e) => e.ts || 0)),
}))
.sort((a, b) => b.cost_usd - a.cost_usd)
.slice(0, 20);
// ── By hour-of-day (for traffic-pattern view) ── // ── By hour-of-day (for traffic-pattern view) ──
const byHour = groupBy(entries, (e) => new Date(e.ts).getUTCHours()); const byHour = groupBy(entries, (e) => new Date(e.ts).getUTCHours());
const hourRows = Array.from({ length: 24 }, (_, h) => { const hourRows = Array.from({ length: 24 }, (_, h) => {
@@ -193,6 +328,198 @@ export function aggregate(entries) {
})) }))
.sort((a, b) => a.avg_duration_ms - b.avg_duration_ms); .sort((a, b) => a.avg_duration_ms - b.avg_duration_ms);
// ── Per-summary rollup (collapse transcribe + analyze pairs) ──
// Every "summarize" produces 2 audit entries — one transcribe, one
// analyze — sharing a job_id. The dashboard's call-level views show
// them separately, which is useful for backend-vs-pipeline tuning but
// confusing as "how many summaries did I serve". Group by job_id so
// operators see one row per summary with combined cost/duration.
// Entries without a job_id (older relay versions, or balance pings)
// are bucketed into their own "no-jobid" row at the bottom.
const byJob = groupBy(entries, (e) => e.job_id || "__no_jobid__");
const summaryRows = Object.entries(byJob)
.filter(([k]) => k !== "__no_jobid__")
.map(([jobId, list]) => {
const transcribe = list.find((e) => e.pipeline === "transcribe");
const analyze = list.find((e) => e.pipeline === "analyze");
return {
job_id: jobId,
install_id: list[0]?.install_id || null,
tier: list[0]?.tier || null,
started_at: Math.min(...list.map((e) => e.ts || Infinity)),
completed_at: Math.max(...list.map((e) => e.ts || 0)),
transcribe_backend: transcribe?.backend || null,
transcribe_model: transcribe?.model || null,
analyze_backend: analyze?.backend || null,
analyze_model: analyze?.model || null,
total_cost_usd: sumBy(list, "cost_usd"),
total_duration_ms: sumBy(list, "duration_ms"),
status:
list.every((e) => e.status === "success")
? "success"
: list.some((e) => e.status === "error")
? "error"
: "partial",
had_transcribe: !!transcribe,
had_analyze: !!analyze,
};
})
.sort((a, b) => b.completed_at - a.completed_at);
// ── Recent errors (newest 50) ──
// Quick triage view — when something is failing, the operator needs
// to see the offending error strings without scrolling the full
// call log.
// Surface any audit row carrying an error message — that catches
// status="error" (true backend failures) AND status="partial"
// (e.g. transcribe-with-truncated-chunks, which records the
// missing-speech message in the error field). Operators rely on
// this view to triage all degraded behavior, not just outright
// 5xx-class failures, so the broader filter is the right default.
const errorRows = entries
.filter((e) => e.error)
.slice(0, 50)
.map((e) => ({
ts: e.ts,
install_id: e.install_id || null,
tier: e.tier || null,
pipeline: e.pipeline || null,
backend: e.backend || null,
model: e.model || null,
duration_ms: e.duration_ms || 0,
error: (e.error || "").slice(0, 280),
attempts: Array.isArray(e.attempts) ? e.attempts : null,
}));
// ── Per-(pipeline, model) performance + failure tables ──
// Normalizes raw duration_ms by audio_seconds so different models
// can be compared on a backend-agnostic benchmark: how many ms of
// wall-clock time does this model take per minute of audio? Analyze
// calls don't have audio (they consume the transcript text), so we
// report ms-per-1k-input-tokens for those instead.
//
// Failure rate is computed against `attempted` (success + error)
// and excludes `refused` calls — refused requests never reached the
// backend, so they shouldn't count against the model's reliability.
const byPipelineModel = {};
for (const e of entries) {
const pipeline = e.pipeline || "unknown";
const model = e.model || "unknown";
if (model === "unknown" && e.status === "refused") continue; // refused entries often have no model
const key = `${pipeline}::${model}`;
if (!byPipelineModel[key]) {
byPipelineModel[key] = {
pipeline,
model,
calls: 0,
success: 0,
errors: 0,
refused: 0,
partials: 0,
sum_duration_ms: 0,
sum_audio_seconds: 0,
sum_input_tokens: 0,
sum_output_tokens: 0,
error_counts: {}, // { error_signature: count }
};
}
const row = byPipelineModel[key];
row.calls += 1;
if (e.status === "success") row.success += 1;
if (e.status === "error") row.errors += 1;
if (e.status === "refused") row.refused += 1;
if (e.status === "partial") row.partials += 1;
row.sum_duration_ms += e.duration_ms || 0;
if (typeof e.audio_seconds === "number" && e.audio_seconds > 0) {
row.sum_audio_seconds += e.audio_seconds;
}
row.sum_input_tokens += e.input_tokens || 0;
row.sum_output_tokens += e.output_tokens || 0;
// Aggregate the top-error counts off ANY row that has a populated
// error message — not just status="error" rows. Partial (truncated
// transcribe) and refused (out-of-credits, capacity-gated) rows
// also carry useful error strings the operator wants to see in
// the "Top failure modes" table. The old gate `status === "error"`
// hid all truncations because they're recorded as status="partial".
if (e.error) {
const sig = errorSignature(e.error);
row.error_counts[sig] = (row.error_counts[sig] || 0) + 1;
}
}
const perfByModel = Object.values(byPipelineModel).map((r) => {
const attempted = r.success + r.errors;
const successRate = attempted > 0 ? r.success / attempted : null;
const audioMin = r.sum_audio_seconds / 60;
const msPerAudioMin = audioMin > 0 ? r.sum_duration_ms / audioMin : null;
const msPer1kInputTokens =
r.sum_input_tokens > 0
? r.sum_duration_ms / (r.sum_input_tokens / 1000)
: null;
// Top 3 error signatures by frequency for this model.
const topErrors = Object.entries(r.error_counts)
.map(([signature, count]) => ({ signature, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 3);
return {
pipeline: r.pipeline,
model: r.model,
calls: r.calls,
success: r.success,
errors: r.errors,
refused: r.refused,
partials: r.partials,
// "failures" = total signal worth surfacing in failure tables.
// Includes partials so a TX that lost minutes of speech via a
// truncated chunk is counted as a failure mode, not silently
// tucked away under a "success" pipe. The errors-by-model
// dashboard table reads this; the per-call "errors" field stays
// available for stricter computations.
failures: r.errors + r.partials,
success_rate: successRate,
// Speed benchmark fields. Either or both may be null when there
// wasn't enough successful-with-metadata data to compute them.
ms_per_audio_minute: msPerAudioMin,
ms_per_1k_input_tokens: msPer1kInputTokens,
total_audio_minutes: audioMin > 0 ? audioMin : null,
top_errors: topErrors,
};
});
// ── Revenue / margin (requires tier prices supplied by caller) ──
// Distinct paying USERS in the window × the operator's per-tier
// monthly price. For Core (free) we count distinct installs — that's
// still the right grain for free-tier "active users", since Core has
// no license to dedup against. For Pro/Max we count distinct license
// fingerprints so a single Pro license activated on two installs
// (cloud + self-host) counts ONCE toward monthly revenue, matching
// the post-refactor credit model where they share one monthly pool.
// Falls back to install_id for paid entries missing a fingerprint
// (legacy pre-refactor audit rows) so historical ranges stay
// approximately correct rather than dropping to zero.
//
// Strictly an *estimate* — the relay doesn't know if a Pro user
// actually paid this month, just that they touched a request.
// Underestimates churned customers (who paid but didn't call) and
// overestimates trial users (who haven't paid yet). Hooked in by
// the dashboard route, not here, so tests can pass an empty prices
// map and get zero.
const tierActiveInstalls = {
core: new Set(),
pro: new Set(),
max: new Set(),
};
for (const e of entries) {
const t = e.tier || "core";
if (!tierActiveInstalls[t]) continue;
if (t === "core") {
if (e.install_id) tierActiveInstalls.core.add(e.install_id);
} else {
// Paid: prefer fingerprint, fall back to install_id for legacy rows.
const id = e.license_fingerprint || e.install_id;
if (id) tierActiveInstalls[t].add(id);
}
}
return { return {
summary: { summary: {
calls, calls,
@@ -206,14 +533,89 @@ export function aggregate(entries) {
total_input_tokens: totalInputTokens, total_input_tokens: totalInputTokens,
total_output_tokens: totalOutputTokens, total_output_tokens: totalOutputTokens,
total_thinking_tokens: totalThinkingTokens, total_thinking_tokens: totalThinkingTokens,
total_summaries: summaryRows.length,
// active_installs_by_tier name retained for dashboard compatibility,
// but the paid-tier counts here are actually DISTINCT LICENSES,
// not distinct installs (see the comment on tierActiveInstalls
// above). Core remains install-based. The dashboard label is
// "Active users by tier" which fits either grain.
active_installs_by_tier: {
core: tierActiveInstalls.core.size,
pro: tierActiveInstalls.pro.size,
max: tierActiveInstalls.max.size,
},
}, },
by_tier: tierRows, by_tier: tierRows,
by_model: modelRows, by_model: modelRows,
by_pipeline: pipelineRows, by_pipeline: pipelineRows,
by_backend: backendRows, by_backend: backendRows,
by_install: installRows, by_install: installRows,
by_license: licenseRows,
by_hour_utc: hourRows, by_hour_utc: hourRows,
cost_vs_speed: costSpeedRows, cost_vs_speed: costSpeedRows,
by_summary: summaryRows,
errors: errorRows,
perf_by_model: perfByModel,
};
}
// Normalize a raw error string into a stable signature so two
// near-identical messages bucket together. The audit log stores
// truncated raw messages — we want the bucket key to be coarse enough
// that small variations (a different request-id, file name, port
// number, etc.) collapse into a single error class.
//
// Heuristics:
// - Strip ISO timestamps and timestamps with offsets
// - Strip UUIDs / hex blob hashes / long alphanumeric IDs
// - Strip numeric file sizes and ports
// - Strip URLs to their host + path-pattern
// - Trim to first 120 chars after normalization
function errorSignature(raw) {
if (!raw) return "(unknown)";
let s = String(raw);
s = s.replace(/\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:?\d{2})?\b/g, "<ts>");
s = s.replace(/\b[0-9a-f]{32,}\b/gi, "<hex>");
s = s.replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, "<uuid>");
s = s.replace(/https?:\/\/[^\s)"']+/g, "<url>");
s = s.replace(/:\d{2,5}\b/g, ":<port>");
s = s.replace(/\b\d{4,}\b/g, "<n>");
return s.trim().slice(0, 120);
}
// Derived revenue/margin numbers. Pulled out of aggregate() because it
// needs prices the operator sets in config — keeping the core
// aggregator config-agnostic. Returns:
// {
// monthly_revenue_usd: pro_count * pro_price + max_count * max_price
// + core_count * core_price,
// gemini_cost_usd_in_range: summary.total_cost_usd (passed through),
// margin_usd: revenue - cost, // approximate
// by_tier_revenue: [{ tier, active_installs, price_usd, revenue_usd }],
// }
//
// `active_installs_by_tier` should come from the aggregate summary
// (Set sizes already computed there). `prices` is the {core,pro,max}
// USD-per-month map. `geminiCostInRange` is total_cost_usd from the
// summary.
export function computeRevenue({ activeInstallsByTier, prices, geminiCostInRange }) {
const tiers = ["core", "pro", "max"];
const byTier = tiers.map((tier) => {
const installs = activeInstallsByTier?.[tier] || 0;
const price = Math.max(0, Number(prices?.[tier] ?? 0));
return {
tier,
active_installs: installs,
price_usd: price,
revenue_usd: installs * price,
};
});
const revenue = byTier.reduce((s, r) => s + r.revenue_usd, 0);
return {
monthly_revenue_usd: revenue,
gemini_cost_usd_in_range: geminiCostInRange,
margin_usd: revenue - geminiCostInRange,
by_tier_revenue: byTier,
}; };
} }
+676 -109
View File
@@ -17,6 +17,33 @@ import { GoogleGenAI } from "@google/genai";
import fs from "fs/promises"; import fs from "fs/promises";
import os from "os"; import os from "os";
import path from "path"; import path from "path";
import { splitAudioFile, getAudioDurationSeconds } from "../audio-meta.js";
// Chunking knobs are passed into createGeminiBackend() from
// /data/config/relay-config.json (live-reloaded). The previous
// hardcoded constants (30 min / 6 concurrency) were removed in
// v0.2.32 so all chunking decisions flow from one canonical config
// source — edited via the dashboard's Settings tab.
// ffmpeg infers the output container/codec from the FILE EXTENSION,
// not the input format. Writing the master audio as "audio.bin" and
// asking ffmpeg to produce "chunk_0.bin" makes it choke with "Unable
// to find a suitable output format for ...bin". So we derive a sane
// extension from the mimeType the caller hands us; the audio bytes
// themselves are unchanged. mp3 is the safe default for unknown
// audio/* types — yt-dlp emits mp3 for YouTube and most podcast
// enclosures are mp3 too.
function extForMime(mimeType) {
const t = (mimeType || "").toLowerCase();
if (t.includes("mp4") || t.includes("m4a")) return "m4a";
if (t.includes("ogg")) return "ogg";
if (t.includes("opus")) return "opus";
if (t.includes("wav")) return "wav";
if (t.includes("webm")) return "webm";
if (t.includes("flac")) return "flac";
if (t.includes("aac")) return "aac";
return "mp3";
}
// Defaults used only when the caller doesn't supply explicit model // Defaults used only when the caller doesn't supply explicit model
// names. Production callers should pass models pulled from // names. Production callers should pass models pulled from
@@ -27,33 +54,49 @@ const DEFAULT_TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
const DEFAULT_ANALYSIS_MODEL = "gemini-3.1-pro-preview"; const DEFAULT_ANALYSIS_MODEL = "gemini-3.1-pro-preview";
const EMPTY_RETRIES = 3; const EMPTY_RETRIES = 3;
// Per-pipeline fallback chains, ordered newest/most-expensive → // Per-pipeline fallback chains. All entries are verified valid as of
// older/cheaper. When the operator-selected primary model returns a // 2026-05 against ai.google.dev/gemini-api/docs/models. Retired model
// retryable error (503 capacity, 429 rate limit, etc.) the relay // IDs (gemini-3-pro-preview shut down 2026-03-09, gemini-2.0-flash
// walks DOWN this list — never up, since the operator's choice // deprecated, gemini-3.1-flash-* never existed) are NOT in the chain.
// reflects their preferred price/quality point. The chain is sliced //
// from the primary forward, so picking 2.5-flash falls back to only // Ordering rationale per pipeline:
// 2.0-flash, never back up to 3-flash. // transcribe — Flash first (audio is Flash's natural fit), Pro
// only as last-resort because Pro on audio is wildly expensive
// analyze — Pro first (structured JSON benefits from reasoning),
// then Flash for the cheap+fast path
const TRANSCRIPTION_FALLBACK_CHAIN = [ const TRANSCRIPTION_FALLBACK_CHAIN = [
"gemini-3-flash-preview", "gemini-3-flash-preview",
"gemini-2.5-flash", "gemini-2.5-flash",
"gemini-2.0-flash", "gemini-3.1-flash-lite",
"gemini-2.5-pro",
"gemini-3.1-pro-preview",
]; ];
const ANALYSIS_FALLBACK_CHAIN = [ const ANALYSIS_FALLBACK_CHAIN = [
"gemini-3.1-pro-preview", "gemini-3.1-pro-preview",
"gemini-3-pro-preview", "gemini-2.5-pro",
"gemini-3-flash-preview", "gemini-3-flash-preview",
"gemini-2.5-flash", "gemini-2.5-flash",
"gemini-3.1-flash-lite",
]; ];
// Slice the chain starting at the primary model. If the primary isn't // Build the fallback chain starting at the operator-selected primary.
// in the chain (unknown / typo), return just the primary — no // Walks the primary first, then everything below it (cheaper / older
// fallback possible. Returns a fresh array so callers can iterate // fallbacks — preserves the "operator picked their price point"
// safely. // intent), then everything above it as a last-resort layer. The
// last-resort layer matters when the primary sits at the BOTTOM of
// the chain (e.g. operator picked flash-lite for cost): if Google
// returns 503 capacity on flash-lite there's nothing strictly below
// to try, so the call would fail. Walking upward as a final tier
// lets the user's job complete on a pricier model rather than fail
// entirely — operators see the cost in the dashboard and can adjust
// their primary if the fallback-to-up is too costly in practice.
function fallbackChain(chain, primary) { function fallbackChain(chain, primary) {
const idx = chain.indexOf(primary); const idx = chain.indexOf(primary);
if (idx < 0) return [primary]; if (idx < 0) return [primary];
return chain.slice(idx); return [
...chain.slice(idx), // primary + everything strictly below
...chain.slice(0, idx), // everything above primary, last-resort
];
} }
// Detect errors that warrant trying the next model in the chain. // Detect errors that warrant trying the next model in the chain.
@@ -63,10 +106,78 @@ function isFallbackEligibleError(err) {
const status = err?.status || err?.httpStatusCode || 0; const status = err?.status || err?.httpStatusCode || 0;
const msg = err?.message || String(err); const msg = err?.message || String(err);
if (status === 503 || status === 429 || status === 529) return true; if (status === 503 || status === 429 || status === 529) return true;
// 404 when the MODEL doesn't exist — Google retires preview model
// names regularly, and the next model in the chain is exactly the
// recovery we want. Match the specific shape of the response so a
// generic 404 from a URL typo (config issue) doesn't get masked.
if (status === 404 && /models?\/[^ ]+ is not found|not supported for gen|not supported for this method/i.test(msg)) return true;
// 400 with "Thinking level is not supported" — we already gate
// thinkingConfig on Gemini 3.x Flash, but if a future model
// unexpectedly rejects it, walking to a different model is a
// valid recovery path.
if (status === 400 && /thinking level is not supported/i.test(msg)) return true;
if (/overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg)) return true; if (/overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg)) return true;
return false; return false;
} }
// Strip Google's URL-padded suffix and trim so cascade messages stay
// readable in the activity log. Keeps just enough signal to tell
// "high demand" from "rate limit" from "auth failure".
function shortenError(err) {
const msg = err?.message || String(err);
return msg
.replace(/Please refer to https?:\/\/[^\s]+/g, "")
.replace(/\s+/g, " ")
.trim()
.slice(0, 120);
}
// Shift every [MM:SS] / [H:MM:SS] timestamp in a transcript by a
// number of seconds. Used by the chunked transcribe path so each
// chunk's local timestamps (model emits [0:00] at the start of its
// audio slice) translate to global timestamps relative to the start
// of the full audio. No-op when offsetSec is 0.
//
// Regex captures: hours? : minutes : seconds. Hours is optional;
// minutes and seconds are required. We rewrite the matched token
// with the offset applied, picking [H:MM:SS] format when the result
// crosses 60 minutes and [MM:SS] otherwise — same shape Gemini
// emits, so downstream parsers don't have to change.
function shiftTimestamps(text, offsetSec) {
if (!text || !offsetSec) return text;
return text.replace(
/\[(\d+):(\d{2})(?::(\d{2}))?\]/g,
(_match, p1, p2, p3) => {
let total;
if (p3 !== undefined) {
total = parseInt(p1, 10) * 3600 + parseInt(p2, 10) * 60 + parseInt(p3, 10);
} else {
total = parseInt(p1, 10) * 60 + parseInt(p2, 10);
}
total += offsetSec;
if (!Number.isFinite(total) || total < 0) return _match;
const h = Math.floor(total / 3600);
const m = Math.floor((total % 3600) / 60);
const s = total % 60;
const pad = (n) => n.toString().padStart(2, "0");
return h > 0 ? `[${h}:${pad(m)}:${pad(s)}]` : `[${m}:${pad(s)}]`;
}
);
}
// Render the attempts array as a "primary (503: high demand) →
// fallback (503: high demand) → final ✓" string for activity-log
// surfacing. Caller-agnostic so transcribe + analyze share format.
function formatCascade(attempts) {
return attempts
.map((a) =>
a.status === "success"
? `${a.model}`
: `${a.model} (${a.status}: ${a.error || "?"})`
)
.join(" → ");
}
const TRANSCRIPTION_SAFETY = [ const TRANSCRIPTION_SAFETY = [
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
{ category: "HARM_CATEGORY_HATE_SPEECH", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_HATE_SPEECH", threshold: "BLOCK_NONE" },
@@ -79,6 +190,23 @@ export function createGeminiBackend({
transcriptionModel = DEFAULT_TRANSCRIPTION_MODEL, transcriptionModel = DEFAULT_TRANSCRIPTION_MODEL,
analysisModel = DEFAULT_ANALYSIS_MODEL, analysisModel = DEFAULT_ANALYSIS_MODEL,
timeoutMs = 900_000, timeoutMs = 900_000,
// Chunking knobs — caller MUST source from relay-config.json. We
// accept defaults here ONLY for unit-test ergonomics. Production
// callers (admin-test-run.js, transcribe-url.js) always pass the
// values explicitly so the operator's Settings-tab edits flow
// through.
txChunkSeconds = 30 * 60,
txConcurrency = 12,
// Operator-editable prompt override (Settings tab). Empty string
// falls back to DEFAULT_TRANSCRIBE_PROMPT_BODY at request time.
transcribePromptOverride = "",
// Output-token caps. Defaults are the same values that used to be
// hardcoded in this file (65536 for TX, 8192 for AN — was implicit
// Google default for AN). Operator overrides via Settings tab so
// they can trade robustness for cost. Test-run / unit-test callers
// can keep the defaults.
txMaxOutputTokens = 65536,
anMaxOutputTokens = 8192,
} = {}) { } = {}) {
if (!apiKey) { if (!apiKey) {
throw new Error("createGeminiBackend: apiKey is required"); throw new Error("createGeminiBackend: apiKey is required");
@@ -95,6 +223,127 @@ export function createGeminiBackend({
const txChain = fallbackChain(TRANSCRIPTION_FALLBACK_CHAIN, transcriptionModel); const txChain = fallbackChain(TRANSCRIPTION_FALLBACK_CHAIN, transcriptionModel);
const anChain = fallbackChain(ANALYSIS_FALLBACK_CHAIN, analysisModel); const anChain = fallbackChain(ANALYSIS_FALLBACK_CHAIN, analysisModel);
// Single-file transcribe: upload one audio file to Gemini File API,
// poll until PROCESSING completes, run generateContent through the
// fallback chain. Used both for the short-content single-call path
// and as the per-chunk primitive for long content. timestampOffset
// is applied to all [MM:SS] / [H:MM:SS] tokens in the result so
// chunks above the first stitch correctly into a global timeline.
async function transcribeFromFile({
filePath,
mimeType,
prompt,
timestampOffsetSec = 0,
chunkLabel = "",
}) {
const tag = chunkLabel ? `[gemini ${chunkLabel}] ` : "[gemini] ";
const uploaded = await ai.files.upload({
file: filePath,
config: { mimeType },
});
let f = uploaded;
const pStart = Date.now();
while (f.state === "PROCESSING") {
if (Date.now() - pStart > 5 * 60 * 1000) {
throw new Error(`${tag}Gemini file processing exceeded 5 min`);
}
await new Promise((r) => setTimeout(r, 3000));
f = await ai.files.get({ name: f.name });
}
if (f.state === "FAILED") {
throw new Error(`${tag}Gemini failed to process audio file`);
}
const attempts = [];
let lastErr;
for (let modelIdx = 0; modelIdx < txChain.length; modelIdx++) {
const model = txChain[modelIdx];
// Both Gemini 3.x AND 2.5 families support thinking, but with
// DIFFERENT parameter shapes:
// - Gemini 3.x → thinkingConfig: { thinkingLevel }
// - Gemini 2.5 → thinkingConfig: { thinkingBudget }
const is3x = /^gemini-3(\.\d+)?-(?:pro|flash)/i.test(model);
const is25Flash = /^gemini-2\.5-flash/i.test(model);
const is25Pro = /^gemini-2\.5-pro/i.test(model);
const thinkingConfig = is3x
? { thinkingLevel: "minimal" }
: is25Flash
? { thinkingBudget: 0 }
: is25Pro
? { thinkingBudget: 128 }
: null;
try {
let result;
// Empty-response retries: SDK occasionally returns 200 with
// no text for audio inputs; retry up to N times before
// falling back to the next model.
for (let attempt = 0; attempt < EMPTY_RETRIES; attempt++) {
result = await ai.models.generateContent({
model,
config: {
...(thinkingConfig ? { thinkingConfig } : {}),
safetySettings: TRANSCRIPTION_SAFETY,
// Audio transcripts are output-token-bound. Gemini's
// default (~8192) silently truncates long-chunk
// transcripts mid-stream — observed on a 45-min chunk
// that returned 31:05 worth of speech and stopped. Set
// high so the model can emit the full transcript;
// models with smaller caps clamp internally.
// Configurable via Settings → `relay_gemini_tx_max_output_tokens`.
maxOutputTokens: txMaxOutputTokens,
},
contents: [
{
role: "user",
parts: [
{ fileData: { fileUri: f.uri, mimeType } },
{ text: prompt },
],
},
],
});
if (safeText(result)) break;
}
try { await ai.files.delete({ name: f.name }); } catch {}
const rawText = safeText(result) || "";
const text = shiftTimestamps(rawText, timestampOffsetSec);
attempts.push({ model, status: "success" });
return {
text,
usage: result?.usageMetadata || null,
model,
attempts,
};
} catch (err) {
lastErr = err;
const status = err?.status || err?.httpStatusCode || 0;
attempts.push({
model,
status: status || "error",
error: shortenError(err),
});
const canFallback =
isFallbackEligibleError(err) && modelIdx < txChain.length - 1;
console.warn(
`${tag}transcribe with ${model} failed (${status || "?"}): ${err?.message || err}${canFallback ? ` — falling back to ${txChain[modelIdx + 1]}` : ""}`
);
if (!canFallback) {
try { await ai.files.delete({ name: f.name }); } catch {}
err.attempts = attempts;
err.message =
`transcribe: all attempts failed — ${formatCascade(attempts)}`;
throw err;
}
}
}
const finalErr =
lastErr || new Error("transcribe: all models in fallback chain failed");
finalErr.attempts = attempts;
finalErr.message =
`transcribe: all attempts failed — ${formatCascade(attempts)}`;
throw finalErr;
}
async function transcribeAudio({ async function transcribeAudio({
audio, audio,
mimeType, mimeType,
@@ -104,109 +353,261 @@ export function createGeminiBackend({
chapters = [], chapters = [],
offsetSeconds = 0, offsetSeconds = 0,
}) { }) {
// The Files API requires a path on disk; write to a temp file.
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-tx-")); const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-tx-"));
const tmpPath = path.join(tmpDir, "audio.bin"); const masterPath = path.join(tmpDir, `audio.${extForMime(mimeType)}`);
await fs.writeFile(tmpPath, audio); await fs.writeFile(masterPath, audio);
try { try {
const uploaded = await ai.files.upload({ const prompt = buildTranscriptionPrompt({
file: tmpPath, title, channel, description, chapters,
config: { mimeType }, promptOverride: transcribePromptOverride,
}); });
let f = uploaded; const duration = await getAudioDurationSeconds(masterPath);
const pStart = Date.now();
while (f.state === "PROCESSING") { // Short content: single-call path. Same behavior as before the
if (Date.now() - pStart > 5 * 60 * 1000) { // chunking refactor — minimizes overhead for the common case.
throw new Error("Gemini file processing exceeded 5 min"); if (!duration || duration <= txChunkSeconds) {
} const singleShotStart = Date.now();
await new Promise((r) => setTimeout(r, 3000)); const r = await transcribeFromFile({
f = await ai.files.get({ name: f.name }); filePath: masterPath,
} mimeType,
if (f.state === "FAILED") { prompt,
throw new Error("Gemini failed to process audio file"); timestampOffsetSec: offsetSeconds,
});
return {
// Sort even single-shot output: Gemini sometimes emits
// entries within a single call in non-chronological order
// (rare but observed). sortAndDedupeTranscript is a no-op
// when entries are already monotonic. Also coalesces
// too-short adjacent entries into more readable chunks.
text: mergeShortEntries(sortAndDedupeTranscript(r.text)),
segments: [],
duration_seconds: duration || 0,
usage: r.usage,
model: r.model,
attempts: r.attempts,
chunk_count: 1,
chunk_durations_ms: [Date.now() - singleShotStart],
};
} }
const prompt = buildTranscriptionPrompt({ title, channel, description, chapters }); // Long content: split with ffmpeg, fire chunks in parallel,
// stitch the transcripts with timestamp offsets applied per
// chunk. txChunkSeconds and txConcurrency come from the
// operator's relay-config.json (Settings tab).
const chunks = await splitAudioFile({
inputPath: masterPath,
outputDir: tmpDir,
chunkSeconds: txChunkSeconds,
});
console.log(
`[gemini] chunked transcribe: ${Math.round(duration)}s audio → ${chunks.length} chunks of up to ${txChunkSeconds}s, ${Math.min(chunks.length, txConcurrency)} in parallel`
);
// Walk the fallback chain: try the primary model first; on a // ffmpeg preserves the input extension; use the input mimeType
// retryable error (capacity / 503 / rate-limit), try the next // for chunk uploads. Falls back to mp3 if mimeType is empty.
// model in the chain. Non-retryable errors bubble up to the const chunkMime = mimeType || "audio/mpeg";
// caller — they'd just fail the same way on every model. const results = new Array(chunks.length);
let lastErr; // Per-chunk wall-time tracking. Each entry is the duration in
for (let modelIdx = 0; modelIdx < txChain.length; modelIdx++) { // ms from when the chunk's API call started to when it
const model = txChain[modelIdx]; // completed. Summed by the caller into `transcribe_ms_sum`
const isFlash = /flash/i.test(model); // (total backend compute) — the parent's `duration_ms` is the
try { // outer parallel-fan-out wall-time, which is much smaller when
let result; // concurrency > 1.
// Empty-response retries: when the SDK returns 200 with no const chunkDurationsMs = new Array(chunks.length).fill(null);
// text (which happens periodically with audio inputs), let nextIdx = 0;
// retry up to N times with the SAME model before falling const worker = async () => {
// back. while (true) {
for (let attempt = 0; attempt < EMPTY_RETRIES; attempt++) { const i = nextIdx++;
result = await ai.models.generateContent({ if (i >= chunks.length) return;
model, const c = chunks[i];
config: { const chunkStart = Date.now();
// thinkingLevel: "minimal" is only valid for Flash. try {
// Pro models reject it. Skip when the chain hop const r = await transcribeFromFile({
// landed on a Pro model. filePath: c.filePath,
...(isFlash ? { thinkingConfig: { thinkingLevel: "minimal" } } : {}), mimeType: chunkMime,
safetySettings: TRANSCRIPTION_SAFETY, prompt,
}, timestampOffsetSec: offsetSeconds + (c.startSeconds || 0),
contents: [ chunkLabel: `chunk ${i + 1}/${chunks.length}`,
{
role: "user",
parts: [
{ fileData: { fileUri: f.uri, mimeType } },
{ text: prompt },
],
},
],
}); });
if (safeText(result)) break; chunkDurationsMs[i] = Date.now() - chunkStart;
results[i] = { ok: true, ...r };
} catch (err) {
chunkDurationsMs[i] = Date.now() - chunkStart;
console.warn(
`[gemini] chunk ${i + 1}/${chunks.length} failed: ${err?.message || err}`
);
results[i] = { ok: false, error: err };
} }
}
};
const workerPromises = Array.from(
{ length: Math.min(chunks.length, txConcurrency) },
worker
);
await Promise.all(workerPromises);
// Best-effort cleanup of the uploaded File API artifact. const succeeded = results.filter((r) => r && r.ok);
try { await ai.files.delete({ name: f.name }); } catch {} const failed = results.filter((r) => r && !r.ok);
if (succeeded.length === 0) {
const first = failed[0]?.error;
const e = new Error(
`transcribe: all ${results.length} chunks failed. First error: ${first?.message || "unknown"}`
);
e.status = first?.status || 502;
throw e;
}
if (failed.length > 0) {
console.warn(
`[gemini] chunked transcribe: ${failed.length}/${results.length} chunks failed — proceeding with ${succeeded.length} successful chunks`
);
}
const text = safeText(result) || ""; // Stitch in chunk order. Each chunk's text already has its
return { // timestamps shifted by its startSeconds offset. In a perfect
text, // world the join would produce a chronological transcript —
segments: [], // but Gemini (especially flash variants) sometimes emits
duration_seconds: 0, // entries within a single chunk in non-chronological order
usage: result?.usageMetadata || null, // (observed pattern: 4:27 → 4:40 → 4:56 → 0:00 → 0:18 → ...,
// Return the model that ACTUALLY served the request — so // as if the model treated the audio as multiple "thoughts" and
// the audit log records what was used, not just what was // reset its mental clock between them). Without a post-stitch
// requested. Lets the operator see "this call fell back // sort the downstream analyzer sees those out-of-order entries
// from 3-flash to 2.5-flash" via the dashboard. // and the rendered transcript shows the timestamps jumping
model, // backward in the middle of a section. Solution: parse the
}; // joined text, sort entries by absolute offset, dedupe near-
} catch (err) { // duplicates (same offset + similar leading text), re-emit.
lastErr = err; const naiveStitched = results
const canFallback = .filter((r) => r && r.ok)
isFallbackEligibleError(err) && modelIdx < txChain.length - 1; .map((r) => r.text)
.join("\n");
const stitchedText = mergeShortEntries(sortAndDedupeTranscript(naiveStitched));
// Truncation detection: compare the LAST timestamp emitted in
// each chunk to that chunk's expected end. Some Gemini models
// (notably 2.5-flash and flash-lite) still hit output-token
// caps on long dense chunks even with maxOutputTokens=65536 —
// they silently emit a partial transcript and the chunk
// appears to "succeed." Without this check, the operator's
// Jobs table shows status=SUCCESS for a job that lost real
// minutes of speech. We collect coverage warnings here and
// expose them via the return envelope so the worker route can
// mark the job partial + surface in the errors column.
const truncatedChunks = [];
const LAST_TS_RE = /\[(\d+):(\d{2})(?::(\d{2}))?\][^\[]*$/;
for (let i = 0; i < results.length; i++) {
const r = results[i];
if (!r || !r.ok || !r.text) continue;
const chunk = chunks[i];
const chunkEndAbs = (chunk.startSeconds || 0) + (chunk.durationSec || chunk.durationSeconds || 0);
// Pull the last [H:MM:SS]/[MM:SS] from this chunk's text.
const m = r.text.match(LAST_TS_RE);
if (!m) continue;
const h = m[3] !== undefined ? parseInt(m[1], 10) : 0;
const mm = m[3] !== undefined ? parseInt(m[2], 10) : parseInt(m[1], 10);
const ss = m[3] !== undefined ? parseInt(m[3], 10) : parseInt(m[2], 10);
const lastAbs = h * 3600 + mm * 60 + ss;
const expectedSec = chunk.durationSec || chunk.durationSeconds || 0;
if (expectedSec < 60) continue; // too short to meaningfully truncate
const coverage = expectedSec > 0
? (lastAbs - (chunk.startSeconds || 0)) / expectedSec
: 1;
if (coverage < 0.8) {
truncatedChunks.push({
index: i,
coverage: Math.round(coverage * 100),
lastTs: lastAbs,
expectedEnd: chunkEndAbs,
missingSec: expectedSec - (lastAbs - (chunk.startSeconds || 0)),
});
console.warn( console.warn(
`[gemini] transcribe with ${model} failed (${err?.status || "?"}): ${err?.message || err}${canFallback ? ` — falling back to ${txChain[modelIdx + 1]}` : ""}` `[gemini] chunk ${i + 1} appears TRUNCATED — coverage ${Math.round(coverage * 100)}% (last ts ${lastAbs}s, expected ~${chunkEndAbs}s). Likely hit maxOutputTokens.`
); );
if (!canFallback) {
try { await ai.files.delete({ name: f.name }); } catch {}
throw err;
}
// loop continues with next model
} }
} }
throw lastErr || new Error("transcribe: all models in fallback chain failed");
// Aggregate metadata: pick the most-used successful model,
// sum token usage across chunks, collect every attempt.
const modelCounts = new Map();
let totalIn = 0, totalOut = 0, totalThink = 0, totalAll = 0;
const allAttempts = [];
for (const r of results) {
if (!r || !r.ok) continue;
modelCounts.set(r.model, (modelCounts.get(r.model) || 0) + 1);
const u = r.usage || {};
totalIn += u.promptTokenCount || 0;
totalOut += u.candidatesTokenCount || 0;
totalThink += u.thoughtsTokenCount || 0;
totalAll += u.totalTokenCount || 0;
if (Array.isArray(r.attempts)) allAttempts.push(...r.attempts);
}
const dominantModel =
[...modelCounts.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] || null;
return {
text: stitchedText,
segments: [],
duration_seconds: duration,
usage: {
promptTokenCount: totalIn,
candidatesTokenCount: totalOut,
thoughtsTokenCount: totalThink,
totalTokenCount:
totalAll || totalIn + totalOut + totalThink,
},
model: dominantModel,
attempts: allAttempts,
chunk_count: chunks.length,
// Per-chunk wall-time (ms). Summed by callers into the
// "transcribe_ms_sum" audit field — represents total backend
// compute across all chunks, distinct from the outer
// duration_ms which is the parallel-fan-out wall-time.
chunk_durations_ms: chunkDurationsMs,
// Truncated-chunk warnings. Empty array if every chunk
// emitted ≥80% of its expected audio duration; populated
// when one or more chunks hit a silent output-token cap.
// Worker routes pass this through to recordCall so the
// Jobs-table status flips from SUCCESS to PARTIAL and the
// errors column shows what went missing.
truncated_chunks: truncatedChunks,
};
} finally { } finally {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {} try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
} }
} }
async function analyzeText({ prompt }) { async function analyzeText({ prompt }) {
// Accumulate every attempt so the final error / success can
// surface the cascade. Without this, the operator only sees the
// last model's failure and doesn't know whether the system even
// tried the cheaper/older fallbacks.
const attempts = [];
let lastErr; let lastErr;
for (let modelIdx = 0; modelIdx < anChain.length; modelIdx++) { for (let modelIdx = 0; modelIdx < anChain.length; modelIdx++) {
const model = anChain[modelIdx]; const model = anChain[modelIdx];
try { try {
const result = await ai.models.generateContent({ const result = await ai.models.generateContent({
model, model,
config: {
// Explicit cap so an over-thinking model can't emit an
// unbounded reasoning preamble that crowds out the JSON
// sections array. Default 8192 is plenty for the typical
// 1-3 section payload; configurable via Settings →
// `relay_gemini_an_max_output_tokens`.
maxOutputTokens: anMaxOutputTokens,
// JSON mode — Gemini guarantees the response body is
// valid JSON when this is set. Eliminates the entire
// class of "invalid JSON in window response" failures
// that came from the model occasionally wrapping its
// sections array in a prose preamble, a markdown fence,
// or truncating the closing brace. The prompt itself
// already asks for JSON; this turns that into a hard
// server-enforced constraint on the model\'s decoder.
// Doesn\'t replace responseSchema (which would also
// enforce shape) — kept lighter-weight because the
// post-parse stitcher already clamps + dedupes out-of-
// range indices, so a structural deviation doesn\'t
// crash anything.
responseMimeType: "application/json",
},
contents: [ contents: [
{ {
role: "user", role: "user",
@@ -214,24 +615,43 @@ export function createGeminiBackend({
}, },
], ],
}); });
attempts.push({ model, status: "success" });
return { return {
text: safeText(result) || "", text: safeText(result) || "",
usage: result?.usageMetadata || null, usage: result?.usageMetadata || null,
model, model,
attempts,
}; };
} catch (err) { } catch (err) {
lastErr = err; lastErr = err;
const status = err?.status || err?.httpStatusCode || 0;
attempts.push({
model,
status: status || "error",
error: shortenError(err),
});
const canFallback = const canFallback =
isFallbackEligibleError(err) && modelIdx < anChain.length - 1; isFallbackEligibleError(err) && modelIdx < anChain.length - 1;
console.warn( console.warn(
`[gemini] analyze with ${model} failed (${err?.status || "?"}): ${err?.message || err}${canFallback ? ` — falling back to ${anChain[modelIdx + 1]}` : ""}` `[gemini] analyze with ${model} failed (${status || "?"}): ${err?.message || err}${canFallback ? ` — falling back to ${anChain[modelIdx + 1]}` : ""}`
); );
if (!canFallback) throw err; if (!canFallback) {
err.attempts = attempts;
err.message =
`analyze: all attempts failed — ${formatCascade(attempts)}`;
throw err;
}
} }
} }
throw lastErr || new Error("analyze: all models in fallback chain failed"); const finalErr =
lastErr || new Error("analyze: all models in fallback chain failed");
finalErr.attempts = attempts;
finalErr.message =
`analyze: all attempts failed — ${formatCascade(attempts)}`;
throw finalErr;
} }
return { transcribeAudio, analyzeText }; return { transcribeAudio, analyzeText };
} }
@@ -246,7 +666,31 @@ function safeText(r) {
return ""; return "";
} }
function buildTranscriptionPrompt({ title, channel, description, chapters } = {}) { // Default transcribe-prompt INSTRUCTION body. Exported so the
// dashboard's Settings tab can show it as the "current default"
// alongside any operator override. The auto-generated metadata
// block (title / channel / description / chapters) is prepended
// at request time and is NOT part of the editable prompt — the
// operator only edits the instruction portion below.
export const DEFAULT_TRANSCRIBE_PROMPT_BODY = `Transcribe this audio completely and verbatim. Group the transcript into substantive thoughts, each spanning roughly 30-60 seconds (or a complete idea, whichever is natural).
Format each line as:
[MM:SS] The spoken text here, spanning a complete thought or several connected sentences...
Rules:
- Transcribe EVERY word spoken, do not skip or summarize anything.
- Use [MM:SS] or [H:MM:SS] timestamp format at the start of each line.
- AIM FOR ~30-60 SECONDS between timestamps. Do NOT emit one entry per breath or one entry per sentence — that makes the transcript unreadable. Each entry should contain a complete thought, typically 2-5 sentences, ~50-200 words. Short interjections ("Yeah.", "Right.") only get their own line when they're a meaningful exchange between speakers.
- TIMESTAMPS MUST INCREASE MONOTONICALLY across the entire output. Never reset to a smaller time and never go backwards. If the audio length is over 60 minutes, use [H:MM:SS] format consistently.
- Include filler words (um, uh, you know) for accuracy.
- Speaker identification: FIRST consult the metadata above — descriptions and chapter titles usually name the host(s) and guest(s) explicitly, and the channel name is often the host's name. Match those names to the voices in the audio (introductions, "I'm Dax", "this is Will", first-person references) and use them as speaker labels. Format as: [MM:SS] Name: text. Only fall back to "Host"/"Guest" if no names appear in the metadata AND nobody is introduced by name in the audio.
Return ONLY the timestamped transcript, nothing else.`;
// Builds the auto-prepended metadata context (title / channel /
// description / chapters). Returns "" when nothing is present —
// avoids leading whitespace in that case.
function buildTranscriptionContextBlock({ title, channel, description, chapters } = {}) {
let ctx = ""; let ctx = "";
if (title) ctx += `Video title: "${title}"\n`; if (title) ctx += `Video title: "${title}"\n`;
if (channel) ctx += `Channel: ${channel}\n`; if (channel) ctx += `Channel: ${channel}\n`;
@@ -267,18 +711,141 @@ function buildTranscriptionPrompt({ title, channel, description, chapters } = {}
ctx += `Chapter markers:\n${lines}\n`; ctx += `Chapter markers:\n${lines}\n`;
} }
if (ctx) ctx += "\n"; if (ctx) ctx += "\n";
return ctx;
return `${ctx}Transcribe this audio completely and verbatim. Include timestamps at regular intervals (every 15-30 seconds or at natural pauses). }
Format each line as: // Resolve the transcribe prompt: operator override (from config)
[MM:SS] The spoken text here... // when present + non-empty, else the hardcoded default body.
// Always prepends the auto-generated context block.
Rules: function buildTranscriptionPrompt({ title, channel, description, chapters, promptOverride } = {}) {
- Transcribe EVERY word spoken, do not skip or summarize anything. const ctx = buildTranscriptionContextBlock({ title, channel, description, chapters });
- Use [MM:SS] or [H:MM:SS] timestamp format at the start of each line. const body = (typeof promptOverride === "string" && promptOverride.trim())
- Start a new timestamped line every 15-30 seconds or at natural speech pauses. ? promptOverride
- Include filler words (um, uh, you know) for accuracy. : DEFAULT_TRANSCRIBE_PROMPT_BODY;
- Speaker identification: FIRST consult the metadata above — descriptions and chapter titles usually name the host(s) and guest(s) explicitly. Format as: [MM:SS] Name: text. Only fall back to "Host"/"Guest" if no names appear. return ctx + body;
}
Return ONLY the timestamped transcript, nothing else.`;
// Merge adjacent entries that are too short (just a few words) into
// a single readable entry. Models occasionally emit one entry per
// breath/word ("► 4:05 um,", "► 4:07 that is", "► 4:09 usually based")
// which is unreadable. We coalesce entries that are <60 chars AND
// within ~10s of the previous entry's timestamp, until each entry is
// either ≥60 chars or sits at a real speech-pause boundary. The
// merge preserves the FIRST entry's timestamp (the start of the
// merged thought).
//
// Conservative thresholds: 60-char floor keeps very short utterances
// (like "Yeah." or "Right.") from being aggressively swept up, but
// merges fragmentary breath-by-breath entries the model sometimes
// produces. 10s gap ceiling avoids merging across an actual silence.
export function mergeShortEntries(text) {
if (!text || typeof text !== "string") return text;
const lines = text.split(/\r?\n/);
const tsRe = /^\[(\d{1,2}):(\d{2})(?::(\d{2}))?\]\s*(.*)$/;
const entries = [];
for (const line of lines) {
const m = line.match(tsRe);
if (m) {
const offset = m[3] !== undefined
? parseInt(m[1], 10) * 3600 + parseInt(m[2], 10) * 60 + parseInt(m[3], 10)
: parseInt(m[1], 10) * 60 + parseInt(m[2], 10);
entries.push({ offset, text: (m[4] || "").trim() });
} else if (entries.length > 0 && line.trim()) {
// continuation
entries[entries.length - 1].text += " " + line.trim();
}
}
if (entries.length < 2) return text;
const MIN_CHARS = 60;
const MAX_GAP_SEC = 10;
const merged = [];
for (const e of entries) {
const prev = merged[merged.length - 1];
const gap = prev ? e.offset - prev.offset : Infinity;
if (prev && prev.text.length < MIN_CHARS && gap <= MAX_GAP_SEC) {
// Append to previous; keep prev.offset (start-of-thought).
prev.text = (prev.text + " " + e.text).trim();
} else {
merged.push({ offset: e.offset, text: e.text });
}
}
// Re-emit. Re-stamp in canonical [H:MM:SS] / [MM:SS] form.
const out = merged.map((e) => {
const h = Math.floor(e.offset / 3600);
const m = Math.floor((e.offset % 3600) / 60);
const s = e.offset % 60;
const pad = (n) => n.toString().padStart(2, "0");
const stamp = h > 0 ? `${h}:${pad(m)}:${pad(s)}` : `${m}:${pad(s)}`;
return `[${stamp}] ${e.text}`;
});
return out.join("\n");
}
// Parse a bracketed-timestamp transcript, sort entries by absolute
// offset, drop near-duplicates, and re-emit. Used as a defensive
// post-stitch step in the chunked transcribe path — Gemini flash
// variants occasionally emit entries within a chunk in non-
// chronological order. Without this sort the downstream analyzer
// sees out-of-order entries and the user-facing transcript renders
// with timestamps jumping backward mid-section.
//
// Dedup rule: entries with offsets within 1 second AND identical
// leading 40 chars are treated as duplicates (keeps the first).
// Conservative — won't merge entries that share an offset but
// have different content (e.g., two speakers at the same moment).
export function sortAndDedupeTranscript(text) {
if (!text || typeof text !== "string") return text;
const lines = text.split(/\r?\n/);
const entries = [];
const tsRe = /^\[(\d{1,2}):(\d{2})(?::(\d{2}))?\]\s*(.*)$/;
for (const line of lines) {
const m = line.match(tsRe);
if (m) {
let offset;
if (m[3] !== undefined) {
offset = parseInt(m[1], 10) * 3600 + parseInt(m[2], 10) * 60 + parseInt(m[3], 10);
} else {
offset = parseInt(m[1], 10) * 60 + parseInt(m[2], 10);
}
entries.push({ offset, text: (m[4] || "").trim(), origLine: line });
} else if (entries.length > 0 && line.trim()) {
// Continuation line — append to the previous entry's text so
// multi-line entries don't get lost in the sort.
entries[entries.length - 1].text += " " + line.trim();
entries[entries.length - 1].origLine += "\n" + line;
}
}
if (entries.length < 2) return text;
// Detect if sorting is needed (most chunks are already sorted; skip
// the expensive rebuild when they are).
let isSorted = true;
for (let i = 1; i < entries.length; i++) {
if (entries[i].offset < entries[i - 1].offset) {
isSorted = false;
break;
}
}
if (isSorted) return text;
console.warn(
`[gemini] post-stitch sort: ${entries.length} entries were out of order — re-sorting by absolute offset`
);
// Stable sort by offset, then drop near-duplicates.
entries.sort((a, b) => a.offset - b.offset);
const out = [];
for (const e of entries) {
const prev = out[out.length - 1];
if (prev &&
Math.abs(e.offset - prev.offset) <= 1 &&
e.text.slice(0, 40) === prev.text.slice(0, 40)) {
continue; // duplicate
}
// Re-emit in canonical [H:MM:SS] / [MM:SS] form.
const h = Math.floor(e.offset / 3600);
const m = Math.floor((e.offset % 3600) / 60);
const s = e.offset % 60;
const pad = (n) => n.toString().padStart(2, "0");
const stamp = h > 0 ? `${h}:${pad(m)}:${pad(s)}` : `${m}:${pad(s)}`;
out.push(`[${stamp}] ${e.text}`);
}
return out.join("\n");
} }
+389 -4
View File
@@ -14,19 +14,94 @@ let cached = { mtimeMs: 0, snapshot: defaultConfig() };
function defaultConfig() { function defaultConfig() {
return { return {
relay_gemini_api_key: "", relay_gemini_api_key: "",
relay_parakeet_base_url: "", relay_spark_control_url: "",
relay_gemma_base_url: "", // Phase 2 — post-cluster polish pass. After diarization +
relay_parakeet_model: "parakeet-tdt-0.6b-v3", // clustering produce global speaker IDs and after analyze
relay_gemma_model: "gemma3:27b", // produces section objects, run a two-stage LLM pass that:
// 1. infers speaker names from the labeled transcript +
// episode metadata
// 2. rewrites section summaries to attribute statements to
// specific speakers ("Matt Hill explains..." instead of
// "the discussion centers around...")
// Cost: ~15-25s wall time at the end of the pipeline. Skipped
// automatically when fewer than 2 speakers detected (nothing
// to attribute). Default ON; operator can disable via the
// Settings tab.
relay_post_cluster_polish_enabled: true,
// Operator-editable polish prompts (same three-layer override
// pattern as relay_transcribe_prompt / relay_analyze_prompt).
// Empty string at this layer = fall through to the hardcoded
// defaults in post-cluster-polish.js. The dashboard "Set as new
// default" button promotes a current override into the
// *_default companion fields.
relay_polish_name_inference_prompt: "",
relay_polish_summary_rewrite_prompt: "",
// Phase 2 of Path 2A — internal meeting extras analysis. Runs
// ONE additional LLM call after the polish pass to extract
// structured items (decisions / action items / open questions /
// key quotes) from the speaker-attributed transcript. Failure
// is non-fatal — rec.extras stays null and the UI hides the
// section. Cost: ~5-15s wall time. Default ON; operator can
// disable here if their hardware is slow or the LLM is unreliable
// for structured extraction. Only affects internal meetings —
// YouTube/podcast flows ignore this setting.
relay_meeting_extras_enabled: true,
// Operator-editable prompt override for the extras pass. Empty =
// fall through to DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE in
// meeting-extras.js. Same three-layer convention as the other
// editable prompts.
relay_meeting_extras_prompt: "",
relay_gemini_transcription_model: "gemini-3-flash-preview", relay_gemini_transcription_model: "gemini-3-flash-preview",
relay_gemini_analysis_model: "gemini-3.1-pro-preview", relay_gemini_analysis_model: "gemini-3.1-pro-preview",
relay_transcribe_backend_preference: "gemini_first", relay_transcribe_backend_preference: "gemini_first",
relay_analyze_backend_preference: "gemini_first", relay_analyze_backend_preference: "gemini_first",
// ── Text-to-speech (audio-first Recaps "walking mode") ──
// TTS turns each topic-summary into a spoken clip so the Recap app
// can play a recap back-to-back like a podcast. Two backends, same
// swap model as transcribe/analyze: operator-hardware Kokoro (via
// Spark Control's /v1/audio/speech) is the default; ElevenLabs is
// the cloud alternative. Gated to Max users on the Recap side.
//
// Preference values mirror the transcribe/analyze knobs:
// hardware_first → Kokoro if ready, else ElevenLabs (if keyed)
// hardware_only → Kokoro only (error if not ready)
// cloud_first → ElevenLabs if keyed, else Kokoro
// cloud_only → ElevenLabs only
// ("gemini_*" naming is intentionally NOT reused — TTS's cloud
// backend is ElevenLabs, not Gemini.)
relay_tts_backend_preference: "hardware_first",
// Default voice when the client doesn't specify one. Kokoro voice ids
// follow <lang_gender>_<name> (e.g. bm_george, bf_emma, am_michael,
// af_heart — the four curated for narration). Enumerate live voices
// for a picker via /v1/models (kind:tts; curated ones first).
relay_tts_default_voice: "bm_george",
// Output format. Kokoro emits wav | mp3 | opus | flac directly, so we
// default to mp3 (small + universally playable for the mobile/offline
// player) and avoid any client-side transcode. The Recap client may
// override per request.
relay_tts_format: "mp3",
// ElevenLabs cloud TTS (untested until a key is supplied). Empty
// api key = ElevenLabs unavailable; relay falls back to Kokoro under
// hardware_first / cloud_first, or errors under cloud_only.
relay_elevenlabs_api_key: "",
relay_elevenlabs_voice_id: "",
relay_elevenlabs_model: "eleven_turbo_v2_5",
relay_keysat_base_url: "https://keysat.xyz", relay_keysat_base_url: "https://keysat.xyz",
// ── Cloud operator key (core-decoupling) ──
// Shared secret that authenticates the operator's cloud Recaps server
// (recaps.cc) to this relay. When a request carries X-Recap-User-Id,
// the relay trusts it (and keys the credit pool by user:<id>, with the
// tier the relay stores for that user) ONLY if X-Recap-Operator-Key
// matches this value. Empty = cloud user-id requests are rejected
// (relay still serves the existing license/install path). Set the same
// value here and in the Recaps server's recap_relay_operator_key.
relay_cloud_operator_key: "",
relay_admin_username: "", relay_admin_username: "",
relay_admin_password_hash: "", relay_admin_password_hash: "",
relay_admin_password_salt: "", relay_admin_password_salt: "",
relay_admin_session_secret: "", relay_admin_session_secret: "",
relay_save_user_outputs: false,
relay_tier_quotas_json: JSON.stringify({ relay_tier_quotas_json: JSON.stringify({
core: { core: {
lifetime: 10, lifetime: 10,
@@ -37,9 +112,319 @@ function defaultConfig() {
pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 }, pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 },
max: { lifetime: null, monthly: null, geminiCapMonthly: 50 }, max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
}), }),
relay_tier_prices_usd_json: JSON.stringify({ core: 0, pro: 5, max: 15 }),
// Self-serve subscription prices in SATS per 30-day period. Bitcoin-
// native (the BTCPay rail invoices these directly). The Zaprite card
// rail converts to fiat at purchase time.
relay_tier_prices_sats_json: JSON.stringify({ pro: 21000, max: 42000 }),
// Self-serve subscription prices for the CARD (Zaprite) rail, in the
// smallest unit of relay_zaprite_currency (cents for USD). This is the
// amount actually charged to a card buyer — kept explicit (cents) and
// separate from relay_tier_prices_usd_json (which is a whole-dollar
// figure used only by the dashboard's revenue/margin tile). Default is
// parity with the sat prices (≈$21 / $42); the operator can set a card
// premium here to cover processing fees.
relay_tier_prices_fiat_cents_json: JSON.stringify({ pro: 2100, max: 4200 }),
// Prepaid subscription period length in days.
relay_subscription_period_days: 30,
relay_btcpay_base_url: "",
relay_btcpay_internal_url: "",
relay_btcpay_public_url: "",
relay_btcpay_store_id: "",
relay_btcpay_api_key: "",
relay_btcpay_webhook_secret: "",
// ── Zaprite (card rail) ──
// Hosted-checkout API for card payments. The operator sets the API key
// via the StartOS "Set Zaprite Connection" action. base_url defaults to
// Zaprite's public API; currency is the fiat the card is charged in
// (amount comes from relay_tier_prices_fiat_cents_json, in that
// currency's smallest unit). No webhook secret: the webhook is verified
// by re-fetching the order from Zaprite's authenticated API.
relay_zaprite_base_url: "https://api.zaprite.com",
relay_zaprite_api_key: "",
relay_zaprite_currency: "USD",
relay_credit_packages_json: JSON.stringify([
{ credits: 5, sats: 4000 },
{ credits: 10, sats: 6000 },
{ credits: 20, sats: 10000 },
]),
// ── Chunking / concurrency knobs ──
// ONE canonical default per knob, defined here exactly once. All
// backend code reads from the live-reloaded snapshot at request
// time — no hardcoded fallbacks anywhere else, no per-test-run
// overrides. The Settings tab in the dashboard edits these values
// via PUT /admin/settings.
//
// Gemini backend (relay → Google Gemini API):
// 30-min TX chunks at 12-way concurrency is well under Gemini's
// paid Tier 1 RPM cap (1000 for flash, 150 for pro). 18-min
// analyze windows match the recap-app value. 12 analyze windows
// in flight saturates most operator workloads.
relay_gemini_tx_chunk_minutes: 30,
relay_gemini_tx_concurrency: 12,
relay_gemini_analyze_window_minutes: 18,
relay_gemini_analyze_overlap_minutes: 2,
relay_gemini_analyze_concurrency: 12,
// Hardware backend (relay → operator's Parakeet + Gemma/vLLM):
// 5-min TX chunks at 4-way concurrency for Parakeet OOM headroom
// (operator hardware is typically 1 GPU; spark-control dev
// confirmed 4 concurrent is safe). 18-min analyze windows match
// gemini's value for cross-backend benchmark parity. 8 analyze
// windows in flight is the vLLM-on-single-Spark sweet spot for
// our prompt size (12 starts to queue on the GPU).
// Speaker diarization on the operator-hardware transcribe path.
// When enabled, each audio chunk gets sent to TWO Spark Control
// endpoints in parallel: /v1/audio/transcriptions (Parakeet) AND
// /api/audio/diarize-chunk (Sortformer + TitaNet). The relay
// collects per-chunk voice fingerprints, clusters them across
// chunks using cosine similarity, and re-labels diarization
// segments with globally-consistent speaker IDs (Speaker_0,
// Speaker_1, …). Each transcript entry then carries an optional
// speaker label. Diarization is hardware-path-only — the Gemini
// path uses prompt-based speaker labeling instead. Default OFF;
// operator opts in via the dashboard toggle.
relay_hardware_diarization_enabled: false,
// Cosine-similarity threshold for the cross-chunk speaker
// clustering step. Stored as integer percentage (70 = 0.70
// cosine similarity) because the slider system is integer-only.
// Two fingerprints with similarity >= threshold are merged
// into the same global speaker; below threshold = different
// speakers. NeMo's recommended default for TitaNet embeddings
// is 0.70 — clean audio with distinct voices tolerates lower
// (more aggressive merging); panel audio with similar voices
// benefits from higher (more conservative).
relay_hardware_voice_clustering_threshold: 70,
relay_hardware_anchor_min_speaking_sec: 30,
relay_hardware_small_cluster_max_speaking_sec: 15,
relay_hardware_uncertain_margin_pct: 10,
relay_hardware_tx_chunk_minutes: 5,
// Overlap (in seconds) between consecutive audio chunks on the
// hardware transcribe path. Each chunk N+1 starts `overlap`
// seconds before chunk N ends, so the same audio is covered
// twice at chunk boundaries. The relay dedupes by dropping
// words/segments in chunk N+1 whose timestamps fall within the
// shared region. Two reasons this matters:
// 1. Diarization: TitaNet needs ~3+ seconds of clean speech
// to produce a reliable voice fingerprint. A speaker who
// only talks at the very end of chunk N may get a thin
// fingerprint; the overlap means chunk N+1 also captures
// that audio, giving a better fingerprint for clustering.
// 2. Word boundaries: words straddling a chunk boundary get
// cleanly transcribed in one chunk or the other rather
// than clipped. Applies even when diarization is OFF.
// Default 30s is conservative — enough for fingerprint quality
// without making the overlap region a meaningful fraction of
// a 5-minute chunk.
relay_hardware_tx_chunk_overlap_seconds: 30,
relay_hardware_tx_concurrency: 4,
relay_hardware_analyze_window_minutes: 18,
relay_hardware_analyze_overlap_minutes: 2,
relay_hardware_analyze_concurrency: 8,
// Below this audio duration the chunked-analyze planner emits a
// single window covering the whole transcript (single-shot fast
// path). Below this duration TX chunking is also skipped — both
// backends' transcribeAudio short-circuit when audio is shorter
// than their chunk size, but this is the floor.
relay_analyze_cutoff_minutes: 25,
// ── Output-token caps per backend / pipeline ──
// Gemini's transcribe path needs a high cap because dense long
// chunks emit a lot of timestamped lines. Lower values trade
// robustness against truncation for cost / speed. The model's
// internal per-call cap may be lower than our request — see the
// truncation-detector in gemini.js for the post-hoc check that
// surfaces this as a "partial" status on the Jobs table.
relay_gemini_tx_max_output_tokens: 65536,
// Gemini analyze emits a small JSON sections array — defaulting
// explicit to 8192 (was implicit / Google default before). Bump
// higher if you observe truncated section JSON in audit rows.
relay_gemini_an_max_output_tokens: 8192,
// Hardware analyze max_tokens passed to the operator's vLLM /
// Ollama endpoint via OpenAI-compatible chat-completion shape.
// Smaller models (3B-7B) may produce better JSON with a lower
// cap that forces concision; larger models can use more.
relay_hardware_an_max_tokens: 16000,
// ── Analyze prompt section-count targets ──
// Per-VIDEO-DURATION targets that the relay uses to compute a
// per-window section count at request time. The operator sets how
// many TOTAL sections they want for a video of each duration; the
// relay divides by (total_audio / window_body) to get the average
// sections-per-window, splices the result into {{targetSections}}
// in the analyze prompt.
//
// Replaces the prior 3-bucket per-window model (short/medium/long
// by window duration). Reason: section count should scale with
// video length, not window length — a 30-min single-window
// podcast and a 3-hour 6-window podcast have very different
// segmentation needs even when their window duration is identical.
// Defaults are calibrated so a typical 90-min podcast at the
// default 18-min AN window body produces ~1-2 sections per window
// (about 9 total), matching what felt right in operator testing.
//
// Buckets are video total duration in minutes:
// under_30 → audio_sec < 30 * 60
// 30_60 → 30 * 60 <= audio_sec < 60 * 60
// 60_90 → 60 * 60 <= audio_sec < 90 * 60
// 90_120 → 90 * 60 <= audio_sec < 120 * 60
// 120_150 → 120 * 60 <= audio_sec < 150 * 60
// 150_180 → 150 * 60 <= audio_sec < 180 * 60
// over_180 → audio_sec >= 180 * 60
//
// Each value is target TOTAL sections for that video. The
// per-window emit value = round(total * window_body_sec /
// total_audio_sec).
relay_analyze_total_sections_under_30: 6,
relay_analyze_total_sections_30_60: 8,
relay_analyze_total_sections_60_90: 9,
relay_analyze_total_sections_90_120: 10,
relay_analyze_total_sections_120_150: 11,
relay_analyze_total_sections_150_180: 12,
relay_analyze_total_sections_over_180: 12,
// ── Editable LLM prompts (Settings tab) ──
// Empty string = use the hardcoded default body in
// server/backends/gemini.js (transcribe) or
// server/chunked-analyze.js (analyze). Operator can override via
// the dashboard's Settings tab textareas; PUT /admin/settings
// writes the override here. Cleared (empty) to revert to default
// without operators having to copy the default text exactly.
//
// Transcribe prompt: applies to the Gemini path only. The
// operator-hardware (Parakeet) path is a pure STT model with no
// prompt input, so the override is ignored there.
relay_transcribe_prompt: "",
// Analyze prompt: applies to BOTH Gemini and operator-hardware
// (Gemma) analyze paths. Template variables {{transcript}},
// {{windowMin}}, {{targetSections}} are interpolated at request
// time. PUT /admin/settings validates that the override still
// contains the JSON-output instruction and the {{transcript}}
// variable so an accidental edit can't silently break the
// pipeline.
relay_analyze_prompt: "",
// Operator-promoted defaults. Three-layer resolution at request
// time: override (relay_transcribe_prompt) → operator default
// (relay_transcribe_prompt_default) → hardcoded code default
// (DEFAULT_TRANSCRIBE_PROMPT_BODY in gemini.js). The "Set as new
// default" button in the dashboard moves the override content
// into this field + clears the override, letting operators
// evolve their defaults over time without code redeploys. Empty
// = use the code-side default.
relay_transcribe_prompt_default: "",
relay_analyze_prompt_default: "",
}; };
} }
// Parsed view of the credit-package menu. Returns an array of
// { credits, sats } pairs in display order. Used by the
// purchase-modal endpoint to render package choices AND by the
// purchase route to validate that the requested package matches a
// configured option (so the buyer can't ask for arbitrary
// credits-for-cheap pricing).
export async function getCreditPackages() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_credit_packages_json);
if (!Array.isArray(parsed)) return DEFAULT_PACKAGES;
return parsed
.map((p) => ({
credits: Number(p?.credits),
sats: Number(p?.sats),
}))
.filter(
(p) =>
Number.isFinite(p.credits) &&
p.credits > 0 &&
Number.isFinite(p.sats) &&
p.sats > 0
);
} catch {
return DEFAULT_PACKAGES;
}
}
const DEFAULT_PACKAGES = [
{ credits: 5, sats: 4000 },
{ credits: 10, sats: 6000 },
{ credits: 20, sats: 10000 },
];
// Parsed view of relay_tier_prices_usd_json. Returns a {core,pro,max}
// object with USD-per-month numbers. Used by the dashboard's
// revenue/margin tile.
export async function getTierPrices() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_tier_prices_usd_json);
return {
core: numOrZero(parsed?.core, 0),
pro: numOrZero(parsed?.pro, 5),
max: numOrZero(parsed?.max, 15),
};
} catch {
return { core: 0, pro: 5, max: 15 };
}
}
// Self-serve subscription pricing in SATS per period. { pro, max }. Used by
// the BTCPay tier-invoice flow. Returns null for an unknown/unpriced tier.
export async function getTierPricesSats() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_tier_prices_sats_json);
return {
pro: numOrZero(parsed?.pro, 21000),
max: numOrZero(parsed?.max, 42000),
};
} catch {
return { pro: 21000, max: 42000 };
}
}
// The configured prepaid period length in days (default 30).
export async function getSubscriptionPeriodDays() {
const cfg = await getConfigSnapshot();
const n = Number(cfg.relay_subscription_period_days);
return Number.isFinite(n) && n > 0 ? Math.floor(n) : 30;
}
// Card-rail (Zaprite) subscription prices in the smallest unit of the
// configured currency (cents for USD). { pro, max }. Used to set the
// Zaprite order `amount`. Returns null for an unknown/unpriced tier.
export async function getTierPricesFiatCents() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_tier_prices_fiat_cents_json);
return {
pro: numOrZero(parsed?.pro, 2100),
max: numOrZero(parsed?.max, 4200),
};
} catch {
return { pro: 2100, max: 4200 };
}
}
// Zaprite (card rail) connection config. { baseUrl, apiKey, currency }.
// apiKey empty = card rail not configured (callers should 503).
export async function getZapriteConfig() {
const cfg = await getConfigSnapshot();
const baseUrl =
(cfg.relay_zaprite_base_url || "https://api.zaprite.com").replace(/\/$/, "");
const currency = (cfg.relay_zaprite_currency || "USD").toUpperCase();
return {
baseUrl,
apiKey: cfg.relay_zaprite_api_key || "",
currency,
};
}
function numOrZero(v, fallback) {
const n = Number(v);
if (Number.isFinite(n) && n >= 0) return n;
return fallback;
}
function configPath() { function configPath() {
return path.join(dataDir, "config", "relay-config.json"); return path.join(dataDir, "config", "relay-config.json");
} }
+109
View File
@@ -0,0 +1,109 @@
// Request identity resolution (core-decoupling).
//
// Two kinds of caller authenticate to the relay:
//
// "cloud" — the operator's cloud Recaps server (recaps.cc) acting on
// behalf of one of its signed-in users. It authenticates
// ONCE with a shared operator key (X-Recap-Operator-Key) and
// names the user via X-Recap-User-Id. The credit pool is
// keyed by `user:<id>`, and the tier is whatever the relay
// has STORED for that user (operator-set) — NOT read from a
// license. This is the path that removes the per-user Keysat
// license from cloud requests.
//
// "license" — a license bearer (a self-hosted install using the
// operator's relay, or the operator's own single-mode app).
// Unchanged legacy behavior: tier + pool come from the
// resolved Keysat license / install id.
//
// resolveIdentity(req) returns a uniform shape the routes thread into the
// credits/job-credits/envelope helpers:
// { kind, creditKey, userId|null, installId|null, license|null }
//
// A route bills against `creditKey`; for tier it uses the stored row tier
// (cloud) or `license.tier` (license) — see identityTier().
import { getConfigSnapshot } from "./config.js";
import { resolveLicense } from "./keysat-client.js";
import { getCreditKey } from "./credits.js";
// user-ids come from Recaps (base64url / hex account ids). Constrain the
// charset so a header can't smuggle a path-ish or oversized key into the
// ledger.
const USER_ID_RE = /^[A-Za-z0-9_-]{1,128}$/;
export async function resolveIdentity(req) {
const userId = (req.header("X-Recap-User-Id") || "").trim();
if (userId) {
// Cloud path — must present a valid operator key.
const cfg = await getConfigSnapshot();
const expected = (cfg.relay_cloud_operator_key || "").trim();
const presented = (req.header("X-Recap-Operator-Key") || "").trim();
if (!expected || !presented || presented !== expected) {
const e = new Error(
"X-Recap-User-Id requires a valid X-Recap-Operator-Key"
);
e.status = 401;
throw e;
}
if (!USER_ID_RE.test(userId)) {
const e = new Error("invalid X-Recap-User-Id");
e.status = 400;
throw e;
}
return {
kind: "cloud",
creditKey: `user:${userId}`,
userId,
installId: null,
license: null,
};
}
// License / install path — unchanged legacy behavior.
const installId = req.header("X-Recap-Install-Id") || null;
const license = await resolveLicense(req.header("Authorization"));
// getCreditKey throws only when there's neither a license fingerprint
// nor an installId; routes guard "missing install id" themselves, so
// tolerate it here and leave creditKey null.
let creditKey = null;
try {
creditKey = getCreditKey({ installId, license });
} catch {}
return { kind: "license", creditKey, userId: null, installId, license };
}
// Is this request the operator's cloud server (valid operator key)?
// Used by operator-only endpoints (e.g. setting a user's tier) that
// aren't tied to a specific X-Recap-User-Id.
export async function verifyOperatorKey(req) {
const cfg = await getConfigSnapshot();
const expected = (cfg.relay_cloud_operator_key || "").trim();
const presented = (req.header("X-Recap-Operator-Key") || "").trim();
return !!expected && !!presented && presented === expected;
}
// The tier to bill/quota at for a resolved identity.
// cloud → the relay's stored row tier (operator-set; default "core")
// license → the resolved license tier
export function identityTier(identity, row) {
if (identity.kind === "cloud") {
// Prepaid-period enforcement (self-serve subscriptions): once the paid
// period lapses, the user is effectively Core until they renew. An
// operator comp grant leaves subscription_expires_at null, so this is a
// no-op for those — only dated purchases expire.
if (isSubscriptionExpired(row)) return "core";
return row?.tier_snapshot || "core";
}
return identity.license?.tier || "core";
}
// True when a cloud user's paid period has a set expiry that's in the past.
// Null/absent expiry = never expires (e.g. an operator comp grant).
export function isSubscriptionExpired(row) {
const exp = row?.subscription_expires_at;
if (!exp) return false;
const t = new Date(exp).getTime();
return Number.isFinite(t) && t < Date.now();
}
+35 -8
View File
@@ -15,18 +15,28 @@ import { fileURLToPath } from "url";
import { initConfig } from "./config.js"; import { initConfig } from "./config.js";
import { initCredits } from "./credits.js"; import { initCredits } from "./credits.js";
import { initAuditLog } from "./audit-log.js"; import { initAuditLog } from "./audit-log.js";
import { initJobCredits } from "./job-credits.js";
import { initOutputStore } from "./output-store.js";
import { import {
setupAdminAuthMiddleware, setupAdminAuthMiddleware,
setupAdminAuthRoutes, setupAdminAuthRoutes,
} from "./admin-auth.js"; } from "./admin-auth.js";
import { transcribeRouter } from "./routes/transcribe.js"; import { transcribeRouter } from "./routes/transcribe.js";
import { transcribeUrlRouter } from "./routes/transcribe-url.js"; import { transcribeUrlRouter } from "./routes/transcribe-url.js";
import { summarizeUrlRouter } from "./routes/summarize-url.js";
import { analyzeRouter } from "./routes/analyze.js"; import { analyzeRouter } from "./routes/analyze.js";
import { ttsRouter } from "./routes/tts.js";
import { userTierRouter } from "./routes/user-tier.js";
import { healthRouter } from "./routes/health.js"; import { healthRouter } from "./routes/health.js";
import { balanceRouter } from "./routes/balance.js"; import { balanceRouter } from "./routes/balance.js";
import { policyRouter } from "./routes/policy.js"; import { policyRouter } from "./routes/policy.js";
import { capabilitiesRouter } from "./routes/capabilities.js"; import { capabilitiesRouter } from "./routes/capabilities.js";
import { creditsRouter } from "./routes/credits.js";
import { zapriteWebhookRouter } from "./routes/zaprite-webhook.js";
import { adminRouter } from "./routes/admin.js"; import { adminRouter } from "./routes/admin.js";
import { internalMeetingsRouter } from "./routes/internal-meetings.js";
import { adminTestRunRouter } from "./routes/admin-test-run.js";
import { btcpaySetupRouter } from "./routes/btcpay-setup.js";
const __dirname = path.dirname(fileURLToPath(import.meta.url)); const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -36,7 +46,9 @@ const PORT = parseInt(process.env.PORT || "3002", 10);
await initConfig({ dataDir: DATA_DIR }); await initConfig({ dataDir: DATA_DIR });
await initCredits({ dataDir: DATA_DIR }); await initCredits({ dataDir: DATA_DIR });
await initJobCredits({ dataDir: DATA_DIR });
await initAuditLog({ dataDir: DATA_DIR }); await initAuditLog({ dataDir: DATA_DIR });
await initOutputStore({ dataDir: DATA_DIR });
const app = express(); const app = express();
app.use(cors()); app.use(cors());
@@ -57,25 +69,40 @@ app.use("/relay", capabilitiesRouter());
app.use("/relay", balanceRouter()); app.use("/relay", balanceRouter());
app.use("/relay", transcribeRouter()); app.use("/relay", transcribeRouter());
app.use("/relay", transcribeUrlRouter()); app.use("/relay", transcribeUrlRouter());
app.use("/relay", summarizeUrlRouter());
app.use("/relay", analyzeRouter()); app.use("/relay", analyzeRouter());
app.use("/relay", ttsRouter());
app.use("/relay", userTierRouter());
app.use("/relay", creditsRouter());
app.use("/relay", zapriteWebhookRouter());
// Admin dashboard endpoints (cookie-gated). // Admin dashboard endpoints (cookie-gated).
app.use("/admin", adminRouter({ dataDir: DATA_DIR })); app.use("/admin", adminRouter({ dataDir: DATA_DIR }));
app.use("/admin", adminTestRunRouter());
// One-click BTCPay setup wizard (uses admin auth for the start +
// stores + finalize endpoints; callback is admin-exempt and uses a
// state token instead — see btcpay-setup.js for the design notes).
app.use("/admin/btcpay", btcpaySetupRouter({ dataDir: DATA_DIR }));
// Internal team meeting upload + analysis (Path 2A — operator-only,
// admin-auth gated by the parent /admin prefix's middleware).
app.use(
"/admin/internal-meetings",
internalMeetingsRouter({ dataDir: DATA_DIR })
);
// Static admin UI (v0.2 will flesh out public/admin.html). For v0.1 // Static admin UI (v0.2 will flesh out public/admin.html). For v0.1
// the dashboard is JSON-only; serve any static assets dropped into // the dashboard is JSON-only; serve any static assets dropped into
// public/ but don't error if the directory is empty. // public/ but don't error if the directory is empty.
app.use(express.static(path.join(__dirname, "..", "public"))); app.use(express.static(path.join(__dirname, "..", "public")));
// Root: redirect to /admin/ for operator convenience, or show a tiny // Root: send operators straight to the dashboard so the StartOS
// placeholder for Recap clients that hit the root by mistake. // "Launch UI" button (which points at `/`) lands on something useful.
// The dashboard's JS handles the admin-auth gate — first hit shows the
// login form when an admin password is configured, then the dashboard
// itself. Recap clients only ever hit /relay/* paths, so this redirect
// doesn't affect them.
app.get("/", (_req, res) => { app.get("/", (_req, res) => {
res.type("text/plain").send( res.redirect("/dashboard.html");
"Recap Relay\n" +
"===========\n" +
"Public endpoints: POST /relay/transcribe, POST /relay/analyze, GET /relay/health\n" +
"Operator dashboard: /admin/\n"
);
}); });
const HOSTNAME = process.env.HOSTNAME || "0.0.0.0"; const HOSTNAME = process.env.HOSTNAME || "0.0.0.0";
+34 -8
View File
@@ -14,6 +14,8 @@
// entitlements: string[], // entitlements: string[],
// reason: string | null } // reason: string | null }
import fs from "fs";
import path from "path";
import { getConfigSnapshot } from "./config.js"; import { getConfigSnapshot } from "./config.js";
const KEYSAT_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour const KEYSAT_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
@@ -23,6 +25,19 @@ const cache = new Map();
// Dynamically import the licensing client so it doesn't block boot // Dynamically import the licensing client so it doesn't block boot
// if vendor/keysat-licensing-client is missing in dev environments. // if vendor/keysat-licensing-client is missing in dev environments.
// Construct the verifier on first use with the issuer public key
// embedded in the image at assets/issuer.pub — same PEM file Recap-app
// ships, so a license that validates in Recap also validates here.
//
// IMPORTANT: this used to call `mod.createVerifier()` (no args), which
// silently no-op'd because the licensing-client exports a `Verifier`
// CLASS, not a `createVerifier` factory. The result was that the
// verifier instance was always null → offline verification was skipped
// → entitlements always came back empty → tierFromEntitlements()
// always returned "core" regardless of what the license actually
// contained. Symptom: a real Pro license activated cleanly in Recap
// (showing the PRO badge) still saw Core credits on the relay. Fixed
// by using the documented Verifier + PublicKey.fromPem(pem) shape.
let verifierLoaded = false; let verifierLoaded = false;
let verifier = null; let verifier = null;
async function loadVerifier() { async function loadVerifier() {
@@ -30,17 +45,28 @@ async function loadVerifier() {
verifierLoaded = true; verifierLoaded = true;
try { try {
const mod = await import("@keysat/licensing-client"); const mod = await import("@keysat/licensing-client");
// Same pattern Recap uses: build a verifier with the embedded if (!mod?.Verifier || !mod?.PublicKey) {
// public key. Recap's license.js shows the exact call; copy it console.warn(
// here. For v0.1 we only need offline verification — if the "[keysat] @keysat/licensing-client missing Verifier/PublicKey exports — leaving verifier null (all licenses will resolve to Core)"
// vendor module signature differs across Recap versions we can );
// tweak this. return null;
if (mod?.createVerifier) {
verifier = mod.createVerifier();
} }
// Locate the embedded issuer PEM. The Docker image copies
// assets/ to /app/assets/, so when this module runs from
// /app/server/keysat-client.js, the PEM is at ../assets/issuer.pub.
// Allow KEYSAT_ISSUER_PEM_PATH env override for local dev / tests.
const __dirname = path.dirname(new URL(import.meta.url).pathname);
const pemPath =
process.env.KEYSAT_ISSUER_PEM_PATH ||
path.join(__dirname, "..", "assets", "issuer.pub");
const pem = fs.readFileSync(pemPath, "utf8");
verifier = new mod.Verifier(mod.PublicKey.fromPem(pem));
console.log(
`[keysat] verifier ready (issuer PEM loaded from ${pemPath})`
);
} catch (err) { } catch (err) {
console.warn( console.warn(
`[keysat] failed to load @keysat/licensing-client (${err.message}) — will treat all licenses as anonymous` `[keysat] failed to initialize verifier (${err.message}) — all licenses will resolve to Core until this is fixed`
); );
} }
return verifier; return verifier;
+5 -1
View File
@@ -3,12 +3,16 @@
"version": "0.2.11", "version": "0.2.11",
"type": "module", "type": "module",
"private": true, "private": true,
"scripts": {
"test": "node --test test/*.test.js"
},
"dependencies": { "dependencies": {
"@google/genai": "^1.0.0", "@google/genai": "^1.0.0",
"@keysat/licensing-client": "file:../vendor/keysat-licensing-client", "@keysat/licensing-client": "file:../vendor/keysat-licensing-client",
"cors": "^2.8.5", "cors": "^2.8.5",
"cookie-parser": "^1.4.6", "cookie-parser": "^1.4.6",
"express": "^4.21.0", "express": "^4.21.0",
"multer": "^1.4.5-lts.1" "multer": "^1.4.5-lts.1",
"undici": "^6.21.0"
} }
} }
+902
View File
@@ -0,0 +1,902 @@
// POST /admin/test-run — operator-side benchmarking flow.
//
// Same end-to-end pipeline as /relay/transcribe-url, but with two
// key differences:
// 1. The operator can OVERRIDE backend + model per call, bypassing
// planBackend's tier/preference logic. Used by the dashboard's
// benchmark suite to test specific permutations.
// 2. The audit row is tagged with batch_id + source="admin-test"
// so test runs are clearly distinguishable from real user
// traffic in the Jobs tab (and filterable / hideable from view).
//
// Request body (admin-auth-gated by virtue of being under /admin/*):
// {
// media_url: string, required
// type?: "youtube" | "podcast"
// title?: string
// transcribe_backend: "gemini" | "hardware", required
// transcribe_model?: string (gemini model id; ignored when hardware)
// analyze_backend: "gemini" | "hardware", required
// analyze_model?: string
// batch_id?: string — groups multiple test runs into one suite
// }
//
// Response (immediate; job runs in background):
// { result: { job_id, status: "queued", batch_id } }
// Poll GET /admin/jobs/:id (existing) for status; final transcript +
// analyze result lands in the Jobs table once complete.
import express from "express";
import fs from "fs/promises";
import os from "os";
import path from "path";
import { randomUUID } from "crypto";
import { getConfigSnapshot } from "../config.js";
import { createGeminiBackend } from "../backends/gemini.js";
import { createHardwareBackend } from "../backends/hardware.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { recordCall } from "../audit-log.js";
import { calcGeminiCost } from "../pricing.js";
import { getAudioDurationSeconds } from "../audio-meta.js";
import {
createJob,
markRunning,
setProgress,
markComplete,
markFailed,
} from "../jobs.js";
import {
looksLikeYouTube,
downloadDirect,
downloadYouTube,
} from "./transcribe-url.js";
import { fetchYouTubeCaptions } from "../youtube-captions.js";
import { saveJobOutput } from "../output-store.js";
import { runChunkedAnalysis } from "../chunked-analyze.js";
// Synthetic install_id used for all test-run audit rows. Keeps them
// out of any real-user aggregations + makes them filterable in the
// Jobs tab via the existing install-id filter.
const TEST_INSTALL_ID = "admin-test";
// ── TX-sharing cache ────────────────────────────────────────────
// The benchmark suite has paired permutations that use the SAME
// transcribe config but differ in their analyze backend:
// pair 1+6: TX = gemini-3.1-flash-lite (then AN gemini vs hardware)
// pair 4+5: TX = hardware (then AN hardware vs gemini)
// pair 7+8: TX = captions (then AN gemini vs hardware)
// Without sharing, running both members of a pair re-transcribes
// the same audio twice — wasteful (cost + wall time).
//
// Implementation: an in-memory Map keyed on (mediaUrl, txConfig)
// whose values are PROMISES for the transcript. The first request
// in the pair inserts a pending Promise; subsequent requests with
// the same key await that Promise. Completed entries linger in the
// cache for ~10 minutes so a "rerun last" benchmark within that
// window also dedupes. Cache entries auto-expire to bound memory.
//
// The cache is process-local (single relay process); a relay
// restart clears it. That's fine — benchmark suites are operator-
// initiated and short-lived.
const TX_CACHE_TTL_MS = 10 * 60 * 1000;
const txCache = new Map(); // key → { promise, expiresAt }
function txCacheKey({ mediaUrl, captionsMode, txBackend, txModel }) {
if (captionsMode === "use") return `captions:${mediaUrl}`;
return `tx:${txBackend}:${txModel || "(default)"}:${mediaUrl}`;
}
function getOrComputeTx(key, computeFn) {
const now = Date.now();
// Evict expired entries opportunistically.
for (const [k, v] of txCache) {
if (v.expiresAt < now) txCache.delete(k);
}
const existing = txCache.get(key);
if (existing && existing.expiresAt > now) {
return {
promise: existing.promise,
cached: true,
startedAt: existing.startedAt,
};
}
const startedAt = Date.now();
const promise = computeFn();
txCache.set(key, { promise, expiresAt: now + TX_CACHE_TTL_MS, startedAt });
// If the compute fails, evict the entry so the next attempt
// gets a fresh try (don't cache failures).
promise.catch(() => txCache.delete(key));
return { promise, cached: false, startedAt };
}
// Strip code fences + parse a JSON-formatted analyze response into
// the { sections: [...] } shape Recap's render expects. Returns
// null on parse failure so the saved output can store the raw text
// for forensic review.
function safeParseSections(text) {
if (!text || typeof text !== "string") return null;
let jsonStr = text.trim();
const cb = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (cb) jsonStr = cb[1].trim();
try {
const parsed = JSON.parse(jsonStr);
return parsed && Array.isArray(parsed.sections) ? parsed : null;
} catch {
return null;
}
}
export function adminTestRunRouter() {
const router = express.Router();
router.post("/test-run", express.json({ limit: "1mb" }), async (req, res) => {
const {
media_url: mediaUrl,
type,
title,
transcribe_backend: txBackend,
transcribe_model: txModel,
analyze_backend: anBackend,
analyze_model: anModel,
batch_id: batchId,
// When captions_mode === "use", the relay fetches YouTube
// captions via yt-dlp instead of downloading+transcribing the
// audio. Transcribe-backend/model are ignored in that case;
// the captions text feeds straight into analyze. Only works
// for YouTube URLs (no captions for podcast .mp3 enclosures).
captions_mode: captionsMode,
} = req.body || {};
if (!mediaUrl || typeof mediaUrl !== "string") {
return res.status(400).json({ error: "missing or non-string media_url" });
}
const useCaptions = captionsMode === "use";
if (!useCaptions && !["gemini", "hardware"].includes(txBackend)) {
return res.status(400).json({ error: "transcribe_backend must be 'gemini' or 'hardware' (unless captions_mode='use')" });
}
if (!["gemini", "hardware"].includes(anBackend)) {
return res.status(400).json({ error: "analyze_backend must be 'gemini' or 'hardware'" });
}
const effectiveBatchId = batchId || randomUUID();
const job = createJob({
kind: "admin-test-run",
installId: TEST_INSTALL_ID,
metadata: {
media_url: mediaUrl,
title,
transcribe_backend: useCaptions ? "captions" : txBackend,
analyze_backend: anBackend,
batch_id: effectiveBatchId,
captions_mode: captionsMode || null,
},
});
// Hand back the job_id immediately; the dashboard polls for status.
res.json({
result: {
job_id: job.id,
status: "queued",
batch_id: effectiveBatchId,
kind: "admin-test-run",
},
});
// Bundle all the worker-input fields into a ctx object so the
// worker can be invoked from BOTH this single-perm endpoint AND
// the /test-run-suite endpoint (which mints jobs upfront then
// fires the same worker per phase).
const ctx = {
mediaUrl, type, title,
txBackend, txModel, anBackend, anModel,
batchId: effectiveBatchId, captionsMode, useCaptions,
};
executeTestRunWorker(job, ctx).catch((err) => {
markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
console.error(`[admin/test-run ${job.id.slice(0, 8)}] worker crashed:`, err);
});
});
// ── POST /admin/test-run-suite ──────────────────────────────
// Server-side benchmark runner. Accepts an ARRAY of permutations,
// mints jobs for all of them upfront (so the client can show the
// table immediately), and runs the phase-based concurrent
// execution server-side. Key property: the suite KEEPS RUNNING
// even if the operator's browser closes / phone sleeps / tab
// refreshes — the work is in a background loop on the relay
// process, not in the dashboard's JavaScript.
//
// Phases are grouped by TX fingerprint so paired permutations
// (1+6, 4+5, 7+8) fire concurrently and share TX via the existing
// in-memory inflight-promise cache.
router.post("/test-run-suite", express.json({ limit: "10mb" }), async (req, res) => {
const { media_url: mediaUrl, permutations } = req.body || {};
if (!mediaUrl || typeof mediaUrl !== "string") {
return res.status(400).json({ error: "missing or non-string media_url" });
}
if (!Array.isArray(permutations) || permutations.length === 0) {
return res.status(400).json({ error: "permutations must be a non-empty array" });
}
const batchId = randomUUID();
const items = [];
for (let i = 0; i < permutations.length; i++) {
const p = permutations[i] || {};
const ctx = {
mediaUrl,
type: p.type,
title: p.title || `permutation ${i + 1}`,
txBackend: p.transcribe_backend,
txModel: p.transcribe_model,
anBackend: p.analyze_backend,
anModel: p.analyze_model,
batchId,
captionsMode: p.captions_mode,
useCaptions: p.captions_mode === "use",
};
// Validate per-perm — partial failures shouldn't poison the
// whole batch; mark them so the worker can record the error.
if (!ctx.useCaptions && !["gemini", "hardware"].includes(ctx.txBackend)) {
ctx._validationError = `permutation ${i + 1}: transcribe_backend must be 'gemini' or 'hardware'`;
} else if (!["gemini", "hardware"].includes(ctx.anBackend)) {
ctx._validationError = `permutation ${i + 1}: analyze_backend must be 'gemini' or 'hardware'`;
}
const job = createJob({
kind: "admin-test-run",
installId: TEST_INSTALL_ID,
metadata: {
media_url: ctx.mediaUrl,
title: ctx.title,
transcribe_backend: ctx.useCaptions ? "captions" : ctx.txBackend,
analyze_backend: ctx.anBackend,
batch_id: batchId,
captions_mode: ctx.captionsMode || null,
suite_position: i + 1,
},
});
items.push({ job, ctx });
}
// Respond immediately with the planned IDs so the dashboard can
// start polling /admin/jobs-history?batch_id=<batchId> without
// blocking on the actual work.
res.json({
result: {
batch_id: batchId,
status: "queued",
job_ids: items.map((it) => it.job.id),
total: items.length,
kind: "admin-test-run-suite",
},
});
// ── Background phase runner ──
// Group items by TX fingerprint into phases. Permutations within
// a phase fire concurrently (their underlying TX dedupes via the
// cache); phases themselves run sequentially so we don't overload
// the transcribe backends. Failures don't abort the suite.
setImmediate(async () => {
try {
const phases = groupItemsByTxFingerprint(items);
console.log(
`[admin/test-run-suite] batch=${batchId.slice(0, 8)} ${items.length} perms in ${phases.length} phases`
);
for (let pi = 0; pi < phases.length; pi++) {
const phase = phases[pi];
console.log(
`[admin/test-run-suite] batch=${batchId.slice(0, 8)} phase ${pi + 1}/${phases.length}: firing ${phase.length} perm${phase.length === 1 ? "" : "s"}`
);
await Promise.allSettled(
phase.map(async (item) => {
if (item.ctx._validationError) {
markFailed(item.job.id, item.ctx._validationError);
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: null,
model: null,
status: "error",
duration_ms: 0,
cost_usd: 0,
job_id: item.job.id,
batch_id: batchId,
source: "admin-test",
media_url: item.ctx.mediaUrl,
title: item.ctx.title,
error: item.ctx._validationError,
});
return;
}
try {
await executeTestRunWorker(item.job, item.ctx);
} catch (err) {
markFailed(item.job.id, "worker_crashed: " + (err?.message || String(err)));
console.error(
`[admin/test-run-suite ${item.job.id.slice(0, 8)}] worker crashed:`,
err
);
}
})
);
}
console.log(`[admin/test-run-suite] batch=${batchId.slice(0, 8)} complete`);
} catch (err) {
console.error(`[admin/test-run-suite] batch=${batchId.slice(0, 8)} runner crashed:`, err);
}
});
});
return router;
}
// Group { job, ctx } items by their TX fingerprint into phases.
// Items with the same fingerprint share a phase so they hit the
// TX-share cache. Phase order is preserved from the input array
// (first appearance of a fingerprint wins).
function groupItemsByTxFingerprint(items) {
const phases = [];
const seen = new Map();
for (const item of items) {
const fp = item.ctx.useCaptions
? `captions:${item.ctx.mediaUrl}`
: `tx:${item.ctx.txBackend}:${item.ctx.txModel || ""}:${item.ctx.mediaUrl}`;
if (seen.has(fp)) {
phases[seen.get(fp)].push(item);
} else {
seen.set(fp, phases.length);
phases.push([item]);
}
}
return phases;
}
// Extracted worker — runs the full download / transcribe / analyze
// pipeline for one permutation. Used by both /admin/test-run (one
// permutation) and /admin/test-run-suite (many permutations
// orchestrated server-side in phases). Body is the same flow the
// inline IIFE used previously; ctx replaces what were closure refs.
async function executeTestRunWorker(job, ctx) {
const {
mediaUrl, type, title,
txBackend, txModel, anBackend, anModel,
batchId: effectiveBatchId,
captionsMode, useCaptions,
} = ctx;
// The legacy body of the IIFE follows verbatim (with `job` already
// passed in, and the closure vars now destructured from ctx).
{
const workerT0 = Date.now();
markRunning(job.id);
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "admin-tr-"));
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
// ── Captions fast-path branch ──
// For YouTube URLs with captions_mode="use", fetch caption track
// via yt-dlp and skip audio download + Gemini transcribe
// entirely. The captions text feeds straight into analyze.
if (useCaptions) {
if (!isYT) {
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: "captions",
model: null,
status: "error",
duration_ms: 0,
cost_usd: 0,
job_id: job.id,
batch_id: effectiveBatchId,
source: "admin-test",
media_url: mediaUrl,
title: title || null,
error: "captions_mode='use' requires a YouTube URL (no captions for podcast audio)",
});
markFailed(job.id, "captions_mode requires YouTube URL");
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
return;
}
setProgress(job.id, "fetching captions…");
const capStart = Date.now();
let cap;
let capFromCache = false;
let capSharedStartedAt = capStart;
const capKey = txCacheKey({ mediaUrl, captionsMode: "use" });
try {
const { promise, cached, startedAt: sharedStartedAt } = getOrComputeTx(capKey, () =>
fetchYouTubeCaptions({ url: mediaUrl, tmpDir })
);
capFromCache = cached;
capSharedStartedAt = sharedStartedAt || capStart;
if (cached) setProgress(job.id, "reusing shared captions from paired permutation…");
cap = await promise;
} catch (err) {
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: "captions",
model: null,
status: "error",
duration_ms: Date.now() - capStart,
audio_seconds: null,
cost_usd: 0,
job_id: job.id,
batch_id: effectiveBatchId,
source: "admin-test",
media_url: mediaUrl,
title: title || null,
error: (err?.message || String(err)).slice(0, 300),
});
markFailed(job.id, "captions_fetch_failed: " + (err?.message || err));
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
return;
}
// Record the captions "transcribe" row. backend="captions"
// so the dashboard can filter / display it distinctly.
// When this permutation reused a paired sibling's captions
// fetch, the wall-time we attribute is the underlying fetch's
// wall-time (from the cache entry's startedAt) — so the
// dashboard's per-row TX-rate columns show real numbers on
// BOTH paired rows, not "—" on the sibling. The `source`
// flag "admin-test-shared-tx" lets aggregate analytics dedupe.
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: "captions",
model: cap.captions_source === "auto" ? "youtube-auto" : "youtube-manual",
status: "success",
duration_ms: Date.now() - capSharedStartedAt,
audio_seconds: cap.duration_seconds || null,
audio_bytes: null, // no audio downloaded
download_ms: null, // n/a
chunk_count: 1,
cost_usd: 0,
job_id: job.id,
batch_id: effectiveBatchId,
source: capFromCache ? "admin-test-shared-tx" : "admin-test",
media_url: mediaUrl,
title: title || null,
});
setProgress(job.id, "analyzing topics…");
const cfg2 = await getConfigSnapshot();
const hw2 = await resolveHardwareConfig(cfg2);
let anResultForCaptions = null;
try {
anResultForCaptions = await runAnalyzeForTestRun({
transcriptText: cap.text || "",
anBackend,
anModel,
cfg: cfg2,
hw: hw2,
jobId: job.id,
batchId: effectiveBatchId,
mediaUrl,
title,
audioSeconds: cap.duration_seconds || null,
audioBytes: null,
});
} catch (err) {
console.warn(`[admin/test-run ${job.id.slice(0, 8)}] analyze failed (captions): ${err?.message || err}`);
}
// Save output (test-runs always persist regardless of the
// save-user-outputs flag).
await saveJobOutput(job.id, {
batch_id: effectiveBatchId,
source: "admin-test",
transcript: cap.text || "",
analysis: anResultForCaptions ? safeParseSections(anResultForCaptions.text) : null,
analysis_raw_text: anResultForCaptions?.text || null,
meta: {
title: title || null,
media_url: mediaUrl,
audio_seconds: cap.duration_seconds || null,
audio_bytes: null,
captions_mode: "use",
captions_source: cap.captions_source || null,
transcribe_backend: "captions",
transcribe_model: cap.captions_source === "auto" ? "youtube-auto" : "youtube-manual",
analyze_backend: anBackend,
analyze_model: anResultForCaptions?.model || null,
},
});
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
markComplete(job.id, {
result: { transcribe_model: "captions", batch_id: effectiveBatchId },
});
return;
}
// ── Audio download path (no captions) ──
setProgress(job.id, "downloading media…");
let audio;
let downloadMs = 0;
try {
const dlStart = Date.now();
audio = isYT
? await downloadYouTube(mediaUrl, tmpDir)
: await downloadDirect(mediaUrl, tmpDir);
downloadMs = Date.now() - dlStart;
audio.seconds = await getAudioDurationSeconds(audio.filePath);
setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
} catch (err) {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
const msg = (err?.message || String(err)).slice(0, 300);
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: txBackend,
model: null,
status: "error",
credit_charged: 0,
duration_ms: Date.now() - workerT0,
download_ms: Date.now() - workerT0,
audio_seconds: null,
cost_usd: 0,
job_id: job.id,
batch_id: effectiveBatchId,
source: "admin-test",
media_url: mediaUrl,
title: title || null,
error: "download_failed: " + msg,
});
markFailed(job.id, "download_failed: " + msg);
return;
}
// ── Transcription with the operator's chosen backend ──
// Uses the TX-sharing cache so that paired benchmark
// permutations (e.g. 1+6 both transcribe with gemini-3.1-flash-
// lite) only invoke the underlying backend ONCE — the second
// permutation awaits the first's in-flight promise and reuses
// its transcript. Cache entries linger ~10 min so a fast
// "Rerun last" also dedupes.
const cfg = await getConfigSnapshot();
const hw = await resolveHardwareConfig(cfg);
let txResult;
let txFromCache = false;
const txStartedAt = Date.now();
let txSharedStartedAt = txStartedAt;
const cacheKey = txCacheKey({ mediaUrl, captionsMode: null, txBackend, txModel });
try {
const audioBuf = await fs.readFile(audio.filePath);
const { promise, cached, startedAt: sharedStartedAt } = getOrComputeTx(cacheKey, async () => {
if (txBackend === "gemini") {
const backend = createGeminiBackend({
apiKey: cfg.relay_gemini_api_key,
transcriptionModel: txModel || cfg.relay_gemini_transcription_model,
analysisModel: cfg.relay_gemini_analysis_model,
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
transcribePromptOverride: cfg.relay_transcribe_prompt || "",
});
return await backend.transcribeAudio({
audio: audioBuf,
mimeType: audio.mimeType || "audio/mpeg",
title: title || "",
offsetSeconds: 0,
});
}
if (!hw.transcribe.url) {
throw new Error("hardware transcribe URL not configured");
}
const backend = createHardwareBackend({
parakeetBaseURL: hw.transcribe.url,
gemmaBaseURL: hw.analyze.url || "",
sparkControlBaseURL: hw.sparkBase || "",
parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
});
return await backend.transcribeAudio({
audio: audioBuf,
mimeType: audio.mimeType || "audio/mpeg",
offsetSeconds: 0,
});
});
txFromCache = cached;
txSharedStartedAt = sharedStartedAt || txStartedAt;
if (cached) {
setProgress(job.id, "reusing shared TX from paired permutation…");
}
txResult = await promise;
} catch (err) {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
const msg = (err?.message || String(err)).slice(0, 400);
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: txBackend,
model: txBackend === "gemini" ? (txModel || cfg.relay_gemini_transcription_model) : (hw.transcribe.model || "(auto)"),
status: "error",
credit_charged: 0,
duration_ms: Date.now() - workerT0,
download_ms: downloadMs,
audio_seconds: audio?.seconds || null,
audio_bytes: audio?.bytes || null,
cost_usd: 0,
job_id: job.id,
batch_id: effectiveBatchId,
source: "admin-test",
media_url: mediaUrl,
title: title || null,
error: msg,
});
markFailed(job.id, "transcribe_failed: " + msg);
return;
}
// Audit the successful transcribe.
const txCostDetails =
txBackend === "gemini" && txResult.usage
? calcGeminiCost(txResult.model, txResult.usage)
: { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
// Truncation detection — same as the production routes. When
// any chunk emitted < 80% of its expected audio, mark the
// benchmark row partial so the operator doesn't compare a
// truncated TX run against a clean one.
const txTruncatedChunks = Array.isArray(txResult?.truncated_chunks)
? txResult.truncated_chunks
: [];
const txWasTruncated = txTruncatedChunks.length > 0;
const txTruncationError = txWasTruncated
? `transcribe: ${txTruncatedChunks.length} chunk(s) truncated — missing ~${txTruncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${txResult.model || "unknown"})`
: null;
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "transcribe",
backend: txBackend,
model: txResult.model || null,
status: txWasTruncated ? "partial" : "success",
credit_charged: 0,
truncated_chunks: txWasTruncated ? txTruncatedChunks : null,
error: txTruncationError,
// When this permutation reused a paired sibling's TX, the
// attributed duration is the wall-time of the UNDERLYING TX
// (from when the originating permutation kicked it off until
// both siblings' awaits resolved) — so the per-row TX rate
// columns in the Jobs table show real numbers on BOTH paired
// rows, not "—" on the sibling. Cost is still zero on the
// sibling (only the originator pays). The "admin-test-shared-tx"
// source flag lets aggregate analytics dedupe across pairs.
duration_ms: Date.now() - txSharedStartedAt,
download_ms: downloadMs,
audio_bytes: audio.bytes,
audio_seconds: audio.seconds || null,
job_id: job.id,
batch_id: effectiveBatchId,
source: txFromCache ? "admin-test-shared-tx" : "admin-test",
media_url: mediaUrl,
title: title || null,
attempts: txResult.attempts || null,
chunk_count: txResult.chunk_count ?? null,
// Per-chunk wall-times (ms). Aggregator sums this into
// transcribe_ms_sum so the Jobs table shows BOTH wall-time
// (from duration_ms) and total backend compute (from sum).
chunk_durations_ms: txResult.chunk_durations_ms || null,
...(txFromCache
? { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 }
: txCostDetails),
});
// ── Analyze with the operator's chosen backend ──
// For benchmarking purposes we run the chunked-analyze flow
// directly here (mirroring Recap's behavior) so the per-window
// performance is captured in the Jobs table. We build a simple
// prompt from the transcript text.
setProgress(job.id, "analyzing topics…");
let anResult = null;
try {
anResult = await runAnalyzeForTestRun({
transcriptText: txResult.text || "",
anBackend,
anModel,
cfg,
hw,
jobId: job.id,
batchId: effectiveBatchId,
mediaUrl,
title,
audioSeconds: audio.seconds || null,
audioBytes: audio.bytes,
});
} catch (err) {
// Analyze failure is recorded (inside runAnalyzeForTestRun);
// we still mark the job complete since transcribe succeeded.
console.warn(`[admin/test-run ${job.id.slice(0, 8)}] analyze failed: ${err?.message || err}`);
}
// Save the transcript + analysis JSON to disk for the
// dashboard's "View output" feature. Test-run jobs always
// persist regardless of the save-user-outputs config flag.
await saveJobOutput(job.id, {
batch_id: effectiveBatchId,
source: "admin-test",
transcript: txResult.text || "",
analysis: anResult ? safeParseSections(anResult.text) : null,
analysis_raw_text: anResult?.text || null,
meta: {
title: title || null,
media_url: mediaUrl,
audio_seconds: audio.seconds || null,
audio_bytes: audio.bytes,
captions_mode: null,
transcribe_backend: txBackend,
transcribe_model: txResult.model || null,
analyze_backend: anBackend,
analyze_model: anResult?.model || null,
},
});
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
markComplete(job.id, {
result: {
transcribe_model: txResult.model,
batch_id: effectiveBatchId,
},
});
}
}
// Run chunked analyze over the just-transcribed text using the same
// windowing strategy Recap's client uses (~18 min window body, 2 min
// overlap, N windows in flight). Each window emits its own audit row
// via recordCall (handled inside runChunkedAnalysis), so the Jobs
// table sees:
// - analyze_windows_total = N
// - analyze_ms = sum of per-window duration_ms (total backend work)
// - wall_time_ms = elapsed from first window start → last window end
// (computed by job-stats.js from row timestamps)
// Per-window `audio_seconds` is the window body length (not total audio),
// so per-row rate columns (s/audio-min) divide by the right denominator.
async function runAnalyzeForTestRun({
transcriptText,
anBackend,
anModel,
cfg,
hw,
jobId,
batchId,
mediaUrl,
title,
audioSeconds, // unused — chunked-analyze uses per-window seconds
audioBytes, // unused
}) {
// Build the right backend, then hand to runChunkedAnalysis which
// handles per-window prompt building, parallelism, audit logging,
// and stitching. Construction errors (missing apiKey, missing
// hardware URL) are audited as a single failed analyze row so the
// Jobs table shows what happened — runChunkedAnalysis only writes
// rows once it has a backend to call.
let backend;
let resolvedModel;
let computeCostDetails;
try {
if (anBackend === "gemini") {
backend = createGeminiBackend({
apiKey: cfg.relay_gemini_api_key,
transcriptionModel: cfg.relay_gemini_transcription_model,
analysisModel: anModel || cfg.relay_gemini_analysis_model,
// tx knobs are unused on the analyze path but the factory
// accepts them anyway — pass for consistency.
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
});
resolvedModel = anModel || cfg.relay_gemini_analysis_model;
computeCostDetails = (model, usage) =>
usage ? calcGeminiCost(model, usage) : {
input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0,
};
} else {
if (!hw.analyze.url) {
throw new Error("hardware analyze URL not configured");
}
backend = createHardwareBackend({
parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: hw.analyze.url,
sparkControlBaseURL: hw.sparkBase || "",
parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
});
resolvedModel = hw.analyze.model || null;
computeCostDetails = () => ({
input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0,
});
}
} catch (err) {
await recordCall({
install_id: TEST_INSTALL_ID,
tier: "core",
pipeline: "analyze",
backend: anBackend,
model: anBackend === "gemini"
? (anModel || cfg.relay_gemini_analysis_model)
: (hw.analyze.model || "(auto)"),
status: "error",
duration_ms: 0,
audio_seconds: 0,
cost_usd: 0,
job_id: jobId,
batch_id: batchId,
source: "admin-test",
media_url: mediaUrl,
title: title || null,
error: (err?.message || String(err)).slice(0, 400),
window_idx: 0,
window_count: 1,
});
throw err;
}
// Pull windowing tunables from config (Settings tab).
const bodyMin = anBackend === "gemini"
? (cfg.relay_gemini_analyze_window_minutes || 18)
: (cfg.relay_hardware_analyze_window_minutes || 18);
const overlapMin = anBackend === "gemini"
? (cfg.relay_gemini_analyze_overlap_minutes || 2)
: (cfg.relay_hardware_analyze_overlap_minutes || 2);
const concurrency = anBackend === "gemini"
? (cfg.relay_gemini_analyze_concurrency || 12)
: (cfg.relay_hardware_analyze_concurrency || 8);
const cutoffMin = cfg.relay_analyze_cutoff_minutes || 25;
const result = await runChunkedAnalysis({
transcriptText,
backend,
pipelineBackend: anBackend,
jobId,
batchId,
mediaUrl,
title,
installId: TEST_INSTALL_ID,
source: "admin-test",
computeCostDetails,
bodySeconds: bodyMin * 60,
overlapSeconds: overlapMin * 60,
concurrency,
cutoffSeconds: cutoffMin * 60,
analyzePromptOverride: cfg.relay_analyze_prompt || "",
// Section-count target wiring (matches the summarize-url path).
// Without these, buildWindowPrompt falls back to "1 section" —
// works defensively but means test-run benchmarks don't reflect
// production segmentation density.
totalAudioSec: audioSeconds || 0,
targetTotalsByBucket: {
under_30: cfg.relay_analyze_total_sections_under_30,
"30_60": cfg.relay_analyze_total_sections_30_60,
"60_90": cfg.relay_analyze_total_sections_60_90,
"90_120": cfg.relay_analyze_total_sections_90_120,
"120_150": cfg.relay_analyze_total_sections_120_150,
"150_180": cfg.relay_analyze_total_sections_150_180,
over_180: cfg.relay_analyze_total_sections_over_180,
},
});
return {
text: result.text || "",
model: result.model || resolvedModel,
attempts: result.attempts,
};
}
+1106 -5
View File
File diff suppressed because it is too large Load Diff
+114 -58
View File
@@ -16,14 +16,21 @@
// margin, and speed metrics. // margin, and speed metrics.
import express from "express"; import express from "express";
import { resolveLicense } from "../keysat-client.js"; import { resolveIdentity, identityTier } from "../identity.js";
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js"; import {
getOrCreateRow,
planBackend,
commitCredit,
licenseFingerprint,
} from "../credits.js";
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js"; import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
import { getConfigSnapshot, getTierQuotas } from "../config.js"; import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { createGeminiBackend } from "../backends/gemini.js"; import { createGeminiBackend } from "../backends/gemini.js";
import { createHardwareBackend } from "../backends/hardware.js"; import { createHardwareBackend } from "../backends/hardware.js";
import { envelope, errorEnvelope } from "./envelope.js"; import { envelope, errorEnvelope } from "./envelope.js";
import { recordCall } from "../audit-log.js"; import { recordCall } from "../audit-log.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { reportHealthEvent } from "../spark-control-events.js";
import { calcGeminiCost } from "../pricing.js"; import { calcGeminiCost } from "../pricing.js";
export function analyzeRouter() { export function analyzeRouter() {
@@ -31,72 +38,100 @@ export function analyzeRouter() {
router.post("/analyze", express.json({ limit: "10mb" }), async (req, res) => { router.post("/analyze", express.json({ limit: "10mb" }), async (req, res) => {
const t0 = Date.now(); const t0 = Date.now();
const installId = req.header("X-Recap-Install-Id");
const jobId = req.header("X-Recap-Job-Id") || null; const jobId = req.header("X-Recap-Job-Id") || null;
const auth = req.header("Authorization");
if (!installId) { let identity;
try {
identity = await resolveIdentity(req);
} catch (err) {
const e = await errorEnvelope({
error: err?.message || "auth_error",
statusHint: err?.status || 401,
});
return res.status(e.statusHint || 401).json(e.body);
}
if (identity.kind === "license" && !identity.installId) {
const e = await errorEnvelope({ const e = await errorEnvelope({
error: "missing X-Recap-Install-Id header", error: "missing X-Recap-Install-Id header",
statusHint: 400, statusHint: 400,
}); });
return res.status(400).json(e.body); return res.status(400).json(e.body);
} }
const { creditKey, installId, license } = identity;
const prompt = req.body?.prompt; const prompt = req.body?.prompt;
if (!prompt || typeof prompt !== "string") { if (!prompt || typeof prompt !== "string") {
const e = await errorEnvelope({ const e = await errorEnvelope({
error: "missing or non-string body.prompt", error: "missing or non-string body.prompt",
creditKey,
installId, installId,
statusHint: 400, statusHint: 400,
}); });
return res.status(400).json(e.body); return res.status(400).json(e.body);
} }
const license = await resolveLicense(auth); const row = await getOrCreateRow({ creditKey, installId, license });
const tier = license.tier; const tier = identityTier(identity, row);
const row = await getOrCreateRow(installId);
row.tier_snapshot = tier; row.tier_snapshot = tier;
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
const auditInstall = installId || identity.userId || null;
let reusedJob = false; // Two separate decisions on every call:
let chosenBackend = null; // 1. Billing: did we already charge a credit for this job? (look
const existingJob = lookupJob(installId, jobId); // up by job_id; reused → don't charge again.)
if (existingJob) { // 2. Routing: which backend serves THIS pipeline step's request?
reusedJob = true; // (always per-pipeline preference + planBackend, even when
chosenBackend = existingJob.backend; // the job has a prior transcribe call that routed elsewhere.)
} else { //
const cfg = await getConfigSnapshot(); // The old code conflated the two — it copied `backend` from the
const hasHardware = !!cfg.relay_gemma_base_url; // existing job, which meant analyze would silently inherit
const quota = await getTierQuotas(); // transcribe's backend choice even when the operator's analyze
const preference = // preference said something different. Fixed: routing is decided
cfg.relay_analyze_backend_preference || "gemini_first"; // fresh per pipeline step, regardless of job history.
const plan = planBackend(row, quota, { hasHardware, preference }); const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
if (!plan.allowed) {
await recordCall({
install_id: installId,
tier,
pipeline: "analyze",
backend: null,
model: null,
status: "refused",
credit_charged: 0,
duration_ms: Date.now() - t0,
cost_usd: 0,
job_id: jobId,
error: plan.reason,
});
const e = await errorEnvelope({
error: plan.reason,
installId,
tier,
statusHint: 402,
});
return res.status(402).json(e.body);
}
chosenBackend = plan.backend;
}
const cfg = await getConfigSnapshot(); const cfg = await getConfigSnapshot();
const hw = await resolveHardwareConfig(cfg);
// Operator-only diagnostic — see summarize-url.js for the full
// reasoning. We don't 503 here on blocked_reason because doing
// so pre-empts planBackend and would surface operator-internal
// Spark Control / vLLM wording to clients even when Gemini was
// the configured preference. planBackend correctly routes around
// an unavailable hardware path via hasHardware = false.
if (hw.analyze.blocked_reason) {
console.warn(
`[analyze] hardware analyze currently blocked (planBackend will route to Gemini if available): ${hw.analyze.blocked_reason}`,
);
}
const hasHardware = !!hw.analyze.url;
const quota = await getTierQuotas();
const preference =
cfg.relay_analyze_backend_preference || "gemini_first";
const plan = planBackend(row, quota, { hasHardware, preference });
if (!plan.allowed) {
await recordCall({
install_id: auditInstall,
license_fingerprint: licenseFp,
tier,
pipeline: "analyze",
backend: null,
model: null,
status: "refused",
credit_charged: 0,
duration_ms: Date.now() - t0,
cost_usd: 0,
job_id: jobId,
error: plan.reason,
});
const e = await errorEnvelope({
error: plan.reason,
creditKey,
installId,
tier,
statusHint: 402,
});
return res.status(402).json(e.body);
}
const chosenBackend = plan.backend;
let result; let result;
try { try {
if (chosenBackend === "gemini") { if (chosenBackend === "gemini") {
@@ -108,24 +143,39 @@ export function analyzeRouter() {
result = await backend.analyzeText({ prompt }); result = await backend.analyzeText({ prompt });
} else { } else {
const backend = createHardwareBackend({ const backend = createHardwareBackend({
parakeetBaseURL: cfg.relay_parakeet_base_url, parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: cfg.relay_gemma_base_url, gemmaBaseURL: hw.analyze.url || "",
parakeetModel: cfg.relay_parakeet_model, sparkControlBaseURL: hw.sparkBase || "",
gemmaModel: cfg.relay_gemma_model, parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
}); });
result = await backend.analyzeText({ prompt }); result = await backend.analyzeText({ prompt });
} }
} catch (err) { } catch (err) {
if (reusedJob) refundJob(installId, jobId); if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
console.error(`[relay/analyze] backend error: ${err?.message}`); console.error(`[relay/analyze] backend error: ${err?.message}`);
// Passive health-event report to Spark Control so the
// operator's dashboard surfaces the failure immediately
// (without waiting for its own polling cycle to catch it).
// Only fired for hardware-side calls — Gemini failures are a
// separate observability surface (Google's API health).
if (chosenBackend === "hardware") {
reportHealthEvent({
service: "vllm",
ok: false,
error: (err?.message || String(err)).slice(0, 280),
ms: Date.now() - t0,
});
}
await recordCall({ await recordCall({
install_id: installId, install_id: auditInstall,
license_fingerprint: licenseFp,
tier, tier,
pipeline: "analyze", pipeline: "analyze",
backend: chosenBackend, backend: chosenBackend,
model: chosenBackend === "gemini" model: chosenBackend === "gemini"
? cfg.relay_gemini_analysis_model ? cfg.relay_gemini_analysis_model
: cfg.relay_gemma_model, : hw.analyze.model || "(auto)",
status: "error", status: "error",
credit_charged: 0, credit_charged: 0,
duration_ms: Date.now() - t0, duration_ms: Date.now() - t0,
@@ -135,6 +185,7 @@ export function analyzeRouter() {
}); });
const e = await errorEnvelope({ const e = await errorEnvelope({
error: err?.message || "backend_error", error: err?.message || "backend_error",
creditKey,
installId, installId,
tier, tier,
statusHint: err?.status || 502, statusHint: err?.status || 502,
@@ -144,8 +195,8 @@ export function analyzeRouter() {
let creditCharged = 0; let creditCharged = 0;
if (!reusedJob) { if (!reusedJob) {
await commitCredit(installId, { backend: chosenBackend, tier }); await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
markJobCharged(installId, jobId, { backend: chosenBackend, tier }); await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
creditCharged = 1; creditCharged = 1;
} }
@@ -159,7 +210,8 @@ export function analyzeRouter() {
cost_usd: 0, cost_usd: 0,
}; };
await recordCall({ await recordCall({
install_id: installId, install_id: auditInstall,
license_fingerprint: licenseFp,
tier, tier,
pipeline: "analyze", pipeline: "analyze",
backend: chosenBackend, backend: chosenBackend,
@@ -168,10 +220,14 @@ export function analyzeRouter() {
credit_charged: creditCharged, credit_charged: creditCharged,
duration_ms: Date.now() - t0, duration_ms: Date.now() - t0,
job_id: jobId, job_id: jobId,
// Surface the cascade so the dashboard can show "served by
// 2.5-flash after 3-flash 503'd" — Gemini backend returns this;
// hardware backend doesn't (no per-model fallback there).
attempts: result?.attempts || null,
...costDetails, ...costDetails,
}); });
const body = await envelope({ result, installId, tier, creditCharged }); const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
res.json(body); res.json(body);
}); });
+166 -50
View File
@@ -1,69 +1,185 @@
// GET /relay/capabilities — operator-aware metadata for Recap clients // GET /relay/capabilities — per-install metadata for Recap clients to
// to plan their audio handling. Returns the upper bounds the relay's // plan their audio handling. Tells Recap whether to chunk a long
// CURRENT routing config can comfortably accept, so Recap can decide // audio file before sending it, based on which backend THIS install's
// whether to chunk a long video before sending it. // next transcribe call will actually route to.
// //
// Today's logic: // The decision is install-specific because the relay's routing
// - When the operator's transcribe_backend_preference routes through // preference combined with the install's tier + current Gemini cap
// Gemini at all (gemini_first / gemini_only), we report Gemini-safe // consumption determines the backend per request. In `gemini_first`
// limits (60 min / 30 MB / 2700 s chunks). Even with hardware as // mode, the same operator config will route a fresh install to
// overflow, the FIRST attempt is Gemini, which needs the chunk // Gemini (chunking required) but route a cap-exhausted install to
// budget. // hardware (no chunking needed) — so a global capabilities answer
// - When the operator's preference is hardware-only (or hardware- // would be wrong half the time.
// first with overflow to Gemini disabled in spirit), we report
// "unbounded" — the operator's Parakeet wrapper can typically
// ingest 2+ hour podcasts in a single shot, so chunking just adds
// extra inference passes and timestamp-stitching overhead.
// //
// Recap reads this once on boot + on policy refresh; when its // Inputs:
// transcriptionProvider is "relay", it honors these limits instead of // X-Recap-Install-Id (optional but strongly recommended)
// its own hardcoded thresholds. For non-relay providers, Recap's // Authorization (optional Bearer license — affects tier lookup)
// internal per-provider thresholds apply. //
// Without an install_id, returns Gemini-safe limits conservatively
// (the chunking path always works; the no-chunking path only works
// when hardware actually serves the call).
//
// Output shape (unchanged from v1 — pure additive on the routing
// logic):
// {
// max_audio_mb: number,
// max_audio_minutes: number,
// preferred_chunk_seconds: number | null, // null = don't chunk
// reason: string // human-readable
// }
import express from "express"; import express from "express";
import { getConfigSnapshot } from "../config.js"; import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { resolveLicense } from "../keysat-client.js";
import { getOrCreateRow, planBackend } from "../credits.js";
import { resolveHardwareConfig } from "../hardware-config.js";
// Gemini File API can handle audio up to ~9.5 hours per generateContent
// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
// shipped originally was sized for free-tier worries that no longer
// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
// relay-URL fast-path for content up to 4 hours instead of falling
// back to client-side chunked uploads (which lose the buyer-bandwidth
// savings and serialize the calls).
const GEMINI_LIMITS = Object.freeze({
max_audio_mb: 200,
max_audio_minutes: 240,
preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
// kicks in for stability on the longest
// files, but only on the actual call;
// doesn't gate client-side chunking.
});
const HARDWARE_LIMITS = Object.freeze({
// Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
// audio in one shot. Set high but finite ceilings so a 24-hour file
// doesn't OOM the operator's GPU box silently.
max_audio_mb: 500,
max_audio_minutes: 240,
preferred_chunk_seconds: null,
});
export function capabilitiesRouter() { export function capabilitiesRouter() {
const router = express.Router(); const router = express.Router();
router.get("/capabilities", async (_req, res) => { router.get("/capabilities", async (req, res) => {
const cfg = await getConfigSnapshot(); const cfg = await getConfigSnapshot();
const txPref = const txPref =
cfg.relay_transcribe_backend_preference || "gemini_first"; cfg.relay_transcribe_backend_preference || "gemini_first";
const hasParakeet = !!cfg.relay_parakeet_base_url; const hw = await resolveHardwareConfig(cfg);
const hasHardware = !!hw.transcribe.url;
const installId = req.header("X-Recap-Install-Id") || null;
const auth = req.header("Authorization") || null;
// Conservative default: Gemini-safe limits unless the operator has // ── TTS availability (audio-first "walking mode") ──
// explicitly said "use hardware (only or first) and I've got a // Operator-wide, not install-specific: whether ANY TTS backend can
// Parakeet endpoint wired up". Without the Parakeet endpoint we // serve a /relay/tts call given the operator's config. The Recap app
// can't make use of larger inputs — Gemini's the only path — // uses has_tts to decide whether to show the "Listen" button at all
// so we'd just be lying to the client. // (it additionally gates the feature to Max users on its own side).
const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
const kokoroReady = !!hw.tts?.url;
const elevenConfigured = !!(
cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
);
const ttsBackend =
ttsPref === "hardware_only"
? kokoroReady
? "kokoro"
: null
: ttsPref === "cloud_only"
? elevenConfigured
? "elevenlabs"
: null
: ttsPref === "cloud_first"
? elevenConfigured
? "elevenlabs"
: kokoroReady
? "kokoro"
: null
: kokoroReady // hardware_first (default)
? "kokoro"
: elevenConfigured
? "elevenlabs"
: null;
const ttsCaps = {
has_tts: !!ttsBackend,
tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
tts_default_voice: cfg.relay_tts_default_voice || null,
};
// If we have an install_id, run the same routing logic the actual
// transcribe route uses so the chunking decision matches the
// backend that will actually serve the call.
if (installId) {
try {
const license = await resolveLicense(auth);
const row = await getOrCreateRow({ installId, license });
row.tier_snapshot = license.tier;
const quota = await getTierQuotas();
const plan = planBackend(row, quota, {
hasHardware,
preference: txPref,
});
if (plan.allowed && plan.backend === "hardware") {
return res.json({
...HARDWARE_LIMITS,
...ttsCaps,
reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
});
}
if (plan.allowed && plan.backend === "gemini") {
return res.json({
...GEMINI_LIMITS,
...ttsCaps,
reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
});
}
// planBackend refused entirely (out of credits / no backend
// configured). Return Gemini-safe defaults so the client still
// chunks defensively and gets a clean 402 from the real
// transcribe call rather than a confusing transport failure.
return res.json({
...GEMINI_LIMITS,
...ttsCaps,
reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
});
} catch (err) {
// License lookup or row read failed — fall through to the
// anonymous path so the client at least gets safe defaults.
console.warn(
`[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
);
}
}
// Anonymous (no install_id) or install-aware path failed. Pick
// capabilities from the operator-wide routing preference alone:
// hardware_only / hardware_first → hardware-safe limits (provided
// hardware is configured)
// gemini_only / gemini_first → Gemini-safe (will always work
// for the first attempt; in
// gemini_first the eventual
// overflow to hardware can
// handle bigger files too, but
// chunking still works for both)
//
// When `hardware_first` is set but Parakeet isn't actually
// configured, the relay will fall back to Gemini — so report
// Gemini-safe limits in that case.
const hardwareCapable = const hardwareCapable =
hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first"); hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
if (hardwareCapable) { if (hardwareCapable) {
res.json({ return res.json({
// Effective unbounded — Parakeet wrappers commonly handle 2+ ...HARDWARE_LIMITS,
// hour audio in one shot. Set high but finite ceilings so a ...ttsCaps,
// 24-hour file doesn't OOM the operator's GPU box silently. reason: `hardware-capable backend preference (${txPref})`,
max_audio_mb: 500,
max_audio_minutes: 240,
preferred_chunk_seconds: null,
// Diagnostic — Recap doesn't need this but the dashboard / a
// curious operator might want to know which limit shape they
// returned and why.
reason: "hardware-capable backend preference (" + txPref + ")",
});
} else {
res.json({
// Gemini File-API + practical reliability limits. Matches
// Recap's pre-relay defaults so existing chunking behavior
// is preserved.
max_audio_mb: 30,
max_audio_minutes: 60,
preferred_chunk_seconds: 2700, // 45 min chunks
reason: "Gemini-backed preference (" + txPref + ")",
}); });
} }
return res.json({
...GEMINI_LIMITS,
...ttsCaps,
reason: `Gemini-backed preference (${txPref})`,
});
}); });
return router; return router;
+32 -4
View File
@@ -11,18 +11,34 @@ import { getTierQuotas } from "../config.js";
export async function envelope({ export async function envelope({
result = null, result = null,
installId, installId,
// License is optional but recommended — without it, balance lookups
// route to the install-keyed row even for paid users, which would
// briefly underreport their balance after a commitCredit landed on
// their license-keyed row. Routes pass it through from resolveLicense.
license = null,
// Explicit ledger key override (cloud `user:<id>` path). Takes
// precedence over (installId, license) when present.
creditKey = null,
tier, tier,
creditCharged = 0, creditCharged = 0,
}) { }) {
const quota = await getTierQuotas(); const quota = await getTierQuotas();
const row = await getOrCreateRow(installId); const row = await getOrCreateRow({ installId, license, creditKey });
// tier_snapshot on the row was just updated by commitCredit; if no // tier_snapshot on the row was just updated by commitCredit; if no
// credit was committed (free reuse via job_id) it still reflects // credit was committed (free reuse via job_id) it still reflects
// the last-known tier for this install, which is fine. // the last-known tier for this install, which is fine.
const balance = computeRemaining(row, quota); const balance = computeRemaining(row, quota);
return { return {
result, result,
credits_remaining: balance.remaining, // null = unlimited (Max) // `total` = tier allotment + purchased top-up. Recap renders this
// as the headline number on its credits pill. `remaining` alone
// wouldn't reflect purchased credits at all — so a buyer who
// just bought 5 credits and had 0 tier credits left would still
// see "0 relay credits" until their tier renewed.
credits_remaining: balance.total, // null = unlimited (Max)
// Breakdown for clients that want to display it.
tier_remaining: balance.remaining,
purchased_balance: balance.purchased,
tier, tier,
credit_charged: creditCharged, credit_charged: creditCharged,
}; };
@@ -35,15 +51,25 @@ export async function envelope({
export async function errorEnvelope({ export async function errorEnvelope({
error, error,
installId, installId,
license = null,
creditKey = null,
tier = "core", tier = "core",
statusHint = 500, statusHint = 500,
}) { }) {
let creditsRemaining = null; let creditsRemaining = null;
let tierRemaining = null;
let purchased = 0;
try { try {
const quota = await getTierQuotas(); const quota = await getTierQuotas();
const row = await getOrCreateRow(installId || "unknown"); const row = await getOrCreateRow({
installId: creditKey ? null : installId || "unknown",
license,
creditKey,
});
const balance = computeRemaining(row, quota); const balance = computeRemaining(row, quota);
creditsRemaining = balance.remaining; creditsRemaining = balance.total;
tierRemaining = balance.remaining;
purchased = balance.purchased;
} catch {} } catch {}
return { return {
statusHint, statusHint,
@@ -51,6 +77,8 @@ export async function errorEnvelope({
result: null, result: null,
error: typeof error === "string" ? error : error?.message || "unknown_error", error: typeof error === "string" ? error : error?.message || "unknown_error",
credits_remaining: creditsRemaining, credits_remaining: creditsRemaining,
tier_remaining: tierRemaining,
purchased_balance: purchased,
tier, tier,
credit_charged: 0, credit_charged: 0,
}, },
+7 -2
View File
@@ -35,8 +35,13 @@ export function healthRouter() {
version: VERSION, version: VERSION,
backends: { backends: {
gemini: !!cfg.relay_gemini_api_key, gemini: !!cfg.relay_gemini_api_key,
parakeet: !!cfg.relay_parakeet_base_url, // Whether the operator-hardware path is wired up at all.
gemma: !!cfg.relay_gemma_base_url, // Hardware backends are now sourced from Spark Control
// discovery — see hardware-config.js. Empty discovery URL
// means no hardware path; downstream details (which model is
// ready, transcribe vs analyze availability) are surfaced via
// /admin/config's effective_* fields.
hardware: !!cfg.relay_spark_control_url,
}, },
admin_enabled: !!cfg.relay_admin_password_hash, admin_enabled: !!cfg.relay_admin_password_hash,
}); });
File diff suppressed because it is too large Load Diff
+514 -177
View File
@@ -33,8 +33,13 @@ import { execFile } from "child_process";
import { promisify } from "util"; import { promisify } from "util";
import { Readable } from "stream"; import { Readable } from "stream";
import { pipeline } from "stream/promises"; import { pipeline } from "stream/promises";
import { resolveLicense } from "../keysat-client.js"; import { resolveIdentity, identityTier } from "../identity.js";
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js"; import {
getOrCreateRow,
planBackend,
commitCredit,
licenseFingerprint,
} from "../credits.js";
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js"; import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
import { getConfigSnapshot, getTierQuotas } from "../config.js"; import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { createGeminiBackend } from "../backends/gemini.js"; import { createGeminiBackend } from "../backends/gemini.js";
@@ -42,6 +47,18 @@ import { createHardwareBackend } from "../backends/hardware.js";
import { envelope, errorEnvelope } from "./envelope.js"; import { envelope, errorEnvelope } from "./envelope.js";
import { recordCall } from "../audit-log.js"; import { recordCall } from "../audit-log.js";
import { calcGeminiCost } from "../pricing.js"; import { calcGeminiCost } from "../pricing.js";
import { getAudioDurationSeconds } from "../audio-meta.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { reportHealthEvent } from "../spark-control-events.js";
import {
createJob,
markRunning,
setProgress,
markComplete,
markFailed,
getJob,
} from "../jobs.js";
import { saveJobOutput } from "../output-store.js";
const execFileAsync = promisify(execFile); const execFileAsync = promisify(execFile);
@@ -54,7 +71,7 @@ const MAX_DOWNLOAD_BYTES = 500 * 1024 * 1024;
// rate-limits; a hard ceiling avoids holding the request open forever. // rate-limits; a hard ceiling avoids holding the request open forever.
const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000; const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000;
function looksLikeYouTube(url) { export function looksLikeYouTube(url) {
if (!url) return false; if (!url) return false;
return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url); return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url);
} }
@@ -79,7 +96,7 @@ function guessMimeFromExt(filePath) {
// Download an HTTP(S) audio URL to a temp file. Stops if the file // Download an HTTP(S) audio URL to a temp file. Stops if the file
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes, // would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
// mimeType }. // mimeType }.
async function downloadDirect(url, tmpDir) { export async function downloadDirect(url, tmpDir) {
const res = await fetch(url, { const res = await fetch(url, {
redirect: "follow", redirect: "follow",
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS), signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
@@ -143,7 +160,11 @@ async function downloadDirect(url, tmpDir) {
// Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3. // Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3.
// Logs the chosen path back as the file. Caller manages tmpDir. // Logs the chosen path back as the file. Caller manages tmpDir.
async function downloadYouTube(url, tmpDir) { // Captures the video title via `--print "%(title)s"` so callers (the
// summarize-url / transcribe-url workers) can stamp the Jobs table
// with the real title instead of "Untitled" when the client didn't
// pre-fetch metadata.
export async function downloadYouTube(url, tmpDir) {
const outTemplate = path.join(tmpDir, "audio.%(ext)s"); const outTemplate = path.join(tmpDir, "audio.%(ext)s");
const args = [ const args = [
"-x", // extract audio "-x", // extract audio
@@ -156,18 +177,93 @@ async function downloadYouTube(url, tmpDir) {
"--no-playlist", "--no-playlist",
"--no-simulate", "--no-simulate",
"--no-warnings", "--no-warnings",
// Emit a JSON dict containing the full metadata we care about for
// the transcribe prompt's speaker-identification cues. Using
// `before_dl:` so we get the metadata even if the download itself
// later fails partway. The `.{field1,field2}j` template prints
// just the named fields as a JSON object (yt-dlp escapes embedded
// newlines inside description values, so single-line stdout parses
// cleanly). Title comes from the same dict — no second --print
// needed.
//
// Why these four fields specifically: they\'re exactly what the
// recap-app\'s fetchYouTubeMetadata() pulls and feeds into its
// direct-to-Gemini transcribe prompt. With these populated, the
// model can correctly assign speaker labels (host name from
// channel, guest name from description, chapter titles often name
// both). Without them, every transcript falls back to unlabeled
// dialogue regardless of how detailed the prompt\'s
// speaker-identification rule is.
"--print",
"before_dl:%(.{title,channel,description,chapters})j",
url, url,
]; ];
let extractedMetadata = {
title: null,
channel: null,
description: null,
chapters: [],
};
try { try {
await execFileAsync("yt-dlp", args, { const { stdout } = await execFileAsync("yt-dlp", args, {
timeout: DOWNLOAD_TIMEOUT_MS, timeout: DOWNLOAD_TIMEOUT_MS,
maxBuffer: 10 * 1024 * 1024, maxBuffer: 10 * 1024 * 1024,
}); });
// The JSON dict is the first non-empty line that starts with `{`.
// yt-dlp may print other progress / warning lines before or after
// depending on version; filter to the JSON line specifically.
const firstJsonLine = (stdout || "")
.split(/\r?\n/)
.map((l) => l.trim())
.find((l) => l.length > 0 && l.startsWith("{"));
if (firstJsonLine) {
try {
const parsed = JSON.parse(firstJsonLine);
extractedMetadata = {
title:
typeof parsed.title === "string" && parsed.title.trim()
? parsed.title.trim().slice(0, 300)
: null,
channel:
typeof parsed.channel === "string" && parsed.channel.trim()
? parsed.channel.trim().slice(0, 200)
: null,
// Cap at 2000 chars — recap-app uses the same cap. Long
// descriptions with release-notes / sponsor blocks otherwise
// bloat the prompt and crowd out the speaker-naming signal.
description:
typeof parsed.description === "string" && parsed.description.trim()
? parsed.description.trim().slice(0, 2000)
: null,
// Each chapter is { start_time: seconds, end_time, title }.
// We only use start_time + title in the prompt; pass the full
// array through so callers see what yt-dlp returned.
chapters: Array.isArray(parsed.chapters) ? parsed.chapters : [],
};
} catch (parseErr) {
// Malformed JSON from yt-dlp. Fall back to title-only via a
// best-effort regex on the line. Better than nothing.
const m = firstJsonLine.match(/"title"\s*:\s*"([^"]+)"/);
if (m) extractedMetadata.title = m[1].slice(0, 300);
console.warn(
`[yt-dlp] metadata JSON parse failed: ${parseErr?.message || parseErr} — falling back to title-only`
);
}
} else if (stdout) {
// No JSON line but stdout has something — older yt-dlp versions
// or some videos may emit a bare title line. Use it as title-only
// so we at least preserve the existing v0.2.56 behavior.
const firstLine = stdout
.split(/\r?\n/)
.map((l) => l.trim())
.find((l) => l.length > 0);
if (firstLine) extractedMetadata.title = firstLine.slice(0, 300);
}
} catch (err) { } catch (err) {
const stderr = (err?.stderr || "").toString(); const stderr = (err?.stderr || "").toString();
const stdout = (err?.stdout || "").toString(); const stdoutStr = (err?.stdout || "").toString();
throw new Error( throw new Error(
`yt-dlp failed: ${stderr.trim() || stdout.trim() || err?.message}` `yt-dlp failed: ${stderr.trim() || stdoutStr.trim() || err?.message}`
); );
} }
// Find the produced file — yt-dlp's audio-format=mp3 means it ends // Find the produced file — yt-dlp's audio-format=mp3 means it ends
@@ -189,225 +285,466 @@ async function downloadYouTube(url, tmpDir) {
filePath, filePath,
bytes: stat.size, bytes: stat.size,
mimeType: guessMimeFromExt(filePath), mimeType: guessMimeFromExt(filePath),
title: extractedMetadata.title,
channel: extractedMetadata.channel,
description: extractedMetadata.description,
chapters: extractedMetadata.chapters,
}; };
} }
export function transcribeUrlRouter() { export function transcribeUrlRouter() {
const router = express.Router(); const router = express.Router();
// POST /relay/transcribe-url — kicks off a background transcribe
// job and returns immediately with { job_id }. The client polls
// GET /relay/jobs/:id to find out when it's done.
//
// Why async: a synchronous response over HTTP can't reliably
// survive multi-minute work — proxies, load balancers, and NATs
// along the path will drop the connection on long-running idle
// requests (we observed a 5-minute cut on a 1h45m transcribe).
// The poll requests are short and cheap, so they never trip
// timeouts.
router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => { router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => {
const t0 = Date.now(); const summaryJobId = req.header("X-Recap-Job-Id") || null;
const installId = req.header("X-Recap-Install-Id");
const jobId = req.header("X-Recap-Job-Id") || null;
const auth = req.header("Authorization");
if (!installId) { let identity;
try {
identity = await resolveIdentity(req);
} catch (err) {
const e = await errorEnvelope({
error: err?.message || "auth_error",
statusHint: err?.status || 401,
});
return res.status(e.statusHint || 401).json(e.body);
}
if (identity.kind === "license" && !identity.installId) {
const e = await errorEnvelope({ const e = await errorEnvelope({
error: "missing X-Recap-Install-Id header", error: "missing X-Recap-Install-Id header",
statusHint: 400, statusHint: 400,
}); });
return res.status(400).json(e.body); return res.status(400).json(e.body);
} }
const { creditKey, installId, license } = identity;
// `title` is `let` rather than `const` because the worker may
// backfill it from yt-dlp metadata after the download completes
// (when the client didn't pre-fetch the title).
let title;
const { const {
media_url: mediaUrl, media_url: mediaUrl,
type, type,
mime_type: bodyMime, mime_type: bodyMime,
title, title: bodyTitle,
channel, channel,
description, description,
chapters, chapters,
} = req.body || {}; } = req.body || {};
title = bodyTitle;
if (!mediaUrl || typeof mediaUrl !== "string") { if (!mediaUrl || typeof mediaUrl !== "string") {
const e = await errorEnvelope({ const e = await errorEnvelope({
error: "missing or non-string body.media_url", error: "missing or non-string body.media_url",
creditKey,
installId, installId,
statusHint: 400, statusHint: 400,
}); });
return res.status(400).json(e.body); return res.status(400).json(e.body);
} }
const license = await resolveLicense(auth); const row = await getOrCreateRow({ creditKey, installId, license });
const tier = license.tier; const tier = identityTier(identity, row);
const row = await getOrCreateRow(installId);
row.tier_snapshot = tier; row.tier_snapshot = tier;
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
const auditInstall = installId || identity.userId || null;
// Quota check + backend choice. Same as /relay/transcribe. // Billing vs. routing decoupled — see analyze.js for reasoning.
let reusedJob = false; const reusedSummaryJob = !!lookupJob({ creditKey, installId, license, jobId: summaryJobId });
let chosenBackend = null; const cfgPlan = await getConfigSnapshot();
const existingJob = lookupJob(installId, jobId); const hw = await resolveHardwareConfig(cfgPlan);
if (existingJob) { // Operator-only diagnostic — see summarize-url.js for the full
reusedJob = true; // reasoning. We don't 503 here on blocked_reason because doing
chosenBackend = existingJob.backend; // so pre-empts planBackend and would surface operator-internal
} else { // Spark Control / parakeet wording to clients even when Gemini
const cfg = await getConfigSnapshot(); // was the configured preference.
const hasHardware = !!cfg.relay_parakeet_base_url; if (hw.transcribe.blocked_reason) {
const quota = await getTierQuotas(); console.warn(
const preference = `[transcribe-url] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
cfg.relay_transcribe_backend_preference || "gemini_first";
const plan = planBackend(row, quota, { hasHardware, preference });
if (!plan.allowed) {
await recordCall({
install_id: installId,
tier,
pipeline: "transcribe",
backend: null,
model: null,
status: "refused",
credit_charged: 0,
duration_ms: Date.now() - t0,
cost_usd: 0,
job_id: jobId,
error: plan.reason,
});
const e = await errorEnvelope({
error: plan.reason,
installId,
tier,
statusHint: 402,
});
return res.status(402).json(e.body);
}
chosenBackend = plan.backend;
}
// ── Download phase ─────────────────────────────────────────────
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
const dlStart = Date.now();
let audio;
let downloadMs = 0;
try {
audio = isYT
? await downloadYouTube(mediaUrl, tmpDir)
: await downloadDirect(mediaUrl, tmpDir);
downloadMs = Date.now() - dlStart;
console.log(
`[transcribe-url] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms (${mediaUrl.slice(0, 80)})`
); );
} catch (err) { }
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {} const hasHardware = !!hw.transcribe.url;
console.error(`[transcribe-url] download failed: ${err?.message || err}`); const quota = await getTierQuotas();
const preference =
cfgPlan.relay_transcribe_backend_preference || "gemini_first";
const plan = planBackend(row, quota, { hasHardware, preference });
if (!plan.allowed) {
await recordCall({ await recordCall({
install_id: installId, install_id: auditInstall,
license_fingerprint: licenseFp,
tier, tier,
pipeline: "transcribe", pipeline: "transcribe",
backend: chosenBackend, backend: null,
model: null, model: null,
status: "error", status: "refused",
credit_charged: 0, credit_charged: 0,
duration_ms: Date.now() - t0, duration_ms: 0,
download_ms: Date.now() - dlStart,
cost_usd: 0, cost_usd: 0,
job_id: jobId, job_id: summaryJobId,
error: ("download_failed: " + (err?.message || String(err))).slice(0, 200), media_url: mediaUrl || null,
title: title || null,
error: plan.reason,
}); });
const e = await errorEnvelope({ const e = await errorEnvelope({
error: "download_failed: " + (err?.message || String(err)).slice(0, 200), error: plan.reason,
installId, installId,
license,
tier, tier,
statusHint: 502, statusHint: 402,
}); });
return res.status(502).json(e.body); return res.status(402).json(e.body);
} }
const chosenBackend = plan.backend;
// ── Transcription phase ──────────────────────────────────────── // Mint the background job + RESPOND IMMEDIATELY.
const cfg = await getConfigSnapshot(); const job = createJob({
let result; kind: "transcribe-url",
try { installId: auditInstall,
const audioBuf = await fs.readFile(audio.filePath); metadata: {
const mimeType = bodyMime || audio.mimeType; owner: creditKey, // authorizes the /jobs/:id poll (per-identity)
if (chosenBackend === "gemini") { media_url: mediaUrl,
const backend = createGeminiBackend({
apiKey: cfg.relay_gemini_api_key,
transcriptionModel: cfg.relay_gemini_transcription_model,
analysisModel: cfg.relay_gemini_analysis_model,
});
result = await backend.transcribeAudio({
audio: audioBuf,
mimeType,
title: title || "",
channel: channel || "",
description: description || "",
chapters: Array.isArray(chapters) ? chapters : [],
offsetSeconds: 0,
});
} else {
const backend = createHardwareBackend({
parakeetBaseURL: cfg.relay_parakeet_base_url,
gemmaBaseURL: cfg.relay_gemma_base_url,
parakeetModel: cfg.relay_parakeet_model,
gemmaModel: cfg.relay_gemma_model,
});
result = await backend.transcribeAudio({
audio: audioBuf,
mimeType,
offsetSeconds: 0,
});
}
} catch (err) {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
if (reusedJob) refundJob(installId, jobId);
console.error(`[transcribe-url] transcribe failed: ${err?.message}`);
await recordCall({
install_id: installId,
tier,
pipeline: "transcribe",
backend: chosenBackend, backend: chosenBackend,
model: summary_job_id: summaryJobId,
chosenBackend === "gemini" },
? cfg.relay_gemini_transcription_model
: cfg.relay_parakeet_model,
status: "error",
credit_charged: 0,
duration_ms: Date.now() - t0,
download_ms: downloadMs,
cost_usd: 0,
job_id: jobId,
error: (err?.message || String(err)).slice(0, 200),
});
const e = await errorEnvelope({
error: err?.message || "backend_error",
installId,
tier,
statusHint: err?.status || 502,
});
return res.status(e.statusHint).json(e.body);
} finally {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
}
// ── Commit + audit ─────────────────────────────────────────────
let creditCharged = 0;
if (!reusedJob) {
await commitCredit(installId, { backend: chosenBackend, tier });
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
creditCharged = 1;
}
const costDetails =
chosenBackend === "gemini" && result.usage
? calcGeminiCost(result.model, result.usage)
: {
input_tokens: 0,
output_tokens: 0,
thinking_tokens: 0,
cost_usd: 0,
};
await recordCall({
install_id: installId,
tier,
pipeline: "transcribe",
backend: chosenBackend,
model: result?.model || null,
status: "success",
credit_charged: creditCharged,
duration_ms: Date.now() - t0,
download_ms: downloadMs,
audio_bytes: audio.bytes,
job_id: jobId,
...costDetails,
}); });
const body = await envelope({ result, installId, tier, creditCharged }); // Background worker — runs after this handler has returned.
// Errors are captured into the job record; nothing thrown here
// can crash the route process.
(async () => {
const workerT0 = Date.now();
markRunning(job.id);
setProgress(job.id, "downloading media…");
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
let audio;
let downloadMs = 0;
try {
const dlStart = Date.now();
audio = isYT
? await downloadYouTube(mediaUrl, tmpDir)
: await downloadDirect(mediaUrl, tmpDir);
downloadMs = Date.now() - dlStart;
console.log(
`[transcribe-url ${job.id.slice(0, 8)}] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms`
);
audio.seconds = await getAudioDurationSeconds(audio.filePath);
if (!title && audio.title) {
// yt-dlp captured the title during download; use it when
// the client didn't pass one.
title = audio.title;
}
setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
} catch (err) {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
const msg = (err?.message || String(err)).slice(0, 300);
console.error(`[transcribe-url ${job.id.slice(0, 8)}] download failed: ${msg}`);
await recordCall({
install_id: auditInstall,
license_fingerprint: licenseFp,
tier,
pipeline: "transcribe",
backend: chosenBackend,
model: null,
status: "error",
credit_charged: 0,
duration_ms: Date.now() - workerT0,
download_ms: Date.now() - workerT0,
audio_seconds: null,
cost_usd: 0,
job_id: summaryJobId,
media_url: mediaUrl || null,
title: title || null,
error: "download_failed: " + msg,
});
markFailed(job.id, "download_failed: " + msg);
return;
}
// Transcription phase
const cfg = await getConfigSnapshot();
let result;
// Stamp the moment transcribe is about to start (AFTER download
// finished). Used for duration_ms on the audit row so the
// "TX wall time" column reflects ONLY the transcribe phase.
const txPhaseStart = Date.now();
try {
const audioBuf = await fs.readFile(audio.filePath);
const mimeType = bodyMime || audio.mimeType;
if (chosenBackend === "gemini") {
const backend = createGeminiBackend({
apiKey: cfg.relay_gemini_api_key,
transcriptionModel: cfg.relay_gemini_transcription_model,
analysisModel: cfg.relay_gemini_analysis_model,
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
transcribePromptOverride: cfg.relay_transcribe_prompt || "",
});
result = await backend.transcribeAudio({
audio: audioBuf,
mimeType,
title: title || "",
channel: channel || "",
description: description || "",
chapters: Array.isArray(chapters) ? chapters : [],
offsetSeconds: 0,
});
} else {
const backend = createHardwareBackend({
parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: hw.analyze.url || "",
sparkControlBaseURL: hw.sparkBase || "",
parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
});
result = await backend.transcribeAudio({
audio: audioBuf,
mimeType,
offsetSeconds: 0,
});
}
} catch (err) {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
if (reusedSummaryJob) await refundJob({ creditKey, installId, license, jobId: summaryJobId });
const msg = (err?.message || String(err)).slice(0, 400);
console.error(`[transcribe-url ${job.id.slice(0, 8)}] transcribe failed: ${msg}`);
if (chosenBackend === "hardware") {
reportHealthEvent({
service: "parakeet",
ok: false,
error: msg.slice(0, 280),
ms: Date.now() - workerT0,
});
}
await recordCall({
install_id: auditInstall,
license_fingerprint: licenseFp,
tier,
pipeline: "transcribe",
backend: chosenBackend,
model:
chosenBackend === "gemini"
? cfg.relay_gemini_transcription_model
: hw.transcribe.model || "(auto)",
status: "error",
credit_charged: 0,
duration_ms: Date.now() - txPhaseStart,
download_ms: downloadMs,
audio_seconds: audio?.seconds || null,
audio_bytes: audio?.bytes || null,
cost_usd: 0,
job_id: summaryJobId,
media_url: mediaUrl || null,
title: title || null,
error: msg,
});
markFailed(job.id, msg);
return;
} finally {
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
}
// Success — commit credit (once per summary job_id), audit, mark done.
let creditCharged = 0;
if (!reusedSummaryJob) {
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
await markJobCharged({ creditKey, installId, license, jobId: summaryJobId, backend: chosenBackend, tier });
creditCharged = 1;
}
const costDetails =
chosenBackend === "gemini" && result.usage
? calcGeminiCost(result.model, result.usage)
: {
input_tokens: 0,
output_tokens: 0,
thinking_tokens: 0,
cost_usd: 0,
};
// Truncation detection — mark partial when any chunk hit
// the silent output-token cap and emitted < 80% of its
// expected audio. See gemini.js for the actual coverage
// computation; here we just propagate to the audit row.
const truncatedChunks = Array.isArray(result?.truncated_chunks)
? result.truncated_chunks
: [];
const wasTruncated = truncatedChunks.length > 0;
const truncationError = wasTruncated
? `transcribe: ${truncatedChunks.length} chunk(s) truncated — missing ~${truncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${result.model || "unknown"}). Likely hit maxOutputTokens.`
: null;
await recordCall({
install_id: auditInstall,
license_fingerprint: licenseFp,
tier,
pipeline: "transcribe",
backend: chosenBackend,
model: result?.model || null,
status: wasTruncated ? "partial" : "success",
credit_charged: creditCharged,
duration_ms: Date.now() - txPhaseStart,
download_ms: downloadMs,
audio_bytes: audio.bytes,
audio_seconds: audio.seconds || null,
job_id: summaryJobId,
attempts: result?.attempts || null,
// Per-job context for the operator dashboard's per-video table.
// media_url + title let the dashboard show what was being
// processed; chunk_count exposes the new server-side chunking
// (1 for short audio, N for ≥30 min audio split by the Gemini
// backend or by the hardware backend's Parakeet chunker).
media_url: mediaUrl || null,
title: title || null,
chunk_count: result?.chunk_count ?? null,
chunk_durations_ms: result?.chunk_durations_ms || null,
truncated_chunks: wasTruncated ? truncatedChunks : null,
error: truncationError,
...costDetails,
});
markComplete(job.id, {
result,
credit_charged: creditCharged,
tier,
});
console.log(
`[transcribe-url ${job.id.slice(0, 8)}] complete in ${((Date.now() - workerT0) / 1000).toFixed(1)}s`
);
// Optional: persist transcript output for the operator's
// "View output" dashboard feature. Only when the config flag
// is set (default false) — saving real-user transcripts is an
// opt-in operator decision, not a default. Note that we only
// have the transcript here (analyze runs as a separate
// /relay/analyze call in the Recap flow); the analyze row will
// overwrite this file later with the full transcript+analysis
// payload when it lands. Best-effort, errors ignored.
if (cfg.relay_save_user_outputs) {
await saveJobOutput(summaryJobId || job.id, {
batch_id: null,
source: null,
transcript: result?.text || "",
analysis: null,
analysis_raw_text: null,
meta: {
title: title || null,
media_url: mediaUrl,
audio_seconds: audio.seconds || null,
audio_bytes: audio.bytes,
captions_mode: null,
transcribe_backend: chosenBackend,
transcribe_model: result?.model || null,
analyze_backend: null,
analyze_model: null,
},
});
}
})().catch((err) => {
// Top-level catch — should be unreachable since the worker
// handles its own try/catch, but defends against unexpected
// throws so the job doesn't sit in "running" forever.
markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
console.error(`[transcribe-url ${job.id.slice(0, 8)}] worker crashed:`, err);
});
// Hand back the job_id immediately. Client will poll for status.
const body = await envelope({
result: {
job_id: job.id,
status: "queued",
kind: "transcribe-url",
},
creditKey,
installId,
license,
tier,
});
res.json(body);
});
// GET /relay/jobs/:id — poll loop's friend. Install-id scoped so
// job ids can't be enumerated cross-install. Returns the running
// status + (once complete) the full transcribe result envelope.
router.get("/jobs/:id", async (req, res) => {
let identity;
try {
identity = await resolveIdentity(req);
} catch (err) {
const e = await errorEnvelope({ error: err?.message || "auth_error", statusHint: err?.status || 401 });
return res.status(e.statusHint || 401).json(e.body);
}
if (identity.kind === "license" && !identity.installId) {
const e = await errorEnvelope({
error: "missing X-Recap-Install-Id header",
statusHint: 400,
});
return res.status(400).json(e.body);
}
const { creditKey, installId, license } = identity;
const ownerRow = await getOrCreateRow({ creditKey, installId, license });
const tier = identityTier(identity, ownerRow);
const jobId = (req.params.id || "").trim();
const job = getJob(jobId);
if (!job) {
const e = await errorEnvelope({
error: "job_not_found",
creditKey,
creditKey,
installId,
tier,
statusHint: 404,
});
return res.status(404).json(e.body);
}
// New jobs carry metadata.owner = creditKey; older jobs only carry
// install_id. Authorize by whichever the job has.
const ownerOk = job.metadata?.owner
? job.metadata.owner === creditKey
: identity.installId && job.install_id === identity.installId;
if (!ownerOk) {
const e = await errorEnvelope({
error: "job_belongs_to_different_owner",
creditKey,
creditKey,
installId,
tier,
statusHint: 403,
});
return res.status(403).json(e.body);
}
const body = await envelope({
result: {
job_id: job.id,
kind: job.kind,
status: job.status,
progress: job.progress,
started_at: job.started_at,
updated_at: job.updated_at,
completed_at: job.completed_at,
// Include the FULL transcribe-result on completion so the
// client doesn't need a second round-trip.
result: job.status === "complete" ? job.result?.result : null,
credit_charged:
job.status === "complete" ? job.result?.credit_charged || 0 : 0,
error: job.error,
},
creditKey,
installId,
license,
tier,
});
res.json(body); res.json(body);
}); });
+118 -57
View File
@@ -28,8 +28,13 @@
import express from "express"; import express from "express";
import multer from "multer"; import multer from "multer";
import { resolveLicense } from "../keysat-client.js"; import { resolveIdentity, identityTier } from "../identity.js";
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js"; import {
getOrCreateRow,
planBackend,
commitCredit,
licenseFingerprint,
} from "../credits.js";
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js"; import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
import { getConfigSnapshot, getTierQuotas } from "../config.js"; import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { createGeminiBackend } from "../backends/gemini.js"; import { createGeminiBackend } from "../backends/gemini.js";
@@ -37,6 +42,9 @@ import { createHardwareBackend } from "../backends/hardware.js";
import { envelope, errorEnvelope } from "./envelope.js"; import { envelope, errorEnvelope } from "./envelope.js";
import { recordCall } from "../audit-log.js"; import { recordCall } from "../audit-log.js";
import { calcGeminiCost } from "../pricing.js"; import { calcGeminiCost } from "../pricing.js";
import { getAudioDurationSecondsFromBuffer } from "../audio-meta.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { reportHealthEvent } from "../spark-control-events.js";
const upload = multer({ const upload = multer({
storage: multer.memoryStorage(), storage: multer.memoryStorage(),
@@ -48,67 +56,93 @@ export function transcribeRouter() {
router.post("/transcribe", upload.single("audio"), async (req, res) => { router.post("/transcribe", upload.single("audio"), async (req, res) => {
const t0 = Date.now(); const t0 = Date.now();
const installId = req.header("X-Recap-Install-Id");
const jobId = req.header("X-Recap-Job-Id") || null; const jobId = req.header("X-Recap-Job-Id") || null;
const auth = req.header("Authorization");
if (!installId) { let identity;
try {
identity = await resolveIdentity(req);
} catch (err) {
const e = await errorEnvelope({
error: err?.message || "auth_error",
statusHint: err?.status || 401,
});
return res.status(e.statusHint || 401).json(e.body);
}
if (identity.kind === "license" && !identity.installId) {
const e = await errorEnvelope({ const e = await errorEnvelope({
error: "missing X-Recap-Install-Id header", error: "missing X-Recap-Install-Id header",
statusHint: 400, statusHint: 400,
}); });
return res.status(400).json(e.body); return res.status(400).json(e.body);
} }
const { creditKey, installId, license } = identity;
if (!req.file) { if (!req.file) {
const e = await errorEnvelope({ error: "missing audio file", installId, statusHint: 400 }); const e = await errorEnvelope({ error: "missing audio file", creditKey, installId, statusHint: 400 });
return res.status(400).json(e.body); return res.status(400).json(e.body);
} }
const license = await resolveLicense(auth); const row = await getOrCreateRow({ creditKey, installId, license });
const tier = license.tier; const tier = identityTier(identity, row);
const row = await getOrCreateRow(installId);
row.tier_snapshot = tier; row.tier_snapshot = tier;
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
const auditInstall = installId || identity.userId || null;
let reusedJob = false; // Probe audio duration BEFORE the backend call so we can record
let chosenBackend = null; // it on every audit row (success and error alike). Used by the
const existingJob = lookupJob(installId, jobId); // dashboard to normalize wall-clock time to "ms per minute of
if (existingJob) { // audio" — a backend-agnostic speed benchmark.
reusedJob = true; const audioSeconds = await getAudioDurationSecondsFromBuffer(
chosenBackend = existingJob.backend; req.file?.buffer
} else { );
const cfg = await getConfigSnapshot();
const hasHardware = !!cfg.relay_parakeet_base_url;
const quota = await getTierQuotas();
const preference =
cfg.relay_transcribe_backend_preference || "gemini_first";
const plan = planBackend(row, quota, { hasHardware, preference });
if (!plan.allowed) {
await recordCall({
install_id: installId,
tier,
pipeline: "transcribe",
backend: null,
model: null,
status: "refused",
credit_charged: 0,
duration_ms: Date.now() - t0,
cost_usd: 0,
job_id: jobId,
error: plan.reason,
});
const e = await errorEnvelope({
error: plan.reason,
installId,
tier,
statusHint: 402,
});
return res.status(402).json(e.body);
}
chosenBackend = plan.backend;
}
// Billing vs. routing are decoupled — see analyze.js for the
// full reasoning. Look up job to decide whether to charge a
// credit, but always run planBackend fresh so transcribe's
// routing decision respects relay_transcribe_backend_preference.
const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
const cfg = await getConfigSnapshot(); const cfg = await getConfigSnapshot();
const hw = await resolveHardwareConfig(cfg);
// Operator-only diagnostic — see the matching comment in
// summarize-url.js for the full reasoning. We don't 503 here on
// blocked_reason because doing so pre-empts planBackend and
// surfaces operator-internal wording to clients even when
// Gemini was the configured preference.
if (hw.transcribe.blocked_reason) {
console.warn(
`[transcribe] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
);
}
const hasHardware = !!hw.transcribe.url;
const quota = await getTierQuotas();
const preference =
cfg.relay_transcribe_backend_preference || "gemini_first";
const plan = planBackend(row, quota, { hasHardware, preference });
if (!plan.allowed) {
await recordCall({
install_id: auditInstall,
license_fingerprint: licenseFp,
tier,
pipeline: "transcribe",
backend: null,
model: null,
status: "refused",
credit_charged: 0,
duration_ms: Date.now() - t0,
audio_seconds: audioSeconds,
cost_usd: 0,
job_id: jobId,
error: plan.reason,
});
const e = await errorEnvelope({
error: plan.reason,
creditKey,
installId,
tier,
statusHint: 402,
});
return res.status(402).json(e.body);
}
const chosenBackend = plan.backend;
let result; let result;
try { try {
if (chosenBackend === "gemini") { if (chosenBackend === "gemini") {
@@ -116,6 +150,8 @@ export function transcribeRouter() {
apiKey: cfg.relay_gemini_api_key, apiKey: cfg.relay_gemini_api_key,
transcriptionModel: cfg.relay_gemini_transcription_model, transcriptionModel: cfg.relay_gemini_transcription_model,
analysisModel: cfg.relay_gemini_analysis_model, analysisModel: cfg.relay_gemini_analysis_model,
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
}); });
result = await backend.transcribeAudio({ result = await backend.transcribeAudio({
audio: req.file.buffer, audio: req.file.buffer,
@@ -128,10 +164,19 @@ export function transcribeRouter() {
}); });
} else { } else {
const backend = createHardwareBackend({ const backend = createHardwareBackend({
parakeetBaseURL: cfg.relay_parakeet_base_url, parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: cfg.relay_gemma_base_url, gemmaBaseURL: hw.analyze.url || "",
parakeetModel: cfg.relay_parakeet_model, sparkControlBaseURL: hw.sparkBase || "",
gemmaModel: cfg.relay_gemma_model, parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
}); });
result = await backend.transcribeAudio({ result = await backend.transcribeAudio({
audio: req.file.buffer, audio: req.file.buffer,
@@ -140,25 +185,38 @@ export function transcribeRouter() {
}); });
} }
} catch (err) { } catch (err) {
if (reusedJob) refundJob(installId, jobId); if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
console.error(`[relay/transcribe] backend error: ${err?.message}`); console.error(`[relay/transcribe] backend error: ${err?.message}`);
// Fire-and-forget health report for hardware-served calls;
// Gemini failures are a separate observability surface.
if (chosenBackend === "hardware") {
reportHealthEvent({
service: "parakeet",
ok: false,
error: (err?.message || String(err)).slice(0, 280),
ms: Date.now() - t0,
});
}
await recordCall({ await recordCall({
install_id: installId, install_id: auditInstall,
license_fingerprint: licenseFp,
tier, tier,
pipeline: "transcribe", pipeline: "transcribe",
backend: chosenBackend, backend: chosenBackend,
model: chosenBackend === "gemini" model: chosenBackend === "gemini"
? cfg.relay_gemini_transcription_model ? cfg.relay_gemini_transcription_model
: cfg.relay_parakeet_model, : hw.transcribe.model || "(auto)",
status: "error", status: "error",
credit_charged: 0, credit_charged: 0,
duration_ms: Date.now() - t0, duration_ms: Date.now() - t0,
audio_seconds: audioSeconds,
cost_usd: 0, cost_usd: 0,
job_id: jobId, job_id: jobId,
error: (err?.message || String(err)).slice(0, 200), error: (err?.message || String(err)).slice(0, 200),
}); });
const e = await errorEnvelope({ const e = await errorEnvelope({
error: err?.message || "backend_error", error: err?.message || "backend_error",
creditKey,
installId, installId,
tier, tier,
statusHint: err?.status || 502, statusHint: err?.status || 502,
@@ -168,8 +226,8 @@ export function transcribeRouter() {
let creditCharged = 0; let creditCharged = 0;
if (!reusedJob) { if (!reusedJob) {
await commitCredit(installId, { backend: chosenBackend, tier }); await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
markJobCharged(installId, jobId, { backend: chosenBackend, tier }); await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
creditCharged = 1; creditCharged = 1;
} }
@@ -188,6 +246,7 @@ export function transcribeRouter() {
}; };
await recordCall({ await recordCall({
install_id: installId, install_id: installId,
license_fingerprint: licenseFp,
tier, tier,
pipeline: "transcribe", pipeline: "transcribe",
backend: chosenBackend, backend: chosenBackend,
@@ -195,11 +254,13 @@ export function transcribeRouter() {
status: "success", status: "success",
credit_charged: creditCharged, credit_charged: creditCharged,
duration_ms: Date.now() - t0, duration_ms: Date.now() - t0,
audio_seconds: audioSeconds,
job_id: jobId, job_id: jobId,
attempts: result?.attempts || null,
...costDetails, ...costDetails,
}); });
const body = await envelope({ result, installId, tier, creditCharged }); const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
res.json(body); res.json(body);
}); });
+67
View File
@@ -0,0 +1,67 @@
// Strips operator-internal implementation detail from error messages
// before they're surfaced to public clients (Recap app, Recaps cloud,
// any other SDK consumer). The relay's hardware backend wraps
// Spark Control delegate names + internal IPs + wire-level details
// into its thrown errors, which is great for the operator's relay
// logs and audit table but those tokens shouldn't leak to whoever's
// summarizing a YouTube link.
//
// Applies a two-stage scrub:
// 1. Token replacement — known operator-private terms swap for
// generic equivalents ("Parakeet" → "the transcribe service",
// "Spark Control" → "the operator hardware", etc.). Stays
// grammatical and still readable.
// 2. Network-detail redaction — local IPs and internal URLs
// collapse to "(internal)" so a client never sees an operator's
// LAN topology. Public hostnames + Gemini's googleapis.com URLs
// stay intact (they're not operator-private).
//
// The ORIGINAL message stays available in the relay's recordCall
// audit row + console logs — only the client-facing surface gets
// sanitized.
const TOKEN_MAP = [
// Spark Control + its delegates
[/Spark Control/gi, "the operator hardware"],
[/spark-control(?:'s)?/gi, "the operator hardware"],
[/sparkcontrol/gi, "the operator hardware"],
// Parakeet (NVIDIA STT model wrapper)
[/Parakeet/gi, "the transcribe service"],
[/parakeet/gi, "the transcribe service"],
// vLLM / Gemma / other LLM runners on operator hardware
[/\bvLLM\b/gi, "the analyze service"],
[/\bvllm\b/gi, "the analyze service"],
[/\bGemma\b/g, "the analyze service"],
// Diarization stack
[/Sortformer/gi, "the diarization service"],
[/TitaNet/gi, "the diarization service"],
];
// Match an IPv4 address — local-network (192.168/16, 10/8, 172.16-31)
// and 127/8 (loopback). We don't redact public IPv4s in case a
// public-facing error genuinely references one (rare for the relay
// but possible).
const PRIVATE_IP_RE =
/\b(?:192\.168|10|172\.(?:1[6-9]|2\d|3[01])|127)\.\d{1,3}\.\d{1,3}(?::\d+)?/g;
// Match an http(s) URL whose host is a private IP OR is *.local.
// Both are operator-LAN-only. Public hostnames stay readable so a
// Gemini-side error referencing generativelanguage.googleapis.com
// keeps its diagnostic value.
const PRIVATE_URL_RE =
/https?:\/\/(?:(?:192\.168|10|172\.(?:1[6-9]|2\d|3[01])|127)\.\d{1,3}\.\d{1,3}(?::\d+)?|[A-Za-z0-9-]+\.local(?::\d+)?)[^\s)'"]*/g;
export function sanitizeErrorForClient(input) {
if (input == null) return "";
let s = typeof input === "string" ? input : (input.message || String(input));
// URL-shaped private hosts go first so the IP regex doesn't chew up
// half of the URL before the full URL pattern fires.
s = s.replace(PRIVATE_URL_RE, "(internal)");
s = s.replace(PRIVATE_IP_RE, "(internal)");
for (const [pattern, replacement] of TOKEN_MAP) {
s = s.replace(pattern, replacement);
}
// Collapse any double-spaces created by the substitutions.
s = s.replace(/\s{2,}/g, " ").trim();
return s;
}
+246
View File
@@ -0,0 +1,246 @@
// YouTube captions fast-path. Uses yt-dlp to fetch auto-generated
// (or manual) subtitle tracks WITHOUT downloading the video audio.
// For YouTube videos that have captions, this is dramatically
// faster than running audio through Gemini transcribe — captions
// download is typically 2-5 seconds vs 60-300+ seconds for audio.
//
// We then transform the .vtt subtitle file into a bracketed [MM:SS]
// transcript matching the shape Gemini's transcribe path produces,
// so the analyze step receives the same input format regardless of
// which source provided the transcript.
//
// Public entry point: fetchYouTubeCaptions({ url, tmpDir }) → {
// text: bracketed transcript string
// segments: [] (parity with transcribe; analyze doesn't use)
// duration_seconds: audio duration in seconds
// captions_source: "manual" | "auto" (which track yt-dlp picked)
// }
import fs from "fs/promises";
import path from "path";
import { execFile } from "child_process";
import { promisify } from "util";
const execFileAsync = promisify(execFile);
// Hard cap on the captions fetch — yt-dlp can occasionally hang on
// YouTube rate-limit pages; better to fail fast than wait forever.
const CAPTIONS_TIMEOUT_MS = 90_000;
export async function fetchYouTubeCaptions({ url, tmpDir }) {
// Two-pass strategy:
//
// Pass 1: --sub-langs "en.*" (covers en, en-US, en-GB, en-orig,
// en-auto, etc.). Handles the common case fast.
//
// Pass 2: If pass 1 produced no .vtt, retry with --sub-langs all
// AND surface what subtitles yt-dlp THINKS are available
// (via --list-subs in a third diagnostic call) so the
// operator sees the actual lang codes when it still fails.
//
// The earlier single-pass implementation failed silently on videos
// whose captions yt-dlp tagged in ways that didn't match "en.*"
// (some manually-uploaded subs use just a country code like "us",
// and some auto-captions resist the en-prefix entirely).
const outTemplate = path.join(tmpDir, "captions.%(ext)s");
const baseArgs = [
"--write-subs",
"--write-auto-subs",
"--convert-subs",
"vtt",
"--skip-download",
"--no-warnings",
"-o",
outTemplate,
"--print",
"duration",
];
let durationSec = null;
let lastStderr = "";
// Pass 1: english-only.
durationSec = await runYtDlp([...baseArgs, "--sub-langs", "en.*", url]).then(
(r) => {
lastStderr = r.stderr;
return parseDuration(r.stdout);
},
(err) => {
lastStderr = (err?.stderr || "").toString();
throw new Error(
`yt-dlp captions fetch failed: ${lastStderr.slice(0, 200) || err?.message}`
);
}
);
let files = await fs.readdir(tmpDir);
let vttFiles = files.filter((f) => f.endsWith(".vtt"));
// Pass 2: fall back to "all" if english-only produced nothing.
// Some videos have manually-uploaded captions tagged with non-en
// language codes (e.g. "en-US" → "us", or community-translated
// subs that are still in english but tagged "en-LIVE_CHAT").
if (vttFiles.length === 0) {
console.warn(
`[captions] pass 1 (en.*) produced no .vtt for ${url} — retrying with --sub-langs all`
);
try {
const r = await runYtDlp([...baseArgs, "--sub-langs", "all", url]);
lastStderr = r.stderr;
durationSec = parseDuration(r.stdout) || durationSec;
} catch (err) {
lastStderr = (err?.stderr || "").toString();
// Don't throw; fall through to the "still no .vtt" diagnostic.
}
files = await fs.readdir(tmpDir);
vttFiles = files.filter((f) => f.endsWith(".vtt"));
}
if (vttFiles.length === 0) {
// Last-resort diagnostic: ask yt-dlp what subtitles it CAN see.
// Surfaces in the error message so the operator can tell whether
// the video genuinely lacks captions or whether yt-dlp's
// extractor is being blocked (rate limit, geo, sign-in wall).
let listOut = "";
try {
const { stdout: lsOut } = await execFileAsync(
"yt-dlp",
["--list-subs", "--skip-download", "--no-warnings", url],
{ timeout: CAPTIONS_TIMEOUT_MS, maxBuffer: 4 * 1024 * 1024 }
);
listOut = lsOut || "";
} catch (lsErr) {
listOut = (lsErr?.stderr || lsErr?.message || "").toString();
}
const summary = listOut
.split("\n")
.filter((l) => l.trim() && !/^\[/.test(l))
.slice(0, 12)
.join(" | ")
.slice(0, 400);
throw new Error(
`yt-dlp produced no .vtt subtitle file. ` +
`yt-dlp --list-subs output: ${summary || "(empty)"}. ` +
`Last stderr: ${lastStderr.slice(0, 200)}`
);
}
// Preference order for picking among multiple .vtt files:
// 1. English manual subs (lang code starts with "en", not auto)
// 2. Any english (auto-generated, "en-orig", etc.)
// 3. Any other language (translation is better than nothing for
// benchmarking; analyze just needs text)
vttFiles.sort((a, b) => {
const aEn = /\.en[\.\-]/i.test(a) || /\.en\./i.test(a);
const bEn = /\.en[\.\-]/i.test(b) || /\.en\./i.test(b);
if (aEn !== bEn) return aEn ? -1 : 1;
const aAuto = /auto|orig/i.test(a);
const bAuto = /auto|orig/i.test(b);
if (aAuto !== bAuto) return aAuto ? 1 : -1;
return a.localeCompare(b);
});
const chosenVtt = path.join(tmpDir, vttFiles[0]);
const captionsSource = /auto|orig/i.test(vttFiles[0]) ? "auto" : "manual";
const vtt = await fs.readFile(chosenVtt, "utf8");
const text = vttToBracketedTranscript(vtt);
return {
text,
segments: [],
duration_seconds: durationSec,
captions_source: captionsSource,
};
}
// Run yt-dlp with the given args, returning { stdout, stderr }.
// Rejects on non-zero exit. Captures stderr so the caller can include
// it in diagnostic error messages.
async function runYtDlp(args) {
const result = await execFileAsync("yt-dlp", args, {
timeout: CAPTIONS_TIMEOUT_MS,
maxBuffer: 4 * 1024 * 1024,
});
return {
stdout: result.stdout || "",
stderr: result.stderr || "",
};
}
// yt-dlp prints duration (seconds) on stdout when --print duration is
// set. Other lines may interleave; the duration is the last numeric
// token on stdout in practice. Returns null when not present.
function parseDuration(stdout) {
if (!stdout) return null;
const last = stdout.trim().split(/\s+/).pop();
const v = parseFloat(last);
return Number.isFinite(v) ? v : null;
}
// Parse a .vtt subtitle file and produce a "[MM:SS] line\n[MM:SS] line"
// transcript. We drop overlapping / duplicate caption blocks (YouTube
// auto-captions emit each line twice: once incremental, once final).
//
// VTT format:
// WEBVTT
//
// 00:00:01.000 --> 00:00:03.500
// <c>First caption line.</c>
//
// 00:00:03.500 --> 00:00:06.000
// Second caption line.
//
// We strip the timing arrow + any inline cue tags, dedupe consecutive
// identical lines, and prefix each block with its start time.
export function vttToBracketedTranscript(vtt) {
const lines = vtt.split(/\r?\n/);
const out = [];
let i = 0;
let lastEmitted = "";
while (i < lines.length) {
const line = lines[i];
// Find a timing line: "HH:MM:SS.mmm --> HH:MM:SS.mmm"
const m = line.match(/^(\d{2}:\d{2}:\d{2}\.\d{3})\s+-->\s+(\d{2}:\d{2}:\d{2}\.\d{3})/);
if (!m) { i++; continue; }
const startTs = m[1];
// Collect subsequent text lines until a blank line / end of file.
i++;
const textLines = [];
while (i < lines.length && lines[i].trim() !== "") {
// Strip inline tags <c.colorXXX>...</c>, <00:00:01.234>, etc.
const cleaned = lines[i]
.replace(/<\d{2}:\d{2}:\d{2}\.\d{3}>/g, "")
.replace(/<\/?[^>]+>/g, "")
.trim();
if (cleaned) textLines.push(cleaned);
i++;
}
const blockText = textLines.join(" ").trim();
if (!blockText) continue;
// De-dupe: YouTube auto-captions emit progressive + final lines
// that overlap. If the new text starts with everything from the
// last emitted text, skip it (it's just the previous text being
// re-emitted with slight extensions).
if (lastEmitted && (blockText === lastEmitted || blockText.startsWith(lastEmitted))) {
// Replace the previous emission with the more-complete version.
out.pop();
} else if (lastEmitted && lastEmitted.startsWith(blockText)) {
// Already emitted a more-complete version; skip this one.
continue;
}
// Convert "HH:MM:SS.mmm" → "[MM:SS]" or "[H:MM:SS]" for our
// downstream parser.
const [h, mm, ss] = startTs.split(":");
const hours = parseInt(h, 10);
const mins = parseInt(mm, 10);
const secs = Math.floor(parseFloat(ss));
const pad = (n) => n.toString().padStart(2, "0");
const stamp =
hours > 0
? `[${hours}:${pad(mins)}:${pad(secs)}]`
: `[${mins}:${pad(secs)}]`;
const formatted = `${stamp} ${blockText}`;
out.push(formatted);
lastEmitted = blockText;
}
return out.join("\n");
}