Wire new routes; identity, summarize-url, dashboard, admin
This commit is contained in:
@@ -0,0 +1,902 @@
|
||||
// POST /admin/test-run — operator-side benchmarking flow.
|
||||
//
|
||||
// Same end-to-end pipeline as /relay/transcribe-url, but with two
|
||||
// key differences:
|
||||
// 1. The operator can OVERRIDE backend + model per call, bypassing
|
||||
// planBackend's tier/preference logic. Used by the dashboard's
|
||||
// benchmark suite to test specific permutations.
|
||||
// 2. The audit row is tagged with batch_id + source="admin-test"
|
||||
// so test runs are clearly distinguishable from real user
|
||||
// traffic in the Jobs tab (and filterable / hideable from view).
|
||||
//
|
||||
// Request body (admin-auth-gated by virtue of being under /admin/*):
|
||||
// {
|
||||
// media_url: string, required
|
||||
// type?: "youtube" | "podcast"
|
||||
// title?: string
|
||||
// transcribe_backend: "gemini" | "hardware", required
|
||||
// transcribe_model?: string (gemini model id; ignored when hardware)
|
||||
// analyze_backend: "gemini" | "hardware", required
|
||||
// analyze_model?: string
|
||||
// batch_id?: string — groups multiple test runs into one suite
|
||||
// }
|
||||
//
|
||||
// Response (immediate; job runs in background):
|
||||
// { result: { job_id, status: "queued", batch_id } }
|
||||
// Poll GET /admin/jobs/:id (existing) for status; final transcript +
|
||||
// analyze result lands in the Jobs table once complete.
|
||||
|
||||
import express from "express";
|
||||
import fs from "fs/promises";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import { randomUUID } from "crypto";
|
||||
import { getConfigSnapshot } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
import { getAudioDurationSeconds } from "../audio-meta.js";
|
||||
import {
|
||||
createJob,
|
||||
markRunning,
|
||||
setProgress,
|
||||
markComplete,
|
||||
markFailed,
|
||||
} from "../jobs.js";
|
||||
import {
|
||||
looksLikeYouTube,
|
||||
downloadDirect,
|
||||
downloadYouTube,
|
||||
} from "./transcribe-url.js";
|
||||
import { fetchYouTubeCaptions } from "../youtube-captions.js";
|
||||
import { saveJobOutput } from "../output-store.js";
|
||||
import { runChunkedAnalysis } from "../chunked-analyze.js";
|
||||
|
||||
// Synthetic install_id used for all test-run audit rows. Keeps them
|
||||
// out of any real-user aggregations + makes them filterable in the
|
||||
// Jobs tab via the existing install-id filter.
|
||||
const TEST_INSTALL_ID = "admin-test";
|
||||
|
||||
// ── TX-sharing cache ────────────────────────────────────────────
|
||||
// The benchmark suite has paired permutations that use the SAME
|
||||
// transcribe config but differ in their analyze backend:
|
||||
// pair 1+6: TX = gemini-3.1-flash-lite (then AN gemini vs hardware)
|
||||
// pair 4+5: TX = hardware (then AN hardware vs gemini)
|
||||
// pair 7+8: TX = captions (then AN gemini vs hardware)
|
||||
// Without sharing, running both members of a pair re-transcribes
|
||||
// the same audio twice — wasteful (cost + wall time).
|
||||
//
|
||||
// Implementation: an in-memory Map keyed on (mediaUrl, txConfig)
|
||||
// whose values are PROMISES for the transcript. The first request
|
||||
// in the pair inserts a pending Promise; subsequent requests with
|
||||
// the same key await that Promise. Completed entries linger in the
|
||||
// cache for ~10 minutes so a "rerun last" benchmark within that
|
||||
// window also dedupes. Cache entries auto-expire to bound memory.
|
||||
//
|
||||
// The cache is process-local (single relay process); a relay
|
||||
// restart clears it. That's fine — benchmark suites are operator-
|
||||
// initiated and short-lived.
|
||||
const TX_CACHE_TTL_MS = 10 * 60 * 1000;
|
||||
const txCache = new Map(); // key → { promise, expiresAt }
|
||||
|
||||
function txCacheKey({ mediaUrl, captionsMode, txBackend, txModel }) {
|
||||
if (captionsMode === "use") return `captions:${mediaUrl}`;
|
||||
return `tx:${txBackend}:${txModel || "(default)"}:${mediaUrl}`;
|
||||
}
|
||||
|
||||
function getOrComputeTx(key, computeFn) {
|
||||
const now = Date.now();
|
||||
// Evict expired entries opportunistically.
|
||||
for (const [k, v] of txCache) {
|
||||
if (v.expiresAt < now) txCache.delete(k);
|
||||
}
|
||||
const existing = txCache.get(key);
|
||||
if (existing && existing.expiresAt > now) {
|
||||
return {
|
||||
promise: existing.promise,
|
||||
cached: true,
|
||||
startedAt: existing.startedAt,
|
||||
};
|
||||
}
|
||||
const startedAt = Date.now();
|
||||
const promise = computeFn();
|
||||
txCache.set(key, { promise, expiresAt: now + TX_CACHE_TTL_MS, startedAt });
|
||||
// If the compute fails, evict the entry so the next attempt
|
||||
// gets a fresh try (don't cache failures).
|
||||
promise.catch(() => txCache.delete(key));
|
||||
return { promise, cached: false, startedAt };
|
||||
}
|
||||
|
||||
// Strip code fences + parse a JSON-formatted analyze response into
|
||||
// the { sections: [...] } shape Recap's render expects. Returns
|
||||
// null on parse failure so the saved output can store the raw text
|
||||
// for forensic review.
|
||||
function safeParseSections(text) {
|
||||
if (!text || typeof text !== "string") return null;
|
||||
let jsonStr = text.trim();
|
||||
const cb = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (cb) jsonStr = cb[1].trim();
|
||||
try {
|
||||
const parsed = JSON.parse(jsonStr);
|
||||
return parsed && Array.isArray(parsed.sections) ? parsed : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function adminTestRunRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
router.post("/test-run", express.json({ limit: "1mb" }), async (req, res) => {
|
||||
const {
|
||||
media_url: mediaUrl,
|
||||
type,
|
||||
title,
|
||||
transcribe_backend: txBackend,
|
||||
transcribe_model: txModel,
|
||||
analyze_backend: anBackend,
|
||||
analyze_model: anModel,
|
||||
batch_id: batchId,
|
||||
// When captions_mode === "use", the relay fetches YouTube
|
||||
// captions via yt-dlp instead of downloading+transcribing the
|
||||
// audio. Transcribe-backend/model are ignored in that case;
|
||||
// the captions text feeds straight into analyze. Only works
|
||||
// for YouTube URLs (no captions for podcast .mp3 enclosures).
|
||||
captions_mode: captionsMode,
|
||||
} = req.body || {};
|
||||
|
||||
if (!mediaUrl || typeof mediaUrl !== "string") {
|
||||
return res.status(400).json({ error: "missing or non-string media_url" });
|
||||
}
|
||||
const useCaptions = captionsMode === "use";
|
||||
if (!useCaptions && !["gemini", "hardware"].includes(txBackend)) {
|
||||
return res.status(400).json({ error: "transcribe_backend must be 'gemini' or 'hardware' (unless captions_mode='use')" });
|
||||
}
|
||||
if (!["gemini", "hardware"].includes(anBackend)) {
|
||||
return res.status(400).json({ error: "analyze_backend must be 'gemini' or 'hardware'" });
|
||||
}
|
||||
|
||||
const effectiveBatchId = batchId || randomUUID();
|
||||
const job = createJob({
|
||||
kind: "admin-test-run",
|
||||
installId: TEST_INSTALL_ID,
|
||||
metadata: {
|
||||
media_url: mediaUrl,
|
||||
title,
|
||||
transcribe_backend: useCaptions ? "captions" : txBackend,
|
||||
analyze_backend: anBackend,
|
||||
batch_id: effectiveBatchId,
|
||||
captions_mode: captionsMode || null,
|
||||
},
|
||||
});
|
||||
|
||||
// Hand back the job_id immediately; the dashboard polls for status.
|
||||
res.json({
|
||||
result: {
|
||||
job_id: job.id,
|
||||
status: "queued",
|
||||
batch_id: effectiveBatchId,
|
||||
kind: "admin-test-run",
|
||||
},
|
||||
});
|
||||
|
||||
// Bundle all the worker-input fields into a ctx object so the
|
||||
// worker can be invoked from BOTH this single-perm endpoint AND
|
||||
// the /test-run-suite endpoint (which mints jobs upfront then
|
||||
// fires the same worker per phase).
|
||||
const ctx = {
|
||||
mediaUrl, type, title,
|
||||
txBackend, txModel, anBackend, anModel,
|
||||
batchId: effectiveBatchId, captionsMode, useCaptions,
|
||||
};
|
||||
executeTestRunWorker(job, ctx).catch((err) => {
|
||||
markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
|
||||
console.error(`[admin/test-run ${job.id.slice(0, 8)}] worker crashed:`, err);
|
||||
});
|
||||
});
|
||||
|
||||
// ── POST /admin/test-run-suite ──────────────────────────────
|
||||
// Server-side benchmark runner. Accepts an ARRAY of permutations,
|
||||
// mints jobs for all of them upfront (so the client can show the
|
||||
// table immediately), and runs the phase-based concurrent
|
||||
// execution server-side. Key property: the suite KEEPS RUNNING
|
||||
// even if the operator's browser closes / phone sleeps / tab
|
||||
// refreshes — the work is in a background loop on the relay
|
||||
// process, not in the dashboard's JavaScript.
|
||||
//
|
||||
// Phases are grouped by TX fingerprint so paired permutations
|
||||
// (1+6, 4+5, 7+8) fire concurrently and share TX via the existing
|
||||
// in-memory inflight-promise cache.
|
||||
router.post("/test-run-suite", express.json({ limit: "10mb" }), async (req, res) => {
|
||||
const { media_url: mediaUrl, permutations } = req.body || {};
|
||||
if (!mediaUrl || typeof mediaUrl !== "string") {
|
||||
return res.status(400).json({ error: "missing or non-string media_url" });
|
||||
}
|
||||
if (!Array.isArray(permutations) || permutations.length === 0) {
|
||||
return res.status(400).json({ error: "permutations must be a non-empty array" });
|
||||
}
|
||||
const batchId = randomUUID();
|
||||
const items = [];
|
||||
for (let i = 0; i < permutations.length; i++) {
|
||||
const p = permutations[i] || {};
|
||||
const ctx = {
|
||||
mediaUrl,
|
||||
type: p.type,
|
||||
title: p.title || `permutation ${i + 1}`,
|
||||
txBackend: p.transcribe_backend,
|
||||
txModel: p.transcribe_model,
|
||||
anBackend: p.analyze_backend,
|
||||
anModel: p.analyze_model,
|
||||
batchId,
|
||||
captionsMode: p.captions_mode,
|
||||
useCaptions: p.captions_mode === "use",
|
||||
};
|
||||
// Validate per-perm — partial failures shouldn't poison the
|
||||
// whole batch; mark them so the worker can record the error.
|
||||
if (!ctx.useCaptions && !["gemini", "hardware"].includes(ctx.txBackend)) {
|
||||
ctx._validationError = `permutation ${i + 1}: transcribe_backend must be 'gemini' or 'hardware'`;
|
||||
} else if (!["gemini", "hardware"].includes(ctx.anBackend)) {
|
||||
ctx._validationError = `permutation ${i + 1}: analyze_backend must be 'gemini' or 'hardware'`;
|
||||
}
|
||||
const job = createJob({
|
||||
kind: "admin-test-run",
|
||||
installId: TEST_INSTALL_ID,
|
||||
metadata: {
|
||||
media_url: ctx.mediaUrl,
|
||||
title: ctx.title,
|
||||
transcribe_backend: ctx.useCaptions ? "captions" : ctx.txBackend,
|
||||
analyze_backend: ctx.anBackend,
|
||||
batch_id: batchId,
|
||||
captions_mode: ctx.captionsMode || null,
|
||||
suite_position: i + 1,
|
||||
},
|
||||
});
|
||||
items.push({ job, ctx });
|
||||
}
|
||||
|
||||
// Respond immediately with the planned IDs so the dashboard can
|
||||
// start polling /admin/jobs-history?batch_id=<batchId> without
|
||||
// blocking on the actual work.
|
||||
res.json({
|
||||
result: {
|
||||
batch_id: batchId,
|
||||
status: "queued",
|
||||
job_ids: items.map((it) => it.job.id),
|
||||
total: items.length,
|
||||
kind: "admin-test-run-suite",
|
||||
},
|
||||
});
|
||||
|
||||
// ── Background phase runner ──
|
||||
// Group items by TX fingerprint into phases. Permutations within
|
||||
// a phase fire concurrently (their underlying TX dedupes via the
|
||||
// cache); phases themselves run sequentially so we don't overload
|
||||
// the transcribe backends. Failures don't abort the suite.
|
||||
setImmediate(async () => {
|
||||
try {
|
||||
const phases = groupItemsByTxFingerprint(items);
|
||||
console.log(
|
||||
`[admin/test-run-suite] batch=${batchId.slice(0, 8)} ${items.length} perms in ${phases.length} phases`
|
||||
);
|
||||
for (let pi = 0; pi < phases.length; pi++) {
|
||||
const phase = phases[pi];
|
||||
console.log(
|
||||
`[admin/test-run-suite] batch=${batchId.slice(0, 8)} phase ${pi + 1}/${phases.length}: firing ${phase.length} perm${phase.length === 1 ? "" : "s"}`
|
||||
);
|
||||
await Promise.allSettled(
|
||||
phase.map(async (item) => {
|
||||
if (item.ctx._validationError) {
|
||||
markFailed(item.job.id, item.ctx._validationError);
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "error",
|
||||
duration_ms: 0,
|
||||
cost_usd: 0,
|
||||
job_id: item.job.id,
|
||||
batch_id: batchId,
|
||||
source: "admin-test",
|
||||
media_url: item.ctx.mediaUrl,
|
||||
title: item.ctx.title,
|
||||
error: item.ctx._validationError,
|
||||
});
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await executeTestRunWorker(item.job, item.ctx);
|
||||
} catch (err) {
|
||||
markFailed(item.job.id, "worker_crashed: " + (err?.message || String(err)));
|
||||
console.error(
|
||||
`[admin/test-run-suite ${item.job.id.slice(0, 8)}] worker crashed:`,
|
||||
err
|
||||
);
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
console.log(`[admin/test-run-suite] batch=${batchId.slice(0, 8)} complete`);
|
||||
} catch (err) {
|
||||
console.error(`[admin/test-run-suite] batch=${batchId.slice(0, 8)} runner crashed:`, err);
|
||||
}
|
||||
});
|
||||
});
|
||||
return router;
|
||||
}
|
||||
|
||||
// Group { job, ctx } items by their TX fingerprint into phases.
|
||||
// Items with the same fingerprint share a phase so they hit the
|
||||
// TX-share cache. Phase order is preserved from the input array
|
||||
// (first appearance of a fingerprint wins).
|
||||
function groupItemsByTxFingerprint(items) {
|
||||
const phases = [];
|
||||
const seen = new Map();
|
||||
for (const item of items) {
|
||||
const fp = item.ctx.useCaptions
|
||||
? `captions:${item.ctx.mediaUrl}`
|
||||
: `tx:${item.ctx.txBackend}:${item.ctx.txModel || ""}:${item.ctx.mediaUrl}`;
|
||||
if (seen.has(fp)) {
|
||||
phases[seen.get(fp)].push(item);
|
||||
} else {
|
||||
seen.set(fp, phases.length);
|
||||
phases.push([item]);
|
||||
}
|
||||
}
|
||||
return phases;
|
||||
}
|
||||
|
||||
// Extracted worker — runs the full download / transcribe / analyze
|
||||
// pipeline for one permutation. Used by both /admin/test-run (one
|
||||
// permutation) and /admin/test-run-suite (many permutations
|
||||
// orchestrated server-side in phases). Body is the same flow the
|
||||
// inline IIFE used previously; ctx replaces what were closure refs.
|
||||
async function executeTestRunWorker(job, ctx) {
|
||||
const {
|
||||
mediaUrl, type, title,
|
||||
txBackend, txModel, anBackend, anModel,
|
||||
batchId: effectiveBatchId,
|
||||
captionsMode, useCaptions,
|
||||
} = ctx;
|
||||
// The legacy body of the IIFE follows verbatim (with `job` already
|
||||
// passed in, and the closure vars now destructured from ctx).
|
||||
{
|
||||
const workerT0 = Date.now();
|
||||
markRunning(job.id);
|
||||
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "admin-tr-"));
|
||||
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
|
||||
|
||||
// ── Captions fast-path branch ──
|
||||
// For YouTube URLs with captions_mode="use", fetch caption track
|
||||
// via yt-dlp and skip audio download + Gemini transcribe
|
||||
// entirely. The captions text feeds straight into analyze.
|
||||
if (useCaptions) {
|
||||
if (!isYT) {
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: "captions",
|
||||
model: null,
|
||||
status: "error",
|
||||
duration_ms: 0,
|
||||
cost_usd: 0,
|
||||
job_id: job.id,
|
||||
batch_id: effectiveBatchId,
|
||||
source: "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
error: "captions_mode='use' requires a YouTube URL (no captions for podcast audio)",
|
||||
});
|
||||
markFailed(job.id, "captions_mode requires YouTube URL");
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
return;
|
||||
}
|
||||
setProgress(job.id, "fetching captions…");
|
||||
const capStart = Date.now();
|
||||
let cap;
|
||||
let capFromCache = false;
|
||||
let capSharedStartedAt = capStart;
|
||||
const capKey = txCacheKey({ mediaUrl, captionsMode: "use" });
|
||||
try {
|
||||
const { promise, cached, startedAt: sharedStartedAt } = getOrComputeTx(capKey, () =>
|
||||
fetchYouTubeCaptions({ url: mediaUrl, tmpDir })
|
||||
);
|
||||
capFromCache = cached;
|
||||
capSharedStartedAt = sharedStartedAt || capStart;
|
||||
if (cached) setProgress(job.id, "reusing shared captions from paired permutation…");
|
||||
cap = await promise;
|
||||
} catch (err) {
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: "captions",
|
||||
model: null,
|
||||
status: "error",
|
||||
duration_ms: Date.now() - capStart,
|
||||
audio_seconds: null,
|
||||
cost_usd: 0,
|
||||
job_id: job.id,
|
||||
batch_id: effectiveBatchId,
|
||||
source: "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
error: (err?.message || String(err)).slice(0, 300),
|
||||
});
|
||||
markFailed(job.id, "captions_fetch_failed: " + (err?.message || err));
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
return;
|
||||
}
|
||||
// Record the captions "transcribe" row. backend="captions"
|
||||
// so the dashboard can filter / display it distinctly.
|
||||
// When this permutation reused a paired sibling's captions
|
||||
// fetch, the wall-time we attribute is the underlying fetch's
|
||||
// wall-time (from the cache entry's startedAt) — so the
|
||||
// dashboard's per-row TX-rate columns show real numbers on
|
||||
// BOTH paired rows, not "—" on the sibling. The `source`
|
||||
// flag "admin-test-shared-tx" lets aggregate analytics dedupe.
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: "captions",
|
||||
model: cap.captions_source === "auto" ? "youtube-auto" : "youtube-manual",
|
||||
status: "success",
|
||||
duration_ms: Date.now() - capSharedStartedAt,
|
||||
audio_seconds: cap.duration_seconds || null,
|
||||
audio_bytes: null, // no audio downloaded
|
||||
download_ms: null, // n/a
|
||||
chunk_count: 1,
|
||||
cost_usd: 0,
|
||||
job_id: job.id,
|
||||
batch_id: effectiveBatchId,
|
||||
source: capFromCache ? "admin-test-shared-tx" : "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
});
|
||||
setProgress(job.id, "analyzing topics…");
|
||||
const cfg2 = await getConfigSnapshot();
|
||||
const hw2 = await resolveHardwareConfig(cfg2);
|
||||
let anResultForCaptions = null;
|
||||
try {
|
||||
anResultForCaptions = await runAnalyzeForTestRun({
|
||||
transcriptText: cap.text || "",
|
||||
anBackend,
|
||||
anModel,
|
||||
cfg: cfg2,
|
||||
hw: hw2,
|
||||
jobId: job.id,
|
||||
batchId: effectiveBatchId,
|
||||
mediaUrl,
|
||||
title,
|
||||
audioSeconds: cap.duration_seconds || null,
|
||||
audioBytes: null,
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn(`[admin/test-run ${job.id.slice(0, 8)}] analyze failed (captions): ${err?.message || err}`);
|
||||
}
|
||||
// Save output (test-runs always persist regardless of the
|
||||
// save-user-outputs flag).
|
||||
await saveJobOutput(job.id, {
|
||||
batch_id: effectiveBatchId,
|
||||
source: "admin-test",
|
||||
transcript: cap.text || "",
|
||||
analysis: anResultForCaptions ? safeParseSections(anResultForCaptions.text) : null,
|
||||
analysis_raw_text: anResultForCaptions?.text || null,
|
||||
meta: {
|
||||
title: title || null,
|
||||
media_url: mediaUrl,
|
||||
audio_seconds: cap.duration_seconds || null,
|
||||
audio_bytes: null,
|
||||
captions_mode: "use",
|
||||
captions_source: cap.captions_source || null,
|
||||
transcribe_backend: "captions",
|
||||
transcribe_model: cap.captions_source === "auto" ? "youtube-auto" : "youtube-manual",
|
||||
analyze_backend: anBackend,
|
||||
analyze_model: anResultForCaptions?.model || null,
|
||||
},
|
||||
});
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
markComplete(job.id, {
|
||||
result: { transcribe_model: "captions", batch_id: effectiveBatchId },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Audio download path (no captions) ──
|
||||
setProgress(job.id, "downloading media…");
|
||||
let audio;
|
||||
let downloadMs = 0;
|
||||
try {
|
||||
const dlStart = Date.now();
|
||||
audio = isYT
|
||||
? await downloadYouTube(mediaUrl, tmpDir)
|
||||
: await downloadDirect(mediaUrl, tmpDir);
|
||||
downloadMs = Date.now() - dlStart;
|
||||
audio.seconds = await getAudioDurationSeconds(audio.filePath);
|
||||
setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
const msg = (err?.message || String(err)).slice(0, 300);
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: txBackend,
|
||||
model: null,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - workerT0,
|
||||
download_ms: Date.now() - workerT0,
|
||||
audio_seconds: null,
|
||||
cost_usd: 0,
|
||||
job_id: job.id,
|
||||
batch_id: effectiveBatchId,
|
||||
source: "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
error: "download_failed: " + msg,
|
||||
});
|
||||
markFailed(job.id, "download_failed: " + msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Transcription with the operator's chosen backend ──
|
||||
// Uses the TX-sharing cache so that paired benchmark
|
||||
// permutations (e.g. 1+6 both transcribe with gemini-3.1-flash-
|
||||
// lite) only invoke the underlying backend ONCE — the second
|
||||
// permutation awaits the first's in-flight promise and reuses
|
||||
// its transcript. Cache entries linger ~10 min so a fast
|
||||
// "Rerun last" also dedupes.
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hw = await resolveHardwareConfig(cfg);
|
||||
let txResult;
|
||||
let txFromCache = false;
|
||||
const txStartedAt = Date.now();
|
||||
let txSharedStartedAt = txStartedAt;
|
||||
const cacheKey = txCacheKey({ mediaUrl, captionsMode: null, txBackend, txModel });
|
||||
try {
|
||||
const audioBuf = await fs.readFile(audio.filePath);
|
||||
const { promise, cached, startedAt: sharedStartedAt } = getOrComputeTx(cacheKey, async () => {
|
||||
if (txBackend === "gemini") {
|
||||
const backend = createGeminiBackend({
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: txModel || cfg.relay_gemini_transcription_model,
|
||||
analysisModel: cfg.relay_gemini_analysis_model,
|
||||
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
|
||||
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
|
||||
transcribePromptOverride: cfg.relay_transcribe_prompt || "",
|
||||
});
|
||||
return await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType: audio.mimeType || "audio/mpeg",
|
||||
title: title || "",
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
}
|
||||
if (!hw.transcribe.url) {
|
||||
throw new Error("hardware transcribe URL not configured");
|
||||
}
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: hw.transcribe.url,
|
||||
gemmaBaseURL: hw.analyze.url || "",
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
|
||||
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
|
||||
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
|
||||
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
|
||||
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
||||
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
||||
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
||||
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
||||
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
||||
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
||||
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
|
||||
});
|
||||
return await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType: audio.mimeType || "audio/mpeg",
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
});
|
||||
txFromCache = cached;
|
||||
txSharedStartedAt = sharedStartedAt || txStartedAt;
|
||||
if (cached) {
|
||||
setProgress(job.id, "reusing shared TX from paired permutation…");
|
||||
}
|
||||
txResult = await promise;
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
const msg = (err?.message || String(err)).slice(0, 400);
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: txBackend,
|
||||
model: txBackend === "gemini" ? (txModel || cfg.relay_gemini_transcription_model) : (hw.transcribe.model || "(auto)"),
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - workerT0,
|
||||
download_ms: downloadMs,
|
||||
audio_seconds: audio?.seconds || null,
|
||||
audio_bytes: audio?.bytes || null,
|
||||
cost_usd: 0,
|
||||
job_id: job.id,
|
||||
batch_id: effectiveBatchId,
|
||||
source: "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
error: msg,
|
||||
});
|
||||
markFailed(job.id, "transcribe_failed: " + msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Audit the successful transcribe.
|
||||
const txCostDetails =
|
||||
txBackend === "gemini" && txResult.usage
|
||||
? calcGeminiCost(txResult.model, txResult.usage)
|
||||
: { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
|
||||
// Truncation detection — same as the production routes. When
|
||||
// any chunk emitted < 80% of its expected audio, mark the
|
||||
// benchmark row partial so the operator doesn't compare a
|
||||
// truncated TX run against a clean one.
|
||||
const txTruncatedChunks = Array.isArray(txResult?.truncated_chunks)
|
||||
? txResult.truncated_chunks
|
||||
: [];
|
||||
const txWasTruncated = txTruncatedChunks.length > 0;
|
||||
const txTruncationError = txWasTruncated
|
||||
? `transcribe: ${txTruncatedChunks.length} chunk(s) truncated — missing ~${txTruncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${txResult.model || "unknown"})`
|
||||
: null;
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "transcribe",
|
||||
backend: txBackend,
|
||||
model: txResult.model || null,
|
||||
status: txWasTruncated ? "partial" : "success",
|
||||
credit_charged: 0,
|
||||
truncated_chunks: txWasTruncated ? txTruncatedChunks : null,
|
||||
error: txTruncationError,
|
||||
// When this permutation reused a paired sibling's TX, the
|
||||
// attributed duration is the wall-time of the UNDERLYING TX
|
||||
// (from when the originating permutation kicked it off until
|
||||
// both siblings' awaits resolved) — so the per-row TX rate
|
||||
// columns in the Jobs table show real numbers on BOTH paired
|
||||
// rows, not "—" on the sibling. Cost is still zero on the
|
||||
// sibling (only the originator pays). The "admin-test-shared-tx"
|
||||
// source flag lets aggregate analytics dedupe across pairs.
|
||||
duration_ms: Date.now() - txSharedStartedAt,
|
||||
download_ms: downloadMs,
|
||||
audio_bytes: audio.bytes,
|
||||
audio_seconds: audio.seconds || null,
|
||||
job_id: job.id,
|
||||
batch_id: effectiveBatchId,
|
||||
source: txFromCache ? "admin-test-shared-tx" : "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
attempts: txResult.attempts || null,
|
||||
chunk_count: txResult.chunk_count ?? null,
|
||||
// Per-chunk wall-times (ms). Aggregator sums this into
|
||||
// transcribe_ms_sum so the Jobs table shows BOTH wall-time
|
||||
// (from duration_ms) and total backend compute (from sum).
|
||||
chunk_durations_ms: txResult.chunk_durations_ms || null,
|
||||
...(txFromCache
|
||||
? { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 }
|
||||
: txCostDetails),
|
||||
});
|
||||
|
||||
// ── Analyze with the operator's chosen backend ──
|
||||
// For benchmarking purposes we run the chunked-analyze flow
|
||||
// directly here (mirroring Recap's behavior) so the per-window
|
||||
// performance is captured in the Jobs table. We build a simple
|
||||
// prompt from the transcript text.
|
||||
setProgress(job.id, "analyzing topics…");
|
||||
let anResult = null;
|
||||
try {
|
||||
anResult = await runAnalyzeForTestRun({
|
||||
transcriptText: txResult.text || "",
|
||||
anBackend,
|
||||
anModel,
|
||||
cfg,
|
||||
hw,
|
||||
jobId: job.id,
|
||||
batchId: effectiveBatchId,
|
||||
mediaUrl,
|
||||
title,
|
||||
audioSeconds: audio.seconds || null,
|
||||
audioBytes: audio.bytes,
|
||||
});
|
||||
} catch (err) {
|
||||
// Analyze failure is recorded (inside runAnalyzeForTestRun);
|
||||
// we still mark the job complete since transcribe succeeded.
|
||||
console.warn(`[admin/test-run ${job.id.slice(0, 8)}] analyze failed: ${err?.message || err}`);
|
||||
}
|
||||
|
||||
// Save the transcript + analysis JSON to disk for the
|
||||
// dashboard's "View output" feature. Test-run jobs always
|
||||
// persist regardless of the save-user-outputs config flag.
|
||||
await saveJobOutput(job.id, {
|
||||
batch_id: effectiveBatchId,
|
||||
source: "admin-test",
|
||||
transcript: txResult.text || "",
|
||||
analysis: anResult ? safeParseSections(anResult.text) : null,
|
||||
analysis_raw_text: anResult?.text || null,
|
||||
meta: {
|
||||
title: title || null,
|
||||
media_url: mediaUrl,
|
||||
audio_seconds: audio.seconds || null,
|
||||
audio_bytes: audio.bytes,
|
||||
captions_mode: null,
|
||||
transcribe_backend: txBackend,
|
||||
transcribe_model: txResult.model || null,
|
||||
analyze_backend: anBackend,
|
||||
analyze_model: anResult?.model || null,
|
||||
},
|
||||
});
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
|
||||
markComplete(job.id, {
|
||||
result: {
|
||||
transcribe_model: txResult.model,
|
||||
batch_id: effectiveBatchId,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Run chunked analyze over the just-transcribed text using the same
|
||||
// windowing strategy Recap's client uses (~18 min window body, 2 min
|
||||
// overlap, N windows in flight). Each window emits its own audit row
|
||||
// via recordCall (handled inside runChunkedAnalysis), so the Jobs
|
||||
// table sees:
|
||||
// - analyze_windows_total = N
|
||||
// - analyze_ms = sum of per-window duration_ms (total backend work)
|
||||
// - wall_time_ms = elapsed from first window start → last window end
|
||||
// (computed by job-stats.js from row timestamps)
|
||||
// Per-window `audio_seconds` is the window body length (not total audio),
|
||||
// so per-row rate columns (s/audio-min) divide by the right denominator.
|
||||
async function runAnalyzeForTestRun({
|
||||
transcriptText,
|
||||
anBackend,
|
||||
anModel,
|
||||
cfg,
|
||||
hw,
|
||||
jobId,
|
||||
batchId,
|
||||
mediaUrl,
|
||||
title,
|
||||
audioSeconds, // unused — chunked-analyze uses per-window seconds
|
||||
audioBytes, // unused
|
||||
}) {
|
||||
// Build the right backend, then hand to runChunkedAnalysis which
|
||||
// handles per-window prompt building, parallelism, audit logging,
|
||||
// and stitching. Construction errors (missing apiKey, missing
|
||||
// hardware URL) are audited as a single failed analyze row so the
|
||||
// Jobs table shows what happened — runChunkedAnalysis only writes
|
||||
// rows once it has a backend to call.
|
||||
let backend;
|
||||
let resolvedModel;
|
||||
let computeCostDetails;
|
||||
try {
|
||||
if (anBackend === "gemini") {
|
||||
backend = createGeminiBackend({
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||
analysisModel: anModel || cfg.relay_gemini_analysis_model,
|
||||
// tx knobs are unused on the analyze path but the factory
|
||||
// accepts them anyway — pass for consistency.
|
||||
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
|
||||
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
|
||||
});
|
||||
resolvedModel = anModel || cfg.relay_gemini_analysis_model;
|
||||
computeCostDetails = (model, usage) =>
|
||||
usage ? calcGeminiCost(model, usage) : {
|
||||
input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0,
|
||||
};
|
||||
} else {
|
||||
if (!hw.analyze.url) {
|
||||
throw new Error("hardware analyze URL not configured");
|
||||
}
|
||||
backend = createHardwareBackend({
|
||||
parakeetBaseURL: hw.transcribe.url || "",
|
||||
gemmaBaseURL: hw.analyze.url,
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
|
||||
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
|
||||
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
|
||||
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
|
||||
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
||||
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
||||
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
||||
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
|
||||
});
|
||||
resolvedModel = hw.analyze.model || null;
|
||||
computeCostDetails = () => ({
|
||||
input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
await recordCall({
|
||||
install_id: TEST_INSTALL_ID,
|
||||
tier: "core",
|
||||
pipeline: "analyze",
|
||||
backend: anBackend,
|
||||
model: anBackend === "gemini"
|
||||
? (anModel || cfg.relay_gemini_analysis_model)
|
||||
: (hw.analyze.model || "(auto)"),
|
||||
status: "error",
|
||||
duration_ms: 0,
|
||||
audio_seconds: 0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
batch_id: batchId,
|
||||
source: "admin-test",
|
||||
media_url: mediaUrl,
|
||||
title: title || null,
|
||||
error: (err?.message || String(err)).slice(0, 400),
|
||||
window_idx: 0,
|
||||
window_count: 1,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Pull windowing tunables from config (Settings tab).
|
||||
const bodyMin = anBackend === "gemini"
|
||||
? (cfg.relay_gemini_analyze_window_minutes || 18)
|
||||
: (cfg.relay_hardware_analyze_window_minutes || 18);
|
||||
const overlapMin = anBackend === "gemini"
|
||||
? (cfg.relay_gemini_analyze_overlap_minutes || 2)
|
||||
: (cfg.relay_hardware_analyze_overlap_minutes || 2);
|
||||
const concurrency = anBackend === "gemini"
|
||||
? (cfg.relay_gemini_analyze_concurrency || 12)
|
||||
: (cfg.relay_hardware_analyze_concurrency || 8);
|
||||
const cutoffMin = cfg.relay_analyze_cutoff_minutes || 25;
|
||||
|
||||
const result = await runChunkedAnalysis({
|
||||
transcriptText,
|
||||
backend,
|
||||
pipelineBackend: anBackend,
|
||||
jobId,
|
||||
batchId,
|
||||
mediaUrl,
|
||||
title,
|
||||
installId: TEST_INSTALL_ID,
|
||||
source: "admin-test",
|
||||
computeCostDetails,
|
||||
bodySeconds: bodyMin * 60,
|
||||
overlapSeconds: overlapMin * 60,
|
||||
concurrency,
|
||||
cutoffSeconds: cutoffMin * 60,
|
||||
analyzePromptOverride: cfg.relay_analyze_prompt || "",
|
||||
// Section-count target wiring (matches the summarize-url path).
|
||||
// Without these, buildWindowPrompt falls back to "1 section" —
|
||||
// works defensively but means test-run benchmarks don't reflect
|
||||
// production segmentation density.
|
||||
totalAudioSec: audioSeconds || 0,
|
||||
targetTotalsByBucket: {
|
||||
under_30: cfg.relay_analyze_total_sections_under_30,
|
||||
"30_60": cfg.relay_analyze_total_sections_30_60,
|
||||
"60_90": cfg.relay_analyze_total_sections_60_90,
|
||||
"90_120": cfg.relay_analyze_total_sections_90_120,
|
||||
"120_150": cfg.relay_analyze_total_sections_120_150,
|
||||
"150_180": cfg.relay_analyze_total_sections_150_180,
|
||||
over_180: cfg.relay_analyze_total_sections_over_180,
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
text: result.text || "",
|
||||
model: result.model || resolvedModel,
|
||||
attempts: result.attempts,
|
||||
};
|
||||
}
|
||||
+1106
-5
File diff suppressed because it is too large
Load Diff
+114
-58
@@ -16,14 +16,21 @@
|
||||
// margin, and speed metrics.
|
||||
|
||||
import express from "express";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
|
||||
import { resolveIdentity, identityTier } from "../identity.js";
|
||||
import {
|
||||
getOrCreateRow,
|
||||
planBackend,
|
||||
commitCredit,
|
||||
licenseFingerprint,
|
||||
} from "../credits.js";
|
||||
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
import { reportHealthEvent } from "../spark-control-events.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
|
||||
export function analyzeRouter() {
|
||||
@@ -31,72 +38,100 @@ export function analyzeRouter() {
|
||||
|
||||
router.post("/analyze", express.json({ limit: "10mb" }), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
|
||||
if (!installId) {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "auth_error",
|
||||
statusHint: err?.status || 401,
|
||||
});
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
const prompt = req.body?.prompt;
|
||||
if (!prompt || typeof prompt !== "string") {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing or non-string body.prompt",
|
||||
creditKey,
|
||||
installId,
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
|
||||
const license = await resolveLicense(auth);
|
||||
const tier = license.tier;
|
||||
|
||||
const row = await getOrCreateRow(installId);
|
||||
const row = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, row);
|
||||
row.tier_snapshot = tier;
|
||||
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
|
||||
const auditInstall = installId || identity.userId || null;
|
||||
|
||||
let reusedJob = false;
|
||||
let chosenBackend = null;
|
||||
const existingJob = lookupJob(installId, jobId);
|
||||
if (existingJob) {
|
||||
reusedJob = true;
|
||||
chosenBackend = existingJob.backend;
|
||||
} else {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hasHardware = !!cfg.relay_gemma_base_url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_analyze_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
chosenBackend = plan.backend;
|
||||
}
|
||||
|
||||
// Two separate decisions on every call:
|
||||
// 1. Billing: did we already charge a credit for this job? (look
|
||||
// up by job_id; reused → don't charge again.)
|
||||
// 2. Routing: which backend serves THIS pipeline step's request?
|
||||
// (always per-pipeline preference + planBackend, even when
|
||||
// the job has a prior transcribe call that routed elsewhere.)
|
||||
//
|
||||
// The old code conflated the two — it copied `backend` from the
|
||||
// existing job, which meant analyze would silently inherit
|
||||
// transcribe's backend choice even when the operator's analyze
|
||||
// preference said something different. Fixed: routing is decided
|
||||
// fresh per pipeline step, regardless of job history.
|
||||
const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hw = await resolveHardwareConfig(cfg);
|
||||
// Operator-only diagnostic — see summarize-url.js for the full
|
||||
// reasoning. We don't 503 here on blocked_reason because doing
|
||||
// so pre-empts planBackend and would surface operator-internal
|
||||
// Spark Control / vLLM wording to clients even when Gemini was
|
||||
// the configured preference. planBackend correctly routes around
|
||||
// an unavailable hardware path via hasHardware = false.
|
||||
if (hw.analyze.blocked_reason) {
|
||||
console.warn(
|
||||
`[analyze] hardware analyze currently blocked (planBackend will route to Gemini if available): ${hw.analyze.blocked_reason}`,
|
||||
);
|
||||
}
|
||||
const hasHardware = !!hw.analyze.url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_analyze_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
const chosenBackend = plan.backend;
|
||||
|
||||
let result;
|
||||
try {
|
||||
if (chosenBackend === "gemini") {
|
||||
@@ -108,24 +143,39 @@ export function analyzeRouter() {
|
||||
result = await backend.analyzeText({ prompt });
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
parakeetBaseURL: hw.transcribe.url || "",
|
||||
gemmaBaseURL: hw.analyze.url || "",
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
});
|
||||
result = await backend.analyzeText({ prompt });
|
||||
}
|
||||
} catch (err) {
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
|
||||
console.error(`[relay/analyze] backend error: ${err?.message}`);
|
||||
// Passive health-event report to Spark Control so the
|
||||
// operator's dashboard surfaces the failure immediately
|
||||
// (without waiting for its own polling cycle to catch it).
|
||||
// Only fired for hardware-side calls — Gemini failures are a
|
||||
// separate observability surface (Google's API health).
|
||||
if (chosenBackend === "hardware") {
|
||||
reportHealthEvent({
|
||||
service: "vllm",
|
||||
ok: false,
|
||||
error: (err?.message || String(err)).slice(0, 280),
|
||||
ms: Date.now() - t0,
|
||||
});
|
||||
}
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: chosenBackend,
|
||||
model: chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_analysis_model
|
||||
: cfg.relay_gemma_model,
|
||||
: hw.analyze.model || "(auto)",
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
@@ -135,6 +185,7 @@ export function analyzeRouter() {
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: err?.status || 502,
|
||||
@@ -144,8 +195,8 @@ export function analyzeRouter() {
|
||||
|
||||
let creditCharged = 0;
|
||||
if (!reusedJob) {
|
||||
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
|
||||
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
|
||||
await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
|
||||
@@ -159,7 +210,8 @@ export function analyzeRouter() {
|
||||
cost_usd: 0,
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: chosenBackend,
|
||||
@@ -168,10 +220,14 @@ export function analyzeRouter() {
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
job_id: jobId,
|
||||
// Surface the cascade so the dashboard can show "served by
|
||||
// 2.5-flash after 3-flash 503'd" — Gemini backend returns this;
|
||||
// hardware backend doesn't (no per-model fallback there).
|
||||
attempts: result?.attempts || null,
|
||||
...costDetails,
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
|
||||
+166
-50
@@ -1,69 +1,185 @@
|
||||
// GET /relay/capabilities — operator-aware metadata for Recap clients
|
||||
// to plan their audio handling. Returns the upper bounds the relay's
|
||||
// CURRENT routing config can comfortably accept, so Recap can decide
|
||||
// whether to chunk a long video before sending it.
|
||||
// GET /relay/capabilities — per-install metadata for Recap clients to
|
||||
// plan their audio handling. Tells Recap whether to chunk a long
|
||||
// audio file before sending it, based on which backend THIS install's
|
||||
// next transcribe call will actually route to.
|
||||
//
|
||||
// Today's logic:
|
||||
// - When the operator's transcribe_backend_preference routes through
|
||||
// Gemini at all (gemini_first / gemini_only), we report Gemini-safe
|
||||
// limits (60 min / 30 MB / 2700 s chunks). Even with hardware as
|
||||
// overflow, the FIRST attempt is Gemini, which needs the chunk
|
||||
// budget.
|
||||
// - When the operator's preference is hardware-only (or hardware-
|
||||
// first with overflow to Gemini disabled in spirit), we report
|
||||
// "unbounded" — the operator's Parakeet wrapper can typically
|
||||
// ingest 2+ hour podcasts in a single shot, so chunking just adds
|
||||
// extra inference passes and timestamp-stitching overhead.
|
||||
// The decision is install-specific because the relay's routing
|
||||
// preference combined with the install's tier + current Gemini cap
|
||||
// consumption determines the backend per request. In `gemini_first`
|
||||
// mode, the same operator config will route a fresh install to
|
||||
// Gemini (chunking required) but route a cap-exhausted install to
|
||||
// hardware (no chunking needed) — so a global capabilities answer
|
||||
// would be wrong half the time.
|
||||
//
|
||||
// Recap reads this once on boot + on policy refresh; when its
|
||||
// transcriptionProvider is "relay", it honors these limits instead of
|
||||
// its own hardcoded thresholds. For non-relay providers, Recap's
|
||||
// internal per-provider thresholds apply.
|
||||
// Inputs:
|
||||
// X-Recap-Install-Id (optional but strongly recommended)
|
||||
// Authorization (optional Bearer license — affects tier lookup)
|
||||
//
|
||||
// Without an install_id, returns Gemini-safe limits conservatively
|
||||
// (the chunking path always works; the no-chunking path only works
|
||||
// when hardware actually serves the call).
|
||||
//
|
||||
// Output shape (unchanged from v1 — pure additive on the routing
|
||||
// logic):
|
||||
// {
|
||||
// max_audio_mb: number,
|
||||
// max_audio_minutes: number,
|
||||
// preferred_chunk_seconds: number | null, // null = don't chunk
|
||||
// reason: string // human-readable
|
||||
// }
|
||||
|
||||
import express from "express";
|
||||
import { getConfigSnapshot } from "../config.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend } from "../credits.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
|
||||
// Gemini File API can handle audio up to ~9.5 hours per generateContent
|
||||
// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
|
||||
// shipped originally was sized for free-tier worries that no longer
|
||||
// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
|
||||
// relay-URL fast-path for content up to 4 hours instead of falling
|
||||
// back to client-side chunked uploads (which lose the buyer-bandwidth
|
||||
// savings and serialize the calls).
|
||||
const GEMINI_LIMITS = Object.freeze({
|
||||
max_audio_mb: 200,
|
||||
max_audio_minutes: 240,
|
||||
preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
|
||||
// kicks in for stability on the longest
|
||||
// files, but only on the actual call;
|
||||
// doesn't gate client-side chunking.
|
||||
});
|
||||
|
||||
const HARDWARE_LIMITS = Object.freeze({
|
||||
// Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
|
||||
// audio in one shot. Set high but finite ceilings so a 24-hour file
|
||||
// doesn't OOM the operator's GPU box silently.
|
||||
max_audio_mb: 500,
|
||||
max_audio_minutes: 240,
|
||||
preferred_chunk_seconds: null,
|
||||
});
|
||||
|
||||
export function capabilitiesRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
router.get("/capabilities", async (_req, res) => {
|
||||
router.get("/capabilities", async (req, res) => {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const txPref =
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const hasParakeet = !!cfg.relay_parakeet_base_url;
|
||||
const hw = await resolveHardwareConfig(cfg);
|
||||
const hasHardware = !!hw.transcribe.url;
|
||||
const installId = req.header("X-Recap-Install-Id") || null;
|
||||
const auth = req.header("Authorization") || null;
|
||||
|
||||
// Conservative default: Gemini-safe limits unless the operator has
|
||||
// explicitly said "use hardware (only or first) and I've got a
|
||||
// Parakeet endpoint wired up". Without the Parakeet endpoint we
|
||||
// can't make use of larger inputs — Gemini's the only path —
|
||||
// so we'd just be lying to the client.
|
||||
// ── TTS availability (audio-first "walking mode") ──
|
||||
// Operator-wide, not install-specific: whether ANY TTS backend can
|
||||
// serve a /relay/tts call given the operator's config. The Recap app
|
||||
// uses has_tts to decide whether to show the "Listen" button at all
|
||||
// (it additionally gates the feature to Max users on its own side).
|
||||
const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
|
||||
const kokoroReady = !!hw.tts?.url;
|
||||
const elevenConfigured = !!(
|
||||
cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
|
||||
);
|
||||
const ttsBackend =
|
||||
ttsPref === "hardware_only"
|
||||
? kokoroReady
|
||||
? "kokoro"
|
||||
: null
|
||||
: ttsPref === "cloud_only"
|
||||
? elevenConfigured
|
||||
? "elevenlabs"
|
||||
: null
|
||||
: ttsPref === "cloud_first"
|
||||
? elevenConfigured
|
||||
? "elevenlabs"
|
||||
: kokoroReady
|
||||
? "kokoro"
|
||||
: null
|
||||
: kokoroReady // hardware_first (default)
|
||||
? "kokoro"
|
||||
: elevenConfigured
|
||||
? "elevenlabs"
|
||||
: null;
|
||||
const ttsCaps = {
|
||||
has_tts: !!ttsBackend,
|
||||
tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
|
||||
tts_default_voice: cfg.relay_tts_default_voice || null,
|
||||
};
|
||||
|
||||
// If we have an install_id, run the same routing logic the actual
|
||||
// transcribe route uses so the chunking decision matches the
|
||||
// backend that will actually serve the call.
|
||||
if (installId) {
|
||||
try {
|
||||
const license = await resolveLicense(auth);
|
||||
const row = await getOrCreateRow({ installId, license });
|
||||
row.tier_snapshot = license.tier;
|
||||
const quota = await getTierQuotas();
|
||||
const plan = planBackend(row, quota, {
|
||||
hasHardware,
|
||||
preference: txPref,
|
||||
});
|
||||
if (plan.allowed && plan.backend === "hardware") {
|
||||
return res.json({
|
||||
...HARDWARE_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
|
||||
});
|
||||
}
|
||||
if (plan.allowed && plan.backend === "gemini") {
|
||||
return res.json({
|
||||
...GEMINI_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
|
||||
});
|
||||
}
|
||||
// planBackend refused entirely (out of credits / no backend
|
||||
// configured). Return Gemini-safe defaults so the client still
|
||||
// chunks defensively and gets a clean 402 from the real
|
||||
// transcribe call rather than a confusing transport failure.
|
||||
return res.json({
|
||||
...GEMINI_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
|
||||
});
|
||||
} catch (err) {
|
||||
// License lookup or row read failed — fall through to the
|
||||
// anonymous path so the client at least gets safe defaults.
|
||||
console.warn(
|
||||
`[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Anonymous (no install_id) or install-aware path failed. Pick
|
||||
// capabilities from the operator-wide routing preference alone:
|
||||
// hardware_only / hardware_first → hardware-safe limits (provided
|
||||
// hardware is configured)
|
||||
// gemini_only / gemini_first → Gemini-safe (will always work
|
||||
// for the first attempt; in
|
||||
// gemini_first the eventual
|
||||
// overflow to hardware can
|
||||
// handle bigger files too, but
|
||||
// chunking still works for both)
|
||||
//
|
||||
// When `hardware_first` is set but Parakeet isn't actually
|
||||
// configured, the relay will fall back to Gemini — so report
|
||||
// Gemini-safe limits in that case.
|
||||
const hardwareCapable =
|
||||
hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first");
|
||||
|
||||
hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
|
||||
if (hardwareCapable) {
|
||||
res.json({
|
||||
// Effective unbounded — Parakeet wrappers commonly handle 2+
|
||||
// hour audio in one shot. Set high but finite ceilings so a
|
||||
// 24-hour file doesn't OOM the operator's GPU box silently.
|
||||
max_audio_mb: 500,
|
||||
max_audio_minutes: 240,
|
||||
preferred_chunk_seconds: null,
|
||||
// Diagnostic — Recap doesn't need this but the dashboard / a
|
||||
// curious operator might want to know which limit shape they
|
||||
// returned and why.
|
||||
reason: "hardware-capable backend preference (" + txPref + ")",
|
||||
});
|
||||
} else {
|
||||
res.json({
|
||||
// Gemini File-API + practical reliability limits. Matches
|
||||
// Recap's pre-relay defaults so existing chunking behavior
|
||||
// is preserved.
|
||||
max_audio_mb: 30,
|
||||
max_audio_minutes: 60,
|
||||
preferred_chunk_seconds: 2700, // 45 min chunks
|
||||
reason: "Gemini-backed preference (" + txPref + ")",
|
||||
return res.json({
|
||||
...HARDWARE_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `hardware-capable backend preference (${txPref})`,
|
||||
});
|
||||
}
|
||||
return res.json({
|
||||
...GEMINI_LIMITS,
|
||||
...ttsCaps,
|
||||
reason: `Gemini-backed preference (${txPref})`,
|
||||
});
|
||||
});
|
||||
|
||||
return router;
|
||||
|
||||
@@ -11,18 +11,34 @@ import { getTierQuotas } from "../config.js";
|
||||
export async function envelope({
|
||||
result = null,
|
||||
installId,
|
||||
// License is optional but recommended — without it, balance lookups
|
||||
// route to the install-keyed row even for paid users, which would
|
||||
// briefly underreport their balance after a commitCredit landed on
|
||||
// their license-keyed row. Routes pass it through from resolveLicense.
|
||||
license = null,
|
||||
// Explicit ledger key override (cloud `user:<id>` path). Takes
|
||||
// precedence over (installId, license) when present.
|
||||
creditKey = null,
|
||||
tier,
|
||||
creditCharged = 0,
|
||||
}) {
|
||||
const quota = await getTierQuotas();
|
||||
const row = await getOrCreateRow(installId);
|
||||
const row = await getOrCreateRow({ installId, license, creditKey });
|
||||
// tier_snapshot on the row was just updated by commitCredit; if no
|
||||
// credit was committed (free reuse via job_id) it still reflects
|
||||
// the last-known tier for this install, which is fine.
|
||||
const balance = computeRemaining(row, quota);
|
||||
return {
|
||||
result,
|
||||
credits_remaining: balance.remaining, // null = unlimited (Max)
|
||||
// `total` = tier allotment + purchased top-up. Recap renders this
|
||||
// as the headline number on its credits pill. `remaining` alone
|
||||
// wouldn't reflect purchased credits at all — so a buyer who
|
||||
// just bought 5 credits and had 0 tier credits left would still
|
||||
// see "0 relay credits" until their tier renewed.
|
||||
credits_remaining: balance.total, // null = unlimited (Max)
|
||||
// Breakdown for clients that want to display it.
|
||||
tier_remaining: balance.remaining,
|
||||
purchased_balance: balance.purchased,
|
||||
tier,
|
||||
credit_charged: creditCharged,
|
||||
};
|
||||
@@ -35,15 +51,25 @@ export async function envelope({
|
||||
export async function errorEnvelope({
|
||||
error,
|
||||
installId,
|
||||
license = null,
|
||||
creditKey = null,
|
||||
tier = "core",
|
||||
statusHint = 500,
|
||||
}) {
|
||||
let creditsRemaining = null;
|
||||
let tierRemaining = null;
|
||||
let purchased = 0;
|
||||
try {
|
||||
const quota = await getTierQuotas();
|
||||
const row = await getOrCreateRow(installId || "unknown");
|
||||
const row = await getOrCreateRow({
|
||||
installId: creditKey ? null : installId || "unknown",
|
||||
license,
|
||||
creditKey,
|
||||
});
|
||||
const balance = computeRemaining(row, quota);
|
||||
creditsRemaining = balance.remaining;
|
||||
creditsRemaining = balance.total;
|
||||
tierRemaining = balance.remaining;
|
||||
purchased = balance.purchased;
|
||||
} catch {}
|
||||
return {
|
||||
statusHint,
|
||||
@@ -51,6 +77,8 @@ export async function errorEnvelope({
|
||||
result: null,
|
||||
error: typeof error === "string" ? error : error?.message || "unknown_error",
|
||||
credits_remaining: creditsRemaining,
|
||||
tier_remaining: tierRemaining,
|
||||
purchased_balance: purchased,
|
||||
tier,
|
||||
credit_charged: 0,
|
||||
},
|
||||
|
||||
@@ -35,8 +35,13 @@ export function healthRouter() {
|
||||
version: VERSION,
|
||||
backends: {
|
||||
gemini: !!cfg.relay_gemini_api_key,
|
||||
parakeet: !!cfg.relay_parakeet_base_url,
|
||||
gemma: !!cfg.relay_gemma_base_url,
|
||||
// Whether the operator-hardware path is wired up at all.
|
||||
// Hardware backends are now sourced from Spark Control
|
||||
// discovery — see hardware-config.js. Empty discovery URL
|
||||
// means no hardware path; downstream details (which model is
|
||||
// ready, transcribe vs analyze availability) are surfaced via
|
||||
// /admin/config's effective_* fields.
|
||||
hardware: !!cfg.relay_spark_control_url,
|
||||
},
|
||||
admin_enabled: !!cfg.relay_admin_password_hash,
|
||||
});
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+514
-177
@@ -33,8 +33,13 @@ import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import { Readable } from "stream";
|
||||
import { pipeline } from "stream/promises";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
|
||||
import { resolveIdentity, identityTier } from "../identity.js";
|
||||
import {
|
||||
getOrCreateRow,
|
||||
planBackend,
|
||||
commitCredit,
|
||||
licenseFingerprint,
|
||||
} from "../credits.js";
|
||||
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
@@ -42,6 +47,18 @@ import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
import { getAudioDurationSeconds } from "../audio-meta.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
import { reportHealthEvent } from "../spark-control-events.js";
|
||||
import {
|
||||
createJob,
|
||||
markRunning,
|
||||
setProgress,
|
||||
markComplete,
|
||||
markFailed,
|
||||
getJob,
|
||||
} from "../jobs.js";
|
||||
import { saveJobOutput } from "../output-store.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -54,7 +71,7 @@ const MAX_DOWNLOAD_BYTES = 500 * 1024 * 1024;
|
||||
// rate-limits; a hard ceiling avoids holding the request open forever.
|
||||
const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000;
|
||||
|
||||
function looksLikeYouTube(url) {
|
||||
export function looksLikeYouTube(url) {
|
||||
if (!url) return false;
|
||||
return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url);
|
||||
}
|
||||
@@ -79,7 +96,7 @@ function guessMimeFromExt(filePath) {
|
||||
// Download an HTTP(S) audio URL to a temp file. Stops if the file
|
||||
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
|
||||
// mimeType }.
|
||||
async function downloadDirect(url, tmpDir) {
|
||||
export async function downloadDirect(url, tmpDir) {
|
||||
const res = await fetch(url, {
|
||||
redirect: "follow",
|
||||
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
|
||||
@@ -143,7 +160,11 @@ async function downloadDirect(url, tmpDir) {
|
||||
|
||||
// Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3.
|
||||
// Logs the chosen path back as the file. Caller manages tmpDir.
|
||||
async function downloadYouTube(url, tmpDir) {
|
||||
// Captures the video title via `--print "%(title)s"` so callers (the
|
||||
// summarize-url / transcribe-url workers) can stamp the Jobs table
|
||||
// with the real title instead of "Untitled" when the client didn't
|
||||
// pre-fetch metadata.
|
||||
export async function downloadYouTube(url, tmpDir) {
|
||||
const outTemplate = path.join(tmpDir, "audio.%(ext)s");
|
||||
const args = [
|
||||
"-x", // extract audio
|
||||
@@ -156,18 +177,93 @@ async function downloadYouTube(url, tmpDir) {
|
||||
"--no-playlist",
|
||||
"--no-simulate",
|
||||
"--no-warnings",
|
||||
// Emit a JSON dict containing the full metadata we care about for
|
||||
// the transcribe prompt's speaker-identification cues. Using
|
||||
// `before_dl:` so we get the metadata even if the download itself
|
||||
// later fails partway. The `.{field1,field2}j` template prints
|
||||
// just the named fields as a JSON object (yt-dlp escapes embedded
|
||||
// newlines inside description values, so single-line stdout parses
|
||||
// cleanly). Title comes from the same dict — no second --print
|
||||
// needed.
|
||||
//
|
||||
// Why these four fields specifically: they\'re exactly what the
|
||||
// recap-app\'s fetchYouTubeMetadata() pulls and feeds into its
|
||||
// direct-to-Gemini transcribe prompt. With these populated, the
|
||||
// model can correctly assign speaker labels (host name from
|
||||
// channel, guest name from description, chapter titles often name
|
||||
// both). Without them, every transcript falls back to unlabeled
|
||||
// dialogue regardless of how detailed the prompt\'s
|
||||
// speaker-identification rule is.
|
||||
"--print",
|
||||
"before_dl:%(.{title,channel,description,chapters})j",
|
||||
url,
|
||||
];
|
||||
let extractedMetadata = {
|
||||
title: null,
|
||||
channel: null,
|
||||
description: null,
|
||||
chapters: [],
|
||||
};
|
||||
try {
|
||||
await execFileAsync("yt-dlp", args, {
|
||||
const { stdout } = await execFileAsync("yt-dlp", args, {
|
||||
timeout: DOWNLOAD_TIMEOUT_MS,
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
});
|
||||
// The JSON dict is the first non-empty line that starts with `{`.
|
||||
// yt-dlp may print other progress / warning lines before or after
|
||||
// depending on version; filter to the JSON line specifically.
|
||||
const firstJsonLine = (stdout || "")
|
||||
.split(/\r?\n/)
|
||||
.map((l) => l.trim())
|
||||
.find((l) => l.length > 0 && l.startsWith("{"));
|
||||
if (firstJsonLine) {
|
||||
try {
|
||||
const parsed = JSON.parse(firstJsonLine);
|
||||
extractedMetadata = {
|
||||
title:
|
||||
typeof parsed.title === "string" && parsed.title.trim()
|
||||
? parsed.title.trim().slice(0, 300)
|
||||
: null,
|
||||
channel:
|
||||
typeof parsed.channel === "string" && parsed.channel.trim()
|
||||
? parsed.channel.trim().slice(0, 200)
|
||||
: null,
|
||||
// Cap at 2000 chars — recap-app uses the same cap. Long
|
||||
// descriptions with release-notes / sponsor blocks otherwise
|
||||
// bloat the prompt and crowd out the speaker-naming signal.
|
||||
description:
|
||||
typeof parsed.description === "string" && parsed.description.trim()
|
||||
? parsed.description.trim().slice(0, 2000)
|
||||
: null,
|
||||
// Each chapter is { start_time: seconds, end_time, title }.
|
||||
// We only use start_time + title in the prompt; pass the full
|
||||
// array through so callers see what yt-dlp returned.
|
||||
chapters: Array.isArray(parsed.chapters) ? parsed.chapters : [],
|
||||
};
|
||||
} catch (parseErr) {
|
||||
// Malformed JSON from yt-dlp. Fall back to title-only via a
|
||||
// best-effort regex on the line. Better than nothing.
|
||||
const m = firstJsonLine.match(/"title"\s*:\s*"([^"]+)"/);
|
||||
if (m) extractedMetadata.title = m[1].slice(0, 300);
|
||||
console.warn(
|
||||
`[yt-dlp] metadata JSON parse failed: ${parseErr?.message || parseErr} — falling back to title-only`
|
||||
);
|
||||
}
|
||||
} else if (stdout) {
|
||||
// No JSON line but stdout has something — older yt-dlp versions
|
||||
// or some videos may emit a bare title line. Use it as title-only
|
||||
// so we at least preserve the existing v0.2.56 behavior.
|
||||
const firstLine = stdout
|
||||
.split(/\r?\n/)
|
||||
.map((l) => l.trim())
|
||||
.find((l) => l.length > 0);
|
||||
if (firstLine) extractedMetadata.title = firstLine.slice(0, 300);
|
||||
}
|
||||
} catch (err) {
|
||||
const stderr = (err?.stderr || "").toString();
|
||||
const stdout = (err?.stdout || "").toString();
|
||||
const stdoutStr = (err?.stdout || "").toString();
|
||||
throw new Error(
|
||||
`yt-dlp failed: ${stderr.trim() || stdout.trim() || err?.message}`
|
||||
`yt-dlp failed: ${stderr.trim() || stdoutStr.trim() || err?.message}`
|
||||
);
|
||||
}
|
||||
// Find the produced file — yt-dlp's audio-format=mp3 means it ends
|
||||
@@ -189,225 +285,466 @@ async function downloadYouTube(url, tmpDir) {
|
||||
filePath,
|
||||
bytes: stat.size,
|
||||
mimeType: guessMimeFromExt(filePath),
|
||||
title: extractedMetadata.title,
|
||||
channel: extractedMetadata.channel,
|
||||
description: extractedMetadata.description,
|
||||
chapters: extractedMetadata.chapters,
|
||||
};
|
||||
}
|
||||
|
||||
export function transcribeUrlRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
// POST /relay/transcribe-url — kicks off a background transcribe
|
||||
// job and returns immediately with { job_id }. The client polls
|
||||
// GET /relay/jobs/:id to find out when it's done.
|
||||
//
|
||||
// Why async: a synchronous response over HTTP can't reliably
|
||||
// survive multi-minute work — proxies, load balancers, and NATs
|
||||
// along the path will drop the connection on long-running idle
|
||||
// requests (we observed a 5-minute cut on a 1h45m transcribe).
|
||||
// The poll requests are short and cheap, so they never trip
|
||||
// timeouts.
|
||||
router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
const summaryJobId = req.header("X-Recap-Job-Id") || null;
|
||||
|
||||
if (!installId) {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "auth_error",
|
||||
statusHint: err?.status || 401,
|
||||
});
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
// `title` is `let` rather than `const` because the worker may
|
||||
// backfill it from yt-dlp metadata after the download completes
|
||||
// (when the client didn't pre-fetch the title).
|
||||
let title;
|
||||
const {
|
||||
media_url: mediaUrl,
|
||||
type,
|
||||
mime_type: bodyMime,
|
||||
title,
|
||||
title: bodyTitle,
|
||||
channel,
|
||||
description,
|
||||
chapters,
|
||||
} = req.body || {};
|
||||
title = bodyTitle;
|
||||
if (!mediaUrl || typeof mediaUrl !== "string") {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing or non-string body.media_url",
|
||||
creditKey,
|
||||
installId,
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
|
||||
const license = await resolveLicense(auth);
|
||||
const tier = license.tier;
|
||||
const row = await getOrCreateRow(installId);
|
||||
const row = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, row);
|
||||
row.tier_snapshot = tier;
|
||||
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
|
||||
const auditInstall = installId || identity.userId || null;
|
||||
|
||||
// Quota check + backend choice. Same as /relay/transcribe.
|
||||
let reusedJob = false;
|
||||
let chosenBackend = null;
|
||||
const existingJob = lookupJob(installId, jobId);
|
||||
if (existingJob) {
|
||||
reusedJob = true;
|
||||
chosenBackend = existingJob.backend;
|
||||
} else {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hasHardware = !!cfg.relay_parakeet_base_url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
chosenBackend = plan.backend;
|
||||
}
|
||||
|
||||
// ── Download phase ─────────────────────────────────────────────
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
|
||||
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
|
||||
const dlStart = Date.now();
|
||||
let audio;
|
||||
let downloadMs = 0;
|
||||
try {
|
||||
audio = isYT
|
||||
? await downloadYouTube(mediaUrl, tmpDir)
|
||||
: await downloadDirect(mediaUrl, tmpDir);
|
||||
downloadMs = Date.now() - dlStart;
|
||||
console.log(
|
||||
`[transcribe-url] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms (${mediaUrl.slice(0, 80)})`
|
||||
// Billing vs. routing decoupled — see analyze.js for reasoning.
|
||||
const reusedSummaryJob = !!lookupJob({ creditKey, installId, license, jobId: summaryJobId });
|
||||
const cfgPlan = await getConfigSnapshot();
|
||||
const hw = await resolveHardwareConfig(cfgPlan);
|
||||
// Operator-only diagnostic — see summarize-url.js for the full
|
||||
// reasoning. We don't 503 here on blocked_reason because doing
|
||||
// so pre-empts planBackend and would surface operator-internal
|
||||
// Spark Control / parakeet wording to clients even when Gemini
|
||||
// was the configured preference.
|
||||
if (hw.transcribe.blocked_reason) {
|
||||
console.warn(
|
||||
`[transcribe-url] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
|
||||
);
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
console.error(`[transcribe-url] download failed: ${err?.message || err}`);
|
||||
}
|
||||
const hasHardware = !!hw.transcribe.url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfgPlan.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "error",
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
download_ms: Date.now() - dlStart,
|
||||
duration_ms: 0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: ("download_failed: " + (err?.message || String(err))).slice(0, 200),
|
||||
job_id: summaryJobId,
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: "download_failed: " + (err?.message || String(err)).slice(0, 200),
|
||||
error: plan.reason,
|
||||
installId,
|
||||
license,
|
||||
tier,
|
||||
statusHint: 502,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(502).json(e.body);
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
const chosenBackend = plan.backend;
|
||||
|
||||
// ── Transcription phase ────────────────────────────────────────
|
||||
const cfg = await getConfigSnapshot();
|
||||
let result;
|
||||
try {
|
||||
const audioBuf = await fs.readFile(audio.filePath);
|
||||
const mimeType = bodyMime || audio.mimeType;
|
||||
if (chosenBackend === "gemini") {
|
||||
const backend = createGeminiBackend({
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||
analysisModel: cfg.relay_gemini_analysis_model,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
title: title || "",
|
||||
channel: channel || "",
|
||||
description: description || "",
|
||||
chapters: Array.isArray(chapters) ? chapters : [],
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
console.error(`[transcribe-url] transcribe failed: ${err?.message}`);
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
// Mint the background job + RESPOND IMMEDIATELY.
|
||||
const job = createJob({
|
||||
kind: "transcribe-url",
|
||||
installId: auditInstall,
|
||||
metadata: {
|
||||
owner: creditKey, // authorizes the /jobs/:id poll (per-identity)
|
||||
media_url: mediaUrl,
|
||||
backend: chosenBackend,
|
||||
model:
|
||||
chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_transcription_model
|
||||
: cfg.relay_parakeet_model,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
download_ms: downloadMs,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: (err?.message || String(err)).slice(0, 200),
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
installId,
|
||||
tier,
|
||||
statusHint: err?.status || 502,
|
||||
});
|
||||
return res.status(e.statusHint).json(e.body);
|
||||
} finally {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
|
||||
// ── Commit + audit ─────────────────────────────────────────────
|
||||
let creditCharged = 0;
|
||||
if (!reusedJob) {
|
||||
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
const costDetails =
|
||||
chosenBackend === "gemini" && result.usage
|
||||
? calcGeminiCost(result.model, result.usage)
|
||||
: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
thinking_tokens: 0,
|
||||
cost_usd: 0,
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: result?.model || null,
|
||||
status: "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
download_ms: downloadMs,
|
||||
audio_bytes: audio.bytes,
|
||||
job_id: jobId,
|
||||
...costDetails,
|
||||
summary_job_id: summaryJobId,
|
||||
},
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
// Background worker — runs after this handler has returned.
|
||||
// Errors are captured into the job record; nothing thrown here
|
||||
// can crash the route process.
|
||||
(async () => {
|
||||
const workerT0 = Date.now();
|
||||
markRunning(job.id);
|
||||
setProgress(job.id, "downloading media…");
|
||||
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
|
||||
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
|
||||
let audio;
|
||||
let downloadMs = 0;
|
||||
try {
|
||||
const dlStart = Date.now();
|
||||
audio = isYT
|
||||
? await downloadYouTube(mediaUrl, tmpDir)
|
||||
: await downloadDirect(mediaUrl, tmpDir);
|
||||
downloadMs = Date.now() - dlStart;
|
||||
console.log(
|
||||
`[transcribe-url ${job.id.slice(0, 8)}] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms`
|
||||
);
|
||||
audio.seconds = await getAudioDurationSeconds(audio.filePath);
|
||||
if (!title && audio.title) {
|
||||
// yt-dlp captured the title during download; use it when
|
||||
// the client didn't pass one.
|
||||
title = audio.title;
|
||||
}
|
||||
setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
const msg = (err?.message || String(err)).slice(0, 300);
|
||||
console.error(`[transcribe-url ${job.id.slice(0, 8)}] download failed: ${msg}`);
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: null,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - workerT0,
|
||||
download_ms: Date.now() - workerT0,
|
||||
audio_seconds: null,
|
||||
cost_usd: 0,
|
||||
job_id: summaryJobId,
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
error: "download_failed: " + msg,
|
||||
});
|
||||
markFailed(job.id, "download_failed: " + msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Transcription phase
|
||||
const cfg = await getConfigSnapshot();
|
||||
let result;
|
||||
// Stamp the moment transcribe is about to start (AFTER download
|
||||
// finished). Used for duration_ms on the audit row so the
|
||||
// "TX wall time" column reflects ONLY the transcribe phase.
|
||||
const txPhaseStart = Date.now();
|
||||
try {
|
||||
const audioBuf = await fs.readFile(audio.filePath);
|
||||
const mimeType = bodyMime || audio.mimeType;
|
||||
if (chosenBackend === "gemini") {
|
||||
const backend = createGeminiBackend({
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||
analysisModel: cfg.relay_gemini_analysis_model,
|
||||
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
|
||||
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
|
||||
transcribePromptOverride: cfg.relay_transcribe_prompt || "",
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
title: title || "",
|
||||
channel: channel || "",
|
||||
description: description || "",
|
||||
chapters: Array.isArray(chapters) ? chapters : [],
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: hw.transcribe.url || "",
|
||||
gemmaBaseURL: hw.analyze.url || "",
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
|
||||
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
|
||||
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
|
||||
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
|
||||
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
||||
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
||||
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
||||
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
if (reusedSummaryJob) await refundJob({ creditKey, installId, license, jobId: summaryJobId });
|
||||
const msg = (err?.message || String(err)).slice(0, 400);
|
||||
console.error(`[transcribe-url ${job.id.slice(0, 8)}] transcribe failed: ${msg}`);
|
||||
if (chosenBackend === "hardware") {
|
||||
reportHealthEvent({
|
||||
service: "parakeet",
|
||||
ok: false,
|
||||
error: msg.slice(0, 280),
|
||||
ms: Date.now() - workerT0,
|
||||
});
|
||||
}
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model:
|
||||
chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_transcription_model
|
||||
: hw.transcribe.model || "(auto)",
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - txPhaseStart,
|
||||
download_ms: downloadMs,
|
||||
audio_seconds: audio?.seconds || null,
|
||||
audio_bytes: audio?.bytes || null,
|
||||
cost_usd: 0,
|
||||
job_id: summaryJobId,
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
error: msg,
|
||||
});
|
||||
markFailed(job.id, msg);
|
||||
return;
|
||||
} finally {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
|
||||
// Success — commit credit (once per summary job_id), audit, mark done.
|
||||
let creditCharged = 0;
|
||||
if (!reusedSummaryJob) {
|
||||
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
|
||||
await markJobCharged({ creditKey, installId, license, jobId: summaryJobId, backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
const costDetails =
|
||||
chosenBackend === "gemini" && result.usage
|
||||
? calcGeminiCost(result.model, result.usage)
|
||||
: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
thinking_tokens: 0,
|
||||
cost_usd: 0,
|
||||
};
|
||||
// Truncation detection — mark partial when any chunk hit
|
||||
// the silent output-token cap and emitted < 80% of its
|
||||
// expected audio. See gemini.js for the actual coverage
|
||||
// computation; here we just propagate to the audit row.
|
||||
const truncatedChunks = Array.isArray(result?.truncated_chunks)
|
||||
? result.truncated_chunks
|
||||
: [];
|
||||
const wasTruncated = truncatedChunks.length > 0;
|
||||
const truncationError = wasTruncated
|
||||
? `transcribe: ${truncatedChunks.length} chunk(s) truncated — missing ~${truncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${result.model || "unknown"}). Likely hit maxOutputTokens.`
|
||||
: null;
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: result?.model || null,
|
||||
status: wasTruncated ? "partial" : "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - txPhaseStart,
|
||||
download_ms: downloadMs,
|
||||
audio_bytes: audio.bytes,
|
||||
audio_seconds: audio.seconds || null,
|
||||
job_id: summaryJobId,
|
||||
attempts: result?.attempts || null,
|
||||
// Per-job context for the operator dashboard's per-video table.
|
||||
// media_url + title let the dashboard show what was being
|
||||
// processed; chunk_count exposes the new server-side chunking
|
||||
// (1 for short audio, N for ≥30 min audio split by the Gemini
|
||||
// backend or by the hardware backend's Parakeet chunker).
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
chunk_count: result?.chunk_count ?? null,
|
||||
chunk_durations_ms: result?.chunk_durations_ms || null,
|
||||
truncated_chunks: wasTruncated ? truncatedChunks : null,
|
||||
error: truncationError,
|
||||
...costDetails,
|
||||
});
|
||||
markComplete(job.id, {
|
||||
result,
|
||||
credit_charged: creditCharged,
|
||||
tier,
|
||||
});
|
||||
console.log(
|
||||
`[transcribe-url ${job.id.slice(0, 8)}] complete in ${((Date.now() - workerT0) / 1000).toFixed(1)}s`
|
||||
);
|
||||
// Optional: persist transcript output for the operator's
|
||||
// "View output" dashboard feature. Only when the config flag
|
||||
// is set (default false) — saving real-user transcripts is an
|
||||
// opt-in operator decision, not a default. Note that we only
|
||||
// have the transcript here (analyze runs as a separate
|
||||
// /relay/analyze call in the Recap flow); the analyze row will
|
||||
// overwrite this file later with the full transcript+analysis
|
||||
// payload when it lands. Best-effort, errors ignored.
|
||||
if (cfg.relay_save_user_outputs) {
|
||||
await saveJobOutput(summaryJobId || job.id, {
|
||||
batch_id: null,
|
||||
source: null,
|
||||
transcript: result?.text || "",
|
||||
analysis: null,
|
||||
analysis_raw_text: null,
|
||||
meta: {
|
||||
title: title || null,
|
||||
media_url: mediaUrl,
|
||||
audio_seconds: audio.seconds || null,
|
||||
audio_bytes: audio.bytes,
|
||||
captions_mode: null,
|
||||
transcribe_backend: chosenBackend,
|
||||
transcribe_model: result?.model || null,
|
||||
analyze_backend: null,
|
||||
analyze_model: null,
|
||||
},
|
||||
});
|
||||
}
|
||||
})().catch((err) => {
|
||||
// Top-level catch — should be unreachable since the worker
|
||||
// handles its own try/catch, but defends against unexpected
|
||||
// throws so the job doesn't sit in "running" forever.
|
||||
markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
|
||||
console.error(`[transcribe-url ${job.id.slice(0, 8)}] worker crashed:`, err);
|
||||
});
|
||||
|
||||
// Hand back the job_id immediately. Client will poll for status.
|
||||
const body = await envelope({
|
||||
result: {
|
||||
job_id: job.id,
|
||||
status: "queued",
|
||||
kind: "transcribe-url",
|
||||
},
|
||||
creditKey,
|
||||
installId,
|
||||
license,
|
||||
tier,
|
||||
});
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
// GET /relay/jobs/:id — poll loop's friend. Install-id scoped so
|
||||
// job ids can't be enumerated cross-install. Returns the running
|
||||
// status + (once complete) the full transcribe result envelope.
|
||||
router.get("/jobs/:id", async (req, res) => {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({ error: err?.message || "auth_error", statusHint: err?.status || 401 });
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
const ownerRow = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, ownerRow);
|
||||
const jobId = (req.params.id || "").trim();
|
||||
const job = getJob(jobId);
|
||||
if (!job) {
|
||||
const e = await errorEnvelope({
|
||||
error: "job_not_found",
|
||||
creditKey,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 404,
|
||||
});
|
||||
return res.status(404).json(e.body);
|
||||
}
|
||||
// New jobs carry metadata.owner = creditKey; older jobs only carry
|
||||
// install_id. Authorize by whichever the job has.
|
||||
const ownerOk = job.metadata?.owner
|
||||
? job.metadata.owner === creditKey
|
||||
: identity.installId && job.install_id === identity.installId;
|
||||
if (!ownerOk) {
|
||||
const e = await errorEnvelope({
|
||||
error: "job_belongs_to_different_owner",
|
||||
creditKey,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 403,
|
||||
});
|
||||
return res.status(403).json(e.body);
|
||||
}
|
||||
const body = await envelope({
|
||||
result: {
|
||||
job_id: job.id,
|
||||
kind: job.kind,
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
started_at: job.started_at,
|
||||
updated_at: job.updated_at,
|
||||
completed_at: job.completed_at,
|
||||
// Include the FULL transcribe-result on completion so the
|
||||
// client doesn't need a second round-trip.
|
||||
result: job.status === "complete" ? job.result?.result : null,
|
||||
credit_charged:
|
||||
job.status === "complete" ? job.result?.credit_charged || 0 : 0,
|
||||
error: job.error,
|
||||
},
|
||||
creditKey,
|
||||
installId,
|
||||
license,
|
||||
tier,
|
||||
});
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
|
||||
+118
-57
@@ -28,8 +28,13 @@
|
||||
|
||||
import express from "express";
|
||||
import multer from "multer";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
|
||||
import { resolveIdentity, identityTier } from "../identity.js";
|
||||
import {
|
||||
getOrCreateRow,
|
||||
planBackend,
|
||||
commitCredit,
|
||||
licenseFingerprint,
|
||||
} from "../credits.js";
|
||||
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
@@ -37,6 +42,9 @@ import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
import { getAudioDurationSecondsFromBuffer } from "../audio-meta.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
import { reportHealthEvent } from "../spark-control-events.js";
|
||||
|
||||
const upload = multer({
|
||||
storage: multer.memoryStorage(),
|
||||
@@ -48,67 +56,93 @@ export function transcribeRouter() {
|
||||
|
||||
router.post("/transcribe", upload.single("audio"), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
|
||||
if (!installId) {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "auth_error",
|
||||
statusHint: err?.status || 401,
|
||||
});
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
if (!req.file) {
|
||||
const e = await errorEnvelope({ error: "missing audio file", installId, statusHint: 400 });
|
||||
const e = await errorEnvelope({ error: "missing audio file", creditKey, installId, statusHint: 400 });
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
|
||||
const license = await resolveLicense(auth);
|
||||
const tier = license.tier;
|
||||
|
||||
const row = await getOrCreateRow(installId);
|
||||
const row = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, row);
|
||||
row.tier_snapshot = tier;
|
||||
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
|
||||
const auditInstall = installId || identity.userId || null;
|
||||
|
||||
let reusedJob = false;
|
||||
let chosenBackend = null;
|
||||
const existingJob = lookupJob(installId, jobId);
|
||||
if (existingJob) {
|
||||
reusedJob = true;
|
||||
chosenBackend = existingJob.backend;
|
||||
} else {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hasHardware = !!cfg.relay_parakeet_base_url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
chosenBackend = plan.backend;
|
||||
}
|
||||
// Probe audio duration BEFORE the backend call so we can record
|
||||
// it on every audit row (success and error alike). Used by the
|
||||
// dashboard to normalize wall-clock time to "ms per minute of
|
||||
// audio" — a backend-agnostic speed benchmark.
|
||||
const audioSeconds = await getAudioDurationSecondsFromBuffer(
|
||||
req.file?.buffer
|
||||
);
|
||||
|
||||
// Billing vs. routing are decoupled — see analyze.js for the
|
||||
// full reasoning. Look up job to decide whether to charge a
|
||||
// credit, but always run planBackend fresh so transcribe's
|
||||
// routing decision respects relay_transcribe_backend_preference.
|
||||
const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hw = await resolveHardwareConfig(cfg);
|
||||
// Operator-only diagnostic — see the matching comment in
|
||||
// summarize-url.js for the full reasoning. We don't 503 here on
|
||||
// blocked_reason because doing so pre-empts planBackend and
|
||||
// surfaces operator-internal wording to clients even when
|
||||
// Gemini was the configured preference.
|
||||
if (hw.transcribe.blocked_reason) {
|
||||
console.warn(
|
||||
`[transcribe] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
|
||||
);
|
||||
}
|
||||
const hasHardware = !!hw.transcribe.url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
audio_seconds: audioSeconds,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
const chosenBackend = plan.backend;
|
||||
let result;
|
||||
try {
|
||||
if (chosenBackend === "gemini") {
|
||||
@@ -116,6 +150,8 @@ export function transcribeRouter() {
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||
analysisModel: cfg.relay_gemini_analysis_model,
|
||||
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
|
||||
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: req.file.buffer,
|
||||
@@ -128,10 +164,19 @@ export function transcribeRouter() {
|
||||
});
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
parakeetBaseURL: hw.transcribe.url || "",
|
||||
gemmaBaseURL: hw.analyze.url || "",
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
|
||||
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
|
||||
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
|
||||
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
|
||||
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
||||
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
||||
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
||||
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: req.file.buffer,
|
||||
@@ -140,25 +185,38 @@ export function transcribeRouter() {
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
|
||||
console.error(`[relay/transcribe] backend error: ${err?.message}`);
|
||||
// Fire-and-forget health report for hardware-served calls;
|
||||
// Gemini failures are a separate observability surface.
|
||||
if (chosenBackend === "hardware") {
|
||||
reportHealthEvent({
|
||||
service: "parakeet",
|
||||
ok: false,
|
||||
error: (err?.message || String(err)).slice(0, 280),
|
||||
ms: Date.now() - t0,
|
||||
});
|
||||
}
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_transcription_model
|
||||
: cfg.relay_parakeet_model,
|
||||
: hw.transcribe.model || "(auto)",
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
audio_seconds: audioSeconds,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: (err?.message || String(err)).slice(0, 200),
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: err?.status || 502,
|
||||
@@ -168,8 +226,8 @@ export function transcribeRouter() {
|
||||
|
||||
let creditCharged = 0;
|
||||
if (!reusedJob) {
|
||||
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
|
||||
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
|
||||
await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
|
||||
@@ -188,6 +246,7 @@ export function transcribeRouter() {
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
@@ -195,11 +254,13 @@ export function transcribeRouter() {
|
||||
status: "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
audio_seconds: audioSeconds,
|
||||
job_id: jobId,
|
||||
attempts: result?.attempts || null,
|
||||
...costDetails,
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user