Wire new routes; identity, summarize-url, dashboard, admin
This commit is contained in:
+514
-177
@@ -33,8 +33,13 @@ import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import { Readable } from "stream";
|
||||
import { pipeline } from "stream/promises";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
|
||||
import { resolveIdentity, identityTier } from "../identity.js";
|
||||
import {
|
||||
getOrCreateRow,
|
||||
planBackend,
|
||||
commitCredit,
|
||||
licenseFingerprint,
|
||||
} from "../credits.js";
|
||||
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
@@ -42,6 +47,18 @@ import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
import { getAudioDurationSeconds } from "../audio-meta.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
import { reportHealthEvent } from "../spark-control-events.js";
|
||||
import {
|
||||
createJob,
|
||||
markRunning,
|
||||
setProgress,
|
||||
markComplete,
|
||||
markFailed,
|
||||
getJob,
|
||||
} from "../jobs.js";
|
||||
import { saveJobOutput } from "../output-store.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -54,7 +71,7 @@ const MAX_DOWNLOAD_BYTES = 500 * 1024 * 1024;
|
||||
// rate-limits; a hard ceiling avoids holding the request open forever.
|
||||
const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000;
|
||||
|
||||
function looksLikeYouTube(url) {
|
||||
export function looksLikeYouTube(url) {
|
||||
if (!url) return false;
|
||||
return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url);
|
||||
}
|
||||
@@ -79,7 +96,7 @@ function guessMimeFromExt(filePath) {
|
||||
// Download an HTTP(S) audio URL to a temp file. Stops if the file
|
||||
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
|
||||
// mimeType }.
|
||||
async function downloadDirect(url, tmpDir) {
|
||||
export async function downloadDirect(url, tmpDir) {
|
||||
const res = await fetch(url, {
|
||||
redirect: "follow",
|
||||
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
|
||||
@@ -143,7 +160,11 @@ async function downloadDirect(url, tmpDir) {
|
||||
|
||||
// Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3.
|
||||
// Logs the chosen path back as the file. Caller manages tmpDir.
|
||||
async function downloadYouTube(url, tmpDir) {
|
||||
// Captures the video title via `--print "%(title)s"` so callers (the
|
||||
// summarize-url / transcribe-url workers) can stamp the Jobs table
|
||||
// with the real title instead of "Untitled" when the client didn't
|
||||
// pre-fetch metadata.
|
||||
export async function downloadYouTube(url, tmpDir) {
|
||||
const outTemplate = path.join(tmpDir, "audio.%(ext)s");
|
||||
const args = [
|
||||
"-x", // extract audio
|
||||
@@ -156,18 +177,93 @@ async function downloadYouTube(url, tmpDir) {
|
||||
"--no-playlist",
|
||||
"--no-simulate",
|
||||
"--no-warnings",
|
||||
// Emit a JSON dict containing the full metadata we care about for
|
||||
// the transcribe prompt's speaker-identification cues. Using
|
||||
// `before_dl:` so we get the metadata even if the download itself
|
||||
// later fails partway. The `.{field1,field2}j` template prints
|
||||
// just the named fields as a JSON object (yt-dlp escapes embedded
|
||||
// newlines inside description values, so single-line stdout parses
|
||||
// cleanly). Title comes from the same dict — no second --print
|
||||
// needed.
|
||||
//
|
||||
// Why these four fields specifically: they\'re exactly what the
|
||||
// recap-app\'s fetchYouTubeMetadata() pulls and feeds into its
|
||||
// direct-to-Gemini transcribe prompt. With these populated, the
|
||||
// model can correctly assign speaker labels (host name from
|
||||
// channel, guest name from description, chapter titles often name
|
||||
// both). Without them, every transcript falls back to unlabeled
|
||||
// dialogue regardless of how detailed the prompt\'s
|
||||
// speaker-identification rule is.
|
||||
"--print",
|
||||
"before_dl:%(.{title,channel,description,chapters})j",
|
||||
url,
|
||||
];
|
||||
let extractedMetadata = {
|
||||
title: null,
|
||||
channel: null,
|
||||
description: null,
|
||||
chapters: [],
|
||||
};
|
||||
try {
|
||||
await execFileAsync("yt-dlp", args, {
|
||||
const { stdout } = await execFileAsync("yt-dlp", args, {
|
||||
timeout: DOWNLOAD_TIMEOUT_MS,
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
});
|
||||
// The JSON dict is the first non-empty line that starts with `{`.
|
||||
// yt-dlp may print other progress / warning lines before or after
|
||||
// depending on version; filter to the JSON line specifically.
|
||||
const firstJsonLine = (stdout || "")
|
||||
.split(/\r?\n/)
|
||||
.map((l) => l.trim())
|
||||
.find((l) => l.length > 0 && l.startsWith("{"));
|
||||
if (firstJsonLine) {
|
||||
try {
|
||||
const parsed = JSON.parse(firstJsonLine);
|
||||
extractedMetadata = {
|
||||
title:
|
||||
typeof parsed.title === "string" && parsed.title.trim()
|
||||
? parsed.title.trim().slice(0, 300)
|
||||
: null,
|
||||
channel:
|
||||
typeof parsed.channel === "string" && parsed.channel.trim()
|
||||
? parsed.channel.trim().slice(0, 200)
|
||||
: null,
|
||||
// Cap at 2000 chars — recap-app uses the same cap. Long
|
||||
// descriptions with release-notes / sponsor blocks otherwise
|
||||
// bloat the prompt and crowd out the speaker-naming signal.
|
||||
description:
|
||||
typeof parsed.description === "string" && parsed.description.trim()
|
||||
? parsed.description.trim().slice(0, 2000)
|
||||
: null,
|
||||
// Each chapter is { start_time: seconds, end_time, title }.
|
||||
// We only use start_time + title in the prompt; pass the full
|
||||
// array through so callers see what yt-dlp returned.
|
||||
chapters: Array.isArray(parsed.chapters) ? parsed.chapters : [],
|
||||
};
|
||||
} catch (parseErr) {
|
||||
// Malformed JSON from yt-dlp. Fall back to title-only via a
|
||||
// best-effort regex on the line. Better than nothing.
|
||||
const m = firstJsonLine.match(/"title"\s*:\s*"([^"]+)"/);
|
||||
if (m) extractedMetadata.title = m[1].slice(0, 300);
|
||||
console.warn(
|
||||
`[yt-dlp] metadata JSON parse failed: ${parseErr?.message || parseErr} — falling back to title-only`
|
||||
);
|
||||
}
|
||||
} else if (stdout) {
|
||||
// No JSON line but stdout has something — older yt-dlp versions
|
||||
// or some videos may emit a bare title line. Use it as title-only
|
||||
// so we at least preserve the existing v0.2.56 behavior.
|
||||
const firstLine = stdout
|
||||
.split(/\r?\n/)
|
||||
.map((l) => l.trim())
|
||||
.find((l) => l.length > 0);
|
||||
if (firstLine) extractedMetadata.title = firstLine.slice(0, 300);
|
||||
}
|
||||
} catch (err) {
|
||||
const stderr = (err?.stderr || "").toString();
|
||||
const stdout = (err?.stdout || "").toString();
|
||||
const stdoutStr = (err?.stdout || "").toString();
|
||||
throw new Error(
|
||||
`yt-dlp failed: ${stderr.trim() || stdout.trim() || err?.message}`
|
||||
`yt-dlp failed: ${stderr.trim() || stdoutStr.trim() || err?.message}`
|
||||
);
|
||||
}
|
||||
// Find the produced file — yt-dlp's audio-format=mp3 means it ends
|
||||
@@ -189,225 +285,466 @@ async function downloadYouTube(url, tmpDir) {
|
||||
filePath,
|
||||
bytes: stat.size,
|
||||
mimeType: guessMimeFromExt(filePath),
|
||||
title: extractedMetadata.title,
|
||||
channel: extractedMetadata.channel,
|
||||
description: extractedMetadata.description,
|
||||
chapters: extractedMetadata.chapters,
|
||||
};
|
||||
}
|
||||
|
||||
export function transcribeUrlRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
// POST /relay/transcribe-url — kicks off a background transcribe
|
||||
// job and returns immediately with { job_id }. The client polls
|
||||
// GET /relay/jobs/:id to find out when it's done.
|
||||
//
|
||||
// Why async: a synchronous response over HTTP can't reliably
|
||||
// survive multi-minute work — proxies, load balancers, and NATs
|
||||
// along the path will drop the connection on long-running idle
|
||||
// requests (we observed a 5-minute cut on a 1h45m transcribe).
|
||||
// The poll requests are short and cheap, so they never trip
|
||||
// timeouts.
|
||||
router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
const summaryJobId = req.header("X-Recap-Job-Id") || null;
|
||||
|
||||
if (!installId) {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "auth_error",
|
||||
statusHint: err?.status || 401,
|
||||
});
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
// `title` is `let` rather than `const` because the worker may
|
||||
// backfill it from yt-dlp metadata after the download completes
|
||||
// (when the client didn't pre-fetch the title).
|
||||
let title;
|
||||
const {
|
||||
media_url: mediaUrl,
|
||||
type,
|
||||
mime_type: bodyMime,
|
||||
title,
|
||||
title: bodyTitle,
|
||||
channel,
|
||||
description,
|
||||
chapters,
|
||||
} = req.body || {};
|
||||
title = bodyTitle;
|
||||
if (!mediaUrl || typeof mediaUrl !== "string") {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing or non-string body.media_url",
|
||||
creditKey,
|
||||
installId,
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
|
||||
const license = await resolveLicense(auth);
|
||||
const tier = license.tier;
|
||||
const row = await getOrCreateRow(installId);
|
||||
const row = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, row);
|
||||
row.tier_snapshot = tier;
|
||||
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
|
||||
const auditInstall = installId || identity.userId || null;
|
||||
|
||||
// Quota check + backend choice. Same as /relay/transcribe.
|
||||
let reusedJob = false;
|
||||
let chosenBackend = null;
|
||||
const existingJob = lookupJob(installId, jobId);
|
||||
if (existingJob) {
|
||||
reusedJob = true;
|
||||
chosenBackend = existingJob.backend;
|
||||
} else {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hasHardware = !!cfg.relay_parakeet_base_url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
chosenBackend = plan.backend;
|
||||
}
|
||||
|
||||
// ── Download phase ─────────────────────────────────────────────
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
|
||||
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
|
||||
const dlStart = Date.now();
|
||||
let audio;
|
||||
let downloadMs = 0;
|
||||
try {
|
||||
audio = isYT
|
||||
? await downloadYouTube(mediaUrl, tmpDir)
|
||||
: await downloadDirect(mediaUrl, tmpDir);
|
||||
downloadMs = Date.now() - dlStart;
|
||||
console.log(
|
||||
`[transcribe-url] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms (${mediaUrl.slice(0, 80)})`
|
||||
// Billing vs. routing decoupled — see analyze.js for reasoning.
|
||||
const reusedSummaryJob = !!lookupJob({ creditKey, installId, license, jobId: summaryJobId });
|
||||
const cfgPlan = await getConfigSnapshot();
|
||||
const hw = await resolveHardwareConfig(cfgPlan);
|
||||
// Operator-only diagnostic — see summarize-url.js for the full
|
||||
// reasoning. We don't 503 here on blocked_reason because doing
|
||||
// so pre-empts planBackend and would surface operator-internal
|
||||
// Spark Control / parakeet wording to clients even when Gemini
|
||||
// was the configured preference.
|
||||
if (hw.transcribe.blocked_reason) {
|
||||
console.warn(
|
||||
`[transcribe-url] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
|
||||
);
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
console.error(`[transcribe-url] download failed: ${err?.message || err}`);
|
||||
}
|
||||
const hasHardware = !!hw.transcribe.url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfgPlan.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "error",
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
download_ms: Date.now() - dlStart,
|
||||
duration_ms: 0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: ("download_failed: " + (err?.message || String(err))).slice(0, 200),
|
||||
job_id: summaryJobId,
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: "download_failed: " + (err?.message || String(err)).slice(0, 200),
|
||||
error: plan.reason,
|
||||
installId,
|
||||
license,
|
||||
tier,
|
||||
statusHint: 502,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(502).json(e.body);
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
const chosenBackend = plan.backend;
|
||||
|
||||
// ── Transcription phase ────────────────────────────────────────
|
||||
const cfg = await getConfigSnapshot();
|
||||
let result;
|
||||
try {
|
||||
const audioBuf = await fs.readFile(audio.filePath);
|
||||
const mimeType = bodyMime || audio.mimeType;
|
||||
if (chosenBackend === "gemini") {
|
||||
const backend = createGeminiBackend({
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||
analysisModel: cfg.relay_gemini_analysis_model,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
title: title || "",
|
||||
channel: channel || "",
|
||||
description: description || "",
|
||||
chapters: Array.isArray(chapters) ? chapters : [],
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
console.error(`[transcribe-url] transcribe failed: ${err?.message}`);
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
// Mint the background job + RESPOND IMMEDIATELY.
|
||||
const job = createJob({
|
||||
kind: "transcribe-url",
|
||||
installId: auditInstall,
|
||||
metadata: {
|
||||
owner: creditKey, // authorizes the /jobs/:id poll (per-identity)
|
||||
media_url: mediaUrl,
|
||||
backend: chosenBackend,
|
||||
model:
|
||||
chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_transcription_model
|
||||
: cfg.relay_parakeet_model,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
download_ms: downloadMs,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: (err?.message || String(err)).slice(0, 200),
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
installId,
|
||||
tier,
|
||||
statusHint: err?.status || 502,
|
||||
});
|
||||
return res.status(e.statusHint).json(e.body);
|
||||
} finally {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
|
||||
// ── Commit + audit ─────────────────────────────────────────────
|
||||
let creditCharged = 0;
|
||||
if (!reusedJob) {
|
||||
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
const costDetails =
|
||||
chosenBackend === "gemini" && result.usage
|
||||
? calcGeminiCost(result.model, result.usage)
|
||||
: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
thinking_tokens: 0,
|
||||
cost_usd: 0,
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: result?.model || null,
|
||||
status: "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
download_ms: downloadMs,
|
||||
audio_bytes: audio.bytes,
|
||||
job_id: jobId,
|
||||
...costDetails,
|
||||
summary_job_id: summaryJobId,
|
||||
},
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
// Background worker — runs after this handler has returned.
|
||||
// Errors are captured into the job record; nothing thrown here
|
||||
// can crash the route process.
|
||||
(async () => {
|
||||
const workerT0 = Date.now();
|
||||
markRunning(job.id);
|
||||
setProgress(job.id, "downloading media…");
|
||||
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
|
||||
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
|
||||
let audio;
|
||||
let downloadMs = 0;
|
||||
try {
|
||||
const dlStart = Date.now();
|
||||
audio = isYT
|
||||
? await downloadYouTube(mediaUrl, tmpDir)
|
||||
: await downloadDirect(mediaUrl, tmpDir);
|
||||
downloadMs = Date.now() - dlStart;
|
||||
console.log(
|
||||
`[transcribe-url ${job.id.slice(0, 8)}] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms`
|
||||
);
|
||||
audio.seconds = await getAudioDurationSeconds(audio.filePath);
|
||||
if (!title && audio.title) {
|
||||
// yt-dlp captured the title during download; use it when
|
||||
// the client didn't pass one.
|
||||
title = audio.title;
|
||||
}
|
||||
setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
const msg = (err?.message || String(err)).slice(0, 300);
|
||||
console.error(`[transcribe-url ${job.id.slice(0, 8)}] download failed: ${msg}`);
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: null,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - workerT0,
|
||||
download_ms: Date.now() - workerT0,
|
||||
audio_seconds: null,
|
||||
cost_usd: 0,
|
||||
job_id: summaryJobId,
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
error: "download_failed: " + msg,
|
||||
});
|
||||
markFailed(job.id, "download_failed: " + msg);
|
||||
return;
|
||||
}
|
||||
|
||||
// Transcription phase
|
||||
const cfg = await getConfigSnapshot();
|
||||
let result;
|
||||
// Stamp the moment transcribe is about to start (AFTER download
|
||||
// finished). Used for duration_ms on the audit row so the
|
||||
// "TX wall time" column reflects ONLY the transcribe phase.
|
||||
const txPhaseStart = Date.now();
|
||||
try {
|
||||
const audioBuf = await fs.readFile(audio.filePath);
|
||||
const mimeType = bodyMime || audio.mimeType;
|
||||
if (chosenBackend === "gemini") {
|
||||
const backend = createGeminiBackend({
|
||||
apiKey: cfg.relay_gemini_api_key,
|
||||
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||
analysisModel: cfg.relay_gemini_analysis_model,
|
||||
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
|
||||
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
|
||||
transcribePromptOverride: cfg.relay_transcribe_prompt || "",
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
title: title || "",
|
||||
channel: channel || "",
|
||||
description: description || "",
|
||||
chapters: Array.isArray(chapters) ? chapters : [],
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: hw.transcribe.url || "",
|
||||
gemmaBaseURL: hw.analyze.url || "",
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
|
||||
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
|
||||
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
|
||||
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
|
||||
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
||||
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
||||
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
||||
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
|
||||
});
|
||||
result = await backend.transcribeAudio({
|
||||
audio: audioBuf,
|
||||
mimeType,
|
||||
offsetSeconds: 0,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
if (reusedSummaryJob) await refundJob({ creditKey, installId, license, jobId: summaryJobId });
|
||||
const msg = (err?.message || String(err)).slice(0, 400);
|
||||
console.error(`[transcribe-url ${job.id.slice(0, 8)}] transcribe failed: ${msg}`);
|
||||
if (chosenBackend === "hardware") {
|
||||
reportHealthEvent({
|
||||
service: "parakeet",
|
||||
ok: false,
|
||||
error: msg.slice(0, 280),
|
||||
ms: Date.now() - workerT0,
|
||||
});
|
||||
}
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model:
|
||||
chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_transcription_model
|
||||
: hw.transcribe.model || "(auto)",
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - txPhaseStart,
|
||||
download_ms: downloadMs,
|
||||
audio_seconds: audio?.seconds || null,
|
||||
audio_bytes: audio?.bytes || null,
|
||||
cost_usd: 0,
|
||||
job_id: summaryJobId,
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
error: msg,
|
||||
});
|
||||
markFailed(job.id, msg);
|
||||
return;
|
||||
} finally {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
|
||||
// Success — commit credit (once per summary job_id), audit, mark done.
|
||||
let creditCharged = 0;
|
||||
if (!reusedSummaryJob) {
|
||||
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
|
||||
await markJobCharged({ creditKey, installId, license, jobId: summaryJobId, backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
const costDetails =
|
||||
chosenBackend === "gemini" && result.usage
|
||||
? calcGeminiCost(result.model, result.usage)
|
||||
: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
thinking_tokens: 0,
|
||||
cost_usd: 0,
|
||||
};
|
||||
// Truncation detection — mark partial when any chunk hit
|
||||
// the silent output-token cap and emitted < 80% of its
|
||||
// expected audio. See gemini.js for the actual coverage
|
||||
// computation; here we just propagate to the audit row.
|
||||
const truncatedChunks = Array.isArray(result?.truncated_chunks)
|
||||
? result.truncated_chunks
|
||||
: [];
|
||||
const wasTruncated = truncatedChunks.length > 0;
|
||||
const truncationError = wasTruncated
|
||||
? `transcribe: ${truncatedChunks.length} chunk(s) truncated — missing ~${truncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${result.model || "unknown"}). Likely hit maxOutputTokens.`
|
||||
: null;
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: result?.model || null,
|
||||
status: wasTruncated ? "partial" : "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - txPhaseStart,
|
||||
download_ms: downloadMs,
|
||||
audio_bytes: audio.bytes,
|
||||
audio_seconds: audio.seconds || null,
|
||||
job_id: summaryJobId,
|
||||
attempts: result?.attempts || null,
|
||||
// Per-job context for the operator dashboard's per-video table.
|
||||
// media_url + title let the dashboard show what was being
|
||||
// processed; chunk_count exposes the new server-side chunking
|
||||
// (1 for short audio, N for ≥30 min audio split by the Gemini
|
||||
// backend or by the hardware backend's Parakeet chunker).
|
||||
media_url: mediaUrl || null,
|
||||
title: title || null,
|
||||
chunk_count: result?.chunk_count ?? null,
|
||||
chunk_durations_ms: result?.chunk_durations_ms || null,
|
||||
truncated_chunks: wasTruncated ? truncatedChunks : null,
|
||||
error: truncationError,
|
||||
...costDetails,
|
||||
});
|
||||
markComplete(job.id, {
|
||||
result,
|
||||
credit_charged: creditCharged,
|
||||
tier,
|
||||
});
|
||||
console.log(
|
||||
`[transcribe-url ${job.id.slice(0, 8)}] complete in ${((Date.now() - workerT0) / 1000).toFixed(1)}s`
|
||||
);
|
||||
// Optional: persist transcript output for the operator's
|
||||
// "View output" dashboard feature. Only when the config flag
|
||||
// is set (default false) — saving real-user transcripts is an
|
||||
// opt-in operator decision, not a default. Note that we only
|
||||
// have the transcript here (analyze runs as a separate
|
||||
// /relay/analyze call in the Recap flow); the analyze row will
|
||||
// overwrite this file later with the full transcript+analysis
|
||||
// payload when it lands. Best-effort, errors ignored.
|
||||
if (cfg.relay_save_user_outputs) {
|
||||
await saveJobOutput(summaryJobId || job.id, {
|
||||
batch_id: null,
|
||||
source: null,
|
||||
transcript: result?.text || "",
|
||||
analysis: null,
|
||||
analysis_raw_text: null,
|
||||
meta: {
|
||||
title: title || null,
|
||||
media_url: mediaUrl,
|
||||
audio_seconds: audio.seconds || null,
|
||||
audio_bytes: audio.bytes,
|
||||
captions_mode: null,
|
||||
transcribe_backend: chosenBackend,
|
||||
transcribe_model: result?.model || null,
|
||||
analyze_backend: null,
|
||||
analyze_model: null,
|
||||
},
|
||||
});
|
||||
}
|
||||
})().catch((err) => {
|
||||
// Top-level catch — should be unreachable since the worker
|
||||
// handles its own try/catch, but defends against unexpected
|
||||
// throws so the job doesn't sit in "running" forever.
|
||||
markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
|
||||
console.error(`[transcribe-url ${job.id.slice(0, 8)}] worker crashed:`, err);
|
||||
});
|
||||
|
||||
// Hand back the job_id immediately. Client will poll for status.
|
||||
const body = await envelope({
|
||||
result: {
|
||||
job_id: job.id,
|
||||
status: "queued",
|
||||
kind: "transcribe-url",
|
||||
},
|
||||
creditKey,
|
||||
installId,
|
||||
license,
|
||||
tier,
|
||||
});
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
// GET /relay/jobs/:id — poll loop's friend. Install-id scoped so
|
||||
// job ids can't be enumerated cross-install. Returns the running
|
||||
// status + (once complete) the full transcribe result envelope.
|
||||
router.get("/jobs/:id", async (req, res) => {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({ error: err?.message || "auth_error", statusHint: err?.status || 401 });
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
const ownerRow = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, ownerRow);
|
||||
const jobId = (req.params.id || "").trim();
|
||||
const job = getJob(jobId);
|
||||
if (!job) {
|
||||
const e = await errorEnvelope({
|
||||
error: "job_not_found",
|
||||
creditKey,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 404,
|
||||
});
|
||||
return res.status(404).json(e.body);
|
||||
}
|
||||
// New jobs carry metadata.owner = creditKey; older jobs only carry
|
||||
// install_id. Authorize by whichever the job has.
|
||||
const ownerOk = job.metadata?.owner
|
||||
? job.metadata.owner === creditKey
|
||||
: identity.installId && job.install_id === identity.installId;
|
||||
if (!ownerOk) {
|
||||
const e = await errorEnvelope({
|
||||
error: "job_belongs_to_different_owner",
|
||||
creditKey,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 403,
|
||||
});
|
||||
return res.status(403).json(e.body);
|
||||
}
|
||||
const body = await envelope({
|
||||
result: {
|
||||
job_id: job.id,
|
||||
kind: job.kind,
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
started_at: job.started_at,
|
||||
updated_at: job.updated_at,
|
||||
completed_at: job.completed_at,
|
||||
// Include the FULL transcribe-result on completion so the
|
||||
// client doesn't need a second round-trip.
|
||||
result: job.status === "complete" ? job.result?.result : null,
|
||||
credit_charged:
|
||||
job.status === "complete" ? job.result?.credit_charged || 0 : 0,
|
||||
error: job.error,
|
||||
},
|
||||
creditKey,
|
||||
installId,
|
||||
license,
|
||||
tier,
|
||||
});
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user