Wire new routes; identity, summarize-url, dashboard, admin

2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
@@ -33,8 +33,13 @@ import { execFile } from "child_process";
 import { promisify } from "util";
 import { Readable } from "stream";
 import { pipeline } from "stream/promises";
-import { resolveLicense } from "../keysat-client.js";
-import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
+import { resolveIdentity, identityTier } from "../identity.js";
+import {
+  getOrCreateRow,
+  planBackend,
+  commitCredit,
+  licenseFingerprint,
+} from "../credits.js";
 import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
 import { getConfigSnapshot, getTierQuotas } from "../config.js";
 import { createGeminiBackend } from "../backends/gemini.js";
@@ -42,6 +47,18 @@ import { createHardwareBackend } from "../backends/hardware.js";
 import { envelope, errorEnvelope } from "./envelope.js";
 import { recordCall } from "../audit-log.js";
 import { calcGeminiCost } from "../pricing.js";
+import { getAudioDurationSeconds } from "../audio-meta.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+import { reportHealthEvent } from "../spark-control-events.js";
+import {
+  createJob,
+  markRunning,
+  setProgress,
+  markComplete,
+  markFailed,
+  getJob,
+} from "../jobs.js";
+import { saveJobOutput } from "../output-store.js";

 const execFileAsync = promisify(execFile);

@@ -54,7 +71,7 @@ const MAX_DOWNLOAD_BYTES = 500 * 1024 * 1024;
 // rate-limits; a hard ceiling avoids holding the request open forever.
 const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000;

-function looksLikeYouTube(url) {
+export function looksLikeYouTube(url) {
  if (!url) return false;
  return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url);
 }
@@ -79,7 +96,7 @@ function guessMimeFromExt(filePath) {
 // Download an HTTP(S) audio URL to a temp file. Stops if the file
 // would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
 // mimeType }.
-async function downloadDirect(url, tmpDir) {
+export async function downloadDirect(url, tmpDir) {
  const res = await fetch(url, {
    redirect: "follow",
    signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
@@ -143,7 +160,11 @@ async function downloadDirect(url, tmpDir) {

 // Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3.
 // Logs the chosen path back as the file. Caller manages tmpDir.
-async function downloadYouTube(url, tmpDir) {
+// Captures the video title via `--print "%(title)s"` so callers (the
+// summarize-url / transcribe-url workers) can stamp the Jobs table
+// with the real title instead of "Untitled" when the client didn't
+// pre-fetch metadata.
+export async function downloadYouTube(url, tmpDir) {
  const outTemplate = path.join(tmpDir, "audio.%(ext)s");
  const args = [
    "-x", // extract audio
@@ -156,18 +177,93 @@ async function downloadYouTube(url, tmpDir) {
    "--no-playlist",
    "--no-simulate",
    "--no-warnings",
+    // Emit a JSON dict containing the full metadata we care about for
+    // the transcribe prompt's speaker-identification cues. Using
+    // `before_dl:` so we get the metadata even if the download itself
+    // later fails partway. The `.{field1,field2}j` template prints
+    // just the named fields as a JSON object (yt-dlp escapes embedded
+    // newlines inside description values, so single-line stdout parses
+    // cleanly). Title comes from the same dict — no second --print
+    // needed.
+    //
+    // Why these four fields specifically: they\'re exactly what the
+    // recap-app\'s fetchYouTubeMetadata() pulls and feeds into its
+    // direct-to-Gemini transcribe prompt. With these populated, the
+    // model can correctly assign speaker labels (host name from
+    // channel, guest name from description, chapter titles often name
+    // both). Without them, every transcript falls back to unlabeled
+    // dialogue regardless of how detailed the prompt\'s
+    // speaker-identification rule is.
+    "--print",
+    "before_dl:%(.{title,channel,description,chapters})j",
    url,
  ];
+  let extractedMetadata = {
+    title: null,
+    channel: null,
+    description: null,
+    chapters: [],
+  };
  try {
-    await execFileAsync("yt-dlp", args, {
+    const { stdout } = await execFileAsync("yt-dlp", args, {
      timeout: DOWNLOAD_TIMEOUT_MS,
      maxBuffer: 10 * 1024 * 1024,
    });
+    // The JSON dict is the first non-empty line that starts with `{`.
+    // yt-dlp may print other progress / warning lines before or after
+    // depending on version; filter to the JSON line specifically.
+    const firstJsonLine = (stdout || "")
+      .split(/\r?\n/)
+      .map((l) => l.trim())
+      .find((l) => l.length > 0 && l.startsWith("{"));
+    if (firstJsonLine) {
+      try {
+        const parsed = JSON.parse(firstJsonLine);
+        extractedMetadata = {
+          title:
+            typeof parsed.title === "string" && parsed.title.trim()
+              ? parsed.title.trim().slice(0, 300)
+              : null,
+          channel:
+            typeof parsed.channel === "string" && parsed.channel.trim()
+              ? parsed.channel.trim().slice(0, 200)
+              : null,
+          // Cap at 2000 chars — recap-app uses the same cap. Long
+          // descriptions with release-notes / sponsor blocks otherwise
+          // bloat the prompt and crowd out the speaker-naming signal.
+          description:
+            typeof parsed.description === "string" && parsed.description.trim()
+              ? parsed.description.trim().slice(0, 2000)
+              : null,
+          // Each chapter is { start_time: seconds, end_time, title }.
+          // We only use start_time + title in the prompt; pass the full
+          // array through so callers see what yt-dlp returned.
+          chapters: Array.isArray(parsed.chapters) ? parsed.chapters : [],
+        };
+      } catch (parseErr) {
+        // Malformed JSON from yt-dlp. Fall back to title-only via a
+        // best-effort regex on the line. Better than nothing.
+        const m = firstJsonLine.match(/"title"\s*:\s*"([^"]+)"/);
+        if (m) extractedMetadata.title = m[1].slice(0, 300);
+        console.warn(
+          `[yt-dlp] metadata JSON parse failed: ${parseErr?.message || parseErr} — falling back to title-only`
+        );
+      }
+    } else if (stdout) {
+      // No JSON line but stdout has something — older yt-dlp versions
+      // or some videos may emit a bare title line. Use it as title-only
+      // so we at least preserve the existing v0.2.56 behavior.
+      const firstLine = stdout
+        .split(/\r?\n/)
+        .map((l) => l.trim())
+        .find((l) => l.length > 0);
+      if (firstLine) extractedMetadata.title = firstLine.slice(0, 300);
+    }
  } catch (err) {
    const stderr = (err?.stderr || "").toString();
-    const stdout = (err?.stdout || "").toString();
+    const stdoutStr = (err?.stdout || "").toString();
    throw new Error(
-      `yt-dlp failed: ${stderr.trim() || stdout.trim() || err?.message}`
+      `yt-dlp failed: ${stderr.trim() || stdoutStr.trim() || err?.message}`
    );
  }
  // Find the produced file — yt-dlp's audio-format=mp3 means it ends
@@ -189,225 +285,466 @@ async function downloadYouTube(url, tmpDir) {
    filePath,
    bytes: stat.size,
    mimeType: guessMimeFromExt(filePath),
+    title: extractedMetadata.title,
+    channel: extractedMetadata.channel,
+    description: extractedMetadata.description,
+    chapters: extractedMetadata.chapters,
  };
 }

 export function transcribeUrlRouter() {
  const router = express.Router();

+  // POST /relay/transcribe-url — kicks off a background transcribe
+  // job and returns immediately with { job_id }. The client polls
+  // GET /relay/jobs/:id to find out when it's done.
+  //
+  // Why async: a synchronous response over HTTP can't reliably
+  // survive multi-minute work — proxies, load balancers, and NATs
+  // along the path will drop the connection on long-running idle
+  // requests (we observed a 5-minute cut on a 1h45m transcribe).
+  // The poll requests are short and cheap, so they never trip
+  // timeouts.
  router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => {
-    const t0 = Date.now();
-    const installId = req.header("X-Recap-Install-Id");
-    const jobId = req.header("X-Recap-Job-Id") || null;
-    const auth = req.header("Authorization");
+    const summaryJobId = req.header("X-Recap-Job-Id") || null;

-    if (!installId) {
+    let identity;
+    try {
+      identity = await resolveIdentity(req);
+    } catch (err) {
+      const e = await errorEnvelope({
+        error: err?.message || "auth_error",
+        statusHint: err?.status || 401,
+      });
+      return res.status(e.statusHint || 401).json(e.body);
+    }
+    if (identity.kind === "license" && !identity.installId) {
      const e = await errorEnvelope({
        error: "missing X-Recap-Install-Id header",
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }
+    const { creditKey, installId, license } = identity;
+    // `title` is `let` rather than `const` because the worker may
+    // backfill it from yt-dlp metadata after the download completes
+    // (when the client didn't pre-fetch the title).
+    let title;
    const {
      media_url: mediaUrl,
      type,
      mime_type: bodyMime,
-      title,
+      title: bodyTitle,
      channel,
      description,
      chapters,
    } = req.body || {};
+    title = bodyTitle;
    if (!mediaUrl || typeof mediaUrl !== "string") {
      const e = await errorEnvelope({
        error: "missing or non-string body.media_url",
+        creditKey,
        installId,
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }

-    const license = await resolveLicense(auth);
-    const tier = license.tier;
-    const row = await getOrCreateRow(installId);
+    const row = await getOrCreateRow({ creditKey, installId, license });
+    const tier = identityTier(identity, row);
    row.tier_snapshot = tier;
+    const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
+    const auditInstall = installId || identity.userId || null;

-    // Quota check + backend choice. Same as /relay/transcribe.
-    let reusedJob = false;
-    let chosenBackend = null;
-    const existingJob = lookupJob(installId, jobId);
-    if (existingJob) {
-      reusedJob = true;
-      chosenBackend = existingJob.backend;
-    } else {
-      const cfg = await getConfigSnapshot();
-      const hasHardware = !!cfg.relay_parakeet_base_url;
-      const quota = await getTierQuotas();
-      const preference =
-        cfg.relay_transcribe_backend_preference || "gemini_first";
-      const plan = planBackend(row, quota, { hasHardware, preference });
-      if (!plan.allowed) {
-        await recordCall({
-          install_id: installId,
-          tier,
-          pipeline: "transcribe",
-          backend: null,
-          model: null,
-          status: "refused",
-          credit_charged: 0,
-          duration_ms: Date.now() - t0,
-          cost_usd: 0,
-          job_id: jobId,
-          error: plan.reason,
-        });
-        const e = await errorEnvelope({
-          error: plan.reason,
-          installId,
-          tier,
-          statusHint: 402,
-        });
-        return res.status(402).json(e.body);
-      }
-      chosenBackend = plan.backend;
-    }
-
-    // ── Download phase ─────────────────────────────────────────────
-    const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
-    const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
-    const dlStart = Date.now();
-    let audio;
-    let downloadMs = 0;
-    try {
-      audio = isYT
-        ? await downloadYouTube(mediaUrl, tmpDir)
-        : await downloadDirect(mediaUrl, tmpDir);
-      downloadMs = Date.now() - dlStart;
-      console.log(
-        `[transcribe-url] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms (${mediaUrl.slice(0, 80)})`
+    // Billing vs. routing decoupled — see analyze.js for reasoning.
+    const reusedSummaryJob = !!lookupJob({ creditKey, installId, license, jobId: summaryJobId });
+    const cfgPlan = await getConfigSnapshot();
+    const hw = await resolveHardwareConfig(cfgPlan);
+    // Operator-only diagnostic — see summarize-url.js for the full
+    // reasoning. We don't 503 here on blocked_reason because doing
+    // so pre-empts planBackend and would surface operator-internal
+    // Spark Control / parakeet wording to clients even when Gemini
+    // was the configured preference.
+    if (hw.transcribe.blocked_reason) {
+      console.warn(
+        `[transcribe-url] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
      );
-    } catch (err) {
-      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
-      console.error(`[transcribe-url] download failed: ${err?.message || err}`);
+    }
+    const hasHardware = !!hw.transcribe.url;
+    const quota = await getTierQuotas();
+    const preference =
+      cfgPlan.relay_transcribe_backend_preference || "gemini_first";
+    const plan = planBackend(row, quota, { hasHardware, preference });
+    if (!plan.allowed) {
      await recordCall({
-        install_id: installId,
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
        tier,
        pipeline: "transcribe",
-        backend: chosenBackend,
+        backend: null,
        model: null,
-        status: "error",
+        status: "refused",
        credit_charged: 0,
-        duration_ms: Date.now() - t0,
-        download_ms: Date.now() - dlStart,
+        duration_ms: 0,
        cost_usd: 0,
-        job_id: jobId,
-        error: ("download_failed: " + (err?.message || String(err))).slice(0, 200),
+        job_id: summaryJobId,
+        media_url: mediaUrl || null,
+        title: title || null,
+        error: plan.reason,
      });
      const e = await errorEnvelope({
-        error: "download_failed: " + (err?.message || String(err)).slice(0, 200),
+        error: plan.reason,
        installId,
+        license,
        tier,
-        statusHint: 502,
+        statusHint: 402,
      });
-      return res.status(502).json(e.body);
+      return res.status(402).json(e.body);
    }
+    const chosenBackend = plan.backend;

-    // ── Transcription phase ────────────────────────────────────────
-    const cfg = await getConfigSnapshot();
-    let result;
-    try {
-      const audioBuf = await fs.readFile(audio.filePath);
-      const mimeType = bodyMime || audio.mimeType;
-      if (chosenBackend === "gemini") {
-        const backend = createGeminiBackend({
-          apiKey: cfg.relay_gemini_api_key,
-          transcriptionModel: cfg.relay_gemini_transcription_model,
-          analysisModel: cfg.relay_gemini_analysis_model,
-        });
-        result = await backend.transcribeAudio({
-          audio: audioBuf,
-          mimeType,
-          title: title || "",
-          channel: channel || "",
-          description: description || "",
-          chapters: Array.isArray(chapters) ? chapters : [],
-          offsetSeconds: 0,
-        });
-      } else {
-        const backend = createHardwareBackend({
-          parakeetBaseURL: cfg.relay_parakeet_base_url,
-          gemmaBaseURL: cfg.relay_gemma_base_url,
-          parakeetModel: cfg.relay_parakeet_model,
-          gemmaModel: cfg.relay_gemma_model,
-        });
-        result = await backend.transcribeAudio({
-          audio: audioBuf,
-          mimeType,
-          offsetSeconds: 0,
-        });
-      }
-    } catch (err) {
-      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
-      if (reusedJob) refundJob(installId, jobId);
-      console.error(`[transcribe-url] transcribe failed: ${err?.message}`);
-      await recordCall({
-        install_id: installId,
-        tier,
-        pipeline: "transcribe",
+    // Mint the background job + RESPOND IMMEDIATELY.
+    const job = createJob({
+      kind: "transcribe-url",
+      installId: auditInstall,
+      metadata: {
+        owner: creditKey, // authorizes the /jobs/:id poll (per-identity)
+        media_url: mediaUrl,
        backend: chosenBackend,
-        model:
-          chosenBackend === "gemini"
-            ? cfg.relay_gemini_transcription_model
-            : cfg.relay_parakeet_model,
-        status: "error",
-        credit_charged: 0,
-        duration_ms: Date.now() - t0,
-        download_ms: downloadMs,
-        cost_usd: 0,
-        job_id: jobId,
-        error: (err?.message || String(err)).slice(0, 200),
-      });
-      const e = await errorEnvelope({
-        error: err?.message || "backend_error",
-        installId,
-        tier,
-        statusHint: err?.status || 502,
-      });
-      return res.status(e.statusHint).json(e.body);
-    } finally {
-      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
-    }
-
-    // ── Commit + audit ─────────────────────────────────────────────
-    let creditCharged = 0;
-    if (!reusedJob) {
-      await commitCredit(installId, { backend: chosenBackend, tier });
-      markJobCharged(installId, jobId, { backend: chosenBackend, tier });
-      creditCharged = 1;
-    }
-    const costDetails =
-      chosenBackend === "gemini" && result.usage
-        ? calcGeminiCost(result.model, result.usage)
-        : {
-            input_tokens: 0,
-            output_tokens: 0,
-            thinking_tokens: 0,
-            cost_usd: 0,
-          };
-    await recordCall({
-      install_id: installId,
-      tier,
-      pipeline: "transcribe",
-      backend: chosenBackend,
-      model: result?.model || null,
-      status: "success",
-      credit_charged: creditCharged,
-      duration_ms: Date.now() - t0,
-      download_ms: downloadMs,
-      audio_bytes: audio.bytes,
-      job_id: jobId,
-      ...costDetails,
+        summary_job_id: summaryJobId,
+      },
    });

-    const body = await envelope({ result, installId, tier, creditCharged });
+    // Background worker — runs after this handler has returned.
+    // Errors are captured into the job record; nothing thrown here
+    // can crash the route process.
+    (async () => {
+      const workerT0 = Date.now();
+      markRunning(job.id);
+      setProgress(job.id, "downloading media…");
+
+      const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
+      const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
+      let audio;
+      let downloadMs = 0;
+      try {
+        const dlStart = Date.now();
+        audio = isYT
+          ? await downloadYouTube(mediaUrl, tmpDir)
+          : await downloadDirect(mediaUrl, tmpDir);
+        downloadMs = Date.now() - dlStart;
+        console.log(
+          `[transcribe-url ${job.id.slice(0, 8)}] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms`
+        );
+        audio.seconds = await getAudioDurationSeconds(audio.filePath);
+        if (!title && audio.title) {
+          // yt-dlp captured the title during download; use it when
+          // the client didn't pass one.
+          title = audio.title;
+        }
+        setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
+      } catch (err) {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        const msg = (err?.message || String(err)).slice(0, 300);
+        console.error(`[transcribe-url ${job.id.slice(0, 8)}] download failed: ${msg}`);
+        await recordCall({
+          install_id: auditInstall,
+          license_fingerprint: licenseFp,
+          tier,
+          pipeline: "transcribe",
+          backend: chosenBackend,
+          model: null,
+          status: "error",
+          credit_charged: 0,
+          duration_ms: Date.now() - workerT0,
+          download_ms: Date.now() - workerT0,
+          audio_seconds: null,
+          cost_usd: 0,
+          job_id: summaryJobId,
+          media_url: mediaUrl || null,
+          title: title || null,
+          error: "download_failed: " + msg,
+        });
+        markFailed(job.id, "download_failed: " + msg);
+        return;
+      }
+
+      // Transcription phase
+      const cfg = await getConfigSnapshot();
+      let result;
+      // Stamp the moment transcribe is about to start (AFTER download
+      // finished). Used for duration_ms on the audit row so the
+      // "TX wall time" column reflects ONLY the transcribe phase.
+      const txPhaseStart = Date.now();
+      try {
+        const audioBuf = await fs.readFile(audio.filePath);
+        const mimeType = bodyMime || audio.mimeType;
+        if (chosenBackend === "gemini") {
+          const backend = createGeminiBackend({
+            apiKey: cfg.relay_gemini_api_key,
+            transcriptionModel: cfg.relay_gemini_transcription_model,
+            analysisModel: cfg.relay_gemini_analysis_model,
+            txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
+            txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
+            transcribePromptOverride: cfg.relay_transcribe_prompt || "",
+          });
+          result = await backend.transcribeAudio({
+            audio: audioBuf,
+            mimeType,
+            title: title || "",
+            channel: channel || "",
+            description: description || "",
+            chapters: Array.isArray(chapters) ? chapters : [],
+            offsetSeconds: 0,
+          });
+        } else {
+          const backend = createHardwareBackend({
+            parakeetBaseURL: hw.transcribe.url || "",
+            gemmaBaseURL: hw.analyze.url || "",
+            sparkControlBaseURL: hw.sparkBase || "",
+            parakeetModel: hw.transcribe.model || "",
+            gemmaModel: hw.analyze.model || "",
+            txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
+            txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
+            diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
+            clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
+            anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
+            smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
+            uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
+            txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
+          });
+          result = await backend.transcribeAudio({
+            audio: audioBuf,
+            mimeType,
+            offsetSeconds: 0,
+          });
+        }
+      } catch (err) {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        if (reusedSummaryJob) await refundJob({ creditKey, installId, license, jobId: summaryJobId });
+        const msg = (err?.message || String(err)).slice(0, 400);
+        console.error(`[transcribe-url ${job.id.slice(0, 8)}] transcribe failed: ${msg}`);
+        if (chosenBackend === "hardware") {
+          reportHealthEvent({
+            service: "parakeet",
+            ok: false,
+            error: msg.slice(0, 280),
+            ms: Date.now() - workerT0,
+          });
+        }
+        await recordCall({
+          install_id: auditInstall,
+          license_fingerprint: licenseFp,
+          tier,
+          pipeline: "transcribe",
+          backend: chosenBackend,
+          model:
+            chosenBackend === "gemini"
+              ? cfg.relay_gemini_transcription_model
+              : hw.transcribe.model || "(auto)",
+          status: "error",
+          credit_charged: 0,
+          duration_ms: Date.now() - txPhaseStart,
+          download_ms: downloadMs,
+          audio_seconds: audio?.seconds || null,
+          audio_bytes: audio?.bytes || null,
+          cost_usd: 0,
+          job_id: summaryJobId,
+          media_url: mediaUrl || null,
+          title: title || null,
+          error: msg,
+        });
+        markFailed(job.id, msg);
+        return;
+      } finally {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+      }
+
+      // Success — commit credit (once per summary job_id), audit, mark done.
+      let creditCharged = 0;
+      if (!reusedSummaryJob) {
+        await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
+        await markJobCharged({ creditKey, installId, license, jobId: summaryJobId, backend: chosenBackend, tier });
+        creditCharged = 1;
+      }
+      const costDetails =
+        chosenBackend === "gemini" && result.usage
+          ? calcGeminiCost(result.model, result.usage)
+          : {
+              input_tokens: 0,
+              output_tokens: 0,
+              thinking_tokens: 0,
+              cost_usd: 0,
+            };
+      // Truncation detection — mark partial when any chunk hit
+      // the silent output-token cap and emitted < 80% of its
+      // expected audio. See gemini.js for the actual coverage
+      // computation; here we just propagate to the audit row.
+      const truncatedChunks = Array.isArray(result?.truncated_chunks)
+        ? result.truncated_chunks
+        : [];
+      const wasTruncated = truncatedChunks.length > 0;
+      const truncationError = wasTruncated
+        ? `transcribe: ${truncatedChunks.length} chunk(s) truncated — missing ~${truncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${result.model || "unknown"}). Likely hit maxOutputTokens.`
+        : null;
+      await recordCall({
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
+        tier,
+        pipeline: "transcribe",
+        backend: chosenBackend,
+        model: result?.model || null,
+        status: wasTruncated ? "partial" : "success",
+        credit_charged: creditCharged,
+        duration_ms: Date.now() - txPhaseStart,
+        download_ms: downloadMs,
+        audio_bytes: audio.bytes,
+        audio_seconds: audio.seconds || null,
+        job_id: summaryJobId,
+        attempts: result?.attempts || null,
+        // Per-job context for the operator dashboard's per-video table.
+        // media_url + title let the dashboard show what was being
+        // processed; chunk_count exposes the new server-side chunking
+        // (1 for short audio, N for ≥30 min audio split by the Gemini
+        // backend or by the hardware backend's Parakeet chunker).
+        media_url: mediaUrl || null,
+        title: title || null,
+        chunk_count: result?.chunk_count ?? null,
+        chunk_durations_ms: result?.chunk_durations_ms || null,
+        truncated_chunks: wasTruncated ? truncatedChunks : null,
+        error: truncationError,
+        ...costDetails,
+      });
+      markComplete(job.id, {
+        result,
+        credit_charged: creditCharged,
+        tier,
+      });
+      console.log(
+        `[transcribe-url ${job.id.slice(0, 8)}] complete in ${((Date.now() - workerT0) / 1000).toFixed(1)}s`
+      );
+      // Optional: persist transcript output for the operator's
+      // "View output" dashboard feature. Only when the config flag
+      // is set (default false) — saving real-user transcripts is an
+      // opt-in operator decision, not a default. Note that we only
+      // have the transcript here (analyze runs as a separate
+      // /relay/analyze call in the Recap flow); the analyze row will
+      // overwrite this file later with the full transcript+analysis
+      // payload when it lands. Best-effort, errors ignored.
+      if (cfg.relay_save_user_outputs) {
+        await saveJobOutput(summaryJobId || job.id, {
+          batch_id: null,
+          source: null,
+          transcript: result?.text || "",
+          analysis: null,
+          analysis_raw_text: null,
+          meta: {
+            title: title || null,
+            media_url: mediaUrl,
+            audio_seconds: audio.seconds || null,
+            audio_bytes: audio.bytes,
+            captions_mode: null,
+            transcribe_backend: chosenBackend,
+            transcribe_model: result?.model || null,
+            analyze_backend: null,
+            analyze_model: null,
+          },
+        });
+      }
+    })().catch((err) => {
+      // Top-level catch — should be unreachable since the worker
+      // handles its own try/catch, but defends against unexpected
+      // throws so the job doesn't sit in "running" forever.
+      markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
+      console.error(`[transcribe-url ${job.id.slice(0, 8)}] worker crashed:`, err);
+    });
+
+    // Hand back the job_id immediately. Client will poll for status.
+    const body = await envelope({
+      result: {
+        job_id: job.id,
+        status: "queued",
+        kind: "transcribe-url",
+      },
+      creditKey,
+      installId,
+      license,
+      tier,
+    });
+    res.json(body);
+  });
+
+  // GET /relay/jobs/:id — poll loop's friend. Install-id scoped so
+  // job ids can't be enumerated cross-install. Returns the running
+  // status + (once complete) the full transcribe result envelope.
+  router.get("/jobs/:id", async (req, res) => {
+    let identity;
+    try {
+      identity = await resolveIdentity(req);
+    } catch (err) {
+      const e = await errorEnvelope({ error: err?.message || "auth_error", statusHint: err?.status || 401 });
+      return res.status(e.statusHint || 401).json(e.body);
+    }
+    if (identity.kind === "license" && !identity.installId) {
+      const e = await errorEnvelope({
+        error: "missing X-Recap-Install-Id header",
+        statusHint: 400,
+      });
+      return res.status(400).json(e.body);
+    }
+    const { creditKey, installId, license } = identity;
+    const ownerRow = await getOrCreateRow({ creditKey, installId, license });
+    const tier = identityTier(identity, ownerRow);
+    const jobId = (req.params.id || "").trim();
+    const job = getJob(jobId);
+    if (!job) {
+      const e = await errorEnvelope({
+        error: "job_not_found",
+        creditKey,
+        creditKey,
+        installId,
+        tier,
+        statusHint: 404,
+      });
+      return res.status(404).json(e.body);
+    }
+    // New jobs carry metadata.owner = creditKey; older jobs only carry
+    // install_id. Authorize by whichever the job has.
+    const ownerOk = job.metadata?.owner
+      ? job.metadata.owner === creditKey
+      : identity.installId && job.install_id === identity.installId;
+    if (!ownerOk) {
+      const e = await errorEnvelope({
+        error: "job_belongs_to_different_owner",
+        creditKey,
+        creditKey,
+        installId,
+        tier,
+        statusHint: 403,
+      });
+      return res.status(403).json(e.body);
+    }
+    const body = await envelope({
+      result: {
+        job_id: job.id,
+        kind: job.kind,
+        status: job.status,
+        progress: job.progress,
+        started_at: job.started_at,
+        updated_at: job.updated_at,
+        completed_at: job.completed_at,
+        // Include the FULL transcribe-result on completion so the
+        // client doesn't need a second round-trip.
+        result: job.status === "complete" ? job.result?.result : null,
+        credit_charged:
+          job.status === "complete" ? job.result?.credit_charged || 0 : 0,
+        error: job.error,
+      },
+      creditKey,
+      installId,
+      license,
+      tier,
+    });
    res.json(body);
  });