Wire new routes; identity, summarize-url, dashboard, admin

2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
@@ -0,0 +1,902 @@
+// POST /admin/test-run — operator-side benchmarking flow.
+//
+// Same end-to-end pipeline as /relay/transcribe-url, but with two
+// key differences:
+//   1. The operator can OVERRIDE backend + model per call, bypassing
+//      planBackend's tier/preference logic. Used by the dashboard's
+//      benchmark suite to test specific permutations.
+//   2. The audit row is tagged with batch_id + source="admin-test"
+//      so test runs are clearly distinguishable from real user
+//      traffic in the Jobs tab (and filterable / hideable from view).
+//
+// Request body (admin-auth-gated by virtue of being under /admin/*):
+//   {
+//     media_url:           string, required
+//     type?:               "youtube" | "podcast"
+//     title?:              string
+//     transcribe_backend:  "gemini" | "hardware", required
+//     transcribe_model?:   string (gemini model id; ignored when hardware)
+//     analyze_backend:     "gemini" | "hardware", required
+//     analyze_model?:      string
+//     batch_id?:           string — groups multiple test runs into one suite
+//   }
+//
+// Response (immediate; job runs in background):
+//   { result: { job_id, status: "queued", batch_id } }
+// Poll GET /admin/jobs/:id (existing) for status; final transcript +
+// analyze result lands in the Jobs table once complete.
+
+import express from "express";
+import fs from "fs/promises";
+import os from "os";
+import path from "path";
+import { randomUUID } from "crypto";
+import { getConfigSnapshot } from "../config.js";
+import { createGeminiBackend } from "../backends/gemini.js";
+import { createHardwareBackend } from "../backends/hardware.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+import { recordCall } from "../audit-log.js";
+import { calcGeminiCost } from "../pricing.js";
+import { getAudioDurationSeconds } from "../audio-meta.js";
+import {
+  createJob,
+  markRunning,
+  setProgress,
+  markComplete,
+  markFailed,
+} from "../jobs.js";
+import {
+  looksLikeYouTube,
+  downloadDirect,
+  downloadYouTube,
+} from "./transcribe-url.js";
+import { fetchYouTubeCaptions } from "../youtube-captions.js";
+import { saveJobOutput } from "../output-store.js";
+import { runChunkedAnalysis } from "../chunked-analyze.js";
+
+// Synthetic install_id used for all test-run audit rows. Keeps them
+// out of any real-user aggregations + makes them filterable in the
+// Jobs tab via the existing install-id filter.
+const TEST_INSTALL_ID = "admin-test";
+
+// ── TX-sharing cache ────────────────────────────────────────────
+// The benchmark suite has paired permutations that use the SAME
+// transcribe config but differ in their analyze backend:
+//   pair 1+6: TX = gemini-3.1-flash-lite (then AN gemini vs hardware)
+//   pair 4+5: TX = hardware              (then AN hardware vs gemini)
+//   pair 7+8: TX = captions              (then AN gemini vs hardware)
+// Without sharing, running both members of a pair re-transcribes
+// the same audio twice — wasteful (cost + wall time).
+//
+// Implementation: an in-memory Map keyed on (mediaUrl, txConfig)
+// whose values are PROMISES for the transcript. The first request
+// in the pair inserts a pending Promise; subsequent requests with
+// the same key await that Promise. Completed entries linger in the
+// cache for ~10 minutes so a "rerun last" benchmark within that
+// window also dedupes. Cache entries auto-expire to bound memory.
+//
+// The cache is process-local (single relay process); a relay
+// restart clears it. That's fine — benchmark suites are operator-
+// initiated and short-lived.
+const TX_CACHE_TTL_MS = 10 * 60 * 1000;
+const txCache = new Map(); // key → { promise, expiresAt }
+
+function txCacheKey({ mediaUrl, captionsMode, txBackend, txModel }) {
+  if (captionsMode === "use") return `captions:${mediaUrl}`;
+  return `tx:${txBackend}:${txModel || "(default)"}:${mediaUrl}`;
+}
+
+function getOrComputeTx(key, computeFn) {
+  const now = Date.now();
+  // Evict expired entries opportunistically.
+  for (const [k, v] of txCache) {
+    if (v.expiresAt < now) txCache.delete(k);
+  }
+  const existing = txCache.get(key);
+  if (existing && existing.expiresAt > now) {
+    return {
+      promise: existing.promise,
+      cached: true,
+      startedAt: existing.startedAt,
+    };
+  }
+  const startedAt = Date.now();
+  const promise = computeFn();
+  txCache.set(key, { promise, expiresAt: now + TX_CACHE_TTL_MS, startedAt });
+  // If the compute fails, evict the entry so the next attempt
+  // gets a fresh try (don't cache failures).
+  promise.catch(() => txCache.delete(key));
+  return { promise, cached: false, startedAt };
+}
+
+// Strip code fences + parse a JSON-formatted analyze response into
+// the { sections: [...] } shape Recap's render expects. Returns
+// null on parse failure so the saved output can store the raw text
+// for forensic review.
+function safeParseSections(text) {
+  if (!text || typeof text !== "string") return null;
+  let jsonStr = text.trim();
+  const cb = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (cb) jsonStr = cb[1].trim();
+  try {
+    const parsed = JSON.parse(jsonStr);
+    return parsed && Array.isArray(parsed.sections) ? parsed : null;
+  } catch {
+    return null;
+  }
+}
+
+export function adminTestRunRouter() {
+  const router = express.Router();
+
+  router.post("/test-run", express.json({ limit: "1mb" }), async (req, res) => {
+    const {
+      media_url: mediaUrl,
+      type,
+      title,
+      transcribe_backend: txBackend,
+      transcribe_model: txModel,
+      analyze_backend: anBackend,
+      analyze_model: anModel,
+      batch_id: batchId,
+      // When captions_mode === "use", the relay fetches YouTube
+      // captions via yt-dlp instead of downloading+transcribing the
+      // audio. Transcribe-backend/model are ignored in that case;
+      // the captions text feeds straight into analyze. Only works
+      // for YouTube URLs (no captions for podcast .mp3 enclosures).
+      captions_mode: captionsMode,
+    } = req.body || {};
+
+    if (!mediaUrl || typeof mediaUrl !== "string") {
+      return res.status(400).json({ error: "missing or non-string media_url" });
+    }
+    const useCaptions = captionsMode === "use";
+    if (!useCaptions && !["gemini", "hardware"].includes(txBackend)) {
+      return res.status(400).json({ error: "transcribe_backend must be 'gemini' or 'hardware' (unless captions_mode='use')" });
+    }
+    if (!["gemini", "hardware"].includes(anBackend)) {
+      return res.status(400).json({ error: "analyze_backend must be 'gemini' or 'hardware'" });
+    }
+
+    const effectiveBatchId = batchId || randomUUID();
+    const job = createJob({
+      kind: "admin-test-run",
+      installId: TEST_INSTALL_ID,
+      metadata: {
+        media_url: mediaUrl,
+        title,
+        transcribe_backend: useCaptions ? "captions" : txBackend,
+        analyze_backend: anBackend,
+        batch_id: effectiveBatchId,
+        captions_mode: captionsMode || null,
+      },
+    });
+
+    // Hand back the job_id immediately; the dashboard polls for status.
+    res.json({
+      result: {
+        job_id: job.id,
+        status: "queued",
+        batch_id: effectiveBatchId,
+        kind: "admin-test-run",
+      },
+    });
+
+    // Bundle all the worker-input fields into a ctx object so the
+    // worker can be invoked from BOTH this single-perm endpoint AND
+    // the /test-run-suite endpoint (which mints jobs upfront then
+    // fires the same worker per phase).
+    const ctx = {
+      mediaUrl, type, title,
+      txBackend, txModel, anBackend, anModel,
+      batchId: effectiveBatchId, captionsMode, useCaptions,
+    };
+    executeTestRunWorker(job, ctx).catch((err) => {
+      markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
+      console.error(`[admin/test-run ${job.id.slice(0, 8)}] worker crashed:`, err);
+    });
+  });
+
+  // ── POST /admin/test-run-suite ──────────────────────────────
+  // Server-side benchmark runner. Accepts an ARRAY of permutations,
+  // mints jobs for all of them upfront (so the client can show the
+  // table immediately), and runs the phase-based concurrent
+  // execution server-side. Key property: the suite KEEPS RUNNING
+  // even if the operator's browser closes / phone sleeps / tab
+  // refreshes — the work is in a background loop on the relay
+  // process, not in the dashboard's JavaScript.
+  //
+  // Phases are grouped by TX fingerprint so paired permutations
+  // (1+6, 4+5, 7+8) fire concurrently and share TX via the existing
+  // in-memory inflight-promise cache.
+  router.post("/test-run-suite", express.json({ limit: "10mb" }), async (req, res) => {
+    const { media_url: mediaUrl, permutations } = req.body || {};
+    if (!mediaUrl || typeof mediaUrl !== "string") {
+      return res.status(400).json({ error: "missing or non-string media_url" });
+    }
+    if (!Array.isArray(permutations) || permutations.length === 0) {
+      return res.status(400).json({ error: "permutations must be a non-empty array" });
+    }
+    const batchId = randomUUID();
+    const items = [];
+    for (let i = 0; i < permutations.length; i++) {
+      const p = permutations[i] || {};
+      const ctx = {
+        mediaUrl,
+        type: p.type,
+        title: p.title || `permutation ${i + 1}`,
+        txBackend: p.transcribe_backend,
+        txModel: p.transcribe_model,
+        anBackend: p.analyze_backend,
+        anModel: p.analyze_model,
+        batchId,
+        captionsMode: p.captions_mode,
+        useCaptions: p.captions_mode === "use",
+      };
+      // Validate per-perm — partial failures shouldn't poison the
+      // whole batch; mark them so the worker can record the error.
+      if (!ctx.useCaptions && !["gemini", "hardware"].includes(ctx.txBackend)) {
+        ctx._validationError = `permutation ${i + 1}: transcribe_backend must be 'gemini' or 'hardware'`;
+      } else if (!["gemini", "hardware"].includes(ctx.anBackend)) {
+        ctx._validationError = `permutation ${i + 1}: analyze_backend must be 'gemini' or 'hardware'`;
+      }
+      const job = createJob({
+        kind: "admin-test-run",
+        installId: TEST_INSTALL_ID,
+        metadata: {
+          media_url: ctx.mediaUrl,
+          title: ctx.title,
+          transcribe_backend: ctx.useCaptions ? "captions" : ctx.txBackend,
+          analyze_backend: ctx.anBackend,
+          batch_id: batchId,
+          captions_mode: ctx.captionsMode || null,
+          suite_position: i + 1,
+        },
+      });
+      items.push({ job, ctx });
+    }
+
+    // Respond immediately with the planned IDs so the dashboard can
+    // start polling /admin/jobs-history?batch_id=<batchId> without
+    // blocking on the actual work.
+    res.json({
+      result: {
+        batch_id: batchId,
+        status: "queued",
+        job_ids: items.map((it) => it.job.id),
+        total: items.length,
+        kind: "admin-test-run-suite",
+      },
+    });
+
+    // ── Background phase runner ──
+    // Group items by TX fingerprint into phases. Permutations within
+    // a phase fire concurrently (their underlying TX dedupes via the
+    // cache); phases themselves run sequentially so we don't overload
+    // the transcribe backends. Failures don't abort the suite.
+    setImmediate(async () => {
+      try {
+        const phases = groupItemsByTxFingerprint(items);
+        console.log(
+          `[admin/test-run-suite] batch=${batchId.slice(0, 8)} ${items.length} perms in ${phases.length} phases`
+        );
+        for (let pi = 0; pi < phases.length; pi++) {
+          const phase = phases[pi];
+          console.log(
+            `[admin/test-run-suite] batch=${batchId.slice(0, 8)} phase ${pi + 1}/${phases.length}: firing ${phase.length} perm${phase.length === 1 ? "" : "s"}`
+          );
+          await Promise.allSettled(
+            phase.map(async (item) => {
+              if (item.ctx._validationError) {
+                markFailed(item.job.id, item.ctx._validationError);
+                await recordCall({
+                  install_id: TEST_INSTALL_ID,
+                  tier: "core",
+                  pipeline: "transcribe",
+                  backend: null,
+                  model: null,
+                  status: "error",
+                  duration_ms: 0,
+                  cost_usd: 0,
+                  job_id: item.job.id,
+                  batch_id: batchId,
+                  source: "admin-test",
+                  media_url: item.ctx.mediaUrl,
+                  title: item.ctx.title,
+                  error: item.ctx._validationError,
+                });
+                return;
+              }
+              try {
+                await executeTestRunWorker(item.job, item.ctx);
+              } catch (err) {
+                markFailed(item.job.id, "worker_crashed: " + (err?.message || String(err)));
+                console.error(
+                  `[admin/test-run-suite ${item.job.id.slice(0, 8)}] worker crashed:`,
+                  err
+                );
+              }
+            })
+          );
+        }
+        console.log(`[admin/test-run-suite] batch=${batchId.slice(0, 8)} complete`);
+      } catch (err) {
+        console.error(`[admin/test-run-suite] batch=${batchId.slice(0, 8)} runner crashed:`, err);
+      }
+    });
+  });
+  return router;
+}
+
+// Group { job, ctx } items by their TX fingerprint into phases.
+// Items with the same fingerprint share a phase so they hit the
+// TX-share cache. Phase order is preserved from the input array
+// (first appearance of a fingerprint wins).
+function groupItemsByTxFingerprint(items) {
+  const phases = [];
+  const seen = new Map();
+  for (const item of items) {
+    const fp = item.ctx.useCaptions
+      ? `captions:${item.ctx.mediaUrl}`
+      : `tx:${item.ctx.txBackend}:${item.ctx.txModel || ""}:${item.ctx.mediaUrl}`;
+    if (seen.has(fp)) {
+      phases[seen.get(fp)].push(item);
+    } else {
+      seen.set(fp, phases.length);
+      phases.push([item]);
+    }
+  }
+  return phases;
+}
+
+// Extracted worker — runs the full download / transcribe / analyze
+// pipeline for one permutation. Used by both /admin/test-run (one
+// permutation) and /admin/test-run-suite (many permutations
+// orchestrated server-side in phases). Body is the same flow the
+// inline IIFE used previously; ctx replaces what were closure refs.
+async function executeTestRunWorker(job, ctx) {
+  const {
+    mediaUrl, type, title,
+    txBackend, txModel, anBackend, anModel,
+    batchId: effectiveBatchId,
+    captionsMode, useCaptions,
+  } = ctx;
+  // The legacy body of the IIFE follows verbatim (with `job` already
+  // passed in, and the closure vars now destructured from ctx).
+  {
+      const workerT0 = Date.now();
+      markRunning(job.id);
+
+      const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "admin-tr-"));
+      const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
+
+      // ── Captions fast-path branch ──
+      // For YouTube URLs with captions_mode="use", fetch caption track
+      // via yt-dlp and skip audio download + Gemini transcribe
+      // entirely. The captions text feeds straight into analyze.
+      if (useCaptions) {
+        if (!isYT) {
+          await recordCall({
+            install_id: TEST_INSTALL_ID,
+            tier: "core",
+            pipeline: "transcribe",
+            backend: "captions",
+            model: null,
+            status: "error",
+            duration_ms: 0,
+            cost_usd: 0,
+            job_id: job.id,
+            batch_id: effectiveBatchId,
+            source: "admin-test",
+            media_url: mediaUrl,
+            title: title || null,
+            error: "captions_mode='use' requires a YouTube URL (no captions for podcast audio)",
+          });
+          markFailed(job.id, "captions_mode requires YouTube URL");
+          try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+          return;
+        }
+        setProgress(job.id, "fetching captions…");
+        const capStart = Date.now();
+        let cap;
+        let capFromCache = false;
+        let capSharedStartedAt = capStart;
+        const capKey = txCacheKey({ mediaUrl, captionsMode: "use" });
+        try {
+          const { promise, cached, startedAt: sharedStartedAt } = getOrComputeTx(capKey, () =>
+            fetchYouTubeCaptions({ url: mediaUrl, tmpDir })
+          );
+          capFromCache = cached;
+          capSharedStartedAt = sharedStartedAt || capStart;
+          if (cached) setProgress(job.id, "reusing shared captions from paired permutation…");
+          cap = await promise;
+        } catch (err) {
+          await recordCall({
+            install_id: TEST_INSTALL_ID,
+            tier: "core",
+            pipeline: "transcribe",
+            backend: "captions",
+            model: null,
+            status: "error",
+            duration_ms: Date.now() - capStart,
+            audio_seconds: null,
+            cost_usd: 0,
+            job_id: job.id,
+            batch_id: effectiveBatchId,
+            source: "admin-test",
+            media_url: mediaUrl,
+            title: title || null,
+            error: (err?.message || String(err)).slice(0, 300),
+          });
+          markFailed(job.id, "captions_fetch_failed: " + (err?.message || err));
+          try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+          return;
+        }
+        // Record the captions "transcribe" row. backend="captions"
+        // so the dashboard can filter / display it distinctly.
+        // When this permutation reused a paired sibling's captions
+        // fetch, the wall-time we attribute is the underlying fetch's
+        // wall-time (from the cache entry's startedAt) — so the
+        // dashboard's per-row TX-rate columns show real numbers on
+        // BOTH paired rows, not "—" on the sibling. The `source`
+        // flag "admin-test-shared-tx" lets aggregate analytics dedupe.
+        await recordCall({
+          install_id: TEST_INSTALL_ID,
+          tier: "core",
+          pipeline: "transcribe",
+          backend: "captions",
+          model: cap.captions_source === "auto" ? "youtube-auto" : "youtube-manual",
+          status: "success",
+          duration_ms: Date.now() - capSharedStartedAt,
+          audio_seconds: cap.duration_seconds || null,
+          audio_bytes: null,        // no audio downloaded
+          download_ms: null,        // n/a
+          chunk_count: 1,
+          cost_usd: 0,
+          job_id: job.id,
+          batch_id: effectiveBatchId,
+          source: capFromCache ? "admin-test-shared-tx" : "admin-test",
+          media_url: mediaUrl,
+          title: title || null,
+        });
+        setProgress(job.id, "analyzing topics…");
+        const cfg2 = await getConfigSnapshot();
+        const hw2 = await resolveHardwareConfig(cfg2);
+        let anResultForCaptions = null;
+        try {
+          anResultForCaptions = await runAnalyzeForTestRun({
+            transcriptText: cap.text || "",
+            anBackend,
+            anModel,
+            cfg: cfg2,
+            hw: hw2,
+            jobId: job.id,
+            batchId: effectiveBatchId,
+            mediaUrl,
+            title,
+            audioSeconds: cap.duration_seconds || null,
+            audioBytes: null,
+          });
+        } catch (err) {
+          console.warn(`[admin/test-run ${job.id.slice(0, 8)}] analyze failed (captions): ${err?.message || err}`);
+        }
+        // Save output (test-runs always persist regardless of the
+        // save-user-outputs flag).
+        await saveJobOutput(job.id, {
+          batch_id: effectiveBatchId,
+          source: "admin-test",
+          transcript: cap.text || "",
+          analysis: anResultForCaptions ? safeParseSections(anResultForCaptions.text) : null,
+          analysis_raw_text: anResultForCaptions?.text || null,
+          meta: {
+            title: title || null,
+            media_url: mediaUrl,
+            audio_seconds: cap.duration_seconds || null,
+            audio_bytes: null,
+            captions_mode: "use",
+            captions_source: cap.captions_source || null,
+            transcribe_backend: "captions",
+            transcribe_model: cap.captions_source === "auto" ? "youtube-auto" : "youtube-manual",
+            analyze_backend: anBackend,
+            analyze_model: anResultForCaptions?.model || null,
+          },
+        });
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        markComplete(job.id, {
+          result: { transcribe_model: "captions", batch_id: effectiveBatchId },
+        });
+        return;
+      }
+
+      // ── Audio download path (no captions) ──
+      setProgress(job.id, "downloading media…");
+      let audio;
+      let downloadMs = 0;
+      try {
+        const dlStart = Date.now();
+        audio = isYT
+          ? await downloadYouTube(mediaUrl, tmpDir)
+          : await downloadDirect(mediaUrl, tmpDir);
+        downloadMs = Date.now() - dlStart;
+        audio.seconds = await getAudioDurationSeconds(audio.filePath);
+        setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
+      } catch (err) {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        const msg = (err?.message || String(err)).slice(0, 300);
+        await recordCall({
+          install_id: TEST_INSTALL_ID,
+          tier: "core",
+          pipeline: "transcribe",
+          backend: txBackend,
+          model: null,
+          status: "error",
+          credit_charged: 0,
+          duration_ms: Date.now() - workerT0,
+          download_ms: Date.now() - workerT0,
+          audio_seconds: null,
+          cost_usd: 0,
+          job_id: job.id,
+          batch_id: effectiveBatchId,
+          source: "admin-test",
+          media_url: mediaUrl,
+          title: title || null,
+          error: "download_failed: " + msg,
+        });
+        markFailed(job.id, "download_failed: " + msg);
+        return;
+      }
+
+      // ── Transcription with the operator's chosen backend ──
+      // Uses the TX-sharing cache so that paired benchmark
+      // permutations (e.g. 1+6 both transcribe with gemini-3.1-flash-
+      // lite) only invoke the underlying backend ONCE — the second
+      // permutation awaits the first's in-flight promise and reuses
+      // its transcript. Cache entries linger ~10 min so a fast
+      // "Rerun last" also dedupes.
+      const cfg = await getConfigSnapshot();
+      const hw = await resolveHardwareConfig(cfg);
+      let txResult;
+      let txFromCache = false;
+      const txStartedAt = Date.now();
+      let txSharedStartedAt = txStartedAt;
+      const cacheKey = txCacheKey({ mediaUrl, captionsMode: null, txBackend, txModel });
+      try {
+        const audioBuf = await fs.readFile(audio.filePath);
+        const { promise, cached, startedAt: sharedStartedAt } = getOrComputeTx(cacheKey, async () => {
+          if (txBackend === "gemini") {
+            const backend = createGeminiBackend({
+              apiKey: cfg.relay_gemini_api_key,
+              transcriptionModel: txModel || cfg.relay_gemini_transcription_model,
+              analysisModel: cfg.relay_gemini_analysis_model,
+              txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
+              txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
+              transcribePromptOverride: cfg.relay_transcribe_prompt || "",
+            });
+            return await backend.transcribeAudio({
+              audio: audioBuf,
+              mimeType: audio.mimeType || "audio/mpeg",
+              title: title || "",
+              offsetSeconds: 0,
+            });
+          }
+          if (!hw.transcribe.url) {
+            throw new Error("hardware transcribe URL not configured");
+          }
+          const backend = createHardwareBackend({
+            parakeetBaseURL: hw.transcribe.url,
+            gemmaBaseURL: hw.analyze.url || "",
+            sparkControlBaseURL: hw.sparkBase || "",
+            parakeetModel: hw.transcribe.model || "",
+            gemmaModel: hw.analyze.model || "",
+            txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
+            txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
+            diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
+            clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
+        anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
+        smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
+        uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
+            anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
+            smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
+            uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
+            txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
+          });
+          return await backend.transcribeAudio({
+            audio: audioBuf,
+            mimeType: audio.mimeType || "audio/mpeg",
+            offsetSeconds: 0,
+          });
+        });
+        txFromCache = cached;
+        txSharedStartedAt = sharedStartedAt || txStartedAt;
+        if (cached) {
+          setProgress(job.id, "reusing shared TX from paired permutation…");
+        }
+        txResult = await promise;
+      } catch (err) {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        const msg = (err?.message || String(err)).slice(0, 400);
+        await recordCall({
+          install_id: TEST_INSTALL_ID,
+          tier: "core",
+          pipeline: "transcribe",
+          backend: txBackend,
+          model: txBackend === "gemini" ? (txModel || cfg.relay_gemini_transcription_model) : (hw.transcribe.model || "(auto)"),
+          status: "error",
+          credit_charged: 0,
+          duration_ms: Date.now() - workerT0,
+          download_ms: downloadMs,
+          audio_seconds: audio?.seconds || null,
+          audio_bytes: audio?.bytes || null,
+          cost_usd: 0,
+          job_id: job.id,
+          batch_id: effectiveBatchId,
+          source: "admin-test",
+          media_url: mediaUrl,
+          title: title || null,
+          error: msg,
+        });
+        markFailed(job.id, "transcribe_failed: " + msg);
+        return;
+      }
+
+      // Audit the successful transcribe.
+      const txCostDetails =
+        txBackend === "gemini" && txResult.usage
+          ? calcGeminiCost(txResult.model, txResult.usage)
+          : { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
+      // Truncation detection — same as the production routes. When
+      // any chunk emitted < 80% of its expected audio, mark the
+      // benchmark row partial so the operator doesn't compare a
+      // truncated TX run against a clean one.
+      const txTruncatedChunks = Array.isArray(txResult?.truncated_chunks)
+        ? txResult.truncated_chunks
+        : [];
+      const txWasTruncated = txTruncatedChunks.length > 0;
+      const txTruncationError = txWasTruncated
+        ? `transcribe: ${txTruncatedChunks.length} chunk(s) truncated — missing ~${txTruncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${txResult.model || "unknown"})`
+        : null;
+      await recordCall({
+        install_id: TEST_INSTALL_ID,
+        tier: "core",
+        pipeline: "transcribe",
+        backend: txBackend,
+        model: txResult.model || null,
+        status: txWasTruncated ? "partial" : "success",
+        credit_charged: 0,
+        truncated_chunks: txWasTruncated ? txTruncatedChunks : null,
+        error: txTruncationError,
+        // When this permutation reused a paired sibling's TX, the
+        // attributed duration is the wall-time of the UNDERLYING TX
+        // (from when the originating permutation kicked it off until
+        // both siblings' awaits resolved) — so the per-row TX rate
+        // columns in the Jobs table show real numbers on BOTH paired
+        // rows, not "—" on the sibling. Cost is still zero on the
+        // sibling (only the originator pays). The "admin-test-shared-tx"
+        // source flag lets aggregate analytics dedupe across pairs.
+        duration_ms: Date.now() - txSharedStartedAt,
+        download_ms: downloadMs,
+        audio_bytes: audio.bytes,
+        audio_seconds: audio.seconds || null,
+        job_id: job.id,
+        batch_id: effectiveBatchId,
+        source: txFromCache ? "admin-test-shared-tx" : "admin-test",
+        media_url: mediaUrl,
+        title: title || null,
+        attempts: txResult.attempts || null,
+        chunk_count: txResult.chunk_count ?? null,
+        // Per-chunk wall-times (ms). Aggregator sums this into
+        // transcribe_ms_sum so the Jobs table shows BOTH wall-time
+        // (from duration_ms) and total backend compute (from sum).
+        chunk_durations_ms: txResult.chunk_durations_ms || null,
+        ...(txFromCache
+          ? { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 }
+          : txCostDetails),
+      });
+
+      // ── Analyze with the operator's chosen backend ──
+      // For benchmarking purposes we run the chunked-analyze flow
+      // directly here (mirroring Recap's behavior) so the per-window
+      // performance is captured in the Jobs table. We build a simple
+      // prompt from the transcript text.
+      setProgress(job.id, "analyzing topics…");
+      let anResult = null;
+      try {
+        anResult = await runAnalyzeForTestRun({
+          transcriptText: txResult.text || "",
+          anBackend,
+          anModel,
+          cfg,
+          hw,
+          jobId: job.id,
+          batchId: effectiveBatchId,
+          mediaUrl,
+          title,
+          audioSeconds: audio.seconds || null,
+          audioBytes: audio.bytes,
+        });
+      } catch (err) {
+        // Analyze failure is recorded (inside runAnalyzeForTestRun);
+        // we still mark the job complete since transcribe succeeded.
+        console.warn(`[admin/test-run ${job.id.slice(0, 8)}] analyze failed: ${err?.message || err}`);
+      }
+
+      // Save the transcript + analysis JSON to disk for the
+      // dashboard's "View output" feature. Test-run jobs always
+      // persist regardless of the save-user-outputs config flag.
+      await saveJobOutput(job.id, {
+        batch_id: effectiveBatchId,
+        source: "admin-test",
+        transcript: txResult.text || "",
+        analysis: anResult ? safeParseSections(anResult.text) : null,
+        analysis_raw_text: anResult?.text || null,
+        meta: {
+          title: title || null,
+          media_url: mediaUrl,
+          audio_seconds: audio.seconds || null,
+          audio_bytes: audio.bytes,
+          captions_mode: null,
+          transcribe_backend: txBackend,
+          transcribe_model: txResult.model || null,
+          analyze_backend: anBackend,
+          analyze_model: anResult?.model || null,
+        },
+      });
+      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+
+      markComplete(job.id, {
+        result: {
+          transcribe_model: txResult.model,
+          batch_id: effectiveBatchId,
+        },
+      });
+  }
+}
+
+// Run chunked analyze over the just-transcribed text using the same
+// windowing strategy Recap's client uses (~18 min window body, 2 min
+// overlap, N windows in flight). Each window emits its own audit row
+// via recordCall (handled inside runChunkedAnalysis), so the Jobs
+// table sees:
+//   - analyze_windows_total = N
+//   - analyze_ms            = sum of per-window duration_ms (total backend work)
+//   - wall_time_ms          = elapsed from first window start → last window end
+//                              (computed by job-stats.js from row timestamps)
+// Per-window `audio_seconds` is the window body length (not total audio),
+// so per-row rate columns (s/audio-min) divide by the right denominator.
+async function runAnalyzeForTestRun({
+  transcriptText,
+  anBackend,
+  anModel,
+  cfg,
+  hw,
+  jobId,
+  batchId,
+  mediaUrl,
+  title,
+  audioSeconds, // unused — chunked-analyze uses per-window seconds
+  audioBytes,   // unused
+}) {
+  // Build the right backend, then hand to runChunkedAnalysis which
+  // handles per-window prompt building, parallelism, audit logging,
+  // and stitching. Construction errors (missing apiKey, missing
+  // hardware URL) are audited as a single failed analyze row so the
+  // Jobs table shows what happened — runChunkedAnalysis only writes
+  // rows once it has a backend to call.
+  let backend;
+  let resolvedModel;
+  let computeCostDetails;
+  try {
+    if (anBackend === "gemini") {
+      backend = createGeminiBackend({
+        apiKey: cfg.relay_gemini_api_key,
+        transcriptionModel: cfg.relay_gemini_transcription_model,
+        analysisModel: anModel || cfg.relay_gemini_analysis_model,
+        // tx knobs are unused on the analyze path but the factory
+        // accepts them anyway — pass for consistency.
+        txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
+        txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
+      });
+      resolvedModel = anModel || cfg.relay_gemini_analysis_model;
+      computeCostDetails = (model, usage) =>
+        usage ? calcGeminiCost(model, usage) : {
+          input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0,
+        };
+    } else {
+      if (!hw.analyze.url) {
+        throw new Error("hardware analyze URL not configured");
+      }
+      backend = createHardwareBackend({
+        parakeetBaseURL: hw.transcribe.url || "",
+        gemmaBaseURL: hw.analyze.url,
+        sparkControlBaseURL: hw.sparkBase || "",
+        parakeetModel: hw.transcribe.model || "",
+        gemmaModel: hw.analyze.model || "",
+        txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
+        txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
+        diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
+        clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
+        anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
+        smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
+        uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
+        txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
+      });
+      resolvedModel = hw.analyze.model || null;
+      computeCostDetails = () => ({
+        input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0,
+      });
+    }
+  } catch (err) {
+    await recordCall({
+      install_id: TEST_INSTALL_ID,
+      tier: "core",
+      pipeline: "analyze",
+      backend: anBackend,
+      model: anBackend === "gemini"
+        ? (anModel || cfg.relay_gemini_analysis_model)
+        : (hw.analyze.model || "(auto)"),
+      status: "error",
+      duration_ms: 0,
+      audio_seconds: 0,
+      cost_usd: 0,
+      job_id: jobId,
+      batch_id: batchId,
+      source: "admin-test",
+      media_url: mediaUrl,
+      title: title || null,
+      error: (err?.message || String(err)).slice(0, 400),
+      window_idx: 0,
+      window_count: 1,
+    });
+    throw err;
+  }
+
+  // Pull windowing tunables from config (Settings tab).
+  const bodyMin = anBackend === "gemini"
+    ? (cfg.relay_gemini_analyze_window_minutes || 18)
+    : (cfg.relay_hardware_analyze_window_minutes || 18);
+  const overlapMin = anBackend === "gemini"
+    ? (cfg.relay_gemini_analyze_overlap_minutes || 2)
+    : (cfg.relay_hardware_analyze_overlap_minutes || 2);
+  const concurrency = anBackend === "gemini"
+    ? (cfg.relay_gemini_analyze_concurrency || 12)
+    : (cfg.relay_hardware_analyze_concurrency || 8);
+  const cutoffMin = cfg.relay_analyze_cutoff_minutes || 25;
+
+  const result = await runChunkedAnalysis({
+    transcriptText,
+    backend,
+    pipelineBackend: anBackend,
+    jobId,
+    batchId,
+    mediaUrl,
+    title,
+    installId: TEST_INSTALL_ID,
+    source: "admin-test",
+    computeCostDetails,
+    bodySeconds: bodyMin * 60,
+    overlapSeconds: overlapMin * 60,
+    concurrency,
+    cutoffSeconds: cutoffMin * 60,
+    analyzePromptOverride: cfg.relay_analyze_prompt || "",
+    // Section-count target wiring (matches the summarize-url path).
+    // Without these, buildWindowPrompt falls back to "1 section" —
+    // works defensively but means test-run benchmarks don't reflect
+    // production segmentation density.
+    totalAudioSec: audioSeconds || 0,
+    targetTotalsByBucket: {
+      under_30: cfg.relay_analyze_total_sections_under_30,
+      "30_60": cfg.relay_analyze_total_sections_30_60,
+      "60_90": cfg.relay_analyze_total_sections_60_90,
+      "90_120": cfg.relay_analyze_total_sections_90_120,
+      "120_150": cfg.relay_analyze_total_sections_120_150,
+      "150_180": cfg.relay_analyze_total_sections_150_180,
+      over_180: cfg.relay_analyze_total_sections_over_180,
+    },
+  });
+
+  return {
+    text: result.text || "",
+    model: result.model || resolvedModel,
+    attempts: result.attempts,
+  };
+}
@@ -16,14 +16,21 @@
 // margin, and speed metrics.

 import express from "express";
-import { resolveLicense } from "../keysat-client.js";
-import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
+import { resolveIdentity, identityTier } from "../identity.js";
+import {
+  getOrCreateRow,
+  planBackend,
+  commitCredit,
+  licenseFingerprint,
+} from "../credits.js";
 import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
 import { getConfigSnapshot, getTierQuotas } from "../config.js";
 import { createGeminiBackend } from "../backends/gemini.js";
 import { createHardwareBackend } from "../backends/hardware.js";
 import { envelope, errorEnvelope } from "./envelope.js";
 import { recordCall } from "../audit-log.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+import { reportHealthEvent } from "../spark-control-events.js";
 import { calcGeminiCost } from "../pricing.js";

 export function analyzeRouter() {
@@ -31,72 +38,100 @@ export function analyzeRouter() {

  router.post("/analyze", express.json({ limit: "10mb" }), async (req, res) => {
    const t0 = Date.now();
-    const installId = req.header("X-Recap-Install-Id");
    const jobId = req.header("X-Recap-Job-Id") || null;
-    const auth = req.header("Authorization");

-    if (!installId) {
+    let identity;
+    try {
+      identity = await resolveIdentity(req);
+    } catch (err) {
+      const e = await errorEnvelope({
+        error: err?.message || "auth_error",
+        statusHint: err?.status || 401,
+      });
+      return res.status(e.statusHint || 401).json(e.body);
+    }
+    if (identity.kind === "license" && !identity.installId) {
      const e = await errorEnvelope({
        error: "missing X-Recap-Install-Id header",
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }
+    const { creditKey, installId, license } = identity;
    const prompt = req.body?.prompt;
    if (!prompt || typeof prompt !== "string") {
      const e = await errorEnvelope({
        error: "missing or non-string body.prompt",
+        creditKey,
        installId,
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }

-    const license = await resolveLicense(auth);
-    const tier = license.tier;
-
-    const row = await getOrCreateRow(installId);
+    const row = await getOrCreateRow({ creditKey, installId, license });
+    const tier = identityTier(identity, row);
    row.tier_snapshot = tier;
+    const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
+    const auditInstall = installId || identity.userId || null;

-    let reusedJob = false;
-    let chosenBackend = null;
-    const existingJob = lookupJob(installId, jobId);
-    if (existingJob) {
-      reusedJob = true;
-      chosenBackend = existingJob.backend;
-    } else {
-      const cfg = await getConfigSnapshot();
-      const hasHardware = !!cfg.relay_gemma_base_url;
-      const quota = await getTierQuotas();
-      const preference =
-        cfg.relay_analyze_backend_preference || "gemini_first";
-      const plan = planBackend(row, quota, { hasHardware, preference });
-      if (!plan.allowed) {
-        await recordCall({
-          install_id: installId,
-          tier,
-          pipeline: "analyze",
-          backend: null,
-          model: null,
-          status: "refused",
-          credit_charged: 0,
-          duration_ms: Date.now() - t0,
-          cost_usd: 0,
-          job_id: jobId,
-          error: plan.reason,
-        });
-        const e = await errorEnvelope({
-          error: plan.reason,
-          installId,
-          tier,
-          statusHint: 402,
-        });
-        return res.status(402).json(e.body);
-      }
-      chosenBackend = plan.backend;
-    }
-
+    // Two separate decisions on every call:
+    //   1. Billing: did we already charge a credit for this job? (look
+    //      up by job_id; reused → don't charge again.)
+    //   2. Routing: which backend serves THIS pipeline step's request?
+    //      (always per-pipeline preference + planBackend, even when
+    //      the job has a prior transcribe call that routed elsewhere.)
+    //
+    // The old code conflated the two — it copied `backend` from the
+    // existing job, which meant analyze would silently inherit
+    // transcribe's backend choice even when the operator's analyze
+    // preference said something different. Fixed: routing is decided
+    // fresh per pipeline step, regardless of job history.
+    const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
    const cfg = await getConfigSnapshot();
+    const hw = await resolveHardwareConfig(cfg);
+    // Operator-only diagnostic — see summarize-url.js for the full
+    // reasoning. We don't 503 here on blocked_reason because doing
+    // so pre-empts planBackend and would surface operator-internal
+    // Spark Control / vLLM wording to clients even when Gemini was
+    // the configured preference. planBackend correctly routes around
+    // an unavailable hardware path via hasHardware = false.
+    if (hw.analyze.blocked_reason) {
+      console.warn(
+        `[analyze] hardware analyze currently blocked (planBackend will route to Gemini if available): ${hw.analyze.blocked_reason}`,
+      );
+    }
+    const hasHardware = !!hw.analyze.url;
+    const quota = await getTierQuotas();
+    const preference =
+      cfg.relay_analyze_backend_preference || "gemini_first";
+    const plan = planBackend(row, quota, { hasHardware, preference });
+    if (!plan.allowed) {
+      await recordCall({
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
+        tier,
+        pipeline: "analyze",
+        backend: null,
+        model: null,
+        status: "refused",
+        credit_charged: 0,
+        duration_ms: Date.now() - t0,
+        cost_usd: 0,
+        job_id: jobId,
+        error: plan.reason,
+      });
+      const e = await errorEnvelope({
+        error: plan.reason,
+        creditKey,
+        installId,
+        tier,
+        statusHint: 402,
+      });
+      return res.status(402).json(e.body);
+    }
+    const chosenBackend = plan.backend;
+
    let result;
    try {
      if (chosenBackend === "gemini") {
@@ -108,24 +143,39 @@ export function analyzeRouter() {
        result = await backend.analyzeText({ prompt });
      } else {
        const backend = createHardwareBackend({
-          parakeetBaseURL: cfg.relay_parakeet_base_url,
-          gemmaBaseURL: cfg.relay_gemma_base_url,
-          parakeetModel: cfg.relay_parakeet_model,
-          gemmaModel: cfg.relay_gemma_model,
+          parakeetBaseURL: hw.transcribe.url || "",
+          gemmaBaseURL: hw.analyze.url || "",
+          sparkControlBaseURL: hw.sparkBase || "",
+          parakeetModel: hw.transcribe.model || "",
+          gemmaModel: hw.analyze.model || "",
        });
        result = await backend.analyzeText({ prompt });
      }
    } catch (err) {
-      if (reusedJob) refundJob(installId, jobId);
+      if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
      console.error(`[relay/analyze] backend error: ${err?.message}`);
+      // Passive health-event report to Spark Control so the
+      // operator's dashboard surfaces the failure immediately
+      // (without waiting for its own polling cycle to catch it).
+      // Only fired for hardware-side calls — Gemini failures are a
+      // separate observability surface (Google's API health).
+      if (chosenBackend === "hardware") {
+        reportHealthEvent({
+          service: "vllm",
+          ok: false,
+          error: (err?.message || String(err)).slice(0, 280),
+          ms: Date.now() - t0,
+        });
+      }
      await recordCall({
-        install_id: installId,
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
        tier,
        pipeline: "analyze",
        backend: chosenBackend,
        model: chosenBackend === "gemini"
          ? cfg.relay_gemini_analysis_model
-          : cfg.relay_gemma_model,
+          : hw.analyze.model || "(auto)",
        status: "error",
        credit_charged: 0,
        duration_ms: Date.now() - t0,
@@ -135,6 +185,7 @@ export function analyzeRouter() {
      });
      const e = await errorEnvelope({
        error: err?.message || "backend_error",
+        creditKey,
        installId,
        tier,
        statusHint: err?.status || 502,
@@ -144,8 +195,8 @@ export function analyzeRouter() {

    let creditCharged = 0;
    if (!reusedJob) {
-      await commitCredit(installId, { backend: chosenBackend, tier });
-      markJobCharged(installId, jobId, { backend: chosenBackend, tier });
+      await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
+      await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
      creditCharged = 1;
    }

@@ -159,7 +210,8 @@ export function analyzeRouter() {
            cost_usd: 0,
          };
    await recordCall({
-      install_id: installId,
+      install_id: auditInstall,
+      license_fingerprint: licenseFp,
      tier,
      pipeline: "analyze",
      backend: chosenBackend,
@@ -168,10 +220,14 @@ export function analyzeRouter() {
      credit_charged: creditCharged,
      duration_ms: Date.now() - t0,
      job_id: jobId,
+      // Surface the cascade so the dashboard can show "served by
+      // 2.5-flash after 3-flash 503'd" — Gemini backend returns this;
+      // hardware backend doesn't (no per-model fallback there).
+      attempts: result?.attempts || null,
      ...costDetails,
    });

-    const body = await envelope({ result, installId, tier, creditCharged });
+    const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
    res.json(body);
  });

@@ -1,69 +1,185 @@
-// GET /relay/capabilities — operator-aware metadata for Recap clients
-// to plan their audio handling. Returns the upper bounds the relay's
-// CURRENT routing config can comfortably accept, so Recap can decide
-// whether to chunk a long video before sending it.
+// GET /relay/capabilities — per-install metadata for Recap clients to
+// plan their audio handling. Tells Recap whether to chunk a long
+// audio file before sending it, based on which backend THIS install's
+// next transcribe call will actually route to.
 //
-// Today's logic:
-//   - When the operator's transcribe_backend_preference routes through
-//     Gemini at all (gemini_first / gemini_only), we report Gemini-safe
-//     limits (60 min / 30 MB / 2700 s chunks). Even with hardware as
-//     overflow, the FIRST attempt is Gemini, which needs the chunk
-//     budget.
-//   - When the operator's preference is hardware-only (or hardware-
-//     first with overflow to Gemini disabled in spirit), we report
-//     "unbounded" — the operator's Parakeet wrapper can typically
-//     ingest 2+ hour podcasts in a single shot, so chunking just adds
-//     extra inference passes and timestamp-stitching overhead.
+// The decision is install-specific because the relay's routing
+// preference combined with the install's tier + current Gemini cap
+// consumption determines the backend per request. In `gemini_first`
+// mode, the same operator config will route a fresh install to
+// Gemini (chunking required) but route a cap-exhausted install to
+// hardware (no chunking needed) — so a global capabilities answer
+// would be wrong half the time.
 //
-// Recap reads this once on boot + on policy refresh; when its
-// transcriptionProvider is "relay", it honors these limits instead of
-// its own hardcoded thresholds. For non-relay providers, Recap's
-// internal per-provider thresholds apply.
+// Inputs:
+//   X-Recap-Install-Id  (optional but strongly recommended)
+//   Authorization       (optional Bearer license — affects tier lookup)
+//
+// Without an install_id, returns Gemini-safe limits conservatively
+// (the chunking path always works; the no-chunking path only works
+// when hardware actually serves the call).
+//
+// Output shape (unchanged from v1 — pure additive on the routing
+// logic):
+//   {
+//     max_audio_mb:           number,
+//     max_audio_minutes:      number,
+//     preferred_chunk_seconds: number | null,  // null = don't chunk
+//     reason:                 string           // human-readable
+//   }

 import express from "express";
-import { getConfigSnapshot } from "../config.js";
+import { getConfigSnapshot, getTierQuotas } from "../config.js";
+import { resolveLicense } from "../keysat-client.js";
+import { getOrCreateRow, planBackend } from "../credits.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+
+// Gemini File API can handle audio up to ~9.5 hours per generateContent
+// call and files up to 2GB. The conservative 60-min/30-MB ceiling we
+// shipped originally was sized for free-tier worries that no longer
+// apply on paid Gemini. Bumped to 240 min / 200 MB so Recap hits the
+// relay-URL fast-path for content up to 4 hours instead of falling
+// back to client-side chunked uploads (which lose the buyer-bandwidth
+// savings and serialize the calls).
+const GEMINI_LIMITS = Object.freeze({
+  max_audio_mb: 200,
+  max_audio_minutes: 240,
+  preferred_chunk_seconds: 2700, // 45 min — server-side chunking still
+                                 // kicks in for stability on the longest
+                                 // files, but only on the actual call;
+                                 // doesn't gate client-side chunking.
+});
+
+const HARDWARE_LIMITS = Object.freeze({
+  // Effectively unbounded — Parakeet wrappers commonly handle 2+ hour
+  // audio in one shot. Set high but finite ceilings so a 24-hour file
+  // doesn't OOM the operator's GPU box silently.
+  max_audio_mb: 500,
+  max_audio_minutes: 240,
+  preferred_chunk_seconds: null,
+});

 export function capabilitiesRouter() {
  const router = express.Router();

-  router.get("/capabilities", async (_req, res) => {
+  router.get("/capabilities", async (req, res) => {
    const cfg = await getConfigSnapshot();
    const txPref =
      cfg.relay_transcribe_backend_preference || "gemini_first";
-    const hasParakeet = !!cfg.relay_parakeet_base_url;
+    const hw = await resolveHardwareConfig(cfg);
+    const hasHardware = !!hw.transcribe.url;
+    const installId = req.header("X-Recap-Install-Id") || null;
+    const auth = req.header("Authorization") || null;

-    // Conservative default: Gemini-safe limits unless the operator has
-    // explicitly said "use hardware (only or first) and I've got a
-    // Parakeet endpoint wired up". Without the Parakeet endpoint we
-    // can't make use of larger inputs — Gemini's the only path —
-    // so we'd just be lying to the client.
+    // ── TTS availability (audio-first "walking mode") ──
+    // Operator-wide, not install-specific: whether ANY TTS backend can
+    // serve a /relay/tts call given the operator's config. The Recap app
+    // uses has_tts to decide whether to show the "Listen" button at all
+    // (it additionally gates the feature to Max users on its own side).
+    const ttsPref = cfg.relay_tts_backend_preference || "hardware_first";
+    const kokoroReady = !!hw.tts?.url;
+    const elevenConfigured = !!(
+      cfg.relay_elevenlabs_api_key && cfg.relay_elevenlabs_voice_id
+    );
+    const ttsBackend =
+      ttsPref === "hardware_only"
+        ? kokoroReady
+          ? "kokoro"
+          : null
+        : ttsPref === "cloud_only"
+        ? elevenConfigured
+          ? "elevenlabs"
+          : null
+        : ttsPref === "cloud_first"
+        ? elevenConfigured
+          ? "elevenlabs"
+          : kokoroReady
+          ? "kokoro"
+          : null
+        : kokoroReady // hardware_first (default)
+        ? "kokoro"
+        : elevenConfigured
+        ? "elevenlabs"
+        : null;
+    const ttsCaps = {
+      has_tts: !!ttsBackend,
+      tts_backend: ttsBackend, // "kokoro" | "elevenlabs" | null
+      tts_default_voice: cfg.relay_tts_default_voice || null,
+    };
+
+    // If we have an install_id, run the same routing logic the actual
+    // transcribe route uses so the chunking decision matches the
+    // backend that will actually serve the call.
+    if (installId) {
+      try {
+        const license = await resolveLicense(auth);
+        const row = await getOrCreateRow({ installId, license });
+        row.tier_snapshot = license.tier;
+        const quota = await getTierQuotas();
+        const plan = planBackend(row, quota, {
+          hasHardware,
+          preference: txPref,
+        });
+        if (plan.allowed && plan.backend === "hardware") {
+          return res.json({
+            ...HARDWARE_LIMITS,
+            ...ttsCaps,
+            reason: `routing this install to hardware (pref=${txPref}, tier=${license.tier})`,
+          });
+        }
+        if (plan.allowed && plan.backend === "gemini") {
+          return res.json({
+            ...GEMINI_LIMITS,
+            ...ttsCaps,
+            reason: `routing this install to Gemini (pref=${txPref}, tier=${license.tier})`,
+          });
+        }
+        // planBackend refused entirely (out of credits / no backend
+        // configured). Return Gemini-safe defaults so the client still
+        // chunks defensively and gets a clean 402 from the real
+        // transcribe call rather than a confusing transport failure.
+        return res.json({
+          ...GEMINI_LIMITS,
+          ...ttsCaps,
+          reason: `routing refused for this install (${plan.reason || "unknown"}) — returning Gemini-safe defaults`,
+        });
+      } catch (err) {
+        // License lookup or row read failed — fall through to the
+        // anonymous path so the client at least gets safe defaults.
+        console.warn(
+          `[capabilities] install-aware resolve failed for ${installId}: ${err?.message || err} — falling back to operator-wide defaults`
+        );
+      }
+    }
+
+    // Anonymous (no install_id) or install-aware path failed. Pick
+    // capabilities from the operator-wide routing preference alone:
+    //   hardware_only / hardware_first → hardware-safe limits (provided
+    //                                    hardware is configured)
+    //   gemini_only / gemini_first     → Gemini-safe (will always work
+    //                                    for the first attempt; in
+    //                                    gemini_first the eventual
+    //                                    overflow to hardware can
+    //                                    handle bigger files too, but
+    //                                    chunking still works for both)
+    //
+    // When `hardware_first` is set but Parakeet isn't actually
+    // configured, the relay will fall back to Gemini — so report
+    // Gemini-safe limits in that case.
    const hardwareCapable =
-      hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first");
-
+      hasHardware && (txPref === "hardware_only" || txPref === "hardware_first");
    if (hardwareCapable) {
-      res.json({
-        // Effective unbounded — Parakeet wrappers commonly handle 2+
-        // hour audio in one shot. Set high but finite ceilings so a
-        // 24-hour file doesn't OOM the operator's GPU box silently.
-        max_audio_mb: 500,
-        max_audio_minutes: 240,
-        preferred_chunk_seconds: null,
-        // Diagnostic — Recap doesn't need this but the dashboard / a
-        // curious operator might want to know which limit shape they
-        // returned and why.
-        reason: "hardware-capable backend preference (" + txPref + ")",
-      });
-    } else {
-      res.json({
-        // Gemini File-API + practical reliability limits. Matches
-        // Recap's pre-relay defaults so existing chunking behavior
-        // is preserved.
-        max_audio_mb: 30,
-        max_audio_minutes: 60,
-        preferred_chunk_seconds: 2700, // 45 min chunks
-        reason: "Gemini-backed preference (" + txPref + ")",
+      return res.json({
+        ...HARDWARE_LIMITS,
+        ...ttsCaps,
+        reason: `hardware-capable backend preference (${txPref})`,
      });
    }
+    return res.json({
+      ...GEMINI_LIMITS,
+      ...ttsCaps,
+      reason: `Gemini-backed preference (${txPref})`,
+    });
  });

  return router;
@@ -11,18 +11,34 @@ import { getTierQuotas } from "../config.js";
 export async function envelope({
  result = null,
  installId,
+  // License is optional but recommended — without it, balance lookups
+  // route to the install-keyed row even for paid users, which would
+  // briefly underreport their balance after a commitCredit landed on
+  // their license-keyed row. Routes pass it through from resolveLicense.
+  license = null,
+  // Explicit ledger key override (cloud `user:<id>` path). Takes
+  // precedence over (installId, license) when present.
+  creditKey = null,
  tier,
  creditCharged = 0,
 }) {
  const quota = await getTierQuotas();
-  const row = await getOrCreateRow(installId);
+  const row = await getOrCreateRow({ installId, license, creditKey });
  // tier_snapshot on the row was just updated by commitCredit; if no
  // credit was committed (free reuse via job_id) it still reflects
  // the last-known tier for this install, which is fine.
  const balance = computeRemaining(row, quota);
  return {
    result,
-    credits_remaining: balance.remaining, // null = unlimited (Max)
+    // `total` = tier allotment + purchased top-up. Recap renders this
+    // as the headline number on its credits pill. `remaining` alone
+    // wouldn't reflect purchased credits at all — so a buyer who
+    // just bought 5 credits and had 0 tier credits left would still
+    // see "0 relay credits" until their tier renewed.
+    credits_remaining: balance.total, // null = unlimited (Max)
+    // Breakdown for clients that want to display it.
+    tier_remaining: balance.remaining,
+    purchased_balance: balance.purchased,
    tier,
    credit_charged: creditCharged,
  };
@@ -35,15 +51,25 @@ export async function envelope({
 export async function errorEnvelope({
  error,
  installId,
+  license = null,
+  creditKey = null,
  tier = "core",
  statusHint = 500,
 }) {
  let creditsRemaining = null;
+  let tierRemaining = null;
+  let purchased = 0;
  try {
    const quota = await getTierQuotas();
-    const row = await getOrCreateRow(installId || "unknown");
+    const row = await getOrCreateRow({
+      installId: creditKey ? null : installId || "unknown",
+      license,
+      creditKey,
+    });
    const balance = computeRemaining(row, quota);
-    creditsRemaining = balance.remaining;
+    creditsRemaining = balance.total;
+    tierRemaining = balance.remaining;
+    purchased = balance.purchased;
  } catch {}
  return {
    statusHint,
@@ -51,6 +77,8 @@ export async function errorEnvelope({
      result: null,
      error: typeof error === "string" ? error : error?.message || "unknown_error",
      credits_remaining: creditsRemaining,
+      tier_remaining: tierRemaining,
+      purchased_balance: purchased,
      tier,
      credit_charged: 0,
    },
@@ -35,8 +35,13 @@ export function healthRouter() {
      version: VERSION,
      backends: {
        gemini: !!cfg.relay_gemini_api_key,
-        parakeet: !!cfg.relay_parakeet_base_url,
-        gemma: !!cfg.relay_gemma_base_url,
+        // Whether the operator-hardware path is wired up at all.
+        // Hardware backends are now sourced from Spark Control
+        // discovery — see hardware-config.js. Empty discovery URL
+        // means no hardware path; downstream details (which model is
+        // ready, transcribe vs analyze availability) are surfaced via
+        // /admin/config's effective_* fields.
+        hardware: !!cfg.relay_spark_control_url,
      },
      admin_enabled: !!cfg.relay_admin_password_hash,
    });
@@ -33,8 +33,13 @@ import { execFile } from "child_process";
 import { promisify } from "util";
 import { Readable } from "stream";
 import { pipeline } from "stream/promises";
-import { resolveLicense } from "../keysat-client.js";
-import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
+import { resolveIdentity, identityTier } from "../identity.js";
+import {
+  getOrCreateRow,
+  planBackend,
+  commitCredit,
+  licenseFingerprint,
+} from "../credits.js";
 import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
 import { getConfigSnapshot, getTierQuotas } from "../config.js";
 import { createGeminiBackend } from "../backends/gemini.js";
@@ -42,6 +47,18 @@ import { createHardwareBackend } from "../backends/hardware.js";
 import { envelope, errorEnvelope } from "./envelope.js";
 import { recordCall } from "../audit-log.js";
 import { calcGeminiCost } from "../pricing.js";
+import { getAudioDurationSeconds } from "../audio-meta.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+import { reportHealthEvent } from "../spark-control-events.js";
+import {
+  createJob,
+  markRunning,
+  setProgress,
+  markComplete,
+  markFailed,
+  getJob,
+} from "../jobs.js";
+import { saveJobOutput } from "../output-store.js";

 const execFileAsync = promisify(execFile);

@@ -54,7 +71,7 @@ const MAX_DOWNLOAD_BYTES = 500 * 1024 * 1024;
 // rate-limits; a hard ceiling avoids holding the request open forever.
 const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000;

-function looksLikeYouTube(url) {
+export function looksLikeYouTube(url) {
  if (!url) return false;
  return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url);
 }
@@ -79,7 +96,7 @@ function guessMimeFromExt(filePath) {
 // Download an HTTP(S) audio URL to a temp file. Stops if the file
 // would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
 // mimeType }.
-async function downloadDirect(url, tmpDir) {
+export async function downloadDirect(url, tmpDir) {
  const res = await fetch(url, {
    redirect: "follow",
    signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
@@ -143,7 +160,11 @@ async function downloadDirect(url, tmpDir) {

 // Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3.
 // Logs the chosen path back as the file. Caller manages tmpDir.
-async function downloadYouTube(url, tmpDir) {
+// Captures the video title via `--print "%(title)s"` so callers (the
+// summarize-url / transcribe-url workers) can stamp the Jobs table
+// with the real title instead of "Untitled" when the client didn't
+// pre-fetch metadata.
+export async function downloadYouTube(url, tmpDir) {
  const outTemplate = path.join(tmpDir, "audio.%(ext)s");
  const args = [
    "-x", // extract audio
@@ -156,18 +177,93 @@ async function downloadYouTube(url, tmpDir) {
    "--no-playlist",
    "--no-simulate",
    "--no-warnings",
+    // Emit a JSON dict containing the full metadata we care about for
+    // the transcribe prompt's speaker-identification cues. Using
+    // `before_dl:` so we get the metadata even if the download itself
+    // later fails partway. The `.{field1,field2}j` template prints
+    // just the named fields as a JSON object (yt-dlp escapes embedded
+    // newlines inside description values, so single-line stdout parses
+    // cleanly). Title comes from the same dict — no second --print
+    // needed.
+    //
+    // Why these four fields specifically: they\'re exactly what the
+    // recap-app\'s fetchYouTubeMetadata() pulls and feeds into its
+    // direct-to-Gemini transcribe prompt. With these populated, the
+    // model can correctly assign speaker labels (host name from
+    // channel, guest name from description, chapter titles often name
+    // both). Without them, every transcript falls back to unlabeled
+    // dialogue regardless of how detailed the prompt\'s
+    // speaker-identification rule is.
+    "--print",
+    "before_dl:%(.{title,channel,description,chapters})j",
    url,
  ];
+  let extractedMetadata = {
+    title: null,
+    channel: null,
+    description: null,
+    chapters: [],
+  };
  try {
-    await execFileAsync("yt-dlp", args, {
+    const { stdout } = await execFileAsync("yt-dlp", args, {
      timeout: DOWNLOAD_TIMEOUT_MS,
      maxBuffer: 10 * 1024 * 1024,
    });
+    // The JSON dict is the first non-empty line that starts with `{`.
+    // yt-dlp may print other progress / warning lines before or after
+    // depending on version; filter to the JSON line specifically.
+    const firstJsonLine = (stdout || "")
+      .split(/\r?\n/)
+      .map((l) => l.trim())
+      .find((l) => l.length > 0 && l.startsWith("{"));
+    if (firstJsonLine) {
+      try {
+        const parsed = JSON.parse(firstJsonLine);
+        extractedMetadata = {
+          title:
+            typeof parsed.title === "string" && parsed.title.trim()
+              ? parsed.title.trim().slice(0, 300)
+              : null,
+          channel:
+            typeof parsed.channel === "string" && parsed.channel.trim()
+              ? parsed.channel.trim().slice(0, 200)
+              : null,
+          // Cap at 2000 chars — recap-app uses the same cap. Long
+          // descriptions with release-notes / sponsor blocks otherwise
+          // bloat the prompt and crowd out the speaker-naming signal.
+          description:
+            typeof parsed.description === "string" && parsed.description.trim()
+              ? parsed.description.trim().slice(0, 2000)
+              : null,
+          // Each chapter is { start_time: seconds, end_time, title }.
+          // We only use start_time + title in the prompt; pass the full
+          // array through so callers see what yt-dlp returned.
+          chapters: Array.isArray(parsed.chapters) ? parsed.chapters : [],
+        };
+      } catch (parseErr) {
+        // Malformed JSON from yt-dlp. Fall back to title-only via a
+        // best-effort regex on the line. Better than nothing.
+        const m = firstJsonLine.match(/"title"\s*:\s*"([^"]+)"/);
+        if (m) extractedMetadata.title = m[1].slice(0, 300);
+        console.warn(
+          `[yt-dlp] metadata JSON parse failed: ${parseErr?.message || parseErr} — falling back to title-only`
+        );
+      }
+    } else if (stdout) {
+      // No JSON line but stdout has something — older yt-dlp versions
+      // or some videos may emit a bare title line. Use it as title-only
+      // so we at least preserve the existing v0.2.56 behavior.
+      const firstLine = stdout
+        .split(/\r?\n/)
+        .map((l) => l.trim())
+        .find((l) => l.length > 0);
+      if (firstLine) extractedMetadata.title = firstLine.slice(0, 300);
+    }
  } catch (err) {
    const stderr = (err?.stderr || "").toString();
-    const stdout = (err?.stdout || "").toString();
+    const stdoutStr = (err?.stdout || "").toString();
    throw new Error(
-      `yt-dlp failed: ${stderr.trim() || stdout.trim() || err?.message}`
+      `yt-dlp failed: ${stderr.trim() || stdoutStr.trim() || err?.message}`
    );
  }
  // Find the produced file — yt-dlp's audio-format=mp3 means it ends
@@ -189,225 +285,466 @@ async function downloadYouTube(url, tmpDir) {
    filePath,
    bytes: stat.size,
    mimeType: guessMimeFromExt(filePath),
+    title: extractedMetadata.title,
+    channel: extractedMetadata.channel,
+    description: extractedMetadata.description,
+    chapters: extractedMetadata.chapters,
  };
 }

 export function transcribeUrlRouter() {
  const router = express.Router();

+  // POST /relay/transcribe-url — kicks off a background transcribe
+  // job and returns immediately with { job_id }. The client polls
+  // GET /relay/jobs/:id to find out when it's done.
+  //
+  // Why async: a synchronous response over HTTP can't reliably
+  // survive multi-minute work — proxies, load balancers, and NATs
+  // along the path will drop the connection on long-running idle
+  // requests (we observed a 5-minute cut on a 1h45m transcribe).
+  // The poll requests are short and cheap, so they never trip
+  // timeouts.
  router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => {
-    const t0 = Date.now();
-    const installId = req.header("X-Recap-Install-Id");
-    const jobId = req.header("X-Recap-Job-Id") || null;
-    const auth = req.header("Authorization");
+    const summaryJobId = req.header("X-Recap-Job-Id") || null;

-    if (!installId) {
+    let identity;
+    try {
+      identity = await resolveIdentity(req);
+    } catch (err) {
+      const e = await errorEnvelope({
+        error: err?.message || "auth_error",
+        statusHint: err?.status || 401,
+      });
+      return res.status(e.statusHint || 401).json(e.body);
+    }
+    if (identity.kind === "license" && !identity.installId) {
      const e = await errorEnvelope({
        error: "missing X-Recap-Install-Id header",
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }
+    const { creditKey, installId, license } = identity;
+    // `title` is `let` rather than `const` because the worker may
+    // backfill it from yt-dlp metadata after the download completes
+    // (when the client didn't pre-fetch the title).
+    let title;
    const {
      media_url: mediaUrl,
      type,
      mime_type: bodyMime,
-      title,
+      title: bodyTitle,
      channel,
      description,
      chapters,
    } = req.body || {};
+    title = bodyTitle;
    if (!mediaUrl || typeof mediaUrl !== "string") {
      const e = await errorEnvelope({
        error: "missing or non-string body.media_url",
+        creditKey,
        installId,
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }

-    const license = await resolveLicense(auth);
-    const tier = license.tier;
-    const row = await getOrCreateRow(installId);
+    const row = await getOrCreateRow({ creditKey, installId, license });
+    const tier = identityTier(identity, row);
    row.tier_snapshot = tier;
+    const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
+    const auditInstall = installId || identity.userId || null;

-    // Quota check + backend choice. Same as /relay/transcribe.
-    let reusedJob = false;
-    let chosenBackend = null;
-    const existingJob = lookupJob(installId, jobId);
-    if (existingJob) {
-      reusedJob = true;
-      chosenBackend = existingJob.backend;
-    } else {
-      const cfg = await getConfigSnapshot();
-      const hasHardware = !!cfg.relay_parakeet_base_url;
-      const quota = await getTierQuotas();
-      const preference =
-        cfg.relay_transcribe_backend_preference || "gemini_first";
-      const plan = planBackend(row, quota, { hasHardware, preference });
-      if (!plan.allowed) {
-        await recordCall({
-          install_id: installId,
-          tier,
-          pipeline: "transcribe",
-          backend: null,
-          model: null,
-          status: "refused",
-          credit_charged: 0,
-          duration_ms: Date.now() - t0,
-          cost_usd: 0,
-          job_id: jobId,
-          error: plan.reason,
-        });
-        const e = await errorEnvelope({
-          error: plan.reason,
-          installId,
-          tier,
-          statusHint: 402,
-        });
-        return res.status(402).json(e.body);
-      }
-      chosenBackend = plan.backend;
-    }
-
-    // ── Download phase ─────────────────────────────────────────────
-    const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
-    const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
-    const dlStart = Date.now();
-    let audio;
-    let downloadMs = 0;
-    try {
-      audio = isYT
-        ? await downloadYouTube(mediaUrl, tmpDir)
-        : await downloadDirect(mediaUrl, tmpDir);
-      downloadMs = Date.now() - dlStart;
-      console.log(
-        `[transcribe-url] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms (${mediaUrl.slice(0, 80)})`
+    // Billing vs. routing decoupled — see analyze.js for reasoning.
+    const reusedSummaryJob = !!lookupJob({ creditKey, installId, license, jobId: summaryJobId });
+    const cfgPlan = await getConfigSnapshot();
+    const hw = await resolveHardwareConfig(cfgPlan);
+    // Operator-only diagnostic — see summarize-url.js for the full
+    // reasoning. We don't 503 here on blocked_reason because doing
+    // so pre-empts planBackend and would surface operator-internal
+    // Spark Control / parakeet wording to clients even when Gemini
+    // was the configured preference.
+    if (hw.transcribe.blocked_reason) {
+      console.warn(
+        `[transcribe-url] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
      );
-    } catch (err) {
-      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
-      console.error(`[transcribe-url] download failed: ${err?.message || err}`);
+    }
+    const hasHardware = !!hw.transcribe.url;
+    const quota = await getTierQuotas();
+    const preference =
+      cfgPlan.relay_transcribe_backend_preference || "gemini_first";
+    const plan = planBackend(row, quota, { hasHardware, preference });
+    if (!plan.allowed) {
      await recordCall({
-        install_id: installId,
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
        tier,
        pipeline: "transcribe",
-        backend: chosenBackend,
+        backend: null,
        model: null,
-        status: "error",
+        status: "refused",
        credit_charged: 0,
-        duration_ms: Date.now() - t0,
-        download_ms: Date.now() - dlStart,
+        duration_ms: 0,
        cost_usd: 0,
-        job_id: jobId,
-        error: ("download_failed: " + (err?.message || String(err))).slice(0, 200),
+        job_id: summaryJobId,
+        media_url: mediaUrl || null,
+        title: title || null,
+        error: plan.reason,
      });
      const e = await errorEnvelope({
-        error: "download_failed: " + (err?.message || String(err)).slice(0, 200),
+        error: plan.reason,
        installId,
+        license,
        tier,
-        statusHint: 502,
+        statusHint: 402,
      });
-      return res.status(502).json(e.body);
+      return res.status(402).json(e.body);
    }
+    const chosenBackend = plan.backend;

-    // ── Transcription phase ────────────────────────────────────────
-    const cfg = await getConfigSnapshot();
-    let result;
-    try {
-      const audioBuf = await fs.readFile(audio.filePath);
-      const mimeType = bodyMime || audio.mimeType;
-      if (chosenBackend === "gemini") {
-        const backend = createGeminiBackend({
-          apiKey: cfg.relay_gemini_api_key,
-          transcriptionModel: cfg.relay_gemini_transcription_model,
-          analysisModel: cfg.relay_gemini_analysis_model,
-        });
-        result = await backend.transcribeAudio({
-          audio: audioBuf,
-          mimeType,
-          title: title || "",
-          channel: channel || "",
-          description: description || "",
-          chapters: Array.isArray(chapters) ? chapters : [],
-          offsetSeconds: 0,
-        });
-      } else {
-        const backend = createHardwareBackend({
-          parakeetBaseURL: cfg.relay_parakeet_base_url,
-          gemmaBaseURL: cfg.relay_gemma_base_url,
-          parakeetModel: cfg.relay_parakeet_model,
-          gemmaModel: cfg.relay_gemma_model,
-        });
-        result = await backend.transcribeAudio({
-          audio: audioBuf,
-          mimeType,
-          offsetSeconds: 0,
-        });
-      }
-    } catch (err) {
-      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
-      if (reusedJob) refundJob(installId, jobId);
-      console.error(`[transcribe-url] transcribe failed: ${err?.message}`);
-      await recordCall({
-        install_id: installId,
-        tier,
-        pipeline: "transcribe",
+    // Mint the background job + RESPOND IMMEDIATELY.
+    const job = createJob({
+      kind: "transcribe-url",
+      installId: auditInstall,
+      metadata: {
+        owner: creditKey, // authorizes the /jobs/:id poll (per-identity)
+        media_url: mediaUrl,
        backend: chosenBackend,
-        model:
-          chosenBackend === "gemini"
-            ? cfg.relay_gemini_transcription_model
-            : cfg.relay_parakeet_model,
-        status: "error",
-        credit_charged: 0,
-        duration_ms: Date.now() - t0,
-        download_ms: downloadMs,
-        cost_usd: 0,
-        job_id: jobId,
-        error: (err?.message || String(err)).slice(0, 200),
-      });
-      const e = await errorEnvelope({
-        error: err?.message || "backend_error",
-        installId,
-        tier,
-        statusHint: err?.status || 502,
-      });
-      return res.status(e.statusHint).json(e.body);
-    } finally {
-      try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
-    }
-
-    // ── Commit + audit ─────────────────────────────────────────────
-    let creditCharged = 0;
-    if (!reusedJob) {
-      await commitCredit(installId, { backend: chosenBackend, tier });
-      markJobCharged(installId, jobId, { backend: chosenBackend, tier });
-      creditCharged = 1;
-    }
-    const costDetails =
-      chosenBackend === "gemini" && result.usage
-        ? calcGeminiCost(result.model, result.usage)
-        : {
-            input_tokens: 0,
-            output_tokens: 0,
-            thinking_tokens: 0,
-            cost_usd: 0,
-          };
-    await recordCall({
-      install_id: installId,
-      tier,
-      pipeline: "transcribe",
-      backend: chosenBackend,
-      model: result?.model || null,
-      status: "success",
-      credit_charged: creditCharged,
-      duration_ms: Date.now() - t0,
-      download_ms: downloadMs,
-      audio_bytes: audio.bytes,
-      job_id: jobId,
-      ...costDetails,
+        summary_job_id: summaryJobId,
+      },
    });

-    const body = await envelope({ result, installId, tier, creditCharged });
+    // Background worker — runs after this handler has returned.
+    // Errors are captured into the job record; nothing thrown here
+    // can crash the route process.
+    (async () => {
+      const workerT0 = Date.now();
+      markRunning(job.id);
+      setProgress(job.id, "downloading media…");
+
+      const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
+      const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
+      let audio;
+      let downloadMs = 0;
+      try {
+        const dlStart = Date.now();
+        audio = isYT
+          ? await downloadYouTube(mediaUrl, tmpDir)
+          : await downloadDirect(mediaUrl, tmpDir);
+        downloadMs = Date.now() - dlStart;
+        console.log(
+          `[transcribe-url ${job.id.slice(0, 8)}] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms`
+        );
+        audio.seconds = await getAudioDurationSeconds(audio.filePath);
+        if (!title && audio.title) {
+          // yt-dlp captured the title during download; use it when
+          // the client didn't pass one.
+          title = audio.title;
+        }
+        setProgress(job.id, `transcribing ${Math.round((audio.seconds || 0) / 60)} min audio…`);
+      } catch (err) {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        const msg = (err?.message || String(err)).slice(0, 300);
+        console.error(`[transcribe-url ${job.id.slice(0, 8)}] download failed: ${msg}`);
+        await recordCall({
+          install_id: auditInstall,
+          license_fingerprint: licenseFp,
+          tier,
+          pipeline: "transcribe",
+          backend: chosenBackend,
+          model: null,
+          status: "error",
+          credit_charged: 0,
+          duration_ms: Date.now() - workerT0,
+          download_ms: Date.now() - workerT0,
+          audio_seconds: null,
+          cost_usd: 0,
+          job_id: summaryJobId,
+          media_url: mediaUrl || null,
+          title: title || null,
+          error: "download_failed: " + msg,
+        });
+        markFailed(job.id, "download_failed: " + msg);
+        return;
+      }
+
+      // Transcription phase
+      const cfg = await getConfigSnapshot();
+      let result;
+      // Stamp the moment transcribe is about to start (AFTER download
+      // finished). Used for duration_ms on the audit row so the
+      // "TX wall time" column reflects ONLY the transcribe phase.
+      const txPhaseStart = Date.now();
+      try {
+        const audioBuf = await fs.readFile(audio.filePath);
+        const mimeType = bodyMime || audio.mimeType;
+        if (chosenBackend === "gemini") {
+          const backend = createGeminiBackend({
+            apiKey: cfg.relay_gemini_api_key,
+            transcriptionModel: cfg.relay_gemini_transcription_model,
+            analysisModel: cfg.relay_gemini_analysis_model,
+            txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
+            txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
+            transcribePromptOverride: cfg.relay_transcribe_prompt || "",
+          });
+          result = await backend.transcribeAudio({
+            audio: audioBuf,
+            mimeType,
+            title: title || "",
+            channel: channel || "",
+            description: description || "",
+            chapters: Array.isArray(chapters) ? chapters : [],
+            offsetSeconds: 0,
+          });
+        } else {
+          const backend = createHardwareBackend({
+            parakeetBaseURL: hw.transcribe.url || "",
+            gemmaBaseURL: hw.analyze.url || "",
+            sparkControlBaseURL: hw.sparkBase || "",
+            parakeetModel: hw.transcribe.model || "",
+            gemmaModel: hw.analyze.model || "",
+            txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
+            txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
+            diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
+            clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
+            anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
+            smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
+            uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
+            txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
+          });
+          result = await backend.transcribeAudio({
+            audio: audioBuf,
+            mimeType,
+            offsetSeconds: 0,
+          });
+        }
+      } catch (err) {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+        if (reusedSummaryJob) await refundJob({ creditKey, installId, license, jobId: summaryJobId });
+        const msg = (err?.message || String(err)).slice(0, 400);
+        console.error(`[transcribe-url ${job.id.slice(0, 8)}] transcribe failed: ${msg}`);
+        if (chosenBackend === "hardware") {
+          reportHealthEvent({
+            service: "parakeet",
+            ok: false,
+            error: msg.slice(0, 280),
+            ms: Date.now() - workerT0,
+          });
+        }
+        await recordCall({
+          install_id: auditInstall,
+          license_fingerprint: licenseFp,
+          tier,
+          pipeline: "transcribe",
+          backend: chosenBackend,
+          model:
+            chosenBackend === "gemini"
+              ? cfg.relay_gemini_transcription_model
+              : hw.transcribe.model || "(auto)",
+          status: "error",
+          credit_charged: 0,
+          duration_ms: Date.now() - txPhaseStart,
+          download_ms: downloadMs,
+          audio_seconds: audio?.seconds || null,
+          audio_bytes: audio?.bytes || null,
+          cost_usd: 0,
+          job_id: summaryJobId,
+          media_url: mediaUrl || null,
+          title: title || null,
+          error: msg,
+        });
+        markFailed(job.id, msg);
+        return;
+      } finally {
+        try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
+      }
+
+      // Success — commit credit (once per summary job_id), audit, mark done.
+      let creditCharged = 0;
+      if (!reusedSummaryJob) {
+        await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
+        await markJobCharged({ creditKey, installId, license, jobId: summaryJobId, backend: chosenBackend, tier });
+        creditCharged = 1;
+      }
+      const costDetails =
+        chosenBackend === "gemini" && result.usage
+          ? calcGeminiCost(result.model, result.usage)
+          : {
+              input_tokens: 0,
+              output_tokens: 0,
+              thinking_tokens: 0,
+              cost_usd: 0,
+            };
+      // Truncation detection — mark partial when any chunk hit
+      // the silent output-token cap and emitted < 80% of its
+      // expected audio. See gemini.js for the actual coverage
+      // computation; here we just propagate to the audit row.
+      const truncatedChunks = Array.isArray(result?.truncated_chunks)
+        ? result.truncated_chunks
+        : [];
+      const wasTruncated = truncatedChunks.length > 0;
+      const truncationError = wasTruncated
+        ? `transcribe: ${truncatedChunks.length} chunk(s) truncated — missing ~${truncatedChunks.reduce((s, c) => s + (c.missingSec || 0), 0)}s of speech (model: ${result.model || "unknown"}). Likely hit maxOutputTokens.`
+        : null;
+      await recordCall({
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
+        tier,
+        pipeline: "transcribe",
+        backend: chosenBackend,
+        model: result?.model || null,
+        status: wasTruncated ? "partial" : "success",
+        credit_charged: creditCharged,
+        duration_ms: Date.now() - txPhaseStart,
+        download_ms: downloadMs,
+        audio_bytes: audio.bytes,
+        audio_seconds: audio.seconds || null,
+        job_id: summaryJobId,
+        attempts: result?.attempts || null,
+        // Per-job context for the operator dashboard's per-video table.
+        // media_url + title let the dashboard show what was being
+        // processed; chunk_count exposes the new server-side chunking
+        // (1 for short audio, N for ≥30 min audio split by the Gemini
+        // backend or by the hardware backend's Parakeet chunker).
+        media_url: mediaUrl || null,
+        title: title || null,
+        chunk_count: result?.chunk_count ?? null,
+        chunk_durations_ms: result?.chunk_durations_ms || null,
+        truncated_chunks: wasTruncated ? truncatedChunks : null,
+        error: truncationError,
+        ...costDetails,
+      });
+      markComplete(job.id, {
+        result,
+        credit_charged: creditCharged,
+        tier,
+      });
+      console.log(
+        `[transcribe-url ${job.id.slice(0, 8)}] complete in ${((Date.now() - workerT0) / 1000).toFixed(1)}s`
+      );
+      // Optional: persist transcript output for the operator's
+      // "View output" dashboard feature. Only when the config flag
+      // is set (default false) — saving real-user transcripts is an
+      // opt-in operator decision, not a default. Note that we only
+      // have the transcript here (analyze runs as a separate
+      // /relay/analyze call in the Recap flow); the analyze row will
+      // overwrite this file later with the full transcript+analysis
+      // payload when it lands. Best-effort, errors ignored.
+      if (cfg.relay_save_user_outputs) {
+        await saveJobOutput(summaryJobId || job.id, {
+          batch_id: null,
+          source: null,
+          transcript: result?.text || "",
+          analysis: null,
+          analysis_raw_text: null,
+          meta: {
+            title: title || null,
+            media_url: mediaUrl,
+            audio_seconds: audio.seconds || null,
+            audio_bytes: audio.bytes,
+            captions_mode: null,
+            transcribe_backend: chosenBackend,
+            transcribe_model: result?.model || null,
+            analyze_backend: null,
+            analyze_model: null,
+          },
+        });
+      }
+    })().catch((err) => {
+      // Top-level catch — should be unreachable since the worker
+      // handles its own try/catch, but defends against unexpected
+      // throws so the job doesn't sit in "running" forever.
+      markFailed(job.id, "worker_crashed: " + (err?.message || String(err)));
+      console.error(`[transcribe-url ${job.id.slice(0, 8)}] worker crashed:`, err);
+    });
+
+    // Hand back the job_id immediately. Client will poll for status.
+    const body = await envelope({
+      result: {
+        job_id: job.id,
+        status: "queued",
+        kind: "transcribe-url",
+      },
+      creditKey,
+      installId,
+      license,
+      tier,
+    });
+    res.json(body);
+  });
+
+  // GET /relay/jobs/:id — poll loop's friend. Install-id scoped so
+  // job ids can't be enumerated cross-install. Returns the running
+  // status + (once complete) the full transcribe result envelope.
+  router.get("/jobs/:id", async (req, res) => {
+    let identity;
+    try {
+      identity = await resolveIdentity(req);
+    } catch (err) {
+      const e = await errorEnvelope({ error: err?.message || "auth_error", statusHint: err?.status || 401 });
+      return res.status(e.statusHint || 401).json(e.body);
+    }
+    if (identity.kind === "license" && !identity.installId) {
+      const e = await errorEnvelope({
+        error: "missing X-Recap-Install-Id header",
+        statusHint: 400,
+      });
+      return res.status(400).json(e.body);
+    }
+    const { creditKey, installId, license } = identity;
+    const ownerRow = await getOrCreateRow({ creditKey, installId, license });
+    const tier = identityTier(identity, ownerRow);
+    const jobId = (req.params.id || "").trim();
+    const job = getJob(jobId);
+    if (!job) {
+      const e = await errorEnvelope({
+        error: "job_not_found",
+        creditKey,
+        creditKey,
+        installId,
+        tier,
+        statusHint: 404,
+      });
+      return res.status(404).json(e.body);
+    }
+    // New jobs carry metadata.owner = creditKey; older jobs only carry
+    // install_id. Authorize by whichever the job has.
+    const ownerOk = job.metadata?.owner
+      ? job.metadata.owner === creditKey
+      : identity.installId && job.install_id === identity.installId;
+    if (!ownerOk) {
+      const e = await errorEnvelope({
+        error: "job_belongs_to_different_owner",
+        creditKey,
+        creditKey,
+        installId,
+        tier,
+        statusHint: 403,
+      });
+      return res.status(403).json(e.body);
+    }
+    const body = await envelope({
+      result: {
+        job_id: job.id,
+        kind: job.kind,
+        status: job.status,
+        progress: job.progress,
+        started_at: job.started_at,
+        updated_at: job.updated_at,
+        completed_at: job.completed_at,
+        // Include the FULL transcribe-result on completion so the
+        // client doesn't need a second round-trip.
+        result: job.status === "complete" ? job.result?.result : null,
+        credit_charged:
+          job.status === "complete" ? job.result?.credit_charged || 0 : 0,
+        error: job.error,
+      },
+      creditKey,
+      installId,
+      license,
+      tier,
+    });
    res.json(body);
  });

@@ -28,8 +28,13 @@

 import express from "express";
 import multer from "multer";
-import { resolveLicense } from "../keysat-client.js";
-import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
+import { resolveIdentity, identityTier } from "../identity.js";
+import {
+  getOrCreateRow,
+  planBackend,
+  commitCredit,
+  licenseFingerprint,
+} from "../credits.js";
 import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
 import { getConfigSnapshot, getTierQuotas } from "../config.js";
 import { createGeminiBackend } from "../backends/gemini.js";
@@ -37,6 +42,9 @@ import { createHardwareBackend } from "../backends/hardware.js";
 import { envelope, errorEnvelope } from "./envelope.js";
 import { recordCall } from "../audit-log.js";
 import { calcGeminiCost } from "../pricing.js";
+import { getAudioDurationSecondsFromBuffer } from "../audio-meta.js";
+import { resolveHardwareConfig } from "../hardware-config.js";
+import { reportHealthEvent } from "../spark-control-events.js";

 const upload = multer({
  storage: multer.memoryStorage(),
@@ -48,67 +56,93 @@ export function transcribeRouter() {

  router.post("/transcribe", upload.single("audio"), async (req, res) => {
    const t0 = Date.now();
-    const installId = req.header("X-Recap-Install-Id");
    const jobId = req.header("X-Recap-Job-Id") || null;
-    const auth = req.header("Authorization");

-    if (!installId) {
+    let identity;
+    try {
+      identity = await resolveIdentity(req);
+    } catch (err) {
+      const e = await errorEnvelope({
+        error: err?.message || "auth_error",
+        statusHint: err?.status || 401,
+      });
+      return res.status(e.statusHint || 401).json(e.body);
+    }
+    if (identity.kind === "license" && !identity.installId) {
      const e = await errorEnvelope({
        error: "missing X-Recap-Install-Id header",
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }
+    const { creditKey, installId, license } = identity;
    if (!req.file) {
-      const e = await errorEnvelope({ error: "missing audio file", installId, statusHint: 400 });
+      const e = await errorEnvelope({ error: "missing audio file", creditKey, installId, statusHint: 400 });
      return res.status(400).json(e.body);
    }

-    const license = await resolveLicense(auth);
-    const tier = license.tier;
-
-    const row = await getOrCreateRow(installId);
+    const row = await getOrCreateRow({ creditKey, installId, license });
+    const tier = identityTier(identity, row);
    row.tier_snapshot = tier;
+    const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
+    const auditInstall = installId || identity.userId || null;

-    let reusedJob = false;
-    let chosenBackend = null;
-    const existingJob = lookupJob(installId, jobId);
-    if (existingJob) {
-      reusedJob = true;
-      chosenBackend = existingJob.backend;
-    } else {
-      const cfg = await getConfigSnapshot();
-      const hasHardware = !!cfg.relay_parakeet_base_url;
-      const quota = await getTierQuotas();
-      const preference =
-        cfg.relay_transcribe_backend_preference || "gemini_first";
-      const plan = planBackend(row, quota, { hasHardware, preference });
-      if (!plan.allowed) {
-        await recordCall({
-          install_id: installId,
-          tier,
-          pipeline: "transcribe",
-          backend: null,
-          model: null,
-          status: "refused",
-          credit_charged: 0,
-          duration_ms: Date.now() - t0,
-          cost_usd: 0,
-          job_id: jobId,
-          error: plan.reason,
-        });
-        const e = await errorEnvelope({
-          error: plan.reason,
-          installId,
-          tier,
-          statusHint: 402,
-        });
-        return res.status(402).json(e.body);
-      }
-      chosenBackend = plan.backend;
-    }
+    // Probe audio duration BEFORE the backend call so we can record
+    // it on every audit row (success and error alike). Used by the
+    // dashboard to normalize wall-clock time to "ms per minute of
+    // audio" — a backend-agnostic speed benchmark.
+    const audioSeconds = await getAudioDurationSecondsFromBuffer(
+      req.file?.buffer
+    );

+    // Billing vs. routing are decoupled — see analyze.js for the
+    // full reasoning. Look up job to decide whether to charge a
+    // credit, but always run planBackend fresh so transcribe's
+    // routing decision respects relay_transcribe_backend_preference.
+    const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
    const cfg = await getConfigSnapshot();
+    const hw = await resolveHardwareConfig(cfg);
+    // Operator-only diagnostic — see the matching comment in
+    // summarize-url.js for the full reasoning. We don't 503 here on
+    // blocked_reason because doing so pre-empts planBackend and
+    // surfaces operator-internal wording to clients even when
+    // Gemini was the configured preference.
+    if (hw.transcribe.blocked_reason) {
+      console.warn(
+        `[transcribe] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
+      );
+    }
+    const hasHardware = !!hw.transcribe.url;
+    const quota = await getTierQuotas();
+    const preference =
+      cfg.relay_transcribe_backend_preference || "gemini_first";
+    const plan = planBackend(row, quota, { hasHardware, preference });
+    if (!plan.allowed) {
+      await recordCall({
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
+        tier,
+        pipeline: "transcribe",
+        backend: null,
+        model: null,
+        status: "refused",
+        credit_charged: 0,
+        duration_ms: Date.now() - t0,
+        audio_seconds: audioSeconds,
+        cost_usd: 0,
+        job_id: jobId,
+        error: plan.reason,
+      });
+      const e = await errorEnvelope({
+        error: plan.reason,
+        creditKey,
+        installId,
+        tier,
+        statusHint: 402,
+      });
+      return res.status(402).json(e.body);
+    }
+    const chosenBackend = plan.backend;
    let result;
    try {
      if (chosenBackend === "gemini") {
@@ -116,6 +150,8 @@ export function transcribeRouter() {
          apiKey: cfg.relay_gemini_api_key,
          transcriptionModel: cfg.relay_gemini_transcription_model,
          analysisModel: cfg.relay_gemini_analysis_model,
+          txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
+          txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
        });
        result = await backend.transcribeAudio({
          audio: req.file.buffer,
@@ -128,10 +164,19 @@ export function transcribeRouter() {
        });
      } else {
        const backend = createHardwareBackend({
-          parakeetBaseURL: cfg.relay_parakeet_base_url,
-          gemmaBaseURL: cfg.relay_gemma_base_url,
-          parakeetModel: cfg.relay_parakeet_model,
-          gemmaModel: cfg.relay_gemma_model,
+          parakeetBaseURL: hw.transcribe.url || "",
+          gemmaBaseURL: hw.analyze.url || "",
+          sparkControlBaseURL: hw.sparkBase || "",
+          parakeetModel: hw.transcribe.model || "",
+          gemmaModel: hw.analyze.model || "",
+          txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
+          txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
+          diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
+          clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
+          anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
+          smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
+          uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
+          txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
        });
        result = await backend.transcribeAudio({
          audio: req.file.buffer,
@@ -140,25 +185,38 @@ export function transcribeRouter() {
        });
      }
    } catch (err) {
-      if (reusedJob) refundJob(installId, jobId);
+      if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
      console.error(`[relay/transcribe] backend error: ${err?.message}`);
+      // Fire-and-forget health report for hardware-served calls;
+      // Gemini failures are a separate observability surface.
+      if (chosenBackend === "hardware") {
+        reportHealthEvent({
+          service: "parakeet",
+          ok: false,
+          error: (err?.message || String(err)).slice(0, 280),
+          ms: Date.now() - t0,
+        });
+      }
      await recordCall({
-        install_id: installId,
+        install_id: auditInstall,
+        license_fingerprint: licenseFp,
        tier,
        pipeline: "transcribe",
        backend: chosenBackend,
        model: chosenBackend === "gemini"
          ? cfg.relay_gemini_transcription_model
-          : cfg.relay_parakeet_model,
+          : hw.transcribe.model || "(auto)",
        status: "error",
        credit_charged: 0,
        duration_ms: Date.now() - t0,
+        audio_seconds: audioSeconds,
        cost_usd: 0,
        job_id: jobId,
        error: (err?.message || String(err)).slice(0, 200),
      });
      const e = await errorEnvelope({
        error: err?.message || "backend_error",
+        creditKey,
        installId,
        tier,
        statusHint: err?.status || 502,
@@ -168,8 +226,8 @@ export function transcribeRouter() {

    let creditCharged = 0;
    if (!reusedJob) {
-      await commitCredit(installId, { backend: chosenBackend, tier });
-      markJobCharged(installId, jobId, { backend: chosenBackend, tier });
+      await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
+      await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
      creditCharged = 1;
    }

@@ -188,6 +246,7 @@ export function transcribeRouter() {
          };
    await recordCall({
      install_id: installId,
+      license_fingerprint: licenseFp,
      tier,
      pipeline: "transcribe",
      backend: chosenBackend,
@@ -195,11 +254,13 @@ export function transcribeRouter() {
      status: "success",
      credit_charged: creditCharged,
      duration_ms: Date.now() - t0,
+      audio_seconds: audioSeconds,
      job_id: jobId,
+      attempts: result?.attempts || null,
      ...costDetails,
    });

-    const body = await envelope({ result, installId, tier, creditCharged });
+    const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
    res.json(body);
  });