recap-relay/server/routes/transcribe.js

// POST /relay/transcribe — forwards an audio payload to the chosen
// backend (Gemini first, operator hardware as overflow) and returns
// the standard envelope.
//
// Request shape: multipart/form-data
//   audio:        binary audio file (required)
//   mime_type:    string (default application/octet-stream)
//   title:        string (optional, used by Gemini prompt)
//   channel:      string (optional)
//   description:  string (optional)
//   chapters:     JSON-stringified array (optional)
//   offset_seconds: number string (optional, for chunked audio)
//
// Headers:
//   X-Recap-Install-Id  (required)
//   X-Recap-Job-Id      (optional but expected — pairs with /analyze)
//   Authorization       (optional Bearer LIC1-... for licensed tiers)
//
// Response (standard envelope):
//   {
//     result: { text: "[MM:SS] ...", segments: [], duration_seconds: 0 },
//     credits_remaining, tier, credit_charged
//   }
//
// Every outcome (success / quota-refused / backend-error) writes one
// row to the audit log so the admin dashboard can compute cost,
// margin, and speed metrics.

import express from "express";
import multer from "multer";
import { resolveIdentity, identityTier } from "../identity.js";
import {
  getOrCreateRow,
  planBackend,
  commitCredit,
  licenseFingerprint,
} from "../credits.js";
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
import { getConfigSnapshot, getTierQuotas } from "../config.js";
import { createGeminiBackend } from "../backends/gemini.js";
import { createHardwareBackend } from "../backends/hardware.js";
import { envelope, errorEnvelope } from "./envelope.js";
import { recordCall } from "../audit-log.js";
import { calcGeminiCost } from "../pricing.js";
import { getAudioDurationSecondsFromBuffer } from "../audio-meta.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { reportHealthEvent } from "../spark-control-events.js";

const upload = multer({
  storage: multer.memoryStorage(),
  limits: { fileSize: 200 * 1024 * 1024 }, // 200 MB per request
});

export function transcribeRouter() {
  const router = express.Router();

  router.post("/transcribe", upload.single("audio"), async (req, res) => {
    const t0 = Date.now();
    const jobId = req.header("X-Recap-Job-Id") || null;

    let identity;
    try {
      identity = await resolveIdentity(req);
    } catch (err) {
      const e = await errorEnvelope({
        error: err?.message || "auth_error",
        statusHint: err?.status || 401,
      });
      return res.status(e.statusHint || 401).json(e.body);
    }
    if (identity.kind === "license" && !identity.installId) {
      const e = await errorEnvelope({
        error: "missing X-Recap-Install-Id header",
        statusHint: 400,
      });
      return res.status(400).json(e.body);
    }
    const { creditKey, installId, license } = identity;
    if (!req.file) {
      const e = await errorEnvelope({ error: "missing audio file", creditKey, installId, statusHint: 400 });
      return res.status(400).json(e.body);
    }

    const row = await getOrCreateRow({ creditKey, installId, license });
    const tier = identityTier(identity, row);
    row.tier_snapshot = tier;
    const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
    const auditInstall = installId || identity.userId || null;

    // Probe audio duration BEFORE the backend call so we can record
    // it on every audit row (success and error alike). Used by the
    // dashboard to normalize wall-clock time to "ms per minute of
    // audio" — a backend-agnostic speed benchmark.
    const audioSeconds = await getAudioDurationSecondsFromBuffer(
      req.file?.buffer
    );

    // Billing vs. routing are decoupled — see analyze.js for the
    // full reasoning. Look up job to decide whether to charge a
    // credit, but always run planBackend fresh so transcribe's
    // routing decision respects relay_transcribe_backend_preference.
    const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
    const cfg = await getConfigSnapshot();
    const hw = await resolveHardwareConfig(cfg);
    // Operator-only diagnostic — see the matching comment in
    // summarize-url.js for the full reasoning. We don't 503 here on
    // blocked_reason because doing so pre-empts planBackend and
    // surfaces operator-internal wording to clients even when
    // Gemini was the configured preference.
    if (hw.transcribe.blocked_reason) {
      console.warn(
        `[transcribe] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
      );
    }
    const hasHardware = !!hw.transcribe.url;
    const quota = await getTierQuotas();
    const preference =
      cfg.relay_transcribe_backend_preference || "gemini_first";
    const plan = planBackend(row, quota, { hasHardware, preference });
    if (!plan.allowed) {
      await recordCall({
        install_id: auditInstall,
        license_fingerprint: licenseFp,
        tier,
        pipeline: "transcribe",
        backend: null,
        model: null,
        status: "refused",
        credit_charged: 0,
        duration_ms: Date.now() - t0,
        audio_seconds: audioSeconds,
        cost_usd: 0,
        job_id: jobId,
        error: plan.reason,
      });
      const e = await errorEnvelope({
        error: plan.reason,
        creditKey,
        installId,
        tier,
        statusHint: 402,
      });
      return res.status(402).json(e.body);
    }
    const chosenBackend = plan.backend;
    let result;
    try {
      if (chosenBackend === "gemini") {
        const backend = createGeminiBackend({
          apiKey: cfg.relay_gemini_api_key,
          transcriptionModel: cfg.relay_gemini_transcription_model,
          analysisModel: cfg.relay_gemini_analysis_model,
          txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
          txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
        });
        result = await backend.transcribeAudio({
          audio: req.file.buffer,
          mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream",
          title: req.body?.title || "",
          channel: req.body?.channel || "",
          description: req.body?.description || "",
          chapters: parseChaptersField(req.body?.chapters),
          offsetSeconds: Number(req.body?.offset_seconds) || 0,
        });
      } else {
        const backend = createHardwareBackend({
          parakeetBaseURL: hw.transcribe.url || "",
          gemmaBaseURL: hw.analyze.url || "",
          sparkControlBaseURL: hw.sparkBase || "",
          parakeetModel: hw.transcribe.model || "",
          gemmaModel: hw.analyze.model || "",
          txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
          txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
          diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
          clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
          anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
          smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
          uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
          txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
        });
        result = await backend.transcribeAudio({
          audio: req.file.buffer,
          mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream",
          offsetSeconds: Number(req.body?.offset_seconds) || 0,
        });
      }
    } catch (err) {
      if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
      console.error(`[relay/transcribe] backend error: ${err?.message}`);
      // Fire-and-forget health report for hardware-served calls;
      // Gemini failures are a separate observability surface.
      if (chosenBackend === "hardware") {
        reportHealthEvent({
          service: "parakeet",
          ok: false,
          error: (err?.message || String(err)).slice(0, 280),
          ms: Date.now() - t0,
        });
      }
      await recordCall({
        install_id: auditInstall,
        license_fingerprint: licenseFp,
        tier,
        pipeline: "transcribe",
        backend: chosenBackend,
        model: chosenBackend === "gemini"
          ? cfg.relay_gemini_transcription_model
          : hw.transcribe.model || "(auto)",
        status: "error",
        credit_charged: 0,
        duration_ms: Date.now() - t0,
        audio_seconds: audioSeconds,
        cost_usd: 0,
        job_id: jobId,
        error: (err?.message || String(err)).slice(0, 200),
      });
      const e = await errorEnvelope({
        error: err?.message || "backend_error",
        creditKey,
        installId,
        tier,
        statusHint: err?.status || 502,
      });
      return res.status(e.statusHint).json(e.body);
    }

    let creditCharged = 0;
    if (!reusedJob) {
      await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
      await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
      creditCharged = 1;
    }

    // Success — write the audit row with cost details. Gemini's usage
    // metadata gives us token counts; calcGeminiCost translates that
    // into USD. Hardware-served calls have no token data and we
    // report cost_usd: 0 (operator's hardware is fixed-cost).
    const costDetails =
      chosenBackend === "gemini" && result.usage
        ? calcGeminiCost(result.model, result.usage)
        : {
            input_tokens: 0,
            output_tokens: 0,
            thinking_tokens: 0,
            cost_usd: 0,
          };
    await recordCall({
      install_id: installId,
      license_fingerprint: licenseFp,
      tier,
      pipeline: "transcribe",
      backend: chosenBackend,
      model: result?.model || null,
      status: "success",
      credit_charged: creditCharged,
      duration_ms: Date.now() - t0,
      audio_seconds: audioSeconds,
      job_id: jobId,
      attempts: result?.attempts || null,
      ...costDetails,
    });

    const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
    res.json(body);
  });

  return router;
}

function parseChaptersField(raw) {
  if (!raw) return [];
  try {
    const arr = JSON.parse(raw);
    return Array.isArray(arr) ? arr : [];
  } catch {
    return [];
  }
}