recap-relay/server/audio-meta.js

// Wrapper around ffprobe for getting the playable duration of an
// audio file. Used by the transcribe routes to record audio_seconds
// alongside each audit entry, so the dashboard can normalize wall-
// clock duration to "ms per minute of audio" — a backend-agnostic
// speed benchmark.
//
// Returns the duration in seconds (float), or null if ffprobe fails
// or the file isn't probeable. Never throws — best-effort metadata
// shouldn't break the request that needs it.

import { execFile } from "child_process";
import { promisify } from "util";
import fs from "fs/promises";
import os from "os";
import path from "path";

const execFileAsync = promisify(execFile);

// NOTE: there is intentionally NO default chunk size export here.
// The canonical default lives in server/config.js
// (`relay_hardware_tx_chunk_minutes` and `relay_gemini_tx_chunk_minutes`)
// and flows down through createHardwareBackend / createGeminiBackend
// to splitAudioFile. Removed in v0.2.32 so there's exactly one place
// to change the default — the Settings tab in the dashboard.

// Runs ffprobe on a file path. Returns seconds, or null on any failure.
export async function getAudioDurationSeconds(filePath) {
  if (!filePath) return null;
  try {
    // -v error: silence everything except hard errors
    // -show_entries format=duration: just the duration float
    // -of default=noprint_wrappers=1:nokey=1: bare number, no labels
    const { stdout } = await execFileAsync(
      "ffprobe",
      [
        "-v",
        "error",
        "-show_entries",
        "format=duration",
        "-of",
        "default=noprint_wrappers=1:nokey=1",
        filePath,
      ],
      { timeout: 10_000 }
    );
    const seconds = parseFloat(stdout.trim());
    if (!Number.isFinite(seconds) || seconds <= 0) return null;
    return seconds;
  } catch {
    return null;
  }
}

// Split an audio file into fixed-length chunks via ffmpeg. Returns
// an array of { filePath, startSeconds, durationSeconds, index }
// ordered by startSeconds. Uses -acodec copy so it's lossless and
// fast (no re-encoding pass). Returns an empty array if the audio
// is shorter than chunkSeconds — caller should just send the
// original file in that case.
//
// Used by the hardware backend to keep Parakeet calls within memory
// limits on long audio. The relay's audit log later records audio_seconds
// for the WHOLE file (not per-chunk) so the dashboard's
// "ms per minute of audio" benchmark stays meaningful.
export async function splitAudioFile({
  inputPath,
  outputDir,
  chunkSeconds,
  overlapSeconds = 0,
}) {
  if (!Number.isFinite(chunkSeconds) || chunkSeconds <= 0) {
    throw new Error("splitAudioFile: chunkSeconds is required (no default — pass an explicit value from config)");
  }
  if (
    !Number.isFinite(overlapSeconds) ||
    overlapSeconds < 0 ||
    overlapSeconds >= chunkSeconds
  ) {
    // Overlap must be smaller than chunk size or the loop never
    // advances. 0 is fine (no overlap, original behavior).
    overlapSeconds = 0;
  }
  const duration = await getAudioDurationSeconds(inputPath);
  if (!duration || duration <= chunkSeconds) return [];
  const chunks = [];
  let startSec = 0;
  let i = 0;
  const ext = path.extname(inputPath).replace(/^\./, "") || "mp3";
  // Advance step = chunkSeconds - overlap. Each chunk still has
  // length up to chunkSeconds; consecutive chunks share `overlap`
  // seconds at their boundary. The caller's stitching code dedupes
  // by dropping the overlapping prefix from chunk N+1 (and all
  // subsequent chunks).
  const advanceStep = chunkSeconds - overlapSeconds;
  while (startSec < duration) {
    const chunkPath = path.join(outputDir, `chunk_${i}.${ext}`);
    const segLen = Math.min(chunkSeconds, duration - startSec);
    try {
      await execFileAsync(
        "ffmpeg",
        [
          "-y",
          "-i",
          inputPath,
          "-ss",
          String(startSec),
          "-t",
          String(segLen),
          "-acodec",
          "copy",
          chunkPath,
        ],
        { timeout: 120_000 }
      );
    } catch (err) {
      // `-acodec copy` fails on some containers/streams that don't
      // start on a keyframe at the cut point. Retry with re-encoding,
      // which always works at the cost of CPU time.
      await execFileAsync(
        "ffmpeg",
        [
          "-y",
          "-i",
          inputPath,
          "-ss",
          String(startSec),
          "-t",
          String(segLen),
          chunkPath,
        ],
        { timeout: 180_000 }
      );
    }
    chunks.push({
      filePath: chunkPath,
      startSeconds: startSec,
      durationSeconds: segLen,
      // Boundary marker: timestamps strictly less than this value
      // are duplicates of the prior chunk's tail (overlap region).
      // Caller dedupes by dropping output before this boundary.
      // For chunk 0 this equals startSec (no prior chunk), so the
      // boundary check is a no-op.
      overlapBoundarySec: i === 0 ? startSec : startSec + overlapSeconds,
      index: i,
    });
    startSec += advanceStep;
    i++;
  }
  return chunks;
}

// Convenience wrapper for callers holding the audio in memory (the
// /relay/transcribe route receives multipart uploads as buffers).
// Writes a temp file, probes, cleans up. Cheaper than re-streaming
// through ffprobe's stdin which doesn't always handle every format
// reliably.
export async function getAudioDurationSecondsFromBuffer(buffer) {
  if (!buffer || !buffer.length) return null;
  const tmpFile = path.join(
    os.tmpdir(),
    `relay-probe-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
  );
  try {
    await fs.writeFile(tmpFile, buffer);
    return await getAudioDurationSeconds(tmpFile);
  } catch {
    return null;
  } finally {
    fs.unlink(tmpFile).catch(() => {});
  }
}