recap/server/audio.js

// Audio I/O helpers — ffprobe for metadata, ffmpeg for splitting, plus
// HTTP downloading for podcast episodes. Pure module: no state, no
// Express, only takes paths/URLs and returns data.

import { execFile } from "child_process";
import { promisify } from "util";
import path from "path";
import http from "http";
import https from "https";
import { createWriteStream } from "fs";

const execFileAsync = promisify(execFile);

// ── Audio duration via ffprobe ──────────────────────────────────────────────
// Returns the duration in seconds, or null if ffprobe can't read the file
// (corrupt, missing, unsupported format). Caller decides what to do with
// null — most call sites treat it as "unknown" and skip duration-dependent
// branches.
export async function getAudioDuration(filePath) {
  try {
    const { stdout } = await execFileAsync("ffprobe", [
      "-v", "error",
      "-show_entries", "format=duration",
      "-of", "default=noprint_wrappers=1:nokey=1",
      filePath,
    ], { timeout: 15000 });
    const dur = parseFloat(stdout.trim());
    return isNaN(dur) ? null : dur;
  } catch {
    return null;
  }
}

// ── Split a long audio file into chunks ─────────────────────────────────────
// Used when a video is too long for a single Gemini transcription call.
// Returns null if no split is needed (audio fits in one chunk), otherwise
// an array of `{ path, startOffset, index }`. Uses `-acodec copy` so it's
// fast and lossless — no re-encoding.
export async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) {
  const duration = await getAudioDuration(inputPath);
  if (!duration || duration <= chunkSeconds) return null;

  const chunks = [];
  let startSec = 0;
  let i = 0;
  while (startSec < duration) {
    const chunkPath = path.join(outputDir, `chunk_${i}.mp3`);
    const segLen = Math.min(chunkSeconds, duration - startSec);
    await execFileAsync("ffmpeg", [
      "-y", "-i", inputPath,
      "-ss", String(startSec),
      "-t", String(segLen),
      "-acodec", "copy",
      chunkPath,
    ], { timeout: 120000 });
    chunks.push({
      path: chunkPath,
      startOffset: startSec,
      // Actual seconds in THIS chunk (the last chunk is usually
      // shorter than chunkSeconds). Carried downstream so the
      // transcribe-stitching code can sanity-cap timestamps each
      // chunk's model emits — some models hallucinate offsets
      // way past the chunk's audio (observed: gemini-3.1-flash-lite
      // emitting [10:12:44] on a 45-min chunk).
      durationSec: segLen,
      index: i,
    });
    startSec += chunkSeconds;
    i++;
  }
  return chunks;
}

// ── Download a podcast episode by URL ───────────────────────────────────────
// Streams the HTTP response straight to disk. Follows redirects. Rejects
// on any non-200 final status. Used by /api/process when the input URL is
// a podcast episode rather than a YouTube video.
export function downloadPodcastAudio(audioUrl, destPath) {
  return new Promise((resolve, reject) => {
    const doFetch = (url) => {
      const getter = url.startsWith("https") ? https : http;
      getter.get(url, (res) => {
        if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
          return doFetch(res.headers.location);
        }
        if (res.statusCode !== 200) {
          return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
        }
        const fileStream = createWriteStream(destPath);
        res.pipe(fileStream);
        fileStream.on("finish", () => fileStream.close(resolve));
        fileStream.on("error", reject);
      }).on("error", reject);
    };
    doFetch(audioUrl);
  });
}