// Audio I/O helpers — ffprobe for metadata, ffmpeg for splitting, plus // HTTP downloading for podcast episodes. Pure module: no state, no // Express, only takes paths/URLs and returns data. import { execFile } from "child_process"; import { promisify } from "util"; import path from "path"; import http from "http"; import https from "https"; import { createWriteStream } from "fs"; const execFileAsync = promisify(execFile); // ── Audio duration via ffprobe ────────────────────────────────────────────── // Returns the duration in seconds, or null if ffprobe can't read the file // (corrupt, missing, unsupported format). Caller decides what to do with // null — most call sites treat it as "unknown" and skip duration-dependent // branches. export async function getAudioDuration(filePath) { try { const { stdout } = await execFileAsync("ffprobe", [ "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filePath, ], { timeout: 15000 }); const dur = parseFloat(stdout.trim()); return isNaN(dur) ? null : dur; } catch { return null; } } // ── Split a long audio file into chunks ───────────────────────────────────── // Used when a video is too long for a single Gemini transcription call. // Returns null if no split is needed (audio fits in one chunk), otherwise // an array of `{ path, startOffset, index }`. Uses `-acodec copy` so it's // fast and lossless — no re-encoding. export async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) { const duration = await getAudioDuration(inputPath); if (!duration || duration <= chunkSeconds) return null; const chunks = []; let startSec = 0; let i = 0; while (startSec < duration) { const chunkPath = path.join(outputDir, `chunk_${i}.mp3`); const segLen = Math.min(chunkSeconds, duration - startSec); await execFileAsync("ffmpeg", [ "-y", "-i", inputPath, "-ss", String(startSec), "-t", String(segLen), "-acodec", "copy", chunkPath, ], { timeout: 120000 }); chunks.push({ path: chunkPath, startOffset: startSec, index: i }); startSec += chunkSeconds; i++; } return chunks; } // ── Download a podcast episode by URL ─────────────────────────────────────── // Streams the HTTP response straight to disk. Follows redirects. Rejects // on any non-200 final status. Used by /api/process when the input URL is // a podcast episode rather than a YouTube video. export function downloadPodcastAudio(audioUrl, destPath) { return new Promise((resolve, reject) => { const doFetch = (url) => { const getter = url.startsWith("https") ? https : http; getter.get(url, (res) => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { return doFetch(res.headers.location); } if (res.statusCode !== 200) { return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`)); } const fileStream = createWriteStream(destPath); res.pipe(fileStream); fileStream.on("finish", () => fileStream.close(resolve)); fileStream.on("error", reject); }).on("error", reject); }; doFetch(audioUrl); }); }