// Wrapper around ffprobe for getting the playable duration of an // audio file. Used by the transcribe routes to record audio_seconds // alongside each audit entry, so the dashboard can normalize wall- // clock duration to "ms per minute of audio" — a backend-agnostic // speed benchmark. // // Returns the duration in seconds (float), or null if ffprobe fails // or the file isn't probeable. Never throws — best-effort metadata // shouldn't break the request that needs it. import { execFile } from "child_process"; import { promisify } from "util"; import fs from "fs/promises"; import os from "os"; import path from "path"; const execFileAsync = promisify(execFile); // NOTE: there is intentionally NO default chunk size export here. // The canonical default lives in server/config.js // (`relay_hardware_tx_chunk_minutes` and `relay_gemini_tx_chunk_minutes`) // and flows down through createHardwareBackend / createGeminiBackend // to splitAudioFile. Removed in v0.2.32 so there's exactly one place // to change the default — the Settings tab in the dashboard. // Runs ffprobe on a file path. Returns seconds, or null on any failure. export async function getAudioDurationSeconds(filePath) { if (!filePath) return null; try { // -v error: silence everything except hard errors // -show_entries format=duration: just the duration float // -of default=noprint_wrappers=1:nokey=1: bare number, no labels const { stdout } = await execFileAsync( "ffprobe", [ "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filePath, ], { timeout: 10_000 } ); const seconds = parseFloat(stdout.trim()); if (!Number.isFinite(seconds) || seconds <= 0) return null; return seconds; } catch { return null; } } // Split an audio file into fixed-length chunks via ffmpeg. Returns // an array of { filePath, startSeconds, durationSeconds, index } // ordered by startSeconds. Uses -acodec copy so it's lossless and // fast (no re-encoding pass). Returns an empty array if the audio // is shorter than chunkSeconds — caller should just send the // original file in that case. // // Used by the hardware backend to keep Parakeet calls within memory // limits on long audio. The relay's audit log later records audio_seconds // for the WHOLE file (not per-chunk) so the dashboard's // "ms per minute of audio" benchmark stays meaningful. export async function splitAudioFile({ inputPath, outputDir, chunkSeconds, overlapSeconds = 0, }) { if (!Number.isFinite(chunkSeconds) || chunkSeconds <= 0) { throw new Error("splitAudioFile: chunkSeconds is required (no default — pass an explicit value from config)"); } if ( !Number.isFinite(overlapSeconds) || overlapSeconds < 0 || overlapSeconds >= chunkSeconds ) { // Overlap must be smaller than chunk size or the loop never // advances. 0 is fine (no overlap, original behavior). overlapSeconds = 0; } const duration = await getAudioDurationSeconds(inputPath); if (!duration || duration <= chunkSeconds) return []; const chunks = []; let startSec = 0; let i = 0; const ext = path.extname(inputPath).replace(/^\./, "") || "mp3"; // Advance step = chunkSeconds - overlap. Each chunk still has // length up to chunkSeconds; consecutive chunks share `overlap` // seconds at their boundary. The caller's stitching code dedupes // by dropping the overlapping prefix from chunk N+1 (and all // subsequent chunks). const advanceStep = chunkSeconds - overlapSeconds; while (startSec < duration) { const chunkPath = path.join(outputDir, `chunk_${i}.${ext}`); const segLen = Math.min(chunkSeconds, duration - startSec); try { await execFileAsync( "ffmpeg", [ "-y", "-i", inputPath, "-ss", String(startSec), "-t", String(segLen), "-acodec", "copy", chunkPath, ], { timeout: 120_000 } ); } catch (err) { // `-acodec copy` fails on some containers/streams that don't // start on a keyframe at the cut point. Retry with re-encoding, // which always works at the cost of CPU time. await execFileAsync( "ffmpeg", [ "-y", "-i", inputPath, "-ss", String(startSec), "-t", String(segLen), chunkPath, ], { timeout: 180_000 } ); } chunks.push({ filePath: chunkPath, startSeconds: startSec, durationSeconds: segLen, // Boundary marker: timestamps strictly less than this value // are duplicates of the prior chunk's tail (overlap region). // Caller dedupes by dropping output before this boundary. // For chunk 0 this equals startSec (no prior chunk), so the // boundary check is a no-op. overlapBoundarySec: i === 0 ? startSec : startSec + overlapSeconds, index: i, }); startSec += advanceStep; i++; } return chunks; } // Convenience wrapper for callers holding the audio in memory (the // /relay/transcribe route receives multipart uploads as buffers). // Writes a temp file, probes, cleans up. Cheaper than re-streaming // through ffprobe's stdin which doesn't always handle every format // reliably. export async function getAudioDurationSecondsFromBuffer(buffer) { if (!buffer || !buffer.length) return null; const tmpFile = path.join( os.tmpdir(), `relay-probe-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` ); try { await fs.writeFile(tmpFile, buffer); return await getAudioDurationSeconds(tmpFile); } catch { return null; } finally { fs.unlink(tmpFile).catch(() => {}); } }