Files

172 lines
5.7 KiB
JavaScript

// Wrapper around ffprobe for getting the playable duration of an
// audio file. Used by the transcribe routes to record audio_seconds
// alongside each audit entry, so the dashboard can normalize wall-
// clock duration to "ms per minute of audio" — a backend-agnostic
// speed benchmark.
//
// Returns the duration in seconds (float), or null if ffprobe fails
// or the file isn't probeable. Never throws — best-effort metadata
// shouldn't break the request that needs it.
import { execFile } from "child_process";
import { promisify } from "util";
import fs from "fs/promises";
import os from "os";
import path from "path";
const execFileAsync = promisify(execFile);
// NOTE: there is intentionally NO default chunk size export here.
// The canonical default lives in server/config.js
// (`relay_hardware_tx_chunk_minutes` and `relay_gemini_tx_chunk_minutes`)
// and flows down through createHardwareBackend / createGeminiBackend
// to splitAudioFile. Removed in v0.2.32 so there's exactly one place
// to change the default — the Settings tab in the dashboard.
// Runs ffprobe on a file path. Returns seconds, or null on any failure.
export async function getAudioDurationSeconds(filePath) {
if (!filePath) return null;
try {
// -v error: silence everything except hard errors
// -show_entries format=duration: just the duration float
// -of default=noprint_wrappers=1:nokey=1: bare number, no labels
const { stdout } = await execFileAsync(
"ffprobe",
[
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
filePath,
],
{ timeout: 10_000 }
);
const seconds = parseFloat(stdout.trim());
if (!Number.isFinite(seconds) || seconds <= 0) return null;
return seconds;
} catch {
return null;
}
}
// Split an audio file into fixed-length chunks via ffmpeg. Returns
// an array of { filePath, startSeconds, durationSeconds, index }
// ordered by startSeconds. Uses -acodec copy so it's lossless and
// fast (no re-encoding pass). Returns an empty array if the audio
// is shorter than chunkSeconds — caller should just send the
// original file in that case.
//
// Used by the hardware backend to keep Parakeet calls within memory
// limits on long audio. The relay's audit log later records audio_seconds
// for the WHOLE file (not per-chunk) so the dashboard's
// "ms per minute of audio" benchmark stays meaningful.
export async function splitAudioFile({
inputPath,
outputDir,
chunkSeconds,
overlapSeconds = 0,
}) {
if (!Number.isFinite(chunkSeconds) || chunkSeconds <= 0) {
throw new Error("splitAudioFile: chunkSeconds is required (no default — pass an explicit value from config)");
}
if (
!Number.isFinite(overlapSeconds) ||
overlapSeconds < 0 ||
overlapSeconds >= chunkSeconds
) {
// Overlap must be smaller than chunk size or the loop never
// advances. 0 is fine (no overlap, original behavior).
overlapSeconds = 0;
}
const duration = await getAudioDurationSeconds(inputPath);
if (!duration || duration <= chunkSeconds) return [];
const chunks = [];
let startSec = 0;
let i = 0;
const ext = path.extname(inputPath).replace(/^\./, "") || "mp3";
// Advance step = chunkSeconds - overlap. Each chunk still has
// length up to chunkSeconds; consecutive chunks share `overlap`
// seconds at their boundary. The caller's stitching code dedupes
// by dropping the overlapping prefix from chunk N+1 (and all
// subsequent chunks).
const advanceStep = chunkSeconds - overlapSeconds;
while (startSec < duration) {
const chunkPath = path.join(outputDir, `chunk_${i}.${ext}`);
const segLen = Math.min(chunkSeconds, duration - startSec);
try {
await execFileAsync(
"ffmpeg",
[
"-y",
"-i",
inputPath,
"-ss",
String(startSec),
"-t",
String(segLen),
"-acodec",
"copy",
chunkPath,
],
{ timeout: 120_000 }
);
} catch (err) {
// `-acodec copy` fails on some containers/streams that don't
// start on a keyframe at the cut point. Retry with re-encoding,
// which always works at the cost of CPU time.
await execFileAsync(
"ffmpeg",
[
"-y",
"-i",
inputPath,
"-ss",
String(startSec),
"-t",
String(segLen),
chunkPath,
],
{ timeout: 180_000 }
);
}
chunks.push({
filePath: chunkPath,
startSeconds: startSec,
durationSeconds: segLen,
// Boundary marker: timestamps strictly less than this value
// are duplicates of the prior chunk's tail (overlap region).
// Caller dedupes by dropping output before this boundary.
// For chunk 0 this equals startSec (no prior chunk), so the
// boundary check is a no-op.
overlapBoundarySec: i === 0 ? startSec : startSec + overlapSeconds,
index: i,
});
startSec += advanceStep;
i++;
}
return chunks;
}
// Convenience wrapper for callers holding the audio in memory (the
// /relay/transcribe route receives multipart uploads as buffers).
// Writes a temp file, probes, cleans up. Cheaper than re-streaming
// through ffprobe's stdin which doesn't always handle every format
// reliably.
export async function getAudioDurationSecondsFromBuffer(buffer) {
if (!buffer || !buffer.length) return null;
const tmpFile = path.join(
os.tmpdir(),
`relay-probe-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
);
try {
await fs.writeFile(tmpFile, buffer);
return await getAudioDurationSeconds(tmpFile);
} catch {
return null;
} finally {
fs.unlink(tmpFile).catch(() => {});
}
}