172 lines
5.7 KiB
JavaScript
172 lines
5.7 KiB
JavaScript
// Wrapper around ffprobe for getting the playable duration of an
|
|
// audio file. Used by the transcribe routes to record audio_seconds
|
|
// alongside each audit entry, so the dashboard can normalize wall-
|
|
// clock duration to "ms per minute of audio" — a backend-agnostic
|
|
// speed benchmark.
|
|
//
|
|
// Returns the duration in seconds (float), or null if ffprobe fails
|
|
// or the file isn't probeable. Never throws — best-effort metadata
|
|
// shouldn't break the request that needs it.
|
|
|
|
import { execFile } from "child_process";
|
|
import { promisify } from "util";
|
|
import fs from "fs/promises";
|
|
import os from "os";
|
|
import path from "path";
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
// NOTE: there is intentionally NO default chunk size export here.
|
|
// The canonical default lives in server/config.js
|
|
// (`relay_hardware_tx_chunk_minutes` and `relay_gemini_tx_chunk_minutes`)
|
|
// and flows down through createHardwareBackend / createGeminiBackend
|
|
// to splitAudioFile. Removed in v0.2.32 so there's exactly one place
|
|
// to change the default — the Settings tab in the dashboard.
|
|
|
|
// Runs ffprobe on a file path. Returns seconds, or null on any failure.
|
|
export async function getAudioDurationSeconds(filePath) {
|
|
if (!filePath) return null;
|
|
try {
|
|
// -v error: silence everything except hard errors
|
|
// -show_entries format=duration: just the duration float
|
|
// -of default=noprint_wrappers=1:nokey=1: bare number, no labels
|
|
const { stdout } = await execFileAsync(
|
|
"ffprobe",
|
|
[
|
|
"-v",
|
|
"error",
|
|
"-show_entries",
|
|
"format=duration",
|
|
"-of",
|
|
"default=noprint_wrappers=1:nokey=1",
|
|
filePath,
|
|
],
|
|
{ timeout: 10_000 }
|
|
);
|
|
const seconds = parseFloat(stdout.trim());
|
|
if (!Number.isFinite(seconds) || seconds <= 0) return null;
|
|
return seconds;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Split an audio file into fixed-length chunks via ffmpeg. Returns
|
|
// an array of { filePath, startSeconds, durationSeconds, index }
|
|
// ordered by startSeconds. Uses -acodec copy so it's lossless and
|
|
// fast (no re-encoding pass). Returns an empty array if the audio
|
|
// is shorter than chunkSeconds — caller should just send the
|
|
// original file in that case.
|
|
//
|
|
// Used by the hardware backend to keep Parakeet calls within memory
|
|
// limits on long audio. The relay's audit log later records audio_seconds
|
|
// for the WHOLE file (not per-chunk) so the dashboard's
|
|
// "ms per minute of audio" benchmark stays meaningful.
|
|
export async function splitAudioFile({
|
|
inputPath,
|
|
outputDir,
|
|
chunkSeconds,
|
|
overlapSeconds = 0,
|
|
}) {
|
|
if (!Number.isFinite(chunkSeconds) || chunkSeconds <= 0) {
|
|
throw new Error("splitAudioFile: chunkSeconds is required (no default — pass an explicit value from config)");
|
|
}
|
|
if (
|
|
!Number.isFinite(overlapSeconds) ||
|
|
overlapSeconds < 0 ||
|
|
overlapSeconds >= chunkSeconds
|
|
) {
|
|
// Overlap must be smaller than chunk size or the loop never
|
|
// advances. 0 is fine (no overlap, original behavior).
|
|
overlapSeconds = 0;
|
|
}
|
|
const duration = await getAudioDurationSeconds(inputPath);
|
|
if (!duration || duration <= chunkSeconds) return [];
|
|
const chunks = [];
|
|
let startSec = 0;
|
|
let i = 0;
|
|
const ext = path.extname(inputPath).replace(/^\./, "") || "mp3";
|
|
// Advance step = chunkSeconds - overlap. Each chunk still has
|
|
// length up to chunkSeconds; consecutive chunks share `overlap`
|
|
// seconds at their boundary. The caller's stitching code dedupes
|
|
// by dropping the overlapping prefix from chunk N+1 (and all
|
|
// subsequent chunks).
|
|
const advanceStep = chunkSeconds - overlapSeconds;
|
|
while (startSec < duration) {
|
|
const chunkPath = path.join(outputDir, `chunk_${i}.${ext}`);
|
|
const segLen = Math.min(chunkSeconds, duration - startSec);
|
|
try {
|
|
await execFileAsync(
|
|
"ffmpeg",
|
|
[
|
|
"-y",
|
|
"-i",
|
|
inputPath,
|
|
"-ss",
|
|
String(startSec),
|
|
"-t",
|
|
String(segLen),
|
|
"-acodec",
|
|
"copy",
|
|
chunkPath,
|
|
],
|
|
{ timeout: 120_000 }
|
|
);
|
|
} catch (err) {
|
|
// `-acodec copy` fails on some containers/streams that don't
|
|
// start on a keyframe at the cut point. Retry with re-encoding,
|
|
// which always works at the cost of CPU time.
|
|
await execFileAsync(
|
|
"ffmpeg",
|
|
[
|
|
"-y",
|
|
"-i",
|
|
inputPath,
|
|
"-ss",
|
|
String(startSec),
|
|
"-t",
|
|
String(segLen),
|
|
chunkPath,
|
|
],
|
|
{ timeout: 180_000 }
|
|
);
|
|
}
|
|
chunks.push({
|
|
filePath: chunkPath,
|
|
startSeconds: startSec,
|
|
durationSeconds: segLen,
|
|
// Boundary marker: timestamps strictly less than this value
|
|
// are duplicates of the prior chunk's tail (overlap region).
|
|
// Caller dedupes by dropping output before this boundary.
|
|
// For chunk 0 this equals startSec (no prior chunk), so the
|
|
// boundary check is a no-op.
|
|
overlapBoundarySec: i === 0 ? startSec : startSec + overlapSeconds,
|
|
index: i,
|
|
});
|
|
startSec += advanceStep;
|
|
i++;
|
|
}
|
|
return chunks;
|
|
}
|
|
|
|
// Convenience wrapper for callers holding the audio in memory (the
|
|
// /relay/transcribe route receives multipart uploads as buffers).
|
|
// Writes a temp file, probes, cleans up. Cheaper than re-streaming
|
|
// through ffprobe's stdin which doesn't always handle every format
|
|
// reliably.
|
|
export async function getAudioDurationSecondsFromBuffer(buffer) {
|
|
if (!buffer || !buffer.length) return null;
|
|
const tmpFile = path.join(
|
|
os.tmpdir(),
|
|
`relay-probe-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
|
);
|
|
try {
|
|
await fs.writeFile(tmpFile, buffer);
|
|
return await getAudioDurationSeconds(tmpFile);
|
|
} catch {
|
|
return null;
|
|
} finally {
|
|
fs.unlink(tmpFile).catch(() => {});
|
|
}
|
|
}
|