Module split: extract audio I/O helpers to server/audio.js
• getAudioDuration(path) — ffprobe wrapper, returns seconds | null • splitAudioFile(in, dir, secs) — ffmpeg -acodec copy chunking • downloadPodcastAudio(url, dst) — streams HTTP audio to disk Also moved fetchUrl into util.js (alongside the other stateless helpers) — it's a generic HTTP-GET-with-redirects used by RSS parsing and channel discovery, not strictly audio. server/index.js: 2758 → 2694 lines. Smoke tested: server boots; /api/license-status, /api/health, / respond. No behavior change.
This commit is contained in:
@@ -0,0 +1,86 @@
|
||||
// Audio I/O helpers — ffprobe for metadata, ffmpeg for splitting, plus
|
||||
// HTTP downloading for podcast episodes. Pure module: no state, no
|
||||
// Express, only takes paths/URLs and returns data.
|
||||
|
||||
import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import path from "path";
|
||||
import http from "http";
|
||||
import https from "https";
|
||||
import { createWriteStream } from "fs";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
// ── Audio duration via ffprobe ──────────────────────────────────────────────
|
||||
// Returns the duration in seconds, or null if ffprobe can't read the file
|
||||
// (corrupt, missing, unsupported format). Caller decides what to do with
|
||||
// null — most call sites treat it as "unknown" and skip duration-dependent
|
||||
// branches.
|
||||
export async function getAudioDuration(filePath) {
|
||||
try {
|
||||
const { stdout } = await execFileAsync("ffprobe", [
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
filePath,
|
||||
], { timeout: 15000 });
|
||||
const dur = parseFloat(stdout.trim());
|
||||
return isNaN(dur) ? null : dur;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Split a long audio file into chunks ─────────────────────────────────────
|
||||
// Used when a video is too long for a single Gemini transcription call.
|
||||
// Returns null if no split is needed (audio fits in one chunk), otherwise
|
||||
// an array of `{ path, startOffset, index }`. Uses `-acodec copy` so it's
|
||||
// fast and lossless — no re-encoding.
|
||||
export async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) {
|
||||
const duration = await getAudioDuration(inputPath);
|
||||
if (!duration || duration <= chunkSeconds) return null;
|
||||
|
||||
const chunks = [];
|
||||
let startSec = 0;
|
||||
let i = 0;
|
||||
while (startSec < duration) {
|
||||
const chunkPath = path.join(outputDir, `chunk_${i}.mp3`);
|
||||
const segLen = Math.min(chunkSeconds, duration - startSec);
|
||||
await execFileAsync("ffmpeg", [
|
||||
"-y", "-i", inputPath,
|
||||
"-ss", String(startSec),
|
||||
"-t", String(segLen),
|
||||
"-acodec", "copy",
|
||||
chunkPath,
|
||||
], { timeout: 120000 });
|
||||
chunks.push({ path: chunkPath, startOffset: startSec, index: i });
|
||||
startSec += chunkSeconds;
|
||||
i++;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// ── Download a podcast episode by URL ───────────────────────────────────────
|
||||
// Streams the HTTP response straight to disk. Follows redirects. Rejects
|
||||
// on any non-200 final status. Used by /api/process when the input URL is
|
||||
// a podcast episode rather than a YouTube video.
|
||||
export function downloadPodcastAudio(audioUrl, destPath) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const doFetch = (url) => {
|
||||
const getter = url.startsWith("https") ? https : http;
|
||||
getter.get(url, (res) => {
|
||||
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
||||
return doFetch(res.headers.location);
|
||||
}
|
||||
if (res.statusCode !== 200) {
|
||||
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
|
||||
}
|
||||
const fileStream = createWriteStream(destPath);
|
||||
res.pipe(fileStream);
|
||||
fileStream.on("finish", () => fileStream.close(resolve));
|
||||
fileStream.on("error", reject);
|
||||
}).on("error", reject);
|
||||
};
|
||||
doFetch(audioUrl);
|
||||
});
|
||||
}
|
||||
+9
-73
@@ -17,8 +17,14 @@ import {
|
||||
parseTimestampedTranscript,
|
||||
safeText,
|
||||
retryGemini,
|
||||
fetchUrl,
|
||||
} from "./util.js";
|
||||
import { calcCost, buildAnalysisPrompt } from "./gemini-helpers.js";
|
||||
import {
|
||||
getAudioDuration,
|
||||
splitAudioFile,
|
||||
downloadPodcastAudio,
|
||||
} from "./audio.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const app = express();
|
||||
@@ -949,19 +955,7 @@ async function fetchUploadDates(videoIds) {
|
||||
// ── RSS-based date fetching (bypasses bot detection) ─────────────────────
|
||||
|
||||
// Fetch a URL and return the response body as a string
|
||||
function fetchUrl(url) {
|
||||
return new Promise((resolve, reject) => {
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
||||
return fetchUrl(res.headers.location).then(resolve, reject);
|
||||
}
|
||||
let data = "";
|
||||
res.on("data", (chunk) => (data += chunk));
|
||||
res.on("end", () => resolve(data));
|
||||
res.on("error", reject);
|
||||
}).on("error", reject);
|
||||
});
|
||||
}
|
||||
// fetchUrl moved to ./util.js
|
||||
|
||||
// Get channel_id from a YouTube channel/playlist URL using yt-dlp
|
||||
async function getChannelId(url) {
|
||||
@@ -1086,26 +1080,7 @@ async function parsePodcastRSS(feedUrl, limit = 200) {
|
||||
}
|
||||
|
||||
// Download a podcast episode audio file via HTTP(S) to a local path
|
||||
function downloadPodcastAudio(audioUrl, destPath) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const doFetch = (url) => {
|
||||
const getter = url.startsWith("https") ? https : http;
|
||||
getter.get(url, (res) => {
|
||||
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
||||
return doFetch(res.headers.location);
|
||||
}
|
||||
if (res.statusCode !== 200) {
|
||||
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
|
||||
}
|
||||
const fileStream = createWriteStream(destPath);
|
||||
res.pipe(fileStream);
|
||||
fileStream.on("finish", () => fileStream.close(resolve));
|
||||
fileStream.on("error", reject);
|
||||
}).on("error", reject);
|
||||
};
|
||||
doFetch(audioUrl);
|
||||
});
|
||||
}
|
||||
// downloadPodcastAudio moved to ./audio.js
|
||||
|
||||
// Get channel name from URL
|
||||
async function fetchChannelName(url) {
|
||||
@@ -2638,46 +2613,7 @@ Return ONLY the timestamped transcript, nothing else.`;
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
// ── Audio duration helper (ffprobe) ─────────────────────────────────────
|
||||
async function getAudioDuration(filePath) {
|
||||
try {
|
||||
const { stdout } = await execFileAsync("ffprobe", [
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
filePath,
|
||||
], { timeout: 15000 });
|
||||
const dur = parseFloat(stdout.trim());
|
||||
return isNaN(dur) ? null : dur;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Split audio into chunks with ffmpeg ─────────────────────────────────
|
||||
async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) {
|
||||
const duration = await getAudioDuration(inputPath);
|
||||
if (!duration || duration <= chunkSeconds) return null; // no split needed
|
||||
|
||||
const chunks = [];
|
||||
let startSec = 0;
|
||||
let i = 0;
|
||||
while (startSec < duration) {
|
||||
const chunkPath = path.join(outputDir, `chunk_${i}.mp3`);
|
||||
const segLen = Math.min(chunkSeconds, duration - startSec);
|
||||
await execFileAsync("ffmpeg", [
|
||||
"-y", "-i", inputPath,
|
||||
"-ss", String(startSec),
|
||||
"-t", String(segLen),
|
||||
"-acodec", "copy",
|
||||
chunkPath,
|
||||
], { timeout: 120000 });
|
||||
chunks.push({ path: chunkPath, startOffset: startSec, index: i });
|
||||
startSec += chunkSeconds;
|
||||
i++;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
// getAudioDuration + splitAudioFile moved to ./audio.js
|
||||
|
||||
// sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js
|
||||
|
||||
|
||||
+27
-3
@@ -1,6 +1,9 @@
|
||||
// Pure helpers — no module-scoped state, no Express, no I/O effects.
|
||||
// Anything in here is safe to import from any other module without
|
||||
// worrying about ordering or initialization side effects.
|
||||
// Stateless helpers — no module-scoped state, no Express, no
|
||||
// initialization side effects. Anything in here is safe to import from
|
||||
// any other module without worrying about ordering. A few helpers do
|
||||
// I/O (fetchUrl) but only when called.
|
||||
|
||||
import https from "https";
|
||||
|
||||
// ── SSE helper ──────────────────────────────────────────────────────────────
|
||||
// Writes a single Server-Sent Events frame: `event: X\ndata: Y\n\n`.
|
||||
@@ -89,6 +92,27 @@ export function safeText(result) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// ── HTTP GET with redirect following ────────────────────────────────────────
|
||||
// Returns the response body as a string. Follows HTTP redirects up to a
|
||||
// reasonable depth (relies on https module's default behavior plus a one-
|
||||
// level recursion). Used for fetching RSS feeds, channel pages, etc.
|
||||
//
|
||||
// For binary downloads (e.g. podcast audio), use audio.downloadPodcastAudio
|
||||
// — it streams to disk instead of buffering in memory.
|
||||
export function fetchUrl(url) {
|
||||
return new Promise((resolve, reject) => {
|
||||
https.get(url, (res) => {
|
||||
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
||||
return fetchUrl(res.headers.location).then(resolve, reject);
|
||||
}
|
||||
let data = "";
|
||||
res.on("data", (chunk) => (data += chunk));
|
||||
res.on("end", () => resolve(data));
|
||||
res.on("error", reject);
|
||||
}).on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
// ── Retry helper for transient Gemini API errors ────────────────────────────
|
||||
// Retries on 503/429 and on common transient network errors. Linear backoff
|
||||
// (delayMs * attempt). The optional `log` callback receives a one-line
|
||||
|
||||
Reference in New Issue
Block a user