// Audio I/O helpers — ffprobe for metadata, ffmpeg for splitting, plus // HTTP downloading for podcast episodes. Pure module: no state, no // Express, only takes paths/URLs and returns data. import { execFile } from "child_process"; import { promisify } from "util"; import path from "path"; import http from "http"; import https from "https"; import dns from "dns"; import net from "net"; import { createWriteStream } from "fs"; const execFileAsync = promisify(execFile); // ── Audio duration via ffprobe ────────────────────────────────────────────── // Returns the duration in seconds, or null if ffprobe can't read the file // (corrupt, missing, unsupported format). Caller decides what to do with // null — most call sites treat it as "unknown" and skip duration-dependent // branches. export async function getAudioDuration(filePath) { try { const { stdout } = await execFileAsync("ffprobe", [ "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filePath, ], { timeout: 15000 }); const dur = parseFloat(stdout.trim()); return isNaN(dur) ? null : dur; } catch { return null; } } // ── Split a long audio file into chunks ───────────────────────────────────── // Used when a video is too long for a single Gemini transcription call. // Returns null if no split is needed (audio fits in one chunk), otherwise // an array of `{ path, startOffset, index }`. Uses `-acodec copy` so it's // fast and lossless — no re-encoding. export async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) { const duration = await getAudioDuration(inputPath); if (!duration || duration <= chunkSeconds) return null; const chunks = []; let startSec = 0; let i = 0; while (startSec < duration) { const chunkPath = path.join(outputDir, `chunk_${i}.mp3`); const segLen = Math.min(chunkSeconds, duration - startSec); await execFileAsync("ffmpeg", [ "-y", "-i", inputPath, "-ss", String(startSec), "-t", String(segLen), "-acodec", "copy", chunkPath, ], { timeout: 120000 }); chunks.push({ path: chunkPath, startOffset: startSec, // Actual seconds in THIS chunk (the last chunk is usually // shorter than chunkSeconds). Carried downstream so the // transcribe-stitching code can sanity-cap timestamps each // chunk's model emits — some models hallucinate offsets // way past the chunk's audio (observed: gemini-3.1-flash-lite // emitting [10:12:44] on a 45-min chunk). durationSec: segLen, index: i, }); startSec += chunkSeconds; i++; } return chunks; } // ── SSRF guard for outbound podcast fetches ───────────────────────────────── // downloadPodcastAudio fetches a fully user-controlled URL, so without a // guard a caller could point it at internal services (cloud metadata at // 169.254.169.254, LAN hosts, localhost) and read the response back through // the transcript. isBlockedAddress rejects loopback / private / link-local / // reserved / multicast targets for IPv4, IPv6, and IPv4-mapped IPv6. export function isBlockedAddress(ip) { if (!ip || typeof ip !== "string") return true; // IPv4-mapped IPv6 in dotted form (::ffff:1.2.3.4) — judge by the embedded // IPv4. (The hex-encoded forms are caught in the IPv6 branch below.) const mapped = ip.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/i); const addr = mapped ? mapped[1] : ip; if (net.isIPv4(addr)) { const [a, b] = addr.split(".").map(Number); if (a === 0) return true; // 0.0.0.0/8 "this network" if (a === 10) return true; // private if (a === 127) return true; // loopback if (a === 169 && b === 254) return true; // link-local (cloud metadata) if (a === 172 && b >= 16 && b <= 31) return true; // private if (a === 192 && b === 168) return true; // private if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT (100.64.0.0/10) if (a >= 224) return true; // multicast + reserved (224.0.0.0+) return false; } if (net.isIPv6(addr)) { const a = addr.toLowerCase(); if (a === "::1" || a === "::") return true; // loopback / unspecified if (a.startsWith("fc") || a.startsWith("fd")) return true; // fc00::/7 ULA if (/^fe[89ab]/.test(a)) return true; // fe80::/10 link-local if (a.startsWith("ff")) return true; // ff00::/8 multicast // Translation / embedded-IPv4 prefixes can smuggle a private IPv4 past the // rules above (the dotted ::ffff:1.2.3.4 form is normalized to IPv4 at the // top; these catch the hex-encoded forms: IPv4-mapped/-compatible, SIIT, // NAT64, 6to4). None is ever a real podcast host, so block the whole // prefix rather than decode the embedded address. if (/^::[0-9a-f]/.test(a)) return true; // ::/96 mapped / compat / SIIT (hex) if (a.startsWith("64:ff9b:")) return true; // NAT64 well-known (RFC 6052) if (a.startsWith("2002:")) return true; // 6to4 return false; } return true; // unrecognized → block } // dns.lookup wrapper that fails the connection if the host resolves to a // blocked address. Passed as the `lookup` option to http(s).get, so the // check runs at connect time on every attempt — including each redirect // hop — which also closes the DNS-rebinding window (the address we validate // is the address the socket connects to). function guardedLookup(hostname, options, callback) { if (typeof options === "function") { callback = options; options = {}; } dns.lookup(hostname, options, (err, address, family) => { if (err) return callback(err); const addrs = Array.isArray(address) ? address : [{ address, family }]; for (const a of addrs) { if (isBlockedAddress(a.address)) { return callback( new Error(`refusing to fetch podcast audio from disallowed address ${a.address}`), ); } } callback(null, address, family); }); } // ── Download a podcast episode by URL ─────────────────────────────────────── // Streams the HTTP response straight to disk. Follows up to MAX_PODCAST_REDIRECTS // redirects (resolving relative Location headers), rejects on any non-200 final // status, and refuses non-HTTP(S) schemes and internal addresses (see the SSRF // guard above). Used by /api/process when the input is a podcast episode. const MAX_PODCAST_REDIRECTS = 5; export function downloadPodcastAudio(audioUrl, destPath) { return new Promise((resolve, reject) => { const doFetch = (rawUrl, redirectsLeft) => { let url; try { url = new URL(rawUrl); } catch { return reject(new Error("invalid podcast audio URL")); } if (url.protocol !== "http:" && url.protocol !== "https:") { return reject(new Error(`refusing non-HTTP podcast URL (${url.protocol})`)); } // IP-literal hosts (e.g. http://169.254.169.254) never hit the DNS // `lookup` hook — the socket connects to the literal directly — so they // must be checked here. guardedLookup below covers hostnames that // *resolve* to a blocked address (and the DNS-rebinding case). const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets if (net.isIP(host) && isBlockedAddress(host)) { return reject( new Error(`refusing to fetch podcast audio from disallowed address ${host}`), ); } const getter = url.protocol === "https:" ? https : http; getter .get(url, { lookup: guardedLookup }, (res) => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { res.resume(); // drain so the socket is freed if (redirectsLeft <= 0) { return reject(new Error("too many redirects downloading podcast audio")); } const next = new URL(res.headers.location, url).toString(); return doFetch(next, redirectsLeft - 1); } if (res.statusCode !== 200) { res.resume(); return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`)); } const fileStream = createWriteStream(destPath); res.pipe(fileStream); fileStream.on("finish", () => fileStream.close(resolve)); fileStream.on("error", reject); }) .on("error", reject); }; doFetch(audioUrl, MAX_PODCAST_REDIRECTS); }); }