d0e98424c1
- Arbitrary file write (P0): validate import keys in /api/library/import via a now-exported safeFilename(); a ../../ key is skipped, not written out of the scope dir. - SSRF (P0): guard downloadPodcastAudio — reject non-HTTP(S) schemes, block IP-literal and DNS-resolved private/link-local/loopback/reserved/multicast and embedded-IPv4 IPv6 targets (closes DNS rebinding), cap + resolve redirects. - ESM require (P1): top-level import of randomBytes in license-purchase.js (the inner require threw on the anon purchase-settle path). - Concurrency lock (P1): skip the process-global free-tier slot in multi-mode so it no longer serializes every cloud tenant onto one job. - X-Forwarded-For bypass (P1): set Express trust proxy from RECAP_TRUSTED_PROXY_HOPS (default 1); getClientIp now reads req.ip instead of a client-spoofable XFF entry. Tests added for safeFilename, the SSRF guard, and getClientIp (119 pass). Registry blockers deferred (ROADMAP); leaked-key history purge queued.
198 lines
8.7 KiB
JavaScript
198 lines
8.7 KiB
JavaScript
// Audio I/O helpers — ffprobe for metadata, ffmpeg for splitting, plus
|
|
// HTTP downloading for podcast episodes. Pure module: no state, no
|
|
// Express, only takes paths/URLs and returns data.
|
|
|
|
import { execFile } from "child_process";
|
|
import { promisify } from "util";
|
|
import path from "path";
|
|
import http from "http";
|
|
import https from "https";
|
|
import dns from "dns";
|
|
import net from "net";
|
|
import { createWriteStream } from "fs";
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
// ── Audio duration via ffprobe ──────────────────────────────────────────────
|
|
// Returns the duration in seconds, or null if ffprobe can't read the file
|
|
// (corrupt, missing, unsupported format). Caller decides what to do with
|
|
// null — most call sites treat it as "unknown" and skip duration-dependent
|
|
// branches.
|
|
export async function getAudioDuration(filePath) {
|
|
try {
|
|
const { stdout } = await execFileAsync("ffprobe", [
|
|
"-v", "error",
|
|
"-show_entries", "format=duration",
|
|
"-of", "default=noprint_wrappers=1:nokey=1",
|
|
filePath,
|
|
], { timeout: 15000 });
|
|
const dur = parseFloat(stdout.trim());
|
|
return isNaN(dur) ? null : dur;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ── Split a long audio file into chunks ─────────────────────────────────────
|
|
// Used when a video is too long for a single Gemini transcription call.
|
|
// Returns null if no split is needed (audio fits in one chunk), otherwise
|
|
// an array of `{ path, startOffset, index }`. Uses `-acodec copy` so it's
|
|
// fast and lossless — no re-encoding.
|
|
export async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) {
|
|
const duration = await getAudioDuration(inputPath);
|
|
if (!duration || duration <= chunkSeconds) return null;
|
|
|
|
const chunks = [];
|
|
let startSec = 0;
|
|
let i = 0;
|
|
while (startSec < duration) {
|
|
const chunkPath = path.join(outputDir, `chunk_${i}.mp3`);
|
|
const segLen = Math.min(chunkSeconds, duration - startSec);
|
|
await execFileAsync("ffmpeg", [
|
|
"-y", "-i", inputPath,
|
|
"-ss", String(startSec),
|
|
"-t", String(segLen),
|
|
"-acodec", "copy",
|
|
chunkPath,
|
|
], { timeout: 120000 });
|
|
chunks.push({
|
|
path: chunkPath,
|
|
startOffset: startSec,
|
|
// Actual seconds in THIS chunk (the last chunk is usually
|
|
// shorter than chunkSeconds). Carried downstream so the
|
|
// transcribe-stitching code can sanity-cap timestamps each
|
|
// chunk's model emits — some models hallucinate offsets
|
|
// way past the chunk's audio (observed: gemini-3.1-flash-lite
|
|
// emitting [10:12:44] on a 45-min chunk).
|
|
durationSec: segLen,
|
|
index: i,
|
|
});
|
|
startSec += chunkSeconds;
|
|
i++;
|
|
}
|
|
return chunks;
|
|
}
|
|
|
|
// ── SSRF guard for outbound podcast fetches ─────────────────────────────────
|
|
// downloadPodcastAudio fetches a fully user-controlled URL, so without a
|
|
// guard a caller could point it at internal services (cloud metadata at
|
|
// 169.254.169.254, LAN hosts, localhost) and read the response back through
|
|
// the transcript. isBlockedAddress rejects loopback / private / link-local /
|
|
// reserved / multicast targets for IPv4, IPv6, and IPv4-mapped IPv6.
|
|
export function isBlockedAddress(ip) {
|
|
if (!ip || typeof ip !== "string") return true;
|
|
// IPv4-mapped IPv6 in dotted form (::ffff:1.2.3.4) — judge by the embedded
|
|
// IPv4. (The hex-encoded forms are caught in the IPv6 branch below.)
|
|
const mapped = ip.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/i);
|
|
const addr = mapped ? mapped[1] : ip;
|
|
|
|
if (net.isIPv4(addr)) {
|
|
const [a, b] = addr.split(".").map(Number);
|
|
if (a === 0) return true; // 0.0.0.0/8 "this network"
|
|
if (a === 10) return true; // private
|
|
if (a === 127) return true; // loopback
|
|
if (a === 169 && b === 254) return true; // link-local (cloud metadata)
|
|
if (a === 172 && b >= 16 && b <= 31) return true; // private
|
|
if (a === 192 && b === 168) return true; // private
|
|
if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT (100.64.0.0/10)
|
|
if (a >= 224) return true; // multicast + reserved (224.0.0.0+)
|
|
return false;
|
|
}
|
|
if (net.isIPv6(addr)) {
|
|
const a = addr.toLowerCase();
|
|
if (a === "::1" || a === "::") return true; // loopback / unspecified
|
|
if (a.startsWith("fc") || a.startsWith("fd")) return true; // fc00::/7 ULA
|
|
if (/^fe[89ab]/.test(a)) return true; // fe80::/10 link-local
|
|
if (a.startsWith("ff")) return true; // ff00::/8 multicast
|
|
// Translation / embedded-IPv4 prefixes can smuggle a private IPv4 past the
|
|
// rules above (the dotted ::ffff:1.2.3.4 form is normalized to IPv4 at the
|
|
// top; these catch the hex-encoded forms: IPv4-mapped/-compatible, SIIT,
|
|
// NAT64, 6to4). None is ever a real podcast host, so block the whole
|
|
// prefix rather than decode the embedded address.
|
|
if (/^::[0-9a-f]/.test(a)) return true; // ::/96 mapped / compat / SIIT (hex)
|
|
if (a.startsWith("64:ff9b:")) return true; // NAT64 well-known (RFC 6052)
|
|
if (a.startsWith("2002:")) return true; // 6to4
|
|
return false;
|
|
}
|
|
return true; // unrecognized → block
|
|
}
|
|
|
|
// dns.lookup wrapper that fails the connection if the host resolves to a
|
|
// blocked address. Passed as the `lookup` option to http(s).get, so the
|
|
// check runs at connect time on every attempt — including each redirect
|
|
// hop — which also closes the DNS-rebinding window (the address we validate
|
|
// is the address the socket connects to).
|
|
function guardedLookup(hostname, options, callback) {
|
|
if (typeof options === "function") {
|
|
callback = options;
|
|
options = {};
|
|
}
|
|
dns.lookup(hostname, options, (err, address, family) => {
|
|
if (err) return callback(err);
|
|
const addrs = Array.isArray(address) ? address : [{ address, family }];
|
|
for (const a of addrs) {
|
|
if (isBlockedAddress(a.address)) {
|
|
return callback(
|
|
new Error(`refusing to fetch podcast audio from disallowed address ${a.address}`),
|
|
);
|
|
}
|
|
}
|
|
callback(null, address, family);
|
|
});
|
|
}
|
|
|
|
// ── Download a podcast episode by URL ───────────────────────────────────────
|
|
// Streams the HTTP response straight to disk. Follows up to MAX_PODCAST_REDIRECTS
|
|
// redirects (resolving relative Location headers), rejects on any non-200 final
|
|
// status, and refuses non-HTTP(S) schemes and internal addresses (see the SSRF
|
|
// guard above). Used by /api/process when the input is a podcast episode.
|
|
const MAX_PODCAST_REDIRECTS = 5;
|
|
|
|
export function downloadPodcastAudio(audioUrl, destPath) {
|
|
return new Promise((resolve, reject) => {
|
|
const doFetch = (rawUrl, redirectsLeft) => {
|
|
let url;
|
|
try {
|
|
url = new URL(rawUrl);
|
|
} catch {
|
|
return reject(new Error("invalid podcast audio URL"));
|
|
}
|
|
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
return reject(new Error(`refusing non-HTTP podcast URL (${url.protocol})`));
|
|
}
|
|
// IP-literal hosts (e.g. http://169.254.169.254) never hit the DNS
|
|
// `lookup` hook — the socket connects to the literal directly — so they
|
|
// must be checked here. guardedLookup below covers hostnames that
|
|
// *resolve* to a blocked address (and the DNS-rebinding case).
|
|
const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
|
|
if (net.isIP(host) && isBlockedAddress(host)) {
|
|
return reject(
|
|
new Error(`refusing to fetch podcast audio from disallowed address ${host}`),
|
|
);
|
|
}
|
|
const getter = url.protocol === "https:" ? https : http;
|
|
getter
|
|
.get(url, { lookup: guardedLookup }, (res) => {
|
|
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
res.resume(); // drain so the socket is freed
|
|
if (redirectsLeft <= 0) {
|
|
return reject(new Error("too many redirects downloading podcast audio"));
|
|
}
|
|
const next = new URL(res.headers.location, url).toString();
|
|
return doFetch(next, redirectsLeft - 1);
|
|
}
|
|
if (res.statusCode !== 200) {
|
|
res.resume();
|
|
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
|
|
}
|
|
const fileStream = createWriteStream(destPath);
|
|
res.pipe(fileStream);
|
|
fileStream.on("finish", () => fileStream.close(resolve));
|
|
fileStream.on("error", reject);
|
|
})
|
|
.on("error", reject);
|
|
};
|
|
doFetch(audioUrl, MAX_PODCAST_REDIRECTS);
|
|
});
|
|
}
|