Block SSRF on media_url downloads (transcribe-url/summarize-url)

downloadDirect fetched any caller-supplied media_url with redirect-follow
and no host/scheme validation; the route is reachable via a self-chosen
X-Recap-Install-Id, so a caller could probe the operator's LAN or cloud
metadata (169.254.169.254). Add safe-url.js: assertPublicHttpUrl rejects
non-http(s) schemes and hosts resolving to private/loopback/link-local/
reserved ranges, and safeFetch follows redirects manually, re-validating
each hop. Route downloadDirect through it (covers transcribe-url,
summarize-url, and admin-test-run).
This commit is contained in:
Keysat
2026-06-13 16:23:26 -05:00
parent 0b90120b72
commit 8ad7c54da4
3 changed files with 255 additions and 2 deletions
+158
View File
@@ -0,0 +1,158 @@
// SSRF guard for user-supplied media URLs.
//
// /relay/transcribe-url and /relay/summarize-url download whatever
// `media_url` the caller passes, and the route is reachable by anyone
// presenting a self-chosen X-Recap-Install-Id, so an unguarded fetch
// lets a caller probe the operator's LAN (Spark Control, BTCPay, other
// StartOS services) or cloud metadata at 169.254.169.254. This module
// rejects non-http(s) schemes and any hostname that resolves to a
// private / loopback / link-local / reserved address, and follows
// redirects MANUALLY so every hop is re-validated — a public URL can
// 302 to an internal one after the first check passes.
//
// LAN calls to the operator's OWN hardware go through lan-fetch.js
// instead: those URLs are config-set, not caller-set, and intentionally
// reach private hosts.
import dns from "node:dns/promises";
import net from "node:net";
export class BlockedUrlError extends Error {
constructor(message) {
super(message);
this.name = "BlockedUrlError";
this.code = "BLOCKED_URL";
}
}
// Parse an IPv4 dotted-quad into its 32-bit integer, or null if it
// isn't a well-formed IPv4 literal.
function ipv4ToInt(ip) {
const parts = ip.split(".");
if (parts.length !== 4) return null;
let n = 0;
for (const p of parts) {
if (!/^\d{1,3}$/.test(p)) return null;
const v = Number(p);
if (v > 255) return null;
n = n * 256 + v;
}
return n >>> 0;
}
function inV4Range(n, base, bits) {
const mask = bits === 0 ? 0 : (~((1 << (32 - bits)) - 1)) >>> 0;
return (n & mask) === (base & mask);
}
// IPv4 ranges that must never be fetched from a user-supplied URL.
const BLOCKED_V4 = [
["0.0.0.0", 8], // "this host"
["10.0.0.0", 8], // private
["100.64.0.0", 10], // CGNAT
["127.0.0.0", 8], // loopback
["169.254.0.0", 16], // link-local (incl. 169.254.169.254 cloud metadata)
["172.16.0.0", 12], // private
["192.0.0.0", 24], // IETF protocol assignments
["192.0.2.0", 24], // TEST-NET-1
["192.168.0.0", 16], // private
["198.18.0.0", 15], // benchmarking
["198.51.100.0", 24], // TEST-NET-2
["203.0.113.0", 24], // TEST-NET-3
["224.0.0.0", 4], // multicast
["240.0.0.0", 4], // reserved (incl. 255.255.255.255 broadcast)
];
function isBlockedV4(ip) {
const n = ipv4ToInt(ip);
if (n === null) return false;
for (const [base, bits] of BLOCKED_V4) {
if (inV4Range(n, ipv4ToInt(base), bits)) return true;
}
return false;
}
// Classify the reserved/private IPv6 ranges we block. Handles
// IPv4-mapped (::ffff:a.b.c.d) by delegating to the v4 check.
function isBlockedV6(ip) {
let addr = ip.toLowerCase();
const pct = addr.indexOf("%"); // strip zone id (fe80::1%eth0)
if (pct !== -1) addr = addr.slice(0, pct);
// IPv4-mapped / -embedded (::ffff:192.168.0.1, ::192.168.0.1).
const tail = addr.slice(addr.lastIndexOf(":") + 1);
if (tail.includes(".") && isBlockedV4(tail)) return true;
if (addr === "::1") return true; // loopback
if (addr === "::") return true; // unspecified
// fe80::/10 link-local spans fe80febf.
if (/^fe[89ab]/.test(addr)) return true;
if (/^f[cd]/.test(addr)) return true; // fc00::/7 unique-local
if (addr.startsWith("ff")) return true; // multicast
return false;
}
// True if `ip` (an IP literal) is in a range we refuse to fetch from.
// Returns false for non-IP strings — the caller resolves DNS first.
export function isBlockedAddress(ip) {
const kind = net.isIP(ip);
if (kind === 4) return isBlockedV4(ip);
if (kind === 6) return isBlockedV6(ip);
return false;
}
// Validate that `urlStr` is an http(s) URL whose host does NOT resolve
// to a private/reserved address. Throws BlockedUrlError otherwise;
// returns the parsed URL on success.
export async function assertPublicHttpUrl(urlStr) {
let u;
try {
u = new URL(urlStr);
} catch {
throw new BlockedUrlError("media_url is not a valid URL");
}
if (u.protocol !== "http:" && u.protocol !== "https:") {
throw new BlockedUrlError(`media_url scheme "${u.protocol}" is not allowed`);
}
const host = u.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
let addresses;
if (net.isIP(host)) {
addresses = [host];
} else {
let looked;
try {
looked = await dns.lookup(host, { all: true });
} catch {
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
}
addresses = looked.map((a) => a.address);
}
if (!addresses.length) {
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
}
for (const addr of addresses) {
if (isBlockedAddress(addr)) {
throw new BlockedUrlError(
`media_url host "${host}" resolves to a blocked address`,
);
}
}
return u;
}
// fetch() wrapper that re-validates the URL on every redirect hop. Node
// fetch's redirect:"follow" would jump to an internal host AFTER the
// initial check passed, so we follow manually with redirect:"manual"
// and re-run assertPublicHttpUrl on each Location.
export async function safeFetch(urlStr, { signal, headers, maxRedirects = 5 } = {}) {
let current = urlStr;
for (let hop = 0; hop <= maxRedirects; hop++) {
await assertPublicHttpUrl(current);
const res = await fetch(current, { redirect: "manual", signal, headers });
const location = res.headers.get("location");
if (res.status >= 300 && res.status < 400 && location) {
current = new URL(location, current).toString(); // resolve relative redirects
continue;
}
return res;
}
throw new BlockedUrlError("media_url exceeded the redirect limit");
}