Block SSRF on media_url downloads (transcribe-url/summarize-url)
downloadDirect fetched any caller-supplied media_url with redirect-follow and no host/scheme validation; the route is reachable via a self-chosen X-Recap-Install-Id, so a caller could probe the operator's LAN or cloud metadata (169.254.169.254). Add safe-url.js: assertPublicHttpUrl rejects non-http(s) schemes and hosts resolving to private/loopback/link-local/ reserved ranges, and safeFetch follows redirects manually, re-validating each hop. Route downloadDirect through it (covers transcribe-url, summarize-url, and admin-test-run).
This commit is contained in:
@@ -0,0 +1,158 @@
|
||||
// SSRF guard for user-supplied media URLs.
|
||||
//
|
||||
// /relay/transcribe-url and /relay/summarize-url download whatever
|
||||
// `media_url` the caller passes, and the route is reachable by anyone
|
||||
// presenting a self-chosen X-Recap-Install-Id, so an unguarded fetch
|
||||
// lets a caller probe the operator's LAN (Spark Control, BTCPay, other
|
||||
// StartOS services) or cloud metadata at 169.254.169.254. This module
|
||||
// rejects non-http(s) schemes and any hostname that resolves to a
|
||||
// private / loopback / link-local / reserved address, and follows
|
||||
// redirects MANUALLY so every hop is re-validated — a public URL can
|
||||
// 302 to an internal one after the first check passes.
|
||||
//
|
||||
// LAN calls to the operator's OWN hardware go through lan-fetch.js
|
||||
// instead: those URLs are config-set, not caller-set, and intentionally
|
||||
// reach private hosts.
|
||||
|
||||
import dns from "node:dns/promises";
|
||||
import net from "node:net";
|
||||
|
||||
export class BlockedUrlError extends Error {
|
||||
constructor(message) {
|
||||
super(message);
|
||||
this.name = "BlockedUrlError";
|
||||
this.code = "BLOCKED_URL";
|
||||
}
|
||||
}
|
||||
|
||||
// Parse an IPv4 dotted-quad into its 32-bit integer, or null if it
|
||||
// isn't a well-formed IPv4 literal.
|
||||
function ipv4ToInt(ip) {
|
||||
const parts = ip.split(".");
|
||||
if (parts.length !== 4) return null;
|
||||
let n = 0;
|
||||
for (const p of parts) {
|
||||
if (!/^\d{1,3}$/.test(p)) return null;
|
||||
const v = Number(p);
|
||||
if (v > 255) return null;
|
||||
n = n * 256 + v;
|
||||
}
|
||||
return n >>> 0;
|
||||
}
|
||||
|
||||
function inV4Range(n, base, bits) {
|
||||
const mask = bits === 0 ? 0 : (~((1 << (32 - bits)) - 1)) >>> 0;
|
||||
return (n & mask) === (base & mask);
|
||||
}
|
||||
|
||||
// IPv4 ranges that must never be fetched from a user-supplied URL.
|
||||
const BLOCKED_V4 = [
|
||||
["0.0.0.0", 8], // "this host"
|
||||
["10.0.0.0", 8], // private
|
||||
["100.64.0.0", 10], // CGNAT
|
||||
["127.0.0.0", 8], // loopback
|
||||
["169.254.0.0", 16], // link-local (incl. 169.254.169.254 cloud metadata)
|
||||
["172.16.0.0", 12], // private
|
||||
["192.0.0.0", 24], // IETF protocol assignments
|
||||
["192.0.2.0", 24], // TEST-NET-1
|
||||
["192.168.0.0", 16], // private
|
||||
["198.18.0.0", 15], // benchmarking
|
||||
["198.51.100.0", 24], // TEST-NET-2
|
||||
["203.0.113.0", 24], // TEST-NET-3
|
||||
["224.0.0.0", 4], // multicast
|
||||
["240.0.0.0", 4], // reserved (incl. 255.255.255.255 broadcast)
|
||||
];
|
||||
|
||||
function isBlockedV4(ip) {
|
||||
const n = ipv4ToInt(ip);
|
||||
if (n === null) return false;
|
||||
for (const [base, bits] of BLOCKED_V4) {
|
||||
if (inV4Range(n, ipv4ToInt(base), bits)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Classify the reserved/private IPv6 ranges we block. Handles
|
||||
// IPv4-mapped (::ffff:a.b.c.d) by delegating to the v4 check.
|
||||
function isBlockedV6(ip) {
|
||||
let addr = ip.toLowerCase();
|
||||
const pct = addr.indexOf("%"); // strip zone id (fe80::1%eth0)
|
||||
if (pct !== -1) addr = addr.slice(0, pct);
|
||||
// IPv4-mapped / -embedded (::ffff:192.168.0.1, ::192.168.0.1).
|
||||
const tail = addr.slice(addr.lastIndexOf(":") + 1);
|
||||
if (tail.includes(".") && isBlockedV4(tail)) return true;
|
||||
if (addr === "::1") return true; // loopback
|
||||
if (addr === "::") return true; // unspecified
|
||||
// fe80::/10 link-local spans fe80–febf.
|
||||
if (/^fe[89ab]/.test(addr)) return true;
|
||||
if (/^f[cd]/.test(addr)) return true; // fc00::/7 unique-local
|
||||
if (addr.startsWith("ff")) return true; // multicast
|
||||
return false;
|
||||
}
|
||||
|
||||
// True if `ip` (an IP literal) is in a range we refuse to fetch from.
|
||||
// Returns false for non-IP strings — the caller resolves DNS first.
|
||||
export function isBlockedAddress(ip) {
|
||||
const kind = net.isIP(ip);
|
||||
if (kind === 4) return isBlockedV4(ip);
|
||||
if (kind === 6) return isBlockedV6(ip);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Validate that `urlStr` is an http(s) URL whose host does NOT resolve
|
||||
// to a private/reserved address. Throws BlockedUrlError otherwise;
|
||||
// returns the parsed URL on success.
|
||||
export async function assertPublicHttpUrl(urlStr) {
|
||||
let u;
|
||||
try {
|
||||
u = new URL(urlStr);
|
||||
} catch {
|
||||
throw new BlockedUrlError("media_url is not a valid URL");
|
||||
}
|
||||
if (u.protocol !== "http:" && u.protocol !== "https:") {
|
||||
throw new BlockedUrlError(`media_url scheme "${u.protocol}" is not allowed`);
|
||||
}
|
||||
const host = u.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
|
||||
let addresses;
|
||||
if (net.isIP(host)) {
|
||||
addresses = [host];
|
||||
} else {
|
||||
let looked;
|
||||
try {
|
||||
looked = await dns.lookup(host, { all: true });
|
||||
} catch {
|
||||
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
|
||||
}
|
||||
addresses = looked.map((a) => a.address);
|
||||
}
|
||||
if (!addresses.length) {
|
||||
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
|
||||
}
|
||||
for (const addr of addresses) {
|
||||
if (isBlockedAddress(addr)) {
|
||||
throw new BlockedUrlError(
|
||||
`media_url host "${host}" resolves to a blocked address`,
|
||||
);
|
||||
}
|
||||
}
|
||||
return u;
|
||||
}
|
||||
|
||||
// fetch() wrapper that re-validates the URL on every redirect hop. Node
|
||||
// fetch's redirect:"follow" would jump to an internal host AFTER the
|
||||
// initial check passed, so we follow manually with redirect:"manual"
|
||||
// and re-run assertPublicHttpUrl on each Location.
|
||||
export async function safeFetch(urlStr, { signal, headers, maxRedirects = 5 } = {}) {
|
||||
let current = urlStr;
|
||||
for (let hop = 0; hop <= maxRedirects; hop++) {
|
||||
await assertPublicHttpUrl(current);
|
||||
const res = await fetch(current, { redirect: "manual", signal, headers });
|
||||
const location = res.headers.get("location");
|
||||
if (res.status >= 300 && res.status < 400 && location) {
|
||||
current = new URL(location, current).toString(); // resolve relative redirects
|
||||
continue;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
throw new BlockedUrlError("media_url exceeded the redirect limit");
|
||||
}
|
||||
Reference in New Issue
Block a user