Files
Keysat 8ad7c54da4 Block SSRF on media_url downloads (transcribe-url/summarize-url)
downloadDirect fetched any caller-supplied media_url with redirect-follow
and no host/scheme validation; the route is reachable via a self-chosen
X-Recap-Install-Id, so a caller could probe the operator's LAN or cloud
metadata (169.254.169.254). Add safe-url.js: assertPublicHttpUrl rejects
non-http(s) schemes and hosts resolving to private/loopback/link-local/
reserved ranges, and safeFetch follows redirects manually, re-validating
each hop. Route downloadDirect through it (covers transcribe-url,
summarize-url, and admin-test-run).
2026-06-13 16:23:26 -05:00

159 lines
5.5 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// SSRF guard for user-supplied media URLs.
//
// /relay/transcribe-url and /relay/summarize-url download whatever
// `media_url` the caller passes, and the route is reachable by anyone
// presenting a self-chosen X-Recap-Install-Id, so an unguarded fetch
// lets a caller probe the operator's LAN (Spark Control, BTCPay, other
// StartOS services) or cloud metadata at 169.254.169.254. This module
// rejects non-http(s) schemes and any hostname that resolves to a
// private / loopback / link-local / reserved address, and follows
// redirects MANUALLY so every hop is re-validated — a public URL can
// 302 to an internal one after the first check passes.
//
// LAN calls to the operator's OWN hardware go through lan-fetch.js
// instead: those URLs are config-set, not caller-set, and intentionally
// reach private hosts.
import dns from "node:dns/promises";
import net from "node:net";
export class BlockedUrlError extends Error {
constructor(message) {
super(message);
this.name = "BlockedUrlError";
this.code = "BLOCKED_URL";
}
}
// Parse an IPv4 dotted-quad into its 32-bit integer, or null if it
// isn't a well-formed IPv4 literal.
function ipv4ToInt(ip) {
const parts = ip.split(".");
if (parts.length !== 4) return null;
let n = 0;
for (const p of parts) {
if (!/^\d{1,3}$/.test(p)) return null;
const v = Number(p);
if (v > 255) return null;
n = n * 256 + v;
}
return n >>> 0;
}
function inV4Range(n, base, bits) {
const mask = bits === 0 ? 0 : (~((1 << (32 - bits)) - 1)) >>> 0;
return (n & mask) === (base & mask);
}
// IPv4 ranges that must never be fetched from a user-supplied URL.
const BLOCKED_V4 = [
["0.0.0.0", 8], // "this host"
["10.0.0.0", 8], // private
["100.64.0.0", 10], // CGNAT
["127.0.0.0", 8], // loopback
["169.254.0.0", 16], // link-local (incl. 169.254.169.254 cloud metadata)
["172.16.0.0", 12], // private
["192.0.0.0", 24], // IETF protocol assignments
["192.0.2.0", 24], // TEST-NET-1
["192.168.0.0", 16], // private
["198.18.0.0", 15], // benchmarking
["198.51.100.0", 24], // TEST-NET-2
["203.0.113.0", 24], // TEST-NET-3
["224.0.0.0", 4], // multicast
["240.0.0.0", 4], // reserved (incl. 255.255.255.255 broadcast)
];
function isBlockedV4(ip) {
const n = ipv4ToInt(ip);
if (n === null) return false;
for (const [base, bits] of BLOCKED_V4) {
if (inV4Range(n, ipv4ToInt(base), bits)) return true;
}
return false;
}
// Classify the reserved/private IPv6 ranges we block. Handles
// IPv4-mapped (::ffff:a.b.c.d) by delegating to the v4 check.
function isBlockedV6(ip) {
let addr = ip.toLowerCase();
const pct = addr.indexOf("%"); // strip zone id (fe80::1%eth0)
if (pct !== -1) addr = addr.slice(0, pct);
// IPv4-mapped / -embedded (::ffff:192.168.0.1, ::192.168.0.1).
const tail = addr.slice(addr.lastIndexOf(":") + 1);
if (tail.includes(".") && isBlockedV4(tail)) return true;
if (addr === "::1") return true; // loopback
if (addr === "::") return true; // unspecified
// fe80::/10 link-local spans fe80febf.
if (/^fe[89ab]/.test(addr)) return true;
if (/^f[cd]/.test(addr)) return true; // fc00::/7 unique-local
if (addr.startsWith("ff")) return true; // multicast
return false;
}
// True if `ip` (an IP literal) is in a range we refuse to fetch from.
// Returns false for non-IP strings — the caller resolves DNS first.
export function isBlockedAddress(ip) {
const kind = net.isIP(ip);
if (kind === 4) return isBlockedV4(ip);
if (kind === 6) return isBlockedV6(ip);
return false;
}
// Validate that `urlStr` is an http(s) URL whose host does NOT resolve
// to a private/reserved address. Throws BlockedUrlError otherwise;
// returns the parsed URL on success.
export async function assertPublicHttpUrl(urlStr) {
let u;
try {
u = new URL(urlStr);
} catch {
throw new BlockedUrlError("media_url is not a valid URL");
}
if (u.protocol !== "http:" && u.protocol !== "https:") {
throw new BlockedUrlError(`media_url scheme "${u.protocol}" is not allowed`);
}
const host = u.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
let addresses;
if (net.isIP(host)) {
addresses = [host];
} else {
let looked;
try {
looked = await dns.lookup(host, { all: true });
} catch {
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
}
addresses = looked.map((a) => a.address);
}
if (!addresses.length) {
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
}
for (const addr of addresses) {
if (isBlockedAddress(addr)) {
throw new BlockedUrlError(
`media_url host "${host}" resolves to a blocked address`,
);
}
}
return u;
}
// fetch() wrapper that re-validates the URL on every redirect hop. Node
// fetch's redirect:"follow" would jump to an internal host AFTER the
// initial check passed, so we follow manually with redirect:"manual"
// and re-run assertPublicHttpUrl on each Location.
export async function safeFetch(urlStr, { signal, headers, maxRedirects = 5 } = {}) {
let current = urlStr;
for (let hop = 0; hop <= maxRedirects; hop++) {
await assertPublicHttpUrl(current);
const res = await fetch(current, { redirect: "manual", signal, headers });
const location = res.headers.get("location");
if (res.status >= 300 && res.status < 400 && location) {
current = new URL(location, current).toString(); // resolve relative redirects
continue;
}
return res;
}
throw new BlockedUrlError("media_url exceeded the redirect limit");
}