// SSRF guard for user-supplied media URLs. // // /relay/transcribe-url and /relay/summarize-url download whatever // `media_url` the caller passes, and the route is reachable by anyone // presenting a self-chosen X-Recap-Install-Id, so an unguarded fetch // lets a caller probe the operator's LAN (Spark Control, BTCPay, other // StartOS services) or cloud metadata at 169.254.169.254. This module // rejects non-http(s) schemes and any hostname that resolves to a // private / loopback / link-local / reserved address, and follows // redirects MANUALLY so every hop is re-validated — a public URL can // 302 to an internal one after the first check passes. // // LAN calls to the operator's OWN hardware go through lan-fetch.js // instead: those URLs are config-set, not caller-set, and intentionally // reach private hosts. import dns from "node:dns/promises"; import net from "node:net"; export class BlockedUrlError extends Error { constructor(message) { super(message); this.name = "BlockedUrlError"; this.code = "BLOCKED_URL"; } } // Parse an IPv4 dotted-quad into its 32-bit integer, or null if it // isn't a well-formed IPv4 literal. function ipv4ToInt(ip) { const parts = ip.split("."); if (parts.length !== 4) return null; let n = 0; for (const p of parts) { if (!/^\d{1,3}$/.test(p)) return null; const v = Number(p); if (v > 255) return null; n = n * 256 + v; } return n >>> 0; } function inV4Range(n, base, bits) { const mask = bits === 0 ? 0 : (~((1 << (32 - bits)) - 1)) >>> 0; return (n & mask) === (base & mask); } // IPv4 ranges that must never be fetched from a user-supplied URL. const BLOCKED_V4 = [ ["0.0.0.0", 8], // "this host" ["10.0.0.0", 8], // private ["100.64.0.0", 10], // CGNAT ["127.0.0.0", 8], // loopback ["169.254.0.0", 16], // link-local (incl. 169.254.169.254 cloud metadata) ["172.16.0.0", 12], // private ["192.0.0.0", 24], // IETF protocol assignments ["192.0.2.0", 24], // TEST-NET-1 ["192.168.0.0", 16], // private ["198.18.0.0", 15], // benchmarking ["198.51.100.0", 24], // TEST-NET-2 ["203.0.113.0", 24], // TEST-NET-3 ["224.0.0.0", 4], // multicast ["240.0.0.0", 4], // reserved (incl. 255.255.255.255 broadcast) ]; function isBlockedV4(ip) { const n = ipv4ToInt(ip); if (n === null) return false; for (const [base, bits] of BLOCKED_V4) { if (inV4Range(n, ipv4ToInt(base), bits)) return true; } return false; } // Classify the reserved/private IPv6 ranges we block. Handles // IPv4-mapped (::ffff:a.b.c.d) by delegating to the v4 check. function isBlockedV6(ip) { let addr = ip.toLowerCase(); const pct = addr.indexOf("%"); // strip zone id (fe80::1%eth0) if (pct !== -1) addr = addr.slice(0, pct); // IPv4-mapped / -embedded (::ffff:192.168.0.1, ::192.168.0.1). const tail = addr.slice(addr.lastIndexOf(":") + 1); if (tail.includes(".") && isBlockedV4(tail)) return true; if (addr === "::1") return true; // loopback if (addr === "::") return true; // unspecified // fe80::/10 link-local spans fe80–febf. if (/^fe[89ab]/.test(addr)) return true; if (/^f[cd]/.test(addr)) return true; // fc00::/7 unique-local if (addr.startsWith("ff")) return true; // multicast return false; } // True if `ip` (an IP literal) is in a range we refuse to fetch from. // Returns false for non-IP strings — the caller resolves DNS first. export function isBlockedAddress(ip) { const kind = net.isIP(ip); if (kind === 4) return isBlockedV4(ip); if (kind === 6) return isBlockedV6(ip); return false; } // Validate that `urlStr` is an http(s) URL whose host does NOT resolve // to a private/reserved address. Throws BlockedUrlError otherwise; // returns the parsed URL on success. export async function assertPublicHttpUrl(urlStr) { let u; try { u = new URL(urlStr); } catch { throw new BlockedUrlError("media_url is not a valid URL"); } if (u.protocol !== "http:" && u.protocol !== "https:") { throw new BlockedUrlError(`media_url scheme "${u.protocol}" is not allowed`); } const host = u.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets let addresses; if (net.isIP(host)) { addresses = [host]; } else { let looked; try { looked = await dns.lookup(host, { all: true }); } catch { throw new BlockedUrlError(`media_url host "${host}" did not resolve`); } addresses = looked.map((a) => a.address); } if (!addresses.length) { throw new BlockedUrlError(`media_url host "${host}" did not resolve`); } for (const addr of addresses) { if (isBlockedAddress(addr)) { throw new BlockedUrlError( `media_url host "${host}" resolves to a blocked address`, ); } } return u; } // fetch() wrapper that re-validates the URL on every redirect hop. Node // fetch's redirect:"follow" would jump to an internal host AFTER the // initial check passed, so we follow manually with redirect:"manual" // and re-run assertPublicHttpUrl on each Location. export async function safeFetch(urlStr, { signal, headers, maxRedirects = 5 } = {}) { let current = urlStr; for (let hop = 0; hop <= maxRedirects; hop++) { await assertPublicHttpUrl(current); const res = await fetch(current, { redirect: "manual", signal, headers }); const location = res.headers.get("location"); if (res.status >= 300 && res.status < 400 && location) { current = new URL(location, current).toString(); // resolve relative redirects continue; } return res; } throw new BlockedUrlError("media_url exceeded the redirect limit"); }