diff --git a/server/routes/transcribe-url.js b/server/routes/transcribe-url.js index e35db54..a7839e7 100644 --- a/server/routes/transcribe-url.js +++ b/server/routes/transcribe-url.js @@ -50,6 +50,7 @@ import { calcGeminiCost } from "../pricing.js"; import { getAudioDurationSeconds } from "../audio-meta.js"; import { resolveHardwareConfig } from "../hardware-config.js"; import { reportHealthEvent } from "../spark-control-events.js"; +import { safeFetch } from "../safe-url.js"; import { createJob, markRunning, @@ -97,8 +98,12 @@ function guessMimeFromExt(filePath) { // would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes, // mimeType }. export async function downloadDirect(url, tmpDir) { - const res = await fetch(url, { - redirect: "follow", + // safeFetch is the SSRF choke point: it rejects non-http(s) schemes + // and hosts resolving to private/reserved ranges, and re-validates + // every redirect hop. downloadDirect is the single download path for + // transcribe-url / summarize-url / admin-test-run, so guarding it + // here covers all three. + const res = await safeFetch(url, { signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS), }); if (!res.ok) { diff --git a/server/safe-url.js b/server/safe-url.js new file mode 100644 index 0000000..e11ffe6 --- /dev/null +++ b/server/safe-url.js @@ -0,0 +1,158 @@ +// SSRF guard for user-supplied media URLs. +// +// /relay/transcribe-url and /relay/summarize-url download whatever +// `media_url` the caller passes, and the route is reachable by anyone +// presenting a self-chosen X-Recap-Install-Id, so an unguarded fetch +// lets a caller probe the operator's LAN (Spark Control, BTCPay, other +// StartOS services) or cloud metadata at 169.254.169.254. This module +// rejects non-http(s) schemes and any hostname that resolves to a +// private / loopback / link-local / reserved address, and follows +// redirects MANUALLY so every hop is re-validated — a public URL can +// 302 to an internal one after the first check passes. +// +// LAN calls to the operator's OWN hardware go through lan-fetch.js +// instead: those URLs are config-set, not caller-set, and intentionally +// reach private hosts. + +import dns from "node:dns/promises"; +import net from "node:net"; + +export class BlockedUrlError extends Error { + constructor(message) { + super(message); + this.name = "BlockedUrlError"; + this.code = "BLOCKED_URL"; + } +} + +// Parse an IPv4 dotted-quad into its 32-bit integer, or null if it +// isn't a well-formed IPv4 literal. +function ipv4ToInt(ip) { + const parts = ip.split("."); + if (parts.length !== 4) return null; + let n = 0; + for (const p of parts) { + if (!/^\d{1,3}$/.test(p)) return null; + const v = Number(p); + if (v > 255) return null; + n = n * 256 + v; + } + return n >>> 0; +} + +function inV4Range(n, base, bits) { + const mask = bits === 0 ? 0 : (~((1 << (32 - bits)) - 1)) >>> 0; + return (n & mask) === (base & mask); +} + +// IPv4 ranges that must never be fetched from a user-supplied URL. +const BLOCKED_V4 = [ + ["0.0.0.0", 8], // "this host" + ["10.0.0.0", 8], // private + ["100.64.0.0", 10], // CGNAT + ["127.0.0.0", 8], // loopback + ["169.254.0.0", 16], // link-local (incl. 169.254.169.254 cloud metadata) + ["172.16.0.0", 12], // private + ["192.0.0.0", 24], // IETF protocol assignments + ["192.0.2.0", 24], // TEST-NET-1 + ["192.168.0.0", 16], // private + ["198.18.0.0", 15], // benchmarking + ["198.51.100.0", 24], // TEST-NET-2 + ["203.0.113.0", 24], // TEST-NET-3 + ["224.0.0.0", 4], // multicast + ["240.0.0.0", 4], // reserved (incl. 255.255.255.255 broadcast) +]; + +function isBlockedV4(ip) { + const n = ipv4ToInt(ip); + if (n === null) return false; + for (const [base, bits] of BLOCKED_V4) { + if (inV4Range(n, ipv4ToInt(base), bits)) return true; + } + return false; +} + +// Classify the reserved/private IPv6 ranges we block. Handles +// IPv4-mapped (::ffff:a.b.c.d) by delegating to the v4 check. +function isBlockedV6(ip) { + let addr = ip.toLowerCase(); + const pct = addr.indexOf("%"); // strip zone id (fe80::1%eth0) + if (pct !== -1) addr = addr.slice(0, pct); + // IPv4-mapped / -embedded (::ffff:192.168.0.1, ::192.168.0.1). + const tail = addr.slice(addr.lastIndexOf(":") + 1); + if (tail.includes(".") && isBlockedV4(tail)) return true; + if (addr === "::1") return true; // loopback + if (addr === "::") return true; // unspecified + // fe80::/10 link-local spans fe80–febf. + if (/^fe[89ab]/.test(addr)) return true; + if (/^f[cd]/.test(addr)) return true; // fc00::/7 unique-local + if (addr.startsWith("ff")) return true; // multicast + return false; +} + +// True if `ip` (an IP literal) is in a range we refuse to fetch from. +// Returns false for non-IP strings — the caller resolves DNS first. +export function isBlockedAddress(ip) { + const kind = net.isIP(ip); + if (kind === 4) return isBlockedV4(ip); + if (kind === 6) return isBlockedV6(ip); + return false; +} + +// Validate that `urlStr` is an http(s) URL whose host does NOT resolve +// to a private/reserved address. Throws BlockedUrlError otherwise; +// returns the parsed URL on success. +export async function assertPublicHttpUrl(urlStr) { + let u; + try { + u = new URL(urlStr); + } catch { + throw new BlockedUrlError("media_url is not a valid URL"); + } + if (u.protocol !== "http:" && u.protocol !== "https:") { + throw new BlockedUrlError(`media_url scheme "${u.protocol}" is not allowed`); + } + const host = u.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets + let addresses; + if (net.isIP(host)) { + addresses = [host]; + } else { + let looked; + try { + looked = await dns.lookup(host, { all: true }); + } catch { + throw new BlockedUrlError(`media_url host "${host}" did not resolve`); + } + addresses = looked.map((a) => a.address); + } + if (!addresses.length) { + throw new BlockedUrlError(`media_url host "${host}" did not resolve`); + } + for (const addr of addresses) { + if (isBlockedAddress(addr)) { + throw new BlockedUrlError( + `media_url host "${host}" resolves to a blocked address`, + ); + } + } + return u; +} + +// fetch() wrapper that re-validates the URL on every redirect hop. Node +// fetch's redirect:"follow" would jump to an internal host AFTER the +// initial check passed, so we follow manually with redirect:"manual" +// and re-run assertPublicHttpUrl on each Location. +export async function safeFetch(urlStr, { signal, headers, maxRedirects = 5 } = {}) { + let current = urlStr; + for (let hop = 0; hop <= maxRedirects; hop++) { + await assertPublicHttpUrl(current); + const res = await fetch(current, { redirect: "manual", signal, headers }); + const location = res.headers.get("location"); + if (res.status >= 300 && res.status < 400 && location) { + current = new URL(location, current).toString(); // resolve relative redirects + continue; + } + return res; + } + throw new BlockedUrlError("media_url exceeded the redirect limit"); +} diff --git a/server/test/safe-url.test.js b/server/test/safe-url.test.js new file mode 100644 index 0000000..b28aab9 --- /dev/null +++ b/server/test/safe-url.test.js @@ -0,0 +1,90 @@ +// SSRF guard for user-supplied media URLs (safe-url.js). Uses literal +// IPs so the address checks need no DNS / network. + +import { test, describe } from "node:test"; +import assert from "node:assert/strict"; +import { + isBlockedAddress, + assertPublicHttpUrl, + BlockedUrlError, +} from "../safe-url.js"; + +describe("isBlockedAddress", () => { + test("blocks private / loopback / link-local / reserved IPv4", () => { + for (const ip of [ + "127.0.0.1", + "10.0.0.5", + "172.16.0.1", + "172.31.255.255", + "192.168.1.1", + "169.254.169.254", // cloud metadata + "100.64.0.1", + "0.0.0.0", + "198.18.0.1", + "224.0.0.1", + "255.255.255.255", + ]) { + assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`); + } + }); + + test("allows public IPv4 (incl. the /12 boundaries around 172.16/12)", () => { + for (const ip of ["8.8.8.8", "1.1.1.1", "172.15.0.1", "172.32.0.1", "93.184.216.34"]) { + assert.equal(isBlockedAddress(ip), false, `${ip} should be allowed`); + } + }); + + test("blocks loopback / ULA / link-local / IPv4-mapped IPv6", () => { + for (const ip of [ + "::1", + "::", + "fe80::1", + "febf::1", + "fc00::1", + "fd12:3456::1", + "ff02::1", + "::ffff:127.0.0.1", + "::ffff:192.168.0.1", + ]) { + assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`); + } + }); + + test("allows public IPv6", () => { + assert.equal(isBlockedAddress("2606:4700:4700::1111"), false); + }); +}); + +describe("assertPublicHttpUrl", () => { + test("rejects non-http(s) schemes", async () => { + for (const u of [ + "file:///etc/passwd", + "gopher://x/_", + "ftp://h/f", + "data:text/plain,hi", + ]) { + await assert.rejects(() => assertPublicHttpUrl(u), BlockedUrlError); + } + }); + + test("rejects literal private / metadata IP hosts (no DNS needed)", async () => { + for (const u of [ + "http://127.0.0.1/x", + "http://169.254.169.254/latest/meta-data/", + "http://[::1]/x", + "http://192.168.0.10:9000/a", + "https://10.1.2.3/audio.mp3", + ]) { + await assert.rejects(() => assertPublicHttpUrl(u), BlockedUrlError); + } + }); + + test("rejects malformed URLs", async () => { + await assert.rejects(() => assertPublicHttpUrl("not a url"), BlockedUrlError); + }); + + test("allows a public literal IP host", async () => { + const u = await assertPublicHttpUrl("https://8.8.8.8/audio.mp3"); + assert.equal(u.hostname, "8.8.8.8"); + }); +});