Block SSRF on media_url downloads (transcribe-url/summarize-url)
downloadDirect fetched any caller-supplied media_url with redirect-follow and no host/scheme validation; the route is reachable via a self-chosen X-Recap-Install-Id, so a caller could probe the operator's LAN or cloud metadata (169.254.169.254). Add safe-url.js: assertPublicHttpUrl rejects non-http(s) schemes and hosts resolving to private/loopback/link-local/ reserved ranges, and safeFetch follows redirects manually, re-validating each hop. Route downloadDirect through it (covers transcribe-url, summarize-url, and admin-test-run).
This commit is contained in:
@@ -50,6 +50,7 @@ import { calcGeminiCost } from "../pricing.js";
|
|||||||
import { getAudioDurationSeconds } from "../audio-meta.js";
|
import { getAudioDurationSeconds } from "../audio-meta.js";
|
||||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||||
import { reportHealthEvent } from "../spark-control-events.js";
|
import { reportHealthEvent } from "../spark-control-events.js";
|
||||||
|
import { safeFetch } from "../safe-url.js";
|
||||||
import {
|
import {
|
||||||
createJob,
|
createJob,
|
||||||
markRunning,
|
markRunning,
|
||||||
@@ -97,8 +98,12 @@ function guessMimeFromExt(filePath) {
|
|||||||
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
|
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
|
||||||
// mimeType }.
|
// mimeType }.
|
||||||
export async function downloadDirect(url, tmpDir) {
|
export async function downloadDirect(url, tmpDir) {
|
||||||
const res = await fetch(url, {
|
// safeFetch is the SSRF choke point: it rejects non-http(s) schemes
|
||||||
redirect: "follow",
|
// and hosts resolving to private/reserved ranges, and re-validates
|
||||||
|
// every redirect hop. downloadDirect is the single download path for
|
||||||
|
// transcribe-url / summarize-url / admin-test-run, so guarding it
|
||||||
|
// here covers all three.
|
||||||
|
const res = await safeFetch(url, {
|
||||||
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
|
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
|
|||||||
@@ -0,0 +1,158 @@
|
|||||||
|
// SSRF guard for user-supplied media URLs.
|
||||||
|
//
|
||||||
|
// /relay/transcribe-url and /relay/summarize-url download whatever
|
||||||
|
// `media_url` the caller passes, and the route is reachable by anyone
|
||||||
|
// presenting a self-chosen X-Recap-Install-Id, so an unguarded fetch
|
||||||
|
// lets a caller probe the operator's LAN (Spark Control, BTCPay, other
|
||||||
|
// StartOS services) or cloud metadata at 169.254.169.254. This module
|
||||||
|
// rejects non-http(s) schemes and any hostname that resolves to a
|
||||||
|
// private / loopback / link-local / reserved address, and follows
|
||||||
|
// redirects MANUALLY so every hop is re-validated — a public URL can
|
||||||
|
// 302 to an internal one after the first check passes.
|
||||||
|
//
|
||||||
|
// LAN calls to the operator's OWN hardware go through lan-fetch.js
|
||||||
|
// instead: those URLs are config-set, not caller-set, and intentionally
|
||||||
|
// reach private hosts.
|
||||||
|
|
||||||
|
import dns from "node:dns/promises";
|
||||||
|
import net from "node:net";
|
||||||
|
|
||||||
|
export class BlockedUrlError extends Error {
|
||||||
|
constructor(message) {
|
||||||
|
super(message);
|
||||||
|
this.name = "BlockedUrlError";
|
||||||
|
this.code = "BLOCKED_URL";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse an IPv4 dotted-quad into its 32-bit integer, or null if it
|
||||||
|
// isn't a well-formed IPv4 literal.
|
||||||
|
function ipv4ToInt(ip) {
|
||||||
|
const parts = ip.split(".");
|
||||||
|
if (parts.length !== 4) return null;
|
||||||
|
let n = 0;
|
||||||
|
for (const p of parts) {
|
||||||
|
if (!/^\d{1,3}$/.test(p)) return null;
|
||||||
|
const v = Number(p);
|
||||||
|
if (v > 255) return null;
|
||||||
|
n = n * 256 + v;
|
||||||
|
}
|
||||||
|
return n >>> 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function inV4Range(n, base, bits) {
|
||||||
|
const mask = bits === 0 ? 0 : (~((1 << (32 - bits)) - 1)) >>> 0;
|
||||||
|
return (n & mask) === (base & mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// IPv4 ranges that must never be fetched from a user-supplied URL.
|
||||||
|
const BLOCKED_V4 = [
|
||||||
|
["0.0.0.0", 8], // "this host"
|
||||||
|
["10.0.0.0", 8], // private
|
||||||
|
["100.64.0.0", 10], // CGNAT
|
||||||
|
["127.0.0.0", 8], // loopback
|
||||||
|
["169.254.0.0", 16], // link-local (incl. 169.254.169.254 cloud metadata)
|
||||||
|
["172.16.0.0", 12], // private
|
||||||
|
["192.0.0.0", 24], // IETF protocol assignments
|
||||||
|
["192.0.2.0", 24], // TEST-NET-1
|
||||||
|
["192.168.0.0", 16], // private
|
||||||
|
["198.18.0.0", 15], // benchmarking
|
||||||
|
["198.51.100.0", 24], // TEST-NET-2
|
||||||
|
["203.0.113.0", 24], // TEST-NET-3
|
||||||
|
["224.0.0.0", 4], // multicast
|
||||||
|
["240.0.0.0", 4], // reserved (incl. 255.255.255.255 broadcast)
|
||||||
|
];
|
||||||
|
|
||||||
|
function isBlockedV4(ip) {
|
||||||
|
const n = ipv4ToInt(ip);
|
||||||
|
if (n === null) return false;
|
||||||
|
for (const [base, bits] of BLOCKED_V4) {
|
||||||
|
if (inV4Range(n, ipv4ToInt(base), bits)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Classify the reserved/private IPv6 ranges we block. Handles
|
||||||
|
// IPv4-mapped (::ffff:a.b.c.d) by delegating to the v4 check.
|
||||||
|
function isBlockedV6(ip) {
|
||||||
|
let addr = ip.toLowerCase();
|
||||||
|
const pct = addr.indexOf("%"); // strip zone id (fe80::1%eth0)
|
||||||
|
if (pct !== -1) addr = addr.slice(0, pct);
|
||||||
|
// IPv4-mapped / -embedded (::ffff:192.168.0.1, ::192.168.0.1).
|
||||||
|
const tail = addr.slice(addr.lastIndexOf(":") + 1);
|
||||||
|
if (tail.includes(".") && isBlockedV4(tail)) return true;
|
||||||
|
if (addr === "::1") return true; // loopback
|
||||||
|
if (addr === "::") return true; // unspecified
|
||||||
|
// fe80::/10 link-local spans fe80–febf.
|
||||||
|
if (/^fe[89ab]/.test(addr)) return true;
|
||||||
|
if (/^f[cd]/.test(addr)) return true; // fc00::/7 unique-local
|
||||||
|
if (addr.startsWith("ff")) return true; // multicast
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// True if `ip` (an IP literal) is in a range we refuse to fetch from.
|
||||||
|
// Returns false for non-IP strings — the caller resolves DNS first.
|
||||||
|
export function isBlockedAddress(ip) {
|
||||||
|
const kind = net.isIP(ip);
|
||||||
|
if (kind === 4) return isBlockedV4(ip);
|
||||||
|
if (kind === 6) return isBlockedV6(ip);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate that `urlStr` is an http(s) URL whose host does NOT resolve
|
||||||
|
// to a private/reserved address. Throws BlockedUrlError otherwise;
|
||||||
|
// returns the parsed URL on success.
|
||||||
|
export async function assertPublicHttpUrl(urlStr) {
|
||||||
|
let u;
|
||||||
|
try {
|
||||||
|
u = new URL(urlStr);
|
||||||
|
} catch {
|
||||||
|
throw new BlockedUrlError("media_url is not a valid URL");
|
||||||
|
}
|
||||||
|
if (u.protocol !== "http:" && u.protocol !== "https:") {
|
||||||
|
throw new BlockedUrlError(`media_url scheme "${u.protocol}" is not allowed`);
|
||||||
|
}
|
||||||
|
const host = u.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
|
||||||
|
let addresses;
|
||||||
|
if (net.isIP(host)) {
|
||||||
|
addresses = [host];
|
||||||
|
} else {
|
||||||
|
let looked;
|
||||||
|
try {
|
||||||
|
looked = await dns.lookup(host, { all: true });
|
||||||
|
} catch {
|
||||||
|
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
|
||||||
|
}
|
||||||
|
addresses = looked.map((a) => a.address);
|
||||||
|
}
|
||||||
|
if (!addresses.length) {
|
||||||
|
throw new BlockedUrlError(`media_url host "${host}" did not resolve`);
|
||||||
|
}
|
||||||
|
for (const addr of addresses) {
|
||||||
|
if (isBlockedAddress(addr)) {
|
||||||
|
throw new BlockedUrlError(
|
||||||
|
`media_url host "${host}" resolves to a blocked address`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch() wrapper that re-validates the URL on every redirect hop. Node
|
||||||
|
// fetch's redirect:"follow" would jump to an internal host AFTER the
|
||||||
|
// initial check passed, so we follow manually with redirect:"manual"
|
||||||
|
// and re-run assertPublicHttpUrl on each Location.
|
||||||
|
export async function safeFetch(urlStr, { signal, headers, maxRedirects = 5 } = {}) {
|
||||||
|
let current = urlStr;
|
||||||
|
for (let hop = 0; hop <= maxRedirects; hop++) {
|
||||||
|
await assertPublicHttpUrl(current);
|
||||||
|
const res = await fetch(current, { redirect: "manual", signal, headers });
|
||||||
|
const location = res.headers.get("location");
|
||||||
|
if (res.status >= 300 && res.status < 400 && location) {
|
||||||
|
current = new URL(location, current).toString(); // resolve relative redirects
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
throw new BlockedUrlError("media_url exceeded the redirect limit");
|
||||||
|
}
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
// SSRF guard for user-supplied media URLs (safe-url.js). Uses literal
|
||||||
|
// IPs so the address checks need no DNS / network.
|
||||||
|
|
||||||
|
import { test, describe } from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import {
|
||||||
|
isBlockedAddress,
|
||||||
|
assertPublicHttpUrl,
|
||||||
|
BlockedUrlError,
|
||||||
|
} from "../safe-url.js";
|
||||||
|
|
||||||
|
describe("isBlockedAddress", () => {
|
||||||
|
test("blocks private / loopback / link-local / reserved IPv4", () => {
|
||||||
|
for (const ip of [
|
||||||
|
"127.0.0.1",
|
||||||
|
"10.0.0.5",
|
||||||
|
"172.16.0.1",
|
||||||
|
"172.31.255.255",
|
||||||
|
"192.168.1.1",
|
||||||
|
"169.254.169.254", // cloud metadata
|
||||||
|
"100.64.0.1",
|
||||||
|
"0.0.0.0",
|
||||||
|
"198.18.0.1",
|
||||||
|
"224.0.0.1",
|
||||||
|
"255.255.255.255",
|
||||||
|
]) {
|
||||||
|
assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("allows public IPv4 (incl. the /12 boundaries around 172.16/12)", () => {
|
||||||
|
for (const ip of ["8.8.8.8", "1.1.1.1", "172.15.0.1", "172.32.0.1", "93.184.216.34"]) {
|
||||||
|
assert.equal(isBlockedAddress(ip), false, `${ip} should be allowed`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("blocks loopback / ULA / link-local / IPv4-mapped IPv6", () => {
|
||||||
|
for (const ip of [
|
||||||
|
"::1",
|
||||||
|
"::",
|
||||||
|
"fe80::1",
|
||||||
|
"febf::1",
|
||||||
|
"fc00::1",
|
||||||
|
"fd12:3456::1",
|
||||||
|
"ff02::1",
|
||||||
|
"::ffff:127.0.0.1",
|
||||||
|
"::ffff:192.168.0.1",
|
||||||
|
]) {
|
||||||
|
assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("allows public IPv6", () => {
|
||||||
|
assert.equal(isBlockedAddress("2606:4700:4700::1111"), false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("assertPublicHttpUrl", () => {
|
||||||
|
test("rejects non-http(s) schemes", async () => {
|
||||||
|
for (const u of [
|
||||||
|
"file:///etc/passwd",
|
||||||
|
"gopher://x/_",
|
||||||
|
"ftp://h/f",
|
||||||
|
"data:text/plain,hi",
|
||||||
|
]) {
|
||||||
|
await assert.rejects(() => assertPublicHttpUrl(u), BlockedUrlError);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("rejects literal private / metadata IP hosts (no DNS needed)", async () => {
|
||||||
|
for (const u of [
|
||||||
|
"http://127.0.0.1/x",
|
||||||
|
"http://169.254.169.254/latest/meta-data/",
|
||||||
|
"http://[::1]/x",
|
||||||
|
"http://192.168.0.10:9000/a",
|
||||||
|
"https://10.1.2.3/audio.mp3",
|
||||||
|
]) {
|
||||||
|
await assert.rejects(() => assertPublicHttpUrl(u), BlockedUrlError);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("rejects malformed URLs", async () => {
|
||||||
|
await assert.rejects(() => assertPublicHttpUrl("not a url"), BlockedUrlError);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("allows a public literal IP host", async () => {
|
||||||
|
const u = await assertPublicHttpUrl("https://8.8.8.8/audio.mp3");
|
||||||
|
assert.equal(u.hostname, "8.8.8.8");
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user