Block SSRF on media_url downloads (transcribe-url/summarize-url)

downloadDirect fetched any caller-supplied media_url with redirect-follow
and no host/scheme validation; the route is reachable via a self-chosen
X-Recap-Install-Id, so a caller could probe the operator's LAN or cloud
metadata (169.254.169.254). Add safe-url.js: assertPublicHttpUrl rejects
non-http(s) schemes and hosts resolving to private/loopback/link-local/
reserved ranges, and safeFetch follows redirects manually, re-validating
each hop. Route downloadDirect through it (covers transcribe-url,
summarize-url, and admin-test-run).
This commit is contained in:
Keysat
2026-06-13 16:23:26 -05:00
parent 0b90120b72
commit 8ad7c54da4
3 changed files with 255 additions and 2 deletions
+7 -2
View File
@@ -50,6 +50,7 @@ import { calcGeminiCost } from "../pricing.js";
import { getAudioDurationSeconds } from "../audio-meta.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { reportHealthEvent } from "../spark-control-events.js";
import { safeFetch } from "../safe-url.js";
import {
createJob,
markRunning,
@@ -97,8 +98,12 @@ function guessMimeFromExt(filePath) {
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
// mimeType }.
export async function downloadDirect(url, tmpDir) {
const res = await fetch(url, {
redirect: "follow",
// safeFetch is the SSRF choke point: it rejects non-http(s) schemes
// and hosts resolving to private/reserved ranges, and re-validates
// every redirect hop. downloadDirect is the single download path for
// transcribe-url / summarize-url / admin-test-run, so guarding it
// here covers all three.
const res = await safeFetch(url, {
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
});
if (!res.ok) {