Fix five P0/P1 security & correctness findings from the full-eval

- Arbitrary file write (P0): validate import keys in /api/library/import via
  a now-exported safeFilename(); a ../../ key is skipped, not written out of
  the scope dir.
- SSRF (P0): guard downloadPodcastAudio — reject non-HTTP(S) schemes, block
  IP-literal and DNS-resolved private/link-local/loopback/reserved/multicast
  and embedded-IPv4 IPv6 targets (closes DNS rebinding), cap + resolve redirects.
- ESM require (P1): top-level import of randomBytes in license-purchase.js
  (the inner require threw on the anon purchase-settle path).
- Concurrency lock (P1): skip the process-global free-tier slot in multi-mode
  so it no longer serializes every cloud tenant onto one job.
- X-Forwarded-For bypass (P1): set Express trust proxy from
  RECAP_TRUSTED_PROXY_HOPS (default 1); getClientIp now reads req.ip instead
  of a client-spoofable XFF entry.

Tests added for safeFilename, the SSRF guard, and getClientIp (119 pass).
Registry blockers deferred (ROADMAP); leaked-key history purge queued.
This commit is contained in:
Keysat
2026-06-15 13:36:40 -05:00
parent 755b100f00
commit d0e98424c1
11 changed files with 317 additions and 49 deletions
+8 -11
View File
@@ -42,18 +42,15 @@ async function getTrialConfig() {
};
}
// Crude IPv4-or-IPv6 string extraction. Trusts the X-Forwarded-For
// header's first hop because Recap sits behind StartOS's tunnel — the
// header is set by the operator's infrastructure, not by clients
// directly. If you ever expose the server without a trusted proxy,
// revisit this.
// Resolve the real client IP. We rely on Express's `trust proxy` setting
// (configured in index.js to the number of trusted proxies in front of the
// app) so req.ip is the address the trusted proxy observed — NOT a value the
// client can spoof by sending their own X-Forwarded-For. This previously took
// the first XFF entry verbatim, which a client could forge to mint unlimited
// trials. Falls back to the raw socket address if req.ip isn't populated.
export function getClientIp(req) {
const xff = req.headers?.["x-forwarded-for"];
if (xff) {
const first = String(xff).split(",")[0].trim();
if (first) return first;
}
return (req.socket?.remoteAddress || "").replace(/^::ffff:/, "");
const ip = req.ip || req.socket?.remoteAddress || "";
return ip.replace(/^::ffff:/, "");
}
// Expand an IPv6 string to its full 8-group :-separated form with
+122 -22
View File
@@ -7,6 +7,8 @@ import { promisify } from "util";
import path from "path";
import http from "http";
import https from "https";
import dns from "dns";
import net from "net";
import { createWriteStream } from "fs";
const execFileAsync = promisify(execFile);
@@ -71,27 +73,125 @@ export async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700)
return chunks;
}
// ── Download a podcast episode by URL ───────────────────────────────────────
// Streams the HTTP response straight to disk. Follows redirects. Rejects
// on any non-200 final status. Used by /api/process when the input URL is
// a podcast episode rather than a YouTube video.
export function downloadPodcastAudio(audioUrl, destPath) {
return new Promise((resolve, reject) => {
const doFetch = (url) => {
const getter = url.startsWith("https") ? https : http;
getter.get(url, (res) => {
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
return doFetch(res.headers.location);
}
if (res.statusCode !== 200) {
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
}
const fileStream = createWriteStream(destPath);
res.pipe(fileStream);
fileStream.on("finish", () => fileStream.close(resolve));
fileStream.on("error", reject);
}).on("error", reject);
};
doFetch(audioUrl);
// ── SSRF guard for outbound podcast fetches ─────────────────────────────────
// downloadPodcastAudio fetches a fully user-controlled URL, so without a
// guard a caller could point it at internal services (cloud metadata at
// 169.254.169.254, LAN hosts, localhost) and read the response back through
// the transcript. isBlockedAddress rejects loopback / private / link-local /
// reserved / multicast targets for IPv4, IPv6, and IPv4-mapped IPv6.
export function isBlockedAddress(ip) {
if (!ip || typeof ip !== "string") return true;
// IPv4-mapped IPv6 in dotted form (::ffff:1.2.3.4) — judge by the embedded
// IPv4. (The hex-encoded forms are caught in the IPv6 branch below.)
const mapped = ip.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/i);
const addr = mapped ? mapped[1] : ip;
if (net.isIPv4(addr)) {
const [a, b] = addr.split(".").map(Number);
if (a === 0) return true; // 0.0.0.0/8 "this network"
if (a === 10) return true; // private
if (a === 127) return true; // loopback
if (a === 169 && b === 254) return true; // link-local (cloud metadata)
if (a === 172 && b >= 16 && b <= 31) return true; // private
if (a === 192 && b === 168) return true; // private
if (a === 100 && b >= 64 && b <= 127) return true; // CGNAT (100.64.0.0/10)
if (a >= 224) return true; // multicast + reserved (224.0.0.0+)
return false;
}
if (net.isIPv6(addr)) {
const a = addr.toLowerCase();
if (a === "::1" || a === "::") return true; // loopback / unspecified
if (a.startsWith("fc") || a.startsWith("fd")) return true; // fc00::/7 ULA
if (/^fe[89ab]/.test(a)) return true; // fe80::/10 link-local
if (a.startsWith("ff")) return true; // ff00::/8 multicast
// Translation / embedded-IPv4 prefixes can smuggle a private IPv4 past the
// rules above (the dotted ::ffff:1.2.3.4 form is normalized to IPv4 at the
// top; these catch the hex-encoded forms: IPv4-mapped/-compatible, SIIT,
// NAT64, 6to4). None is ever a real podcast host, so block the whole
// prefix rather than decode the embedded address.
if (/^::[0-9a-f]/.test(a)) return true; // ::/96 mapped / compat / SIIT (hex)
if (a.startsWith("64:ff9b:")) return true; // NAT64 well-known (RFC 6052)
if (a.startsWith("2002:")) return true; // 6to4
return false;
}
return true; // unrecognized → block
}
// dns.lookup wrapper that fails the connection if the host resolves to a
// blocked address. Passed as the `lookup` option to http(s).get, so the
// check runs at connect time on every attempt — including each redirect
// hop — which also closes the DNS-rebinding window (the address we validate
// is the address the socket connects to).
function guardedLookup(hostname, options, callback) {
if (typeof options === "function") {
callback = options;
options = {};
}
dns.lookup(hostname, options, (err, address, family) => {
if (err) return callback(err);
const addrs = Array.isArray(address) ? address : [{ address, family }];
for (const a of addrs) {
if (isBlockedAddress(a.address)) {
return callback(
new Error(`refusing to fetch podcast audio from disallowed address ${a.address}`),
);
}
}
callback(null, address, family);
});
}
// ── Download a podcast episode by URL ───────────────────────────────────────
// Streams the HTTP response straight to disk. Follows up to MAX_PODCAST_REDIRECTS
// redirects (resolving relative Location headers), rejects on any non-200 final
// status, and refuses non-HTTP(S) schemes and internal addresses (see the SSRF
// guard above). Used by /api/process when the input is a podcast episode.
const MAX_PODCAST_REDIRECTS = 5;
export function downloadPodcastAudio(audioUrl, destPath) {
return new Promise((resolve, reject) => {
const doFetch = (rawUrl, redirectsLeft) => {
let url;
try {
url = new URL(rawUrl);
} catch {
return reject(new Error("invalid podcast audio URL"));
}
if (url.protocol !== "http:" && url.protocol !== "https:") {
return reject(new Error(`refusing non-HTTP podcast URL (${url.protocol})`));
}
// IP-literal hosts (e.g. http://169.254.169.254) never hit the DNS
// `lookup` hook — the socket connects to the literal directly — so they
// must be checked here. guardedLookup below covers hostnames that
// *resolve* to a blocked address (and the DNS-rebinding case).
const host = url.hostname.replace(/^\[|\]$/g, ""); // strip IPv6 brackets
if (net.isIP(host) && isBlockedAddress(host)) {
return reject(
new Error(`refusing to fetch podcast audio from disallowed address ${host}`),
);
}
const getter = url.protocol === "https:" ? https : http;
getter
.get(url, { lookup: guardedLookup }, (res) => {
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
res.resume(); // drain so the socket is freed
if (redirectsLeft <= 0) {
return reject(new Error("too many redirects downloading podcast audio"));
}
const next = new URL(res.headers.location, url).toString();
return doFetch(next, redirectsLeft - 1);
}
if (res.statusCode !== 200) {
res.resume();
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
}
const fileStream = createWriteStream(destPath);
res.pipe(fileStream);
fileStream.on("finish", () => fileStream.close(resolve));
fileStream.on("error", reject);
})
.on("error", reject);
};
doFetch(audioUrl, MAX_PODCAST_REDIRECTS);
});
}
+1 -1
View File
@@ -653,7 +653,7 @@ export function setupHistoryRoutes(app, { addToSkipList } = {}) {
// Allow the same character set as scope components for session ids.
// Belt-and-suspenders against ../../ in :id; ids generated by
// saveToHistory always match.
function safeFilename(s) {
export function safeFilename(s) {
if (typeof s !== "string" || !/^[A-Za-z0-9_-]+$/.test(s)) {
throw new Error("invalid_session_id");
}
+17 -1
View File
@@ -116,6 +116,18 @@ import { buildTenantAuthMiddleware } from "./tenant-auth.js";
const execFileAsync = promisify(execFile);
const app = express();
// Trust the operator's reverse proxy (StartOS / StartTunnel, or a cloud proxy)
// so req.ip is the real client address rather than a client-spoofable
// X-Forwarded-For entry. The value is how many trusted proxies sit in front of
// this process — default 1 (the StartOS/StartTunnel hop). Erring low is safe
// (it can only over-count clients onto one IP, hitting the trial cap sooner);
// erring high would re-open the trial-cap bypass. Override via
// RECAP_TRUSTED_PROXY_HOPS (0 = no proxy in front; use the socket address only).
const hopsParsed = parseInt(process.env.RECAP_TRUSTED_PROXY_HOPS, 10);
const trustedProxyHops =
Number.isInteger(hopsParsed) && hopsParsed >= 0 ? hopsParsed : 1;
app.set("trust proxy", trustedProxyHops);
const PORT = process.env.PORT || 3001;
// ── Multi-tenant mode toggle ────────────────────────────────────────────
@@ -2618,7 +2630,11 @@ app.post("/api/process", async (req, res) => {
// through the relay. Non-relay providers ignore this opt.
const jobId = randomUUID();
const isFree = isFreeUser();
// The free-tier single-flight lock is a single-mode concept (one operator,
// BYO key, one job at a time). In multi mode, per-tenant credit metering is
// the resource control, so a process-global lock would wrongly serialize
// every tenant onto one job at a time — never apply it there.
const isFree = req.recapMode !== "multi" && isFreeUser();
if (isFree) {
if (!tryAcquireFreeSlot({ url, title: itemTitle, abortController })) {
const current = getCurrentFreeJob();
+12 -1
View File
@@ -25,6 +25,7 @@ import {
loadMeta,
saveMeta,
scopeForRequest,
safeFilename,
ROOT_SIDECARS,
} from "./history.js";
@@ -129,7 +130,17 @@ export function setupLibraryRoutes(app) {
// Sessions — skip if already present.
for (const [id, session] of Object.entries(data.sessions)) {
const filePath = path.join(scopeDir, `${id}.json`);
// The import file is fully attacker-controlled; validate the key
// before using it as a filename. A "../../" id would otherwise
// escape the scope dir and write anywhere the process can reach.
let safeId;
try {
safeId = safeFilename(id);
} catch {
skipped++;
continue;
}
const filePath = path.join(scopeDir, `${safeId}.json`);
try {
await fs.access(filePath);
skipped++;
+1 -3
View File
@@ -18,6 +18,7 @@
import { Client } from "@keysat/licensing-client";
import * as license from "./license.js";
import { randomBytes } from "crypto";
const KEYSAT_BASE_URL = license.KEYSAT_BASE_URL;
const PRODUCT_SLUG = license.PRODUCT_SLUG;
@@ -416,10 +417,7 @@ async function maybeApplyPendingSignup(invoiceId, licenseKey, req) {
}
// Local UUID helper — same shape we use in auth-routes for new users.
// Avoids a hard import dep just for this one call.
function randomUuid() {
// Same crypto.randomBytes(16).toString("hex") pattern used elsewhere.
// eslint-disable-next-line global-require
const { randomBytes } = require("crypto");
return randomBytes(16).toString("hex");
}
+37
View File
@@ -0,0 +1,37 @@
// Tests for server/anon-trial.js — focused on getClientIp, which underpins
// the per-IP trial cap. The DB-backed minting paths need a multi-mode SQLite
// handle and are exercised by integration tests; here we lock down that the
// client IP is taken from Express's trust-proxy-resolved req.ip, never from a
// raw client-supplied X-Forwarded-For header.
import { test, describe } from "node:test";
import { strict as assert } from "node:assert";
import { getClientIp } from "../anon-trial.js";
describe("getClientIp", () => {
test("uses req.ip (Express's trust-proxy-resolved client address)", () => {
assert.equal(getClientIp({ ip: "203.0.113.7" }), "203.0.113.7");
});
test("strips the IPv4-mapped IPv6 prefix", () => {
assert.equal(getClientIp({ ip: "::ffff:203.0.113.7" }), "203.0.113.7");
});
test("falls back to the socket address when req.ip is absent", () => {
assert.equal(
getClientIp({ socket: { remoteAddress: "::ffff:198.51.100.9" } }),
"198.51.100.9",
);
});
test("does NOT trust a raw client-supplied X-Forwarded-For header", () => {
// Express, not getClientIp, decides the client IP from trust proxy. A
// header Express hasn't blessed must be ignored — so with no req.ip we
// fall through to the socket address, never the spoofed header value.
const spoofed = {
headers: { "x-forwarded-for": "1.2.3.4" },
socket: { remoteAddress: "203.0.113.7" },
};
assert.equal(getClientIp(spoofed), "203.0.113.7");
});
});
+88
View File
@@ -0,0 +1,88 @@
// Tests for server/audio.js — focused on the SSRF guard around
// downloadPodcastAudio (a fully user-controlled outbound fetch). The
// ffprobe/ffmpeg helpers need real media + binaries and aren't covered here.
import { test, describe } from "node:test";
import { strict as assert } from "node:assert";
import { isBlockedAddress, downloadPodcastAudio } from "../audio.js";
const SINK = "/tmp/recap-audio-test-should-not-be-written";
describe("isBlockedAddress", () => {
test("blocks IPv4 loopback / private / link-local / reserved", () => {
for (const ip of [
"127.0.0.1", "127.1.2.3",
"10.0.0.1", "172.16.0.1", "172.31.255.255", "192.168.1.1",
"169.254.169.254", // cloud metadata
"100.64.0.1", // CGNAT
"0.0.0.0",
"224.0.0.1", "255.255.255.255",
]) {
assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`);
}
});
test("allows ordinary public IPv4 (incl. 172.x boundaries)", () => {
for (const ip of ["8.8.8.8", "1.1.1.1", "93.184.216.34", "172.15.0.1", "172.32.0.1"]) {
assert.equal(isBlockedAddress(ip), false, `${ip} should be allowed`);
}
});
test("blocks IPv6 loopback / ULA / link-local / multicast + IPv4-mapped privates", () => {
for (const ip of [
"::1", "::", "fc00::1", "fd12:3456::1", "fe80::1", "ff02::1",
"::ffff:127.0.0.1", "::ffff:169.254.169.254",
]) {
assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`);
}
});
test("blocks hex-encoded embedded-IPv4 IPv6 forms (mapped/SIIT/NAT64/6to4)", () => {
for (const ip of [
"::ffff:7f00:1", // IPv4-mapped 127.0.0.1, hex form
"::ffff:0:7f00:1", // SIIT 127.0.0.1
"64:ff9b::7f00:1", // NAT64 well-known prefix of 127.0.0.1
"2002:7f00:1::", // 6to4 of 127.0.0.1
]) {
assert.equal(isBlockedAddress(ip), true, `${ip} should be blocked`);
}
});
test("allows ordinary public IPv6", () => {
assert.equal(isBlockedAddress("2606:4700:4700::1111"), false);
assert.equal(isBlockedAddress("::ffff:8.8.8.8"), false);
});
test("blocks junk / empty / non-strings", () => {
assert.equal(isBlockedAddress(""), true);
assert.equal(isBlockedAddress(null), true);
assert.equal(isBlockedAddress("not-an-ip"), true);
});
});
describe("downloadPodcastAudio SSRF guard", () => {
test("rejects non-HTTP(S) schemes", async () => {
await assert.rejects(downloadPodcastAudio("file:///etc/passwd", SINK), /non-HTTP/);
await assert.rejects(downloadPodcastAudio("ftp://example.com/x", SINK), /non-HTTP/);
});
test("rejects internal / private destinations before connecting", async () => {
// 127.0.0.1:1 would refuse instantly if we connected; we must reject at
// the DNS-guard step instead (proving the guard fires before connect).
await assert.rejects(
downloadPodcastAudio("http://127.0.0.1:1/x", SINK),
/disallowed address/,
);
await assert.rejects(
downloadPodcastAudio("http://169.254.169.254/latest/meta-data/", SINK),
/disallowed address/,
);
});
test("rejects a malformed URL", async () => {
await assert.rejects(
downloadPodcastAudio("not a url at all", SINK),
/invalid podcast audio URL/,
);
});
});
+21
View File
@@ -138,3 +138,24 @@ describe("loadMeta + saveMeta", () => {
assert.deepEqual(loaded.uncategorized, []);
});
});
describe("safeFilename", () => {
// Exported so callers writing user content to disk (e.g. library import)
// can share the one guard instead of rolling their own.
test("returns valid ids unchanged", () => {
for (const id of ["abc123", "a_b-c", "VIDEOid123", "0"]) {
assert.equal(history.safeFilename(id), id);
}
});
test("rejects traversal, separators, and other unsafe chars", () => {
for (const bad of ["../../evil", "..", "a/b", "a\\b", "a.json", "foo bar", "", "a:b"]) {
assert.throws(() => history.safeFilename(bad), /invalid_session_id/);
}
});
test("rejects non-strings", () => {
assert.throws(() => history.safeFilename(123), /invalid_session_id/);
assert.throws(() => history.safeFilename(null), /invalid_session_id/);
});
});