Files
recap-relay/server/sanitize-error.js

68 lines
3.0 KiB
JavaScript

// Strips operator-internal implementation detail from error messages
// before they're surfaced to public clients (Recap app, Recaps cloud,
// any other SDK consumer). The relay's hardware backend wraps
// Spark Control delegate names + internal IPs + wire-level details
// into its thrown errors, which is great for the operator's relay
// logs and audit table but those tokens shouldn't leak to whoever's
// summarizing a YouTube link.
//
// Applies a two-stage scrub:
// 1. Token replacement — known operator-private terms swap for
// generic equivalents ("Parakeet" → "the transcribe service",
// "Spark Control" → "the operator hardware", etc.). Stays
// grammatical and still readable.
// 2. Network-detail redaction — local IPs and internal URLs
// collapse to "(internal)" so a client never sees an operator's
// LAN topology. Public hostnames + Gemini's googleapis.com URLs
// stay intact (they're not operator-private).
//
// The ORIGINAL message stays available in the relay's recordCall
// audit row + console logs — only the client-facing surface gets
// sanitized.
const TOKEN_MAP = [
// Spark Control + its delegates
[/Spark Control/gi, "the operator hardware"],
[/spark-control(?:'s)?/gi, "the operator hardware"],
[/sparkcontrol/gi, "the operator hardware"],
// Parakeet (NVIDIA STT model wrapper)
[/Parakeet/gi, "the transcribe service"],
[/parakeet/gi, "the transcribe service"],
// vLLM / Gemma / other LLM runners on operator hardware
[/\bvLLM\b/gi, "the analyze service"],
[/\bvllm\b/gi, "the analyze service"],
[/\bGemma\b/g, "the analyze service"],
// Diarization stack
[/Sortformer/gi, "the diarization service"],
[/TitaNet/gi, "the diarization service"],
];
// Match an IPv4 address — local-network (192.168/16, 10/8, 172.16-31)
// and 127/8 (loopback). We don't redact public IPv4s in case a
// public-facing error genuinely references one (rare for the relay
// but possible).
const PRIVATE_IP_RE =
/\b(?:192\.168|10|172\.(?:1[6-9]|2\d|3[01])|127)\.\d{1,3}\.\d{1,3}(?::\d+)?/g;
// Match an http(s) URL whose host is a private IP OR is *.local.
// Both are operator-LAN-only. Public hostnames stay readable so a
// Gemini-side error referencing generativelanguage.googleapis.com
// keeps its diagnostic value.
const PRIVATE_URL_RE =
/https?:\/\/(?:(?:192\.168|10|172\.(?:1[6-9]|2\d|3[01])|127)\.\d{1,3}\.\d{1,3}(?::\d+)?|[A-Za-z0-9-]+\.local(?::\d+)?)[^\s)'"]*/g;
export function sanitizeErrorForClient(input) {
if (input == null) return "";
let s = typeof input === "string" ? input : (input.message || String(input));
// URL-shaped private hosts go first so the IP regex doesn't chew up
// half of the URL before the full URL pattern fires.
s = s.replace(PRIVATE_URL_RE, "(internal)");
s = s.replace(PRIVATE_IP_RE, "(internal)");
for (const [pattern, replacement] of TOKEN_MAP) {
s = s.replace(pattern, replacement);
}
// Collapse any double-spaces created by the substitutions.
s = s.replace(/\s{2,}/g, " ").trim();
return s;
}