59 lines
2.2 KiB
JavaScript
59 lines
2.2 KiB
JavaScript
// Passive error reporting to Spark Control's /api/health-event
|
|
// endpoint. When a hardware-backed transcribe or analyze call fails
|
|
// (Parakeet 503, vLLM model not found, network timeout to Gemma,
|
|
// etc.), the relay fires a small POST so the operator's Spark
|
|
// Control dashboard can surface the failure in real time — without
|
|
// waiting for its own 5s health-check poll to catch the outage.
|
|
//
|
|
// Fire-and-forget: the request is kicked off but NOT awaited by the
|
|
// caller's error path, so reporting failure to Spark Control never
|
|
// adds latency to the user's transcribe-failed response.
|
|
//
|
|
// Reuses the operator's `relay_spark_control_url` config field
|
|
// (already used for /api/endpoints discovery). The relay just swaps
|
|
// the path for /api/health-event, keeping the operator config
|
|
// surface to one URL.
|
|
|
|
import { getConfigSnapshot } from "./config.js";
|
|
|
|
const REPORT_TIMEOUT_MS = 3_000;
|
|
|
|
// Fire-and-forget. service ∈ { "parakeet", "vllm", "gemma", ... }
|
|
// matching whatever Spark Control's poller knows about. Errors are
|
|
// swallowed silently — observability hiccups shouldn't bleed into
|
|
// the relay's error path.
|
|
export function reportHealthEvent({ service, ok = false, error, ms }) {
|
|
// Wrap the actual work in setImmediate so the caller's microtask
|
|
// queue isn't blocked at all.
|
|
setImmediate(async () => {
|
|
try {
|
|
const cfg = await getConfigSnapshot();
|
|
const base = (cfg.relay_spark_control_url || "").trim();
|
|
if (!base) return; // not configured — silent no-op
|
|
// Strip any path the operator may have configured (they
|
|
// typically set the /api/endpoints URL) and append health-event.
|
|
let origin;
|
|
try {
|
|
origin = new URL(base).origin;
|
|
} catch {
|
|
return;
|
|
}
|
|
const url = `${origin}/api/health-event`;
|
|
await fetch(url, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
service,
|
|
ok,
|
|
source: "recap-relay",
|
|
error: typeof error === "string" ? error.slice(0, 280) : null,
|
|
ms: typeof ms === "number" ? ms : null,
|
|
}),
|
|
signal: AbortSignal.timeout(REPORT_TIMEOUT_MS),
|
|
});
|
|
} catch {
|
|
// Best-effort — swallow.
|
|
}
|
|
});
|
|
}
|