// Optional service-discovery client. When the operator points // relay_spark_control_url at a Spark Control (or compatible) endpoint // that serves a JSON document describing the local LLM/STT/TTS // services, the relay uses that to fill in any URL + model fields // the operator left blank in their per-backend config. // // Expected JSON shape (Spark Control's /api/endpoints): // { // "vllm": { ready: bool, base_url: "...", model: "...", openai_compat: bool }, // "parakeet": { ready: bool, base_url: "...", kind: "stt", model: "..." }, // "kokoro": { ready: bool, base_url: "...", kind: "tts" } // } // // Cached for SHORT_CACHE_TTL_MS to keep relay responses snappy while // still picking up model swaps on the operator's GPU box within a // minute. Unreachable / failing endpoint falls back to operator-typed // values silently — no boot-time hard dep. import { lanFetch } from "./lan-fetch.js"; const SHORT_CACHE_TTL_MS = 60 * 1000; // 60s const FETCH_TIMEOUT_MS = 3000; let cached = { fetched_at: 0, url: null, discovery: null }; // Last error from a discovery fetch attempt — surfaced in the // dashboard so the operator can see when discovery is silently // failing (the alternative is "operator-typed values silently win" // which produces confusing "fetch failed" errors downstream when the // override URL is also broken). Cleared on each successful fetch. let lastError = { at: 0, message: null }; // Fire-and-forget background refresh while serving from cache — // callers never block on the network. The first call after the cache // expires returns the stale snapshot but kicks off a refresh in the // background, so the next call gets the fresh data without ever // having paid a round-trip in the critical path. let inflight = null; // Operator-visible discovery snapshot used by the admin dashboard. // Returns: { configured, url, lastFetched, lastError, services } // - configured: whether the operator set a discovery URL at all // - url: the configured URL (or null) // - lastFetched: epoch ms of last successful fetch (or 0) // - lastError: { at, message } of last failed fetch (cleared on success) // - services: parsed map of { parakeet: {...}, vllm: {...}, ... } // when the last fetch succeeded; null otherwise. // // `configured` reflects the operator's config; the other fields // reflect what actually happened on the network. Read-only — never // triggers a fetch, just reports the cached state. The dashboard's // Settings tab renders this so the operator can spot a silently- // failing discovery (typo, unreachable host, TLS cert that the // container doesn't trust, etc.) without grepping container logs. export function getSparkDiscoveryStatus(configuredUrl) { const url = (configuredUrl || "").trim(); return { configured: !!url, url: url || null, lastFetched: cached.url === url ? cached.fetched_at : 0, lastError: cached.url === url && lastError.at > cached.fetched_at ? { at: lastError.at, message: lastError.message } : null, services: cached.url === url && cached.discovery ? Object.fromEntries( Object.entries(cached.discovery).map(([k, v]) => [ k, { ready: !!(v && v.ready), base_url: v && v.base_url ? String(v.base_url) : null, model: v && v.model ? String(v.model) : null, kind: v && v.kind ? String(v.kind) : null, }, ]) ) : null, }; } export async function getSparkControlDiscovery(url) { if (!url) return null; // If the URL changed (operator updated config), invalidate. if (cached.url !== url) { cached = { fetched_at: 0, url, discovery: null }; } const fresh = Date.now() - cached.fetched_at < SHORT_CACHE_TTL_MS; if (fresh && cached.discovery) { return cached.discovery; } // Stale (or never fetched). Trigger a background refresh; return // whatever we currently have (might be null on cold boot). if (!inflight) { inflight = fetchDiscovery(url) .then((discovery) => { cached = { fetched_at: Date.now(), url, discovery }; lastError = { at: 0, message: null }; }) .catch((err) => { const msg = err?.message || String(err); lastError = { at: Date.now(), message: msg }; console.warn(`[spark-control] discovery fetch failed for ${url}: ${msg}`); }) .finally(() => { inflight = null; }); } // On cold start, wait briefly for the first fetch so we don't serve // a request with null discovery if Spark Control is alive. if (!cached.discovery) { try { await Promise.race([ inflight, new Promise((r) => setTimeout(r, FETCH_TIMEOUT_MS + 500)), ]); } catch {} } return cached.discovery; } async function fetchDiscovery(url) { // lanFetch uses an undici Agent with rejectUnauthorized:false so // that Spark Control's StartOS Local Intermediate CA cert (the // standard Start9 LAN setup) doesn't fail TLS validation in the // relay container. Plain-http LAN URLs work through the same path // without TLS overhead. Public-internet calls (Gemini, Keysat, // BTCPay) keep using the global fetch with full cert validation // — see lan-fetch.js for the scope rationale. const res = await lanFetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), redirect: "follow", }); if (!res.ok) { throw new Error(`HTTP ${res.status}`); } return await res.json(); } // Given a Spark Control URL and a "kind" hint, return the live // service state. Three outcomes: // // { status: "ready", base_url, model } // Service is up + healthy. Use these values. // // { status: "not_ready", reason } // Service is listed in discovery but ready=false. A model swap // might be in progress; or the operator hasn't loaded a model // yet; or the wrapper crashed. Route handlers should fail fast // with this message so the user knows what to fix on Spark // Control instead of seeing a generic 500. // // { status: "unknown" } // Discovery URL not configured, not reachable, or the requested // service isn't in the response. Caller should fall back to // operator-typed config. // // kind: "transcribe" → uses .parakeet (any STT-shaped entry) // kind: "analyze" → uses .vllm (any OpenAI-compat LLM entry) // kind: "tts" → uses .kokoro (Kokoro TTS entry; no `model` // field — voice is chosen per-request by the // caller, so a ready kokoro with a base_url is // enough) export async function getSparkServiceState(url, kind) { if (!url) return { status: "unknown" }; let discovery; try { discovery = await getSparkControlDiscovery(url); } catch { return { status: "unknown" }; } if (!discovery) return { status: "unknown" }; const target = kind === "transcribe" ? discovery.parakeet : kind === "analyze" ? discovery.vllm : kind === "tts" ? discovery.kokoro : null; if (!target) return { status: "unknown" }; if (!target.base_url) return { status: "unknown" }; if (target.ready === false) { return { status: "not_ready", reason: kind === "transcribe" ? "Spark Control reports STT (parakeet) is offline. Check spark-control — a model swap may be in progress, or the wrapper needs attention." : kind === "tts" ? "Spark Control reports TTS (kokoro) is offline. Check spark-control — the Kokoro container may be down or restarting." : "Spark Control reports the LLM (vllm) is offline. Check spark-control — load a model via the dashboard or wait for an in-progress swap to finish.", }; } return { status: "ready", base_url: target.base_url, model: target.model || null, }; } // Kept for backward compatibility — same signature as before, returns // null when the service isn't ready. New code should call // getSparkServiceState directly so it can distinguish "not configured" // from "configured but not ready". export async function getDiscoveredEndpoint(url, kind) { const state = await getSparkServiceState(url, kind); if (state.status !== "ready") return null; return { base_url: state.base_url, model: state.model }; }