208 lines
8.2 KiB
JavaScript
208 lines
8.2 KiB
JavaScript
// Optional service-discovery client. When the operator points
|
|
// relay_spark_control_url at a Spark Control (or compatible) endpoint
|
|
// that serves a JSON document describing the local LLM/STT/TTS
|
|
// services, the relay uses that to fill in any URL + model fields
|
|
// the operator left blank in their per-backend config.
|
|
//
|
|
// Expected JSON shape (Spark Control's /api/endpoints):
|
|
// {
|
|
// "vllm": { ready: bool, base_url: "...", model: "...", openai_compat: bool },
|
|
// "parakeet": { ready: bool, base_url: "...", kind: "stt", model: "..." },
|
|
// "kokoro": { ready: bool, base_url: "...", kind: "tts" }
|
|
// }
|
|
//
|
|
// Cached for SHORT_CACHE_TTL_MS to keep relay responses snappy while
|
|
// still picking up model swaps on the operator's GPU box within a
|
|
// minute. Unreachable / failing endpoint falls back to operator-typed
|
|
// values silently — no boot-time hard dep.
|
|
|
|
import { lanFetch } from "./lan-fetch.js";
|
|
|
|
const SHORT_CACHE_TTL_MS = 60 * 1000; // 60s
|
|
const FETCH_TIMEOUT_MS = 3000;
|
|
|
|
let cached = { fetched_at: 0, url: null, discovery: null };
|
|
|
|
// Last error from a discovery fetch attempt — surfaced in the
|
|
// dashboard so the operator can see when discovery is silently
|
|
// failing (the alternative is "operator-typed values silently win"
|
|
// which produces confusing "fetch failed" errors downstream when the
|
|
// override URL is also broken). Cleared on each successful fetch.
|
|
let lastError = { at: 0, message: null };
|
|
|
|
// Fire-and-forget background refresh while serving from cache —
|
|
// callers never block on the network. The first call after the cache
|
|
// expires returns the stale snapshot but kicks off a refresh in the
|
|
// background, so the next call gets the fresh data without ever
|
|
// having paid a round-trip in the critical path.
|
|
let inflight = null;
|
|
|
|
// Operator-visible discovery snapshot used by the admin dashboard.
|
|
// Returns: { configured, url, lastFetched, lastError, services }
|
|
// - configured: whether the operator set a discovery URL at all
|
|
// - url: the configured URL (or null)
|
|
// - lastFetched: epoch ms of last successful fetch (or 0)
|
|
// - lastError: { at, message } of last failed fetch (cleared on success)
|
|
// - services: parsed map of { parakeet: {...}, vllm: {...}, ... }
|
|
// when the last fetch succeeded; null otherwise.
|
|
//
|
|
// `configured` reflects the operator's config; the other fields
|
|
// reflect what actually happened on the network. Read-only — never
|
|
// triggers a fetch, just reports the cached state. The dashboard's
|
|
// Settings tab renders this so the operator can spot a silently-
|
|
// failing discovery (typo, unreachable host, TLS cert that the
|
|
// container doesn't trust, etc.) without grepping container logs.
|
|
export function getSparkDiscoveryStatus(configuredUrl) {
|
|
const url = (configuredUrl || "").trim();
|
|
return {
|
|
configured: !!url,
|
|
url: url || null,
|
|
lastFetched: cached.url === url ? cached.fetched_at : 0,
|
|
lastError: cached.url === url && lastError.at > cached.fetched_at
|
|
? { at: lastError.at, message: lastError.message }
|
|
: null,
|
|
services: cached.url === url && cached.discovery
|
|
? Object.fromEntries(
|
|
Object.entries(cached.discovery).map(([k, v]) => [
|
|
k,
|
|
{
|
|
ready: !!(v && v.ready),
|
|
base_url: v && v.base_url ? String(v.base_url) : null,
|
|
model: v && v.model ? String(v.model) : null,
|
|
kind: v && v.kind ? String(v.kind) : null,
|
|
},
|
|
])
|
|
)
|
|
: null,
|
|
};
|
|
}
|
|
|
|
export async function getSparkControlDiscovery(url) {
|
|
if (!url) return null;
|
|
// If the URL changed (operator updated config), invalidate.
|
|
if (cached.url !== url) {
|
|
cached = { fetched_at: 0, url, discovery: null };
|
|
}
|
|
const fresh = Date.now() - cached.fetched_at < SHORT_CACHE_TTL_MS;
|
|
if (fresh && cached.discovery) {
|
|
return cached.discovery;
|
|
}
|
|
// Stale (or never fetched). Trigger a background refresh; return
|
|
// whatever we currently have (might be null on cold boot).
|
|
if (!inflight) {
|
|
inflight = fetchDiscovery(url)
|
|
.then((discovery) => {
|
|
cached = { fetched_at: Date.now(), url, discovery };
|
|
lastError = { at: 0, message: null };
|
|
})
|
|
.catch((err) => {
|
|
const msg = err?.message || String(err);
|
|
lastError = { at: Date.now(), message: msg };
|
|
console.warn(`[spark-control] discovery fetch failed for ${url}: ${msg}`);
|
|
})
|
|
.finally(() => {
|
|
inflight = null;
|
|
});
|
|
}
|
|
// On cold start, wait briefly for the first fetch so we don't serve
|
|
// a request with null discovery if Spark Control is alive.
|
|
if (!cached.discovery) {
|
|
try {
|
|
await Promise.race([
|
|
inflight,
|
|
new Promise((r) => setTimeout(r, FETCH_TIMEOUT_MS + 500)),
|
|
]);
|
|
} catch {}
|
|
}
|
|
return cached.discovery;
|
|
}
|
|
|
|
async function fetchDiscovery(url) {
|
|
// lanFetch uses an undici Agent with rejectUnauthorized:false so
|
|
// that Spark Control's StartOS Local Intermediate CA cert (the
|
|
// standard Start9 LAN setup) doesn't fail TLS validation in the
|
|
// relay container. Plain-http LAN URLs work through the same path
|
|
// without TLS overhead. Public-internet calls (Gemini, Keysat,
|
|
// BTCPay) keep using the global fetch with full cert validation
|
|
// — see lan-fetch.js for the scope rationale.
|
|
const res = await lanFetch(url, {
|
|
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
|
|
redirect: "follow",
|
|
});
|
|
if (!res.ok) {
|
|
throw new Error(`HTTP ${res.status}`);
|
|
}
|
|
return await res.json();
|
|
}
|
|
|
|
// Given a Spark Control URL and a "kind" hint, return the live
|
|
// service state. Three outcomes:
|
|
//
|
|
// { status: "ready", base_url, model }
|
|
// Service is up + healthy. Use these values.
|
|
//
|
|
// { status: "not_ready", reason }
|
|
// Service is listed in discovery but ready=false. A model swap
|
|
// might be in progress; or the operator hasn't loaded a model
|
|
// yet; or the wrapper crashed. Route handlers should fail fast
|
|
// with this message so the user knows what to fix on Spark
|
|
// Control instead of seeing a generic 500.
|
|
//
|
|
// { status: "unknown" }
|
|
// Discovery URL not configured, not reachable, or the requested
|
|
// service isn't in the response. Caller should fall back to
|
|
// operator-typed config.
|
|
//
|
|
// kind: "transcribe" → uses .parakeet (any STT-shaped entry)
|
|
// kind: "analyze" → uses .vllm (any OpenAI-compat LLM entry)
|
|
// kind: "tts" → uses .kokoro (Kokoro TTS entry; no `model`
|
|
// field — voice is chosen per-request by the
|
|
// caller, so a ready kokoro with a base_url is
|
|
// enough)
|
|
export async function getSparkServiceState(url, kind) {
|
|
if (!url) return { status: "unknown" };
|
|
let discovery;
|
|
try {
|
|
discovery = await getSparkControlDiscovery(url);
|
|
} catch {
|
|
return { status: "unknown" };
|
|
}
|
|
if (!discovery) return { status: "unknown" };
|
|
const target =
|
|
kind === "transcribe"
|
|
? discovery.parakeet
|
|
: kind === "analyze"
|
|
? discovery.vllm
|
|
: kind === "tts"
|
|
? discovery.kokoro
|
|
: null;
|
|
if (!target) return { status: "unknown" };
|
|
if (!target.base_url) return { status: "unknown" };
|
|
if (target.ready === false) {
|
|
return {
|
|
status: "not_ready",
|
|
reason:
|
|
kind === "transcribe"
|
|
? "Spark Control reports STT (parakeet) is offline. Check spark-control — a model swap may be in progress, or the wrapper needs attention."
|
|
: kind === "tts"
|
|
? "Spark Control reports TTS (kokoro) is offline. Check spark-control — the Kokoro container may be down or restarting."
|
|
: "Spark Control reports the LLM (vllm) is offline. Check spark-control — load a model via the dashboard or wait for an in-progress swap to finish.",
|
|
};
|
|
}
|
|
return {
|
|
status: "ready",
|
|
base_url: target.base_url,
|
|
model: target.model || null,
|
|
};
|
|
}
|
|
|
|
// Kept for backward compatibility — same signature as before, returns
|
|
// null when the service isn't ready. New code should call
|
|
// getSparkServiceState directly so it can distinguish "not configured"
|
|
// from "configured but not ready".
|
|
export async function getDiscoveredEndpoint(url, kind) {
|
|
const state = await getSparkServiceState(url, kind);
|
|
if (state.status !== "ready") return null;
|
|
return { base_url: state.base_url, model: state.model };
|
|
}
|