recap-relay/server/spark-control.js

// Optional service-discovery client. When the operator points
// relay_spark_control_url at a Spark Control (or compatible) endpoint
// that serves a JSON document describing the local LLM/STT/TTS
// services, the relay uses that to fill in any URL + model fields
// the operator left blank in their per-backend config.
//
// Expected JSON shape (Spark Control's /api/endpoints):
//   {
//     "vllm":     { ready: bool, base_url: "...", model: "...", openai_compat: bool },
//     "parakeet": { ready: bool, base_url: "...", kind: "stt", model: "..." },
//     "kokoro":   { ready: bool, base_url: "...", kind: "tts" }
//   }
//
// Cached for SHORT_CACHE_TTL_MS to keep relay responses snappy while
// still picking up model swaps on the operator's GPU box within a
// minute. Unreachable / failing endpoint falls back to operator-typed
// values silently — no boot-time hard dep.

import { lanFetch } from "./lan-fetch.js";

const SHORT_CACHE_TTL_MS = 60 * 1000; // 60s
const FETCH_TIMEOUT_MS = 3000;

let cached = { fetched_at: 0, url: null, discovery: null };

// Last error from a discovery fetch attempt — surfaced in the
// dashboard so the operator can see when discovery is silently
// failing (the alternative is "operator-typed values silently win"
// which produces confusing "fetch failed" errors downstream when the
// override URL is also broken). Cleared on each successful fetch.
let lastError = { at: 0, message: null };

// Fire-and-forget background refresh while serving from cache —
// callers never block on the network. The first call after the cache
// expires returns the stale snapshot but kicks off a refresh in the
// background, so the next call gets the fresh data without ever
// having paid a round-trip in the critical path.
let inflight = null;

// Operator-visible discovery snapshot used by the admin dashboard.
// Returns: { configured, url, lastFetched, lastError, services }
//   - configured:  whether the operator set a discovery URL at all
//   - url:         the configured URL (or null)
//   - lastFetched: epoch ms of last successful fetch (or 0)
//   - lastError:   { at, message } of last failed fetch (cleared on success)
//   - services:    parsed map of { parakeet: {...}, vllm: {...}, ... }
//                  when the last fetch succeeded; null otherwise.
//
// `configured` reflects the operator's config; the other fields
// reflect what actually happened on the network. Read-only — never
// triggers a fetch, just reports the cached state. The dashboard's
// Settings tab renders this so the operator can spot a silently-
// failing discovery (typo, unreachable host, TLS cert that the
// container doesn't trust, etc.) without grepping container logs.
export function getSparkDiscoveryStatus(configuredUrl) {
  const url = (configuredUrl || "").trim();
  return {
    configured: !!url,
    url: url || null,
    lastFetched: cached.url === url ? cached.fetched_at : 0,
    lastError: cached.url === url && lastError.at > cached.fetched_at
      ? { at: lastError.at, message: lastError.message }
      : null,
    services: cached.url === url && cached.discovery
      ? Object.fromEntries(
          Object.entries(cached.discovery).map(([k, v]) => [
            k,
            {
              ready: !!(v && v.ready),
              base_url: v && v.base_url ? String(v.base_url) : null,
              model: v && v.model ? String(v.model) : null,
              kind: v && v.kind ? String(v.kind) : null,
            },
          ])
        )
      : null,
  };
}

export async function getSparkControlDiscovery(url) {
  if (!url) return null;
  // If the URL changed (operator updated config), invalidate.
  if (cached.url !== url) {
    cached = { fetched_at: 0, url, discovery: null };
  }
  const fresh = Date.now() - cached.fetched_at < SHORT_CACHE_TTL_MS;
  if (fresh && cached.discovery) {
    return cached.discovery;
  }
  // Stale (or never fetched). Trigger a background refresh; return
  // whatever we currently have (might be null on cold boot).
  if (!inflight) {
    inflight = fetchDiscovery(url)
      .then((discovery) => {
        cached = { fetched_at: Date.now(), url, discovery };
        lastError = { at: 0, message: null };
      })
      .catch((err) => {
        const msg = err?.message || String(err);
        lastError = { at: Date.now(), message: msg };
        console.warn(`[spark-control] discovery fetch failed for ${url}: ${msg}`);
      })
      .finally(() => {
        inflight = null;
      });
  }
  // On cold start, wait briefly for the first fetch so we don't serve
  // a request with null discovery if Spark Control is alive.
  if (!cached.discovery) {
    try {
      await Promise.race([
        inflight,
        new Promise((r) => setTimeout(r, FETCH_TIMEOUT_MS + 500)),
      ]);
    } catch {}
  }
  return cached.discovery;
}

async function fetchDiscovery(url) {
  // lanFetch uses an undici Agent with rejectUnauthorized:false so
  // that Spark Control's StartOS Local Intermediate CA cert (the
  // standard Start9 LAN setup) doesn't fail TLS validation in the
  // relay container. Plain-http LAN URLs work through the same path
  // without TLS overhead. Public-internet calls (Gemini, Keysat,
  // BTCPay) keep using the global fetch with full cert validation
  // — see lan-fetch.js for the scope rationale.
  const res = await lanFetch(url, {
    signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
    redirect: "follow",
  });
  if (!res.ok) {
    throw new Error(`HTTP ${res.status}`);
  }
  return await res.json();
}

// Given a Spark Control URL and a "kind" hint, return the live
// service state. Three outcomes:
//
//   { status: "ready",     base_url, model }
//     Service is up + healthy. Use these values.
//
//   { status: "not_ready", reason }
//     Service is listed in discovery but ready=false. A model swap
//     might be in progress; or the operator hasn't loaded a model
//     yet; or the wrapper crashed. Route handlers should fail fast
//     with this message so the user knows what to fix on Spark
//     Control instead of seeing a generic 500.
//
//   { status: "unknown" }
//     Discovery URL not configured, not reachable, or the requested
//     service isn't in the response. Caller should fall back to
//     operator-typed config.
//
//   kind: "transcribe" → uses .parakeet (any STT-shaped entry)
//   kind: "analyze"    → uses .vllm     (any OpenAI-compat LLM entry)
//   kind: "tts"        → uses .kokoro   (Kokoro TTS entry; no `model`
//                        field — voice is chosen per-request by the
//                        caller, so a ready kokoro with a base_url is
//                        enough)
export async function getSparkServiceState(url, kind) {
  if (!url) return { status: "unknown" };
  let discovery;
  try {
    discovery = await getSparkControlDiscovery(url);
  } catch {
    return { status: "unknown" };
  }
  if (!discovery) return { status: "unknown" };
  const target =
    kind === "transcribe"
      ? discovery.parakeet
      : kind === "analyze"
      ? discovery.vllm
      : kind === "tts"
      ? discovery.kokoro
      : null;
  if (!target) return { status: "unknown" };
  if (!target.base_url) return { status: "unknown" };
  if (target.ready === false) {
    return {
      status: "not_ready",
      reason:
        kind === "transcribe"
          ? "Spark Control reports STT (parakeet) is offline. Check spark-control — a model swap may be in progress, or the wrapper needs attention."
          : kind === "tts"
          ? "Spark Control reports TTS (kokoro) is offline. Check spark-control — the Kokoro container may be down or restarting."
          : "Spark Control reports the LLM (vllm) is offline. Check spark-control — load a model via the dashboard or wait for an in-progress swap to finish.",
    };
  }
  return {
    status: "ready",
    base_url: target.base_url,
    model: target.model || null,
  };
}

// Kept for backward compatibility — same signature as before, returns
// null when the service isn't ready. New code should call
// getSparkServiceState directly so it can distinguish "not configured"
// from "configured but not ready".
export async function getDiscoveredEndpoint(url, kind) {
  const state = await getSparkServiceState(url, kind);
  if (state.status !== "ready") return null;
  return { base_url: state.base_url, model: state.model };
}