recap-relay/server/hardware-config.js

// Resolves which (URL, model) the relay should use for each operator-
// hardware pipeline. Spark Control is the SINGLE host the relay
// talks to — it owns transcribe (/v1/audio/transcriptions),
// diarize-chunk (/api/audio/diarize-chunk), AND analyze
// (/v1/chat/completions, OpenAI-compatible) on the same port. The
// discovery JSON Spark Control serves at /api/endpoints lists the
// backing services SC delegates to internally (e.g. a Parakeet
// wrapper, a vLLM instance) along with their ready state + currently-
// loaded model name; the relay reads it ONLY to learn (a) whether
// each service is ready, and (b) which model name to send in the
// upstream request body. The per-service base URLs in the discovery
// JSON are informational — clients hit Spark Control directly, not
// those internal URLs.
//
// Priority order, per pipeline:
//
//   1. Spark Control says ready  → use the SC base URL + the model
//                                  name discovery reports for that
//                                  service.
//
//   2. SC says not_ready         → return null URL + a blocked_reason
//                                  string. Route handlers surface
//                                  this to the user instead of trying
//                                  a doomed call.
//
//   3. SC unreachable or no URL  → null URL; planBackend treats the
//                                  hardware path as not configured.
//
// History:
//   - pre-v0.2.84: operator-typed override URLs (relay_parakeet_*,
//     relay_gemma_*) won over discovery. Removed.
//   - pre-v0.2.85: discovery's per-service base_urls were used
//     directly, so transcribe went to a different host (the Parakeet
//     wrapper) than diarize (Spark Control). That meant the relay
//     was talking to two hosts for one logical operation, and the
//     transcribe wrapper didn't have the diarize endpoint. Now: SC
//     is the single host. The per-service base_urls in discovery
//     are informational — used only for the operator's awareness in
//     the dashboard's Service Discovery health line.

import { getSparkServiceState } from "./spark-control.js";

export async function resolveHardwareConfig(cfg) {
  const sparkUrl = (cfg.relay_spark_control_url || "").trim();
  // Spark Control base URL with the /api/endpoints suffix and any
  // trailing slash stripped. Used as the URL for every operator-
  // hardware call: transcribe, diarize-chunk, analyze.
  const sparkBase = sparkUrl.replace(/\/$/, "").replace(/\/api\/endpoints$/, "");

  const transcribe = await resolveOne({ sparkUrl, sparkBase, kind: "transcribe" });
  const analyze = await resolveOne({ sparkUrl, sparkBase, kind: "analyze" });
  const tts = await resolveOne({ sparkUrl, sparkBase, kind: "tts" });

  return {
    transcribe,
    analyze,
    tts,
    sparkBase,
  };
}

async function resolveOne({ sparkUrl, sparkBase, kind }) {
  if (!sparkUrl || !sparkBase) {
    return { url: null, model: null, source: null };
  }
  const state = await getSparkServiceState(sparkUrl, kind);
  if (state.status === "ready") {
    return {
      // ALWAYS Spark Control's base URL — never the delegate's
      // base_url. SC owns the wire-facing endpoint; the delegate
      // URL is just where SC routes the request internally.
      url: sparkBase,
      // Model name comes from discovery so we send the right
      // `model` field upstream. SC tells us what model is currently
      // loaded on its parakeet / vllm delegate.
      model: state.model,
      source: "spark-control",
    };
  }
  if (state.status === "not_ready") {
    return {
      url: null,
      model: null,
      source: "spark-control",
      blocked_reason: state.reason,
    };
  }
  // status === "unknown" → discovery unreachable or that service
  // isn't listed. Return null URL; the route handler surfaces the
  // error to the user.
  return { url: null, model: null, source: null };
}