93 lines
3.9 KiB
JavaScript
93 lines
3.9 KiB
JavaScript
// Resolves which (URL, model) the relay should use for each operator-
|
|
// hardware pipeline. Spark Control is the SINGLE host the relay
|
|
// talks to — it owns transcribe (/v1/audio/transcriptions),
|
|
// diarize-chunk (/api/audio/diarize-chunk), AND analyze
|
|
// (/v1/chat/completions, OpenAI-compatible) on the same port. The
|
|
// discovery JSON Spark Control serves at /api/endpoints lists the
|
|
// backing services SC delegates to internally (e.g. a Parakeet
|
|
// wrapper, a vLLM instance) along with their ready state + currently-
|
|
// loaded model name; the relay reads it ONLY to learn (a) whether
|
|
// each service is ready, and (b) which model name to send in the
|
|
// upstream request body. The per-service base URLs in the discovery
|
|
// JSON are informational — clients hit Spark Control directly, not
|
|
// those internal URLs.
|
|
//
|
|
// Priority order, per pipeline:
|
|
//
|
|
// 1. Spark Control says ready → use the SC base URL + the model
|
|
// name discovery reports for that
|
|
// service.
|
|
//
|
|
// 2. SC says not_ready → return null URL + a blocked_reason
|
|
// string. Route handlers surface
|
|
// this to the user instead of trying
|
|
// a doomed call.
|
|
//
|
|
// 3. SC unreachable or no URL → null URL; planBackend treats the
|
|
// hardware path as not configured.
|
|
//
|
|
// History:
|
|
// - pre-v0.2.84: operator-typed override URLs (relay_parakeet_*,
|
|
// relay_gemma_*) won over discovery. Removed.
|
|
// - pre-v0.2.85: discovery's per-service base_urls were used
|
|
// directly, so transcribe went to a different host (the Parakeet
|
|
// wrapper) than diarize (Spark Control). That meant the relay
|
|
// was talking to two hosts for one logical operation, and the
|
|
// transcribe wrapper didn't have the diarize endpoint. Now: SC
|
|
// is the single host. The per-service base_urls in discovery
|
|
// are informational — used only for the operator's awareness in
|
|
// the dashboard's Service Discovery health line.
|
|
|
|
import { getSparkServiceState } from "./spark-control.js";
|
|
|
|
export async function resolveHardwareConfig(cfg) {
|
|
const sparkUrl = (cfg.relay_spark_control_url || "").trim();
|
|
// Spark Control base URL with the /api/endpoints suffix and any
|
|
// trailing slash stripped. Used as the URL for every operator-
|
|
// hardware call: transcribe, diarize-chunk, analyze.
|
|
const sparkBase = sparkUrl.replace(/\/$/, "").replace(/\/api\/endpoints$/, "");
|
|
|
|
const transcribe = await resolveOne({ sparkUrl, sparkBase, kind: "transcribe" });
|
|
const analyze = await resolveOne({ sparkUrl, sparkBase, kind: "analyze" });
|
|
const tts = await resolveOne({ sparkUrl, sparkBase, kind: "tts" });
|
|
|
|
return {
|
|
transcribe,
|
|
analyze,
|
|
tts,
|
|
sparkBase,
|
|
};
|
|
}
|
|
|
|
async function resolveOne({ sparkUrl, sparkBase, kind }) {
|
|
if (!sparkUrl || !sparkBase) {
|
|
return { url: null, model: null, source: null };
|
|
}
|
|
const state = await getSparkServiceState(sparkUrl, kind);
|
|
if (state.status === "ready") {
|
|
return {
|
|
// ALWAYS Spark Control's base URL — never the delegate's
|
|
// base_url. SC owns the wire-facing endpoint; the delegate
|
|
// URL is just where SC routes the request internally.
|
|
url: sparkBase,
|
|
// Model name comes from discovery so we send the right
|
|
// `model` field upstream. SC tells us what model is currently
|
|
// loaded on its parakeet / vllm delegate.
|
|
model: state.model,
|
|
source: "spark-control",
|
|
};
|
|
}
|
|
if (state.status === "not_ready") {
|
|
return {
|
|
url: null,
|
|
model: null,
|
|
source: "spark-control",
|
|
blocked_reason: state.reason,
|
|
};
|
|
}
|
|
// status === "unknown" → discovery unreachable or that service
|
|
// isn't listed. Return null URL; the route handler surfaces the
|
|
// error to the user.
|
|
return { url: null, model: null, source: null };
|
|
}
|