v0.24.0:0 - configurable cluster topology (vllm container name, hide services, second-vllm monitor)

Make the cluster topology configurable so an adopter wired differently
(vLLM on both Sparks, port 8000, different container name, no Parakeet)
can monitor without forking. Covers the OpenClaw report P4/P5/#6.

- VLLM_CONTAINER override (default vllm_node), validated at the boundary
  and quote_arg-quoted into the swap log-tail + pre-flight validator exec.
- DISABLED_SERVICES list: hidden services show no tile and are skipped by
  status/deep-health/connectivity probes (kills the Parakeet-on-8000
  collision).
- kind: vllm custom service monitors a second Spark's vLLM via the shared
  probe_vllm_endpoint; /api/endpoints gains a disabled flag.

Swap mechanism intentionally not generalized to raw docker run (that's
coordination, roadmap item 4).
This commit is contained in:
Keysat
2026-06-17 23:03:33 -05:00
parent 90394f891b
commit 26070eb191
17 changed files with 304 additions and 26 deletions
+20 -8
View File
@@ -20,7 +20,7 @@ from .llm_proxy import build_router as build_llm_router
from .embeddings_proxy import build_router as build_embeddings_router
from .redaction_gateway import build_router as build_redaction_router, MapStore
from .hardware import HardwareProbe
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant, probe_vllm_endpoint
from .matrix_bridge import MatrixBridgeManager
from .models import ModelDef, load_catalog
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
@@ -500,6 +500,10 @@ async def get_services() -> dict:
http = await check_embeddings(settings)
elif name == "qdrant":
http = await check_qdrant(settings)
elif svc.kind == "vllm":
# An extra vLLM monitored on another Spark (registered as a custom
# service). Probe its own host/port, not the primary Spark 1 one.
http = await probe_vllm_endpoint(svc.host, svc.port)
elif svc.kind == "bot":
# No HTTP health endpoint (host networking, no port) — judged purely
# by docker state. http_ready stays None so the badge isn't pinned
@@ -521,7 +525,7 @@ async def get_services() -> dict:
# Prefer the check fn's own top-level model key (embeddings reports
# it there); fall back to a model field inside detail for services
# whose /health embeds it (parakeet).
"model": http.get("model") or ((http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None),
"model": http.get("model") or http.get("current_model") or ((http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None),
"docker_state": docker.get("state"),
"restart_count": docker.get("restart_count"),
"started_at": docker.get("started_at"),
@@ -799,17 +803,20 @@ async def get_endpoints() -> dict:
"base_url": vllm.get("base_url"),
"model": vllm.get("current_model"),
"openai_compat": True,
"disabled": bool(vllm.get("disabled")),
},
"parakeet": {
"ready": bool(parakeet.get("ok")),
"base_url": parakeet.get("base_url"),
"kind": "stt",
"model": (parakeet.get("detail") or {}).get("model") if isinstance(parakeet.get("detail"), dict) else None,
"disabled": bool(parakeet.get("disabled")),
},
"kokoro": {
"ready": bool(kokoro.get("ok")),
"base_url": kokoro.get("base_url"),
"kind": "tts",
"disabled": bool(kokoro.get("disabled")),
},
"embeddings": {
"ready": bool(embeddings.get("ok")),
@@ -818,12 +825,14 @@ async def get_endpoints() -> dict:
"model": embeddings.get("model"),
# The proxied OpenAI-compatible endpoints live on Spark Control itself.
"openai_endpoints": ["/v1/embeddings", "/v1/rerank", "/api/search"],
"disabled": bool(embeddings.get("disabled")),
},
"qdrant": {
"ready": bool(qdrant.get("ok")),
"base_url": qdrant.get("base_url"),
"kind": "vectordb",
"collection": settings.qdrant_collection or None,
"disabled": bool(qdrant.get("disabled")),
},
}
@@ -837,12 +846,15 @@ async def get_status() -> dict:
check_embeddings(settings),
check_qdrant(settings),
)
# Feed health into the connectivity log (deduped — only logs on transition)
record_state("vllm", bool(vllm.get("ok")))
record_state("parakeet", bool(parakeet.get("ok")))
record_state("kokoro", bool(kokoro.get("ok")))
record_state("embeddings", bool(embeddings.get("ok")))
record_state("qdrant", bool(qdrant.get("ok")))
# Feed health into the connectivity log (deduped — only logs on transition).
# Skip services switched off via DISABLED_SERVICES — they'd otherwise log as
# perpetually down.
for _name, _r in (
("vllm", vllm), ("parakeet", parakeet), ("kokoro", kokoro),
("embeddings", embeddings), ("qdrant", qdrant),
):
if not _r.get("disabled"):
record_state(_name, bool(_r.get("ok")))
current_key = _identify_current_model(vllm.get("current_model"))
return {
"configured": settings.configured,