v0.24.0:0 - configurable cluster topology (vllm container name, hide services, second-vllm monitor)
Make the cluster topology configurable so an adopter wired differently (vLLM on both Sparks, port 8000, different container name, no Parakeet) can monitor without forking. Covers the OpenClaw report P4/P5/#6. - VLLM_CONTAINER override (default vllm_node), validated at the boundary and quote_arg-quoted into the swap log-tail + pre-flight validator exec. - DISABLED_SERVICES list: hidden services show no tile and are skipped by status/deep-health/connectivity probes (kills the Parakeet-on-8000 collision). - kind: vllm custom service monitors a second Spark's vLLM via the shared probe_vllm_endpoint; /api/endpoints gains a disabled flag. Swap mechanism intentionally not generalized to raw docker run (that's coordination, roadmap item 4).
This commit is contained in:
+20
-8
@@ -20,7 +20,7 @@ from .llm_proxy import build_router as build_llm_router
|
||||
from .embeddings_proxy import build_router as build_embeddings_router
|
||||
from .redaction_gateway import build_router as build_redaction_router, MapStore
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant, probe_vllm_endpoint
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import ModelDef, load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
@@ -500,6 +500,10 @@ async def get_services() -> dict:
|
||||
http = await check_embeddings(settings)
|
||||
elif name == "qdrant":
|
||||
http = await check_qdrant(settings)
|
||||
elif svc.kind == "vllm":
|
||||
# An extra vLLM monitored on another Spark (registered as a custom
|
||||
# service). Probe its own host/port, not the primary Spark 1 one.
|
||||
http = await probe_vllm_endpoint(svc.host, svc.port)
|
||||
elif svc.kind == "bot":
|
||||
# No HTTP health endpoint (host networking, no port) — judged purely
|
||||
# by docker state. http_ready stays None so the badge isn't pinned
|
||||
@@ -521,7 +525,7 @@ async def get_services() -> dict:
|
||||
# Prefer the check fn's own top-level model key (embeddings reports
|
||||
# it there); fall back to a model field inside detail for services
|
||||
# whose /health embeds it (parakeet).
|
||||
"model": http.get("model") or ((http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None),
|
||||
"model": http.get("model") or http.get("current_model") or ((http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None),
|
||||
"docker_state": docker.get("state"),
|
||||
"restart_count": docker.get("restart_count"),
|
||||
"started_at": docker.get("started_at"),
|
||||
@@ -799,17 +803,20 @@ async def get_endpoints() -> dict:
|
||||
"base_url": vllm.get("base_url"),
|
||||
"model": vllm.get("current_model"),
|
||||
"openai_compat": True,
|
||||
"disabled": bool(vllm.get("disabled")),
|
||||
},
|
||||
"parakeet": {
|
||||
"ready": bool(parakeet.get("ok")),
|
||||
"base_url": parakeet.get("base_url"),
|
||||
"kind": "stt",
|
||||
"model": (parakeet.get("detail") or {}).get("model") if isinstance(parakeet.get("detail"), dict) else None,
|
||||
"disabled": bool(parakeet.get("disabled")),
|
||||
},
|
||||
"kokoro": {
|
||||
"ready": bool(kokoro.get("ok")),
|
||||
"base_url": kokoro.get("base_url"),
|
||||
"kind": "tts",
|
||||
"disabled": bool(kokoro.get("disabled")),
|
||||
},
|
||||
"embeddings": {
|
||||
"ready": bool(embeddings.get("ok")),
|
||||
@@ -818,12 +825,14 @@ async def get_endpoints() -> dict:
|
||||
"model": embeddings.get("model"),
|
||||
# The proxied OpenAI-compatible endpoints live on Spark Control itself.
|
||||
"openai_endpoints": ["/v1/embeddings", "/v1/rerank", "/api/search"],
|
||||
"disabled": bool(embeddings.get("disabled")),
|
||||
},
|
||||
"qdrant": {
|
||||
"ready": bool(qdrant.get("ok")),
|
||||
"base_url": qdrant.get("base_url"),
|
||||
"kind": "vectordb",
|
||||
"collection": settings.qdrant_collection or None,
|
||||
"disabled": bool(qdrant.get("disabled")),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -837,12 +846,15 @@ async def get_status() -> dict:
|
||||
check_embeddings(settings),
|
||||
check_qdrant(settings),
|
||||
)
|
||||
# Feed health into the connectivity log (deduped — only logs on transition)
|
||||
record_state("vllm", bool(vllm.get("ok")))
|
||||
record_state("parakeet", bool(parakeet.get("ok")))
|
||||
record_state("kokoro", bool(kokoro.get("ok")))
|
||||
record_state("embeddings", bool(embeddings.get("ok")))
|
||||
record_state("qdrant", bool(qdrant.get("ok")))
|
||||
# Feed health into the connectivity log (deduped — only logs on transition).
|
||||
# Skip services switched off via DISABLED_SERVICES — they'd otherwise log as
|
||||
# perpetually down.
|
||||
for _name, _r in (
|
||||
("vllm", vllm), ("parakeet", parakeet), ("kokoro", kokoro),
|
||||
("embeddings", embeddings), ("qdrant", qdrant),
|
||||
):
|
||||
if not _r.get("disabled"):
|
||||
record_state(_name, bool(_r.get("ok")))
|
||||
current_key = _identify_current_model(vllm.get("current_model"))
|
||||
return {
|
||||
"configured": settings.configured,
|
||||
|
||||
Reference in New Issue
Block a user