v0.6.0 - Service-level connectivity tracking + passive failure-report endpoint
connectivity.py:
- Generalized 'spark' subject to any string; renamed 'spark' field to 'subject'
- Legacy v0.5 events with the old 'spark' field are migrated transparently on read (kind defaults to 'transition')
- New record_report(subject, ok, source, detail, latency_ms): always appends an event with kind='report'; does NOT mutate the current state (only active polling is authoritative)
- summary() returns events normalized to the new schema
Wiring:
- /api/status now calls record_state for vllm/parakeet/magpie (dedup on no-change)
- /api/services calls record_state for each service after its http check
- Result: dashboard observes service-level transitions automatically with no extra polling
Passive endpoint:
- POST /api/health-event with {service, ok, source?, error?, ms?}
- Useful for external apps (e.g. Open WebUI) to surface sub-poll-interval failures the dashboard would otherwise miss
UI:
- Connectivity dialog groups events by subject (hosts ordered first, then services)
- Per-subject summary shows transition count, down count, report count, failed-report count
- Transitions and reports render inline with distinct styling; reports show source app + error + latency
- Legacy v0.5 events render unchanged
Docs:
- README documents /api/health-event with a curl example
Package: bump to 0.6.0:0
This commit is contained in:
+38
-1
@@ -10,7 +10,7 @@ from pydantic import BaseModel
|
||||
from typing import Literal
|
||||
|
||||
from .config import Settings
|
||||
from .connectivity import get_mac, summary as connectivity_summary
|
||||
from .connectivity import get_mac, record_report, record_state, summary as connectivity_summary
|
||||
from .custom_services import add_custom_service, delete_custom_service
|
||||
from .download import DownloadManager
|
||||
from .hardware import HardwareProbe
|
||||
@@ -136,6 +136,37 @@ async def get_connectivity() -> dict:
|
||||
return connectivity_summary()
|
||||
|
||||
|
||||
class HealthEventBody(BaseModel):
|
||||
service: str # e.g. "parakeet", "magpie", "vllm"
|
||||
ok: bool # true on success, false on failure
|
||||
source: str | None = None # what app reported (e.g. "open-webui")
|
||||
error: str | None = None # optional detail
|
||||
ms: int | None = None # optional latency
|
||||
|
||||
|
||||
@app.post("/api/health-event")
|
||||
async def post_health_event(body: HealthEventBody) -> dict:
|
||||
"""Passive endpoint: any LAN app can POST here when its call to one of our
|
||||
services succeeds or (more usefully) fails. We log the report into the
|
||||
connectivity history so a brief blip that polling misses still surfaces.
|
||||
|
||||
Example:
|
||||
curl -X POST http://<dashboard>/api/health-event \\
|
||||
-H 'content-type: application/json' \\
|
||||
-d '{"service":"parakeet","ok":false,"error":"503","source":"open-webui","ms":420}'
|
||||
"""
|
||||
if not body.service.strip():
|
||||
raise HTTPException(400, "service is required")
|
||||
event = record_report(
|
||||
body.service.strip(),
|
||||
ok=body.ok,
|
||||
source=(body.source or "external").strip(),
|
||||
detail=(body.error or "").strip(),
|
||||
latency_ms=body.ms,
|
||||
)
|
||||
return {"ok": True, "recorded": event}
|
||||
|
||||
|
||||
@app.post("/api/spark/{name}/wake")
|
||||
async def wake_spark(name: str) -> dict:
|
||||
"""Send a Wake-on-LAN magic packet for the named Spark.
|
||||
@@ -216,6 +247,8 @@ async def get_services() -> dict:
|
||||
results = await asyncio.gather(*[one(n) for n in services.keys()])
|
||||
for name, info in results:
|
||||
out[name] = info
|
||||
# Feed http reachability into the connectivity log (transition-only)
|
||||
record_state(name, bool(info.get("http_ready")))
|
||||
return out
|
||||
|
||||
|
||||
@@ -372,6 +405,10 @@ async def get_status() -> dict:
|
||||
check_parakeet(settings),
|
||||
check_magpie(settings),
|
||||
)
|
||||
# Feed health into the connectivity log (deduped — only logs on transition)
|
||||
record_state("vllm", bool(vllm.get("ok")))
|
||||
record_state("parakeet", bool(parakeet.get("ok")))
|
||||
record_state("magpie", bool(magpie.get("ok")))
|
||||
current_key = _identify_current_model(vllm.get("current_model"))
|
||||
return {
|
||||
"configured": settings.configured,
|
||||
|
||||
Reference in New Issue
Block a user