v0.2.0 - Always-on services panel with per-service host config
Dashboard:
- New 'Always-on services' section with cards for Parakeet and Magpie
- Each card: host:port, model loaded, status pill (Healthy/Unhealthy/Starting/Not configured)
- Start, Restart, Stop buttons. Buttons disabled when not applicable for current state
- Restart counter shown when > 1 (would have surfaced the old magpie crash loop)
Backend:
- New /api/services GET: docker container state + http health for each support service
- New POST /api/services/{name}/{action} for start | stop | restart
- services.py module: docker_state, run_action via SSH
- config.py: PARAKEET_HOST/USER/CONTAINER and MAGPIE_* env vars, default to spark2_*
- health.py: use per-service hosts (no longer hard-wired to spark2_host)
Package:
- sparkConfig.yaml.ts: add 6 new optional fields
- configureSparks action: optional 'Parakeet host', 'Parakeet container', 'Magpie host', 'Magpie container' fields; descriptions explain they default to Spark 2 when blank
- Handler normalizes nulls to empty strings before merge
- main.ts: pass new env vars to container
- bump to 0.2.0:0
This commit is contained in:
+18
-3
@@ -29,6 +29,12 @@ class Settings:
|
||||
spark1_user: str
|
||||
spark2_host: str
|
||||
spark2_user: str
|
||||
parakeet_host: str
|
||||
parakeet_user: str
|
||||
parakeet_container: str
|
||||
magpie_host: str
|
||||
magpie_user: str
|
||||
magpie_container: str
|
||||
ssh_key_path: str
|
||||
ssh_known_hosts: str
|
||||
models_yaml: str
|
||||
@@ -39,11 +45,20 @@ class Settings:
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "Settings":
|
||||
spark2_host = _env("SPARK2_HOST")
|
||||
spark2_user = _env("SPARK2_USER")
|
||||
# Parakeet and Magpie default to Spark 2 unless explicitly overridden.
|
||||
return cls(
|
||||
spark1_host=_env("SPARK1_HOST"),
|
||||
spark1_user=_env("SPARK1_USER", "<spark-user>"),
|
||||
spark2_host=_env("SPARK2_HOST"),
|
||||
spark2_user=_env("SPARK2_USER", "<spark-user>"),
|
||||
spark1_user=_env("SPARK1_USER"),
|
||||
spark2_host=spark2_host,
|
||||
spark2_user=spark2_user,
|
||||
parakeet_host=_env("PARAKEET_HOST") or spark2_host,
|
||||
parakeet_user=_env("PARAKEET_USER") or spark2_user,
|
||||
parakeet_container=_env("PARAKEET_CONTAINER", "parakeet-asr"),
|
||||
magpie_host=_env("MAGPIE_HOST") or spark2_host,
|
||||
magpie_user=_env("MAGPIE_USER") or spark2_user,
|
||||
magpie_container=_env("MAGPIE_CONTAINER", "magpie-tts"),
|
||||
ssh_key_path=_env("SSH_KEY_PATH"),
|
||||
ssh_known_hosts=_env("SSH_KNOWN_HOSTS"),
|
||||
models_yaml=_resolve_models_yaml(),
|
||||
|
||||
+10
-10
@@ -31,15 +31,15 @@ async def check_vllm(settings: Settings) -> dict:
|
||||
|
||||
async def check_parakeet(settings: Settings) -> dict:
|
||||
base_url = (
|
||||
f"http://{settings.spark2_host}:{settings.parakeet_port}"
|
||||
if settings.spark2_host
|
||||
f"http://{settings.parakeet_host}:{settings.parakeet_port}"
|
||||
if settings.parakeet_host
|
||||
else None
|
||||
)
|
||||
if not settings.spark2_host:
|
||||
return {"ok": False, "error": "spark2 not configured", "base_url": base_url}
|
||||
if not settings.parakeet_host:
|
||||
return {"ok": False, "error": "parakeet host not configured", "base_url": base_url}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
|
||||
r = await c.get(f"http://{settings.spark2_host}:{settings.parakeet_port}/health")
|
||||
r = await c.get(f"http://{settings.parakeet_host}:{settings.parakeet_port}/health")
|
||||
r.raise_for_status()
|
||||
return {"ok": True, "detail": r.json(), "base_url": base_url}
|
||||
except Exception as e:
|
||||
@@ -48,15 +48,15 @@ async def check_parakeet(settings: Settings) -> dict:
|
||||
|
||||
async def check_magpie(settings: Settings) -> dict:
|
||||
base_url = (
|
||||
f"http://{settings.spark2_host}:{settings.magpie_port}"
|
||||
if settings.spark2_host
|
||||
f"http://{settings.magpie_host}:{settings.magpie_port}"
|
||||
if settings.magpie_host
|
||||
else None
|
||||
)
|
||||
if not settings.spark2_host:
|
||||
return {"ok": False, "error": "spark2 not configured", "base_url": base_url}
|
||||
if not settings.magpie_host:
|
||||
return {"ok": False, "error": "magpie host not configured", "base_url": base_url}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
|
||||
r = await c.get(f"http://{settings.spark2_host}:{settings.magpie_port}/v1/health/ready")
|
||||
r = await c.get(f"http://{settings.magpie_host}:{settings.magpie_port}/v1/health/ready")
|
||||
r.raise_for_status()
|
||||
return {
|
||||
"ok": True,
|
||||
|
||||
@@ -11,6 +11,7 @@ from pydantic import BaseModel
|
||||
from .config import Settings
|
||||
from .health import check_magpie, check_parakeet, check_vllm
|
||||
from .models import load_catalog
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
from .ssh import ssh_run
|
||||
from .swap import SwapManager
|
||||
|
||||
@@ -48,6 +49,64 @@ async def get_models() -> dict:
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/services")
|
||||
async def get_services() -> dict:
|
||||
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
||||
|
||||
Each entry includes:
|
||||
- host/port/container/user (configured)
|
||||
- state: docker container status (running | exited | restarting | missing | unconfigured)
|
||||
- http_ready: whether the service's /health endpoint responded
|
||||
- base_url
|
||||
- model (if reported by the service)
|
||||
- restart_count
|
||||
"""
|
||||
services = services_from_settings(settings)
|
||||
out: dict[str, dict] = {}
|
||||
|
||||
async def one(name: str):
|
||||
svc = services[name]
|
||||
docker = await docker_state(settings, svc)
|
||||
if name == "parakeet":
|
||||
http = await check_parakeet(settings)
|
||||
else:
|
||||
http = await check_magpie(settings)
|
||||
return name, {
|
||||
"host": svc.host,
|
||||
"user": svc.user,
|
||||
"port": svc.port,
|
||||
"container": svc.container,
|
||||
"kind": svc.kind,
|
||||
"base_url": http.get("base_url"),
|
||||
"http_ready": bool(http.get("ok")),
|
||||
"model": (http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None,
|
||||
"docker_state": docker.get("state"),
|
||||
"restart_count": docker.get("restart_count"),
|
||||
"started_at": docker.get("started_at"),
|
||||
"exit_code": docker.get("exit_code"),
|
||||
"error": docker.get("error"),
|
||||
"detail": http.get("detail"),
|
||||
}
|
||||
|
||||
results = await asyncio.gather(*[one(n) for n in services.keys()])
|
||||
for name, info in results:
|
||||
out[name] = info
|
||||
return out
|
||||
|
||||
|
||||
@app.post("/api/services/{name}/{action}")
|
||||
async def service_action(name: str, action: str) -> dict:
|
||||
services = services_from_settings(settings)
|
||||
if name not in services:
|
||||
raise HTTPException(404, f"unknown service: {name}")
|
||||
if action not in ("start", "stop", "restart"):
|
||||
raise HTTPException(400, f"unknown action: {action}")
|
||||
result = await run_action(settings, services[name], action) # type: ignore[arg-type]
|
||||
if not result["ok"]:
|
||||
raise HTTPException(500, result.get("stderr") or result.get("error") or "action failed")
|
||||
return {"name": name, "action": action, **result}
|
||||
|
||||
|
||||
@app.get("/api/endpoints")
|
||||
async def get_endpoints() -> dict:
|
||||
"""Service-discovery summary. Stable shape; other apps on the LAN can poll this
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
"""Lifecycle controls for support-service containers (Parakeet, Magpie, etc.).
|
||||
|
||||
These are independent always-on containers that don't go through the LLM-swap
|
||||
machinery. We just run `docker start|stop|restart <container>` via SSH on the
|
||||
appropriate host.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal, Optional
|
||||
|
||||
from .config import Settings
|
||||
from .ssh import ssh_run
|
||||
|
||||
|
||||
ServiceName = Literal["parakeet", "magpie"]
|
||||
ServiceAction = Literal["start", "stop", "restart"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ServiceDef:
|
||||
name: str
|
||||
kind: str # 'stt' | 'tts' | …
|
||||
host: str
|
||||
user: str
|
||||
container: str
|
||||
port: int
|
||||
|
||||
|
||||
def services_from_settings(s: Settings) -> dict[str, ServiceDef]:
|
||||
return {
|
||||
"parakeet": ServiceDef(
|
||||
name="parakeet",
|
||||
kind="stt",
|
||||
host=s.parakeet_host,
|
||||
user=s.parakeet_user,
|
||||
container=s.parakeet_container,
|
||||
port=s.parakeet_port,
|
||||
),
|
||||
"magpie": ServiceDef(
|
||||
name="magpie",
|
||||
kind="tts",
|
||||
host=s.magpie_host,
|
||||
user=s.magpie_user,
|
||||
container=s.magpie_container,
|
||||
port=s.magpie_port,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def docker_state(settings: Settings, svc: ServiceDef) -> dict:
|
||||
"""Get docker state (running, exited, restarting, etc.) + restart count."""
|
||||
if not svc.host or not svc.user:
|
||||
return {"state": "unconfigured", "restart_count": None, "uptime": None}
|
||||
cmd = (
|
||||
f"docker inspect {svc.container} "
|
||||
f"--format '{{{{.State.Status}}}}|{{{{.State.StartedAt}}}}|{{{{.RestartCount}}}}|{{{{.State.ExitCode}}}}|{{{{.State.Error}}}}' "
|
||||
f"2>&1 || echo 'NOT_FOUND'"
|
||||
)
|
||||
rc, out, _ = await ssh_run(svc.host, svc.user, cmd, settings, timeout=10)
|
||||
out = out.strip()
|
||||
if rc != 0 or out.startswith("NOT_FOUND") or "Error" in out and "no such object" in out.lower():
|
||||
return {"state": "missing", "restart_count": None, "uptime": None, "raw": out}
|
||||
parts = out.split("|")
|
||||
if len(parts) < 4:
|
||||
return {"state": "unknown", "raw": out}
|
||||
status, started_at, restart_count, exit_code = parts[0], parts[1], parts[2], parts[3]
|
||||
error = parts[4] if len(parts) > 4 else ""
|
||||
return {
|
||||
"state": status,
|
||||
"started_at": started_at,
|
||||
"restart_count": int(restart_count) if restart_count.isdigit() else None,
|
||||
"exit_code": int(exit_code) if exit_code.lstrip("-").isdigit() else None,
|
||||
"error": error or None,
|
||||
}
|
||||
|
||||
|
||||
async def run_action(settings: Settings, svc: ServiceDef, action: ServiceAction) -> dict:
|
||||
"""Run docker start/stop/restart on the target host."""
|
||||
if not svc.host or not svc.user:
|
||||
return {"ok": False, "error": "service host not configured"}
|
||||
cmd = f"docker {action} {svc.container}"
|
||||
rc, out, err = await ssh_run(svc.host, svc.user, cmd, settings, timeout=30)
|
||||
return {
|
||||
"ok": rc == 0,
|
||||
"rc": rc,
|
||||
"stdout": out.strip(),
|
||||
"stderr": err.strip(),
|
||||
}
|
||||
@@ -11,6 +11,8 @@ const state = {
|
||||
swap_phase: 'Starting…',
|
||||
swap_phase_detail: '',
|
||||
swap_progress: 0, // 0–1
|
||||
services: {},
|
||||
service_action_in_flight: null, // e.g. "parakeet:restart"
|
||||
configured: true,
|
||||
timer_handle: null,
|
||||
};
|
||||
@@ -83,6 +85,107 @@ function renderCurrent(status) {
|
||||
c.innerHTML = `<strong>${label}</strong>`;
|
||||
}
|
||||
|
||||
function classifyService(s) {
|
||||
// returns one of: running | unhealthy | missing | unconfigured | starting
|
||||
if (!s.host) return 'unconfigured';
|
||||
if (s.docker_state === 'missing') return 'missing';
|
||||
if (s.docker_state === 'restarting') return 'unhealthy';
|
||||
if (s.docker_state === 'exited') return 'unhealthy';
|
||||
if (s.docker_state === 'running' && !s.http_ready) return 'starting';
|
||||
if (s.docker_state === 'running' && s.http_ready) return 'running';
|
||||
return s.docker_state || 'unknown';
|
||||
}
|
||||
|
||||
function statusLabel(cls) {
|
||||
return {
|
||||
running: 'Healthy',
|
||||
unhealthy: 'Unhealthy',
|
||||
starting: 'Starting…',
|
||||
missing: 'Not installed',
|
||||
unconfigured: 'Not configured',
|
||||
unknown: 'Unknown',
|
||||
}[cls] || cls;
|
||||
}
|
||||
|
||||
async function renderServices() {
|
||||
let services = state.services;
|
||||
// First render: fetch.
|
||||
if (!services || Object.keys(services).length === 0) {
|
||||
try {
|
||||
services = await fetchJSON('/api/services');
|
||||
state.services = services;
|
||||
} catch (e) { console.error('services fetch failed', e); return; }
|
||||
}
|
||||
const panel = el('#services-panel');
|
||||
const grid = el('#services-grid');
|
||||
const entries = Object.entries(services);
|
||||
if (entries.length === 0) { panel.classList.add('hidden'); return; }
|
||||
panel.classList.remove('hidden');
|
||||
grid.innerHTML = '';
|
||||
for (const [name, s] of entries) {
|
||||
const cls = classifyService(s);
|
||||
const card = document.createElement('div');
|
||||
card.className = `service-card ${cls}`;
|
||||
const inFlight = state.service_action_in_flight && state.service_action_in_flight.startsWith(name + ':');
|
||||
const disable = (action) => {
|
||||
// Disable buttons that don't make sense for the current state
|
||||
if (inFlight) return true;
|
||||
if (cls === 'unconfigured' || cls === 'missing') return true;
|
||||
if (action === 'start' && (cls === 'running' || cls === 'starting')) return true;
|
||||
if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true;
|
||||
return false;
|
||||
};
|
||||
const hostRow = s.host
|
||||
? `<div class="row"><span class="k">Host</span><span class="v">${escapeHtml(s.host)}:${s.port}</span></div>`
|
||||
: `<div class="row"><span class="k">Host</span><span class="v muted-v">not configured</span></div>`;
|
||||
const modelRow = s.model
|
||||
? `<div class="row"><span class="k">Model</span><span class="v">${escapeHtml(s.model)}</span></div>`
|
||||
: '';
|
||||
const restartsRow = s.restart_count != null && s.restart_count > 1
|
||||
? `<div class="row"><span class="k">Restarts</span><span class="v">${s.restart_count}</span></div>`
|
||||
: '';
|
||||
card.innerHTML = `
|
||||
<div class="head">
|
||||
<span class="name">${escapeHtml(name)}</span>
|
||||
<span class="kind">${escapeHtml(s.kind || '')}</span>
|
||||
<span class="status">${statusLabel(cls)}</span>
|
||||
</div>
|
||||
${hostRow}
|
||||
${modelRow}
|
||||
${restartsRow}
|
||||
<div class="service-actions">
|
||||
<button class="btn" data-svc-action="${name}:start" ${disable('start') ? 'disabled' : ''}>Start</button>
|
||||
<button class="btn" data-svc-action="${name}:restart" ${disable('restart') ? 'disabled' : ''}>Restart</button>
|
||||
<button class="btn danger" data-svc-action="${name}:stop" ${disable('stop') ? 'disabled' : ''}>Stop</button>
|
||||
</div>
|
||||
`;
|
||||
grid.appendChild(card);
|
||||
}
|
||||
for (const btn of grid.querySelectorAll('.btn[data-svc-action]')) {
|
||||
btn.addEventListener('click', () => onServiceAction(btn.dataset.svcAction));
|
||||
}
|
||||
}
|
||||
|
||||
async function onServiceAction(key) {
|
||||
if (state.service_action_in_flight) return;
|
||||
const [name, action] = key.split(':');
|
||||
state.service_action_in_flight = key;
|
||||
renderServices();
|
||||
try {
|
||||
await fetchJSON(`/api/services/${name}/${action}`, { method: 'POST' });
|
||||
} catch (e) {
|
||||
alert(`${action} ${name} failed: ${e.message}`);
|
||||
} finally {
|
||||
state.service_action_in_flight = null;
|
||||
// Refresh services state
|
||||
try {
|
||||
state.services = await fetchJSON('/api/services');
|
||||
} catch {}
|
||||
renderServices();
|
||||
pollStatus();
|
||||
}
|
||||
}
|
||||
|
||||
function renderEndpoint(status) {
|
||||
const v = status.vllm || {};
|
||||
const panel = el('#endpoint-panel');
|
||||
@@ -269,6 +372,11 @@ async function pollStatus() {
|
||||
renderCurrent(status);
|
||||
renderEndpoint(status);
|
||||
renderHealth(status);
|
||||
// Refresh services state lazily — every 5s poll triggers this too.
|
||||
try {
|
||||
state.services = await fetchJSON('/api/services');
|
||||
renderServices();
|
||||
} catch {}
|
||||
if (status.current_swap_job && status.current_swap_job !== state.swap_job_id) {
|
||||
attachToSwap(status.current_swap_job, /*needsBackfill=*/true);
|
||||
} else if (!status.current_swap_job && state.swap_job_id && !state.swap_eventsource) {
|
||||
@@ -392,6 +500,7 @@ async function init() {
|
||||
setupCopyButtons();
|
||||
await loadModels();
|
||||
await pollStatus();
|
||||
await renderServices();
|
||||
setInterval(pollStatus, 5000);
|
||||
}
|
||||
|
||||
|
||||
@@ -63,7 +63,15 @@
|
||||
</details>
|
||||
</section>
|
||||
|
||||
<section id="cards" class="cards"></section>
|
||||
<section id="services-panel" class="services hidden">
|
||||
<h2 class="section-title">Always-on services</h2>
|
||||
<div id="services-grid" class="services-grid"></div>
|
||||
</section>
|
||||
|
||||
<section id="models-section">
|
||||
<h2 class="section-title">LLM swap</h2>
|
||||
<section id="cards" class="cards"></section>
|
||||
</section>
|
||||
|
||||
<footer class="footer">
|
||||
<div class="health">
|
||||
|
||||
@@ -217,6 +217,79 @@ main {
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
/* ===== Section titles ===== */
|
||||
|
||||
.section-title {
|
||||
font-size: 13px;
|
||||
font-weight: 500;
|
||||
color: var(--muted);
|
||||
margin: 24px 0 12px;
|
||||
letter-spacing: 0.06em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.section-title:first-child { margin-top: 0; }
|
||||
|
||||
/* ===== Services panel ===== */
|
||||
|
||||
.services-grid {
|
||||
display: grid;
|
||||
gap: 14px;
|
||||
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
|
||||
}
|
||||
|
||||
.service-card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
padding: 14px 16px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
}
|
||||
.service-card.running { border-color: rgba(74, 222, 128, 0.45); }
|
||||
.service-card.unhealthy { border-color: rgba(239, 68, 68, 0.55); }
|
||||
.service-card.missing,
|
||||
.service-card.unconfigured { border-color: rgba(245, 158, 11, 0.45); }
|
||||
.service-card .head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
.service-card .head .name { font-weight: 600; font-size: 15px; }
|
||||
.service-card .head .kind { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; }
|
||||
.service-card .head .status {
|
||||
margin-left: auto;
|
||||
font-size: 12px;
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
background: var(--surface-2);
|
||||
border: 1px solid var(--border);
|
||||
color: var(--muted);
|
||||
}
|
||||
.service-card.running .status { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
|
||||
.service-card.unhealthy .status { color: var(--error); border-color: rgba(239, 68, 68, 0.4); }
|
||||
.service-card.missing .status,
|
||||
.service-card.unconfigured .status { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
|
||||
|
||||
.service-card .row {
|
||||
display: flex;
|
||||
font-size: 12px;
|
||||
color: var(--muted);
|
||||
gap: 6px;
|
||||
}
|
||||
.service-card .row .k { width: 60px; flex-shrink: 0; }
|
||||
.service-card .row .v { color: var(--text); font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace; word-break: break-all; }
|
||||
.service-card .row .v.muted-v { color: var(--muted); font-family: inherit; }
|
||||
|
||||
.service-actions {
|
||||
display: flex;
|
||||
gap: 6px;
|
||||
margin-top: 4px;
|
||||
}
|
||||
.service-actions .btn { padding: 6px 12px; font-size: 12px; flex: 1; }
|
||||
.service-actions .btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); }
|
||||
.service-actions .btn.danger:hover:not(:disabled) { background: rgba(239, 68, 68, 0.08); border-color: var(--error); }
|
||||
|
||||
/* ===== Cards ===== */
|
||||
|
||||
.cards {
|
||||
|
||||
Reference in New Issue
Block a user