v0.2.0 - Always-on services panel with per-service host config

Dashboard:
- New 'Always-on services' section with cards for Parakeet and Magpie
- Each card: host:port, model loaded, status pill (Healthy/Unhealthy/Starting/Not configured)
- Start, Restart, Stop buttons. Buttons disabled when not applicable for current state
- Restart counter shown when > 1 (would have surfaced the old magpie crash loop)

Backend:
- New /api/services GET: docker container state + http health for each support service
- New POST /api/services/{name}/{action} for start | stop | restart
- services.py module: docker_state, run_action via SSH
- config.py: PARAKEET_HOST/USER/CONTAINER and MAGPIE_* env vars, default to spark2_*
- health.py: use per-service hosts (no longer hard-wired to spark2_host)

Package:
- sparkConfig.yaml.ts: add 6 new optional fields
- configureSparks action: optional 'Parakeet host', 'Parakeet container', 'Magpie host', 'Magpie container' fields; descriptions explain they default to Spark 2 when blank
- Handler normalizes nulls to empty strings before merge
- main.ts: pass new env vars to container
- bump to 0.2.0:0
This commit is contained in:
Grant
2026-05-12 11:21:15 -05:00
parent ed54f85442
commit 27699a2469
11 changed files with 428 additions and 17 deletions
+18 -3
View File
@@ -29,6 +29,12 @@ class Settings:
spark1_user: str spark1_user: str
spark2_host: str spark2_host: str
spark2_user: str spark2_user: str
parakeet_host: str
parakeet_user: str
parakeet_container: str
magpie_host: str
magpie_user: str
magpie_container: str
ssh_key_path: str ssh_key_path: str
ssh_known_hosts: str ssh_known_hosts: str
models_yaml: str models_yaml: str
@@ -39,11 +45,20 @@ class Settings:
@classmethod @classmethod
def from_env(cls) -> "Settings": def from_env(cls) -> "Settings":
spark2_host = _env("SPARK2_HOST")
spark2_user = _env("SPARK2_USER")
# Parakeet and Magpie default to Spark 2 unless explicitly overridden.
return cls( return cls(
spark1_host=_env("SPARK1_HOST"), spark1_host=_env("SPARK1_HOST"),
spark1_user=_env("SPARK1_USER", "<spark-user>"), spark1_user=_env("SPARK1_USER"),
spark2_host=_env("SPARK2_HOST"), spark2_host=spark2_host,
spark2_user=_env("SPARK2_USER", "<spark-user>"), spark2_user=spark2_user,
parakeet_host=_env("PARAKEET_HOST") or spark2_host,
parakeet_user=_env("PARAKEET_USER") or spark2_user,
parakeet_container=_env("PARAKEET_CONTAINER", "parakeet-asr"),
magpie_host=_env("MAGPIE_HOST") or spark2_host,
magpie_user=_env("MAGPIE_USER") or spark2_user,
magpie_container=_env("MAGPIE_CONTAINER", "magpie-tts"),
ssh_key_path=_env("SSH_KEY_PATH"), ssh_key_path=_env("SSH_KEY_PATH"),
ssh_known_hosts=_env("SSH_KNOWN_HOSTS"), ssh_known_hosts=_env("SSH_KNOWN_HOSTS"),
models_yaml=_resolve_models_yaml(), models_yaml=_resolve_models_yaml(),
+10 -10
View File
@@ -31,15 +31,15 @@ async def check_vllm(settings: Settings) -> dict:
async def check_parakeet(settings: Settings) -> dict: async def check_parakeet(settings: Settings) -> dict:
base_url = ( base_url = (
f"http://{settings.spark2_host}:{settings.parakeet_port}" f"http://{settings.parakeet_host}:{settings.parakeet_port}"
if settings.spark2_host if settings.parakeet_host
else None else None
) )
if not settings.spark2_host: if not settings.parakeet_host:
return {"ok": False, "error": "spark2 not configured", "base_url": base_url} return {"ok": False, "error": "parakeet host not configured", "base_url": base_url}
try: try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as c: async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
r = await c.get(f"http://{settings.spark2_host}:{settings.parakeet_port}/health") r = await c.get(f"http://{settings.parakeet_host}:{settings.parakeet_port}/health")
r.raise_for_status() r.raise_for_status()
return {"ok": True, "detail": r.json(), "base_url": base_url} return {"ok": True, "detail": r.json(), "base_url": base_url}
except Exception as e: except Exception as e:
@@ -48,15 +48,15 @@ async def check_parakeet(settings: Settings) -> dict:
async def check_magpie(settings: Settings) -> dict: async def check_magpie(settings: Settings) -> dict:
base_url = ( base_url = (
f"http://{settings.spark2_host}:{settings.magpie_port}" f"http://{settings.magpie_host}:{settings.magpie_port}"
if settings.spark2_host if settings.magpie_host
else None else None
) )
if not settings.spark2_host: if not settings.magpie_host:
return {"ok": False, "error": "spark2 not configured", "base_url": base_url} return {"ok": False, "error": "magpie host not configured", "base_url": base_url}
try: try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as c: async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
r = await c.get(f"http://{settings.spark2_host}:{settings.magpie_port}/v1/health/ready") r = await c.get(f"http://{settings.magpie_host}:{settings.magpie_port}/v1/health/ready")
r.raise_for_status() r.raise_for_status()
return { return {
"ok": True, "ok": True,
+59
View File
@@ -11,6 +11,7 @@ from pydantic import BaseModel
from .config import Settings from .config import Settings
from .health import check_magpie, check_parakeet, check_vllm from .health import check_magpie, check_parakeet, check_vllm
from .models import load_catalog from .models import load_catalog
from .services import docker_state, run_action, services_from_settings
from .ssh import ssh_run from .ssh import ssh_run
from .swap import SwapManager from .swap import SwapManager
@@ -48,6 +49,64 @@ async def get_models() -> dict:
} }
@app.get("/api/services")
async def get_services() -> dict:
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
Each entry includes:
- host/port/container/user (configured)
- state: docker container status (running | exited | restarting | missing | unconfigured)
- http_ready: whether the service's /health endpoint responded
- base_url
- model (if reported by the service)
- restart_count
"""
services = services_from_settings(settings)
out: dict[str, dict] = {}
async def one(name: str):
svc = services[name]
docker = await docker_state(settings, svc)
if name == "parakeet":
http = await check_parakeet(settings)
else:
http = await check_magpie(settings)
return name, {
"host": svc.host,
"user": svc.user,
"port": svc.port,
"container": svc.container,
"kind": svc.kind,
"base_url": http.get("base_url"),
"http_ready": bool(http.get("ok")),
"model": (http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None,
"docker_state": docker.get("state"),
"restart_count": docker.get("restart_count"),
"started_at": docker.get("started_at"),
"exit_code": docker.get("exit_code"),
"error": docker.get("error"),
"detail": http.get("detail"),
}
results = await asyncio.gather(*[one(n) for n in services.keys()])
for name, info in results:
out[name] = info
return out
@app.post("/api/services/{name}/{action}")
async def service_action(name: str, action: str) -> dict:
services = services_from_settings(settings)
if name not in services:
raise HTTPException(404, f"unknown service: {name}")
if action not in ("start", "stop", "restart"):
raise HTTPException(400, f"unknown action: {action}")
result = await run_action(settings, services[name], action) # type: ignore[arg-type]
if not result["ok"]:
raise HTTPException(500, result.get("stderr") or result.get("error") or "action failed")
return {"name": name, "action": action, **result}
@app.get("/api/endpoints") @app.get("/api/endpoints")
async def get_endpoints() -> dict: async def get_endpoints() -> dict:
"""Service-discovery summary. Stable shape; other apps on the LAN can poll this """Service-discovery summary. Stable shape; other apps on the LAN can poll this
+88
View File
@@ -0,0 +1,88 @@
"""Lifecycle controls for support-service containers (Parakeet, Magpie, etc.).
These are independent always-on containers that don't go through the LLM-swap
machinery. We just run `docker start|stop|restart <container>` via SSH on the
appropriate host.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal, Optional
from .config import Settings
from .ssh import ssh_run
ServiceName = Literal["parakeet", "magpie"]
ServiceAction = Literal["start", "stop", "restart"]
@dataclass(frozen=True)
class ServiceDef:
name: str
kind: str # 'stt' | 'tts' | …
host: str
user: str
container: str
port: int
def services_from_settings(s: Settings) -> dict[str, ServiceDef]:
return {
"parakeet": ServiceDef(
name="parakeet",
kind="stt",
host=s.parakeet_host,
user=s.parakeet_user,
container=s.parakeet_container,
port=s.parakeet_port,
),
"magpie": ServiceDef(
name="magpie",
kind="tts",
host=s.magpie_host,
user=s.magpie_user,
container=s.magpie_container,
port=s.magpie_port,
),
}
async def docker_state(settings: Settings, svc: ServiceDef) -> dict:
"""Get docker state (running, exited, restarting, etc.) + restart count."""
if not svc.host or not svc.user:
return {"state": "unconfigured", "restart_count": None, "uptime": None}
cmd = (
f"docker inspect {svc.container} "
f"--format '{{{{.State.Status}}}}|{{{{.State.StartedAt}}}}|{{{{.RestartCount}}}}|{{{{.State.ExitCode}}}}|{{{{.State.Error}}}}' "
f"2>&1 || echo 'NOT_FOUND'"
)
rc, out, _ = await ssh_run(svc.host, svc.user, cmd, settings, timeout=10)
out = out.strip()
if rc != 0 or out.startswith("NOT_FOUND") or "Error" in out and "no such object" in out.lower():
return {"state": "missing", "restart_count": None, "uptime": None, "raw": out}
parts = out.split("|")
if len(parts) < 4:
return {"state": "unknown", "raw": out}
status, started_at, restart_count, exit_code = parts[0], parts[1], parts[2], parts[3]
error = parts[4] if len(parts) > 4 else ""
return {
"state": status,
"started_at": started_at,
"restart_count": int(restart_count) if restart_count.isdigit() else None,
"exit_code": int(exit_code) if exit_code.lstrip("-").isdigit() else None,
"error": error or None,
}
async def run_action(settings: Settings, svc: ServiceDef, action: ServiceAction) -> dict:
"""Run docker start/stop/restart on the target host."""
if not svc.host or not svc.user:
return {"ok": False, "error": "service host not configured"}
cmd = f"docker {action} {svc.container}"
rc, out, err = await ssh_run(svc.host, svc.user, cmd, settings, timeout=30)
return {
"ok": rc == 0,
"rc": rc,
"stdout": out.strip(),
"stderr": err.strip(),
}
+109
View File
@@ -11,6 +11,8 @@ const state = {
swap_phase: 'Starting…', swap_phase: 'Starting…',
swap_phase_detail: '', swap_phase_detail: '',
swap_progress: 0, // 01 swap_progress: 0, // 01
services: {},
service_action_in_flight: null, // e.g. "parakeet:restart"
configured: true, configured: true,
timer_handle: null, timer_handle: null,
}; };
@@ -83,6 +85,107 @@ function renderCurrent(status) {
c.innerHTML = `<strong>${label}</strong>`; c.innerHTML = `<strong>${label}</strong>`;
} }
function classifyService(s) {
// returns one of: running | unhealthy | missing | unconfigured | starting
if (!s.host) return 'unconfigured';
if (s.docker_state === 'missing') return 'missing';
if (s.docker_state === 'restarting') return 'unhealthy';
if (s.docker_state === 'exited') return 'unhealthy';
if (s.docker_state === 'running' && !s.http_ready) return 'starting';
if (s.docker_state === 'running' && s.http_ready) return 'running';
return s.docker_state || 'unknown';
}
function statusLabel(cls) {
return {
running: 'Healthy',
unhealthy: 'Unhealthy',
starting: 'Starting…',
missing: 'Not installed',
unconfigured: 'Not configured',
unknown: 'Unknown',
}[cls] || cls;
}
async function renderServices() {
let services = state.services;
// First render: fetch.
if (!services || Object.keys(services).length === 0) {
try {
services = await fetchJSON('/api/services');
state.services = services;
} catch (e) { console.error('services fetch failed', e); return; }
}
const panel = el('#services-panel');
const grid = el('#services-grid');
const entries = Object.entries(services);
if (entries.length === 0) { panel.classList.add('hidden'); return; }
panel.classList.remove('hidden');
grid.innerHTML = '';
for (const [name, s] of entries) {
const cls = classifyService(s);
const card = document.createElement('div');
card.className = `service-card ${cls}`;
const inFlight = state.service_action_in_flight && state.service_action_in_flight.startsWith(name + ':');
const disable = (action) => {
// Disable buttons that don't make sense for the current state
if (inFlight) return true;
if (cls === 'unconfigured' || cls === 'missing') return true;
if (action === 'start' && (cls === 'running' || cls === 'starting')) return true;
if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true;
return false;
};
const hostRow = s.host
? `<div class="row"><span class="k">Host</span><span class="v">${escapeHtml(s.host)}:${s.port}</span></div>`
: `<div class="row"><span class="k">Host</span><span class="v muted-v">not configured</span></div>`;
const modelRow = s.model
? `<div class="row"><span class="k">Model</span><span class="v">${escapeHtml(s.model)}</span></div>`
: '';
const restartsRow = s.restart_count != null && s.restart_count > 1
? `<div class="row"><span class="k">Restarts</span><span class="v">${s.restart_count}</span></div>`
: '';
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(name)}</span>
<span class="kind">${escapeHtml(s.kind || '')}</span>
<span class="status">${statusLabel(cls)}</span>
</div>
${hostRow}
${modelRow}
${restartsRow}
<div class="service-actions">
<button class="btn" data-svc-action="${name}:start" ${disable('start') ? 'disabled' : ''}>Start</button>
<button class="btn" data-svc-action="${name}:restart" ${disable('restart') ? 'disabled' : ''}>Restart</button>
<button class="btn danger" data-svc-action="${name}:stop" ${disable('stop') ? 'disabled' : ''}>Stop</button>
</div>
`;
grid.appendChild(card);
}
for (const btn of grid.querySelectorAll('.btn[data-svc-action]')) {
btn.addEventListener('click', () => onServiceAction(btn.dataset.svcAction));
}
}
async function onServiceAction(key) {
if (state.service_action_in_flight) return;
const [name, action] = key.split(':');
state.service_action_in_flight = key;
renderServices();
try {
await fetchJSON(`/api/services/${name}/${action}`, { method: 'POST' });
} catch (e) {
alert(`${action} ${name} failed: ${e.message}`);
} finally {
state.service_action_in_flight = null;
// Refresh services state
try {
state.services = await fetchJSON('/api/services');
} catch {}
renderServices();
pollStatus();
}
}
function renderEndpoint(status) { function renderEndpoint(status) {
const v = status.vllm || {}; const v = status.vllm || {};
const panel = el('#endpoint-panel'); const panel = el('#endpoint-panel');
@@ -269,6 +372,11 @@ async function pollStatus() {
renderCurrent(status); renderCurrent(status);
renderEndpoint(status); renderEndpoint(status);
renderHealth(status); renderHealth(status);
// Refresh services state lazily — every 5s poll triggers this too.
try {
state.services = await fetchJSON('/api/services');
renderServices();
} catch {}
if (status.current_swap_job && status.current_swap_job !== state.swap_job_id) { if (status.current_swap_job && status.current_swap_job !== state.swap_job_id) {
attachToSwap(status.current_swap_job, /*needsBackfill=*/true); attachToSwap(status.current_swap_job, /*needsBackfill=*/true);
} else if (!status.current_swap_job && state.swap_job_id && !state.swap_eventsource) { } else if (!status.current_swap_job && state.swap_job_id && !state.swap_eventsource) {
@@ -392,6 +500,7 @@ async function init() {
setupCopyButtons(); setupCopyButtons();
await loadModels(); await loadModels();
await pollStatus(); await pollStatus();
await renderServices();
setInterval(pollStatus, 5000); setInterval(pollStatus, 5000);
} }
+8
View File
@@ -63,7 +63,15 @@
</details> </details>
</section> </section>
<section id="services-panel" class="services hidden">
<h2 class="section-title">Always-on services</h2>
<div id="services-grid" class="services-grid"></div>
</section>
<section id="models-section">
<h2 class="section-title">LLM swap</h2>
<section id="cards" class="cards"></section> <section id="cards" class="cards"></section>
</section>
<footer class="footer"> <footer class="footer">
<div class="health"> <div class="health">
+73
View File
@@ -217,6 +217,79 @@ main {
word-break: break-word; word-break: break-word;
} }
/* ===== Section titles ===== */
.section-title {
font-size: 13px;
font-weight: 500;
color: var(--muted);
margin: 24px 0 12px;
letter-spacing: 0.06em;
text-transform: uppercase;
}
.section-title:first-child { margin-top: 0; }
/* ===== Services panel ===== */
.services-grid {
display: grid;
gap: 14px;
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
}
.service-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
padding: 14px 16px;
display: flex;
flex-direction: column;
gap: 10px;
}
.service-card.running { border-color: rgba(74, 222, 128, 0.45); }
.service-card.unhealthy { border-color: rgba(239, 68, 68, 0.55); }
.service-card.missing,
.service-card.unconfigured { border-color: rgba(245, 158, 11, 0.45); }
.service-card .head {
display: flex;
align-items: center;
gap: 8px;
}
.service-card .head .name { font-weight: 600; font-size: 15px; }
.service-card .head .kind { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; }
.service-card .head .status {
margin-left: auto;
font-size: 12px;
padding: 2px 8px;
border-radius: 999px;
background: var(--surface-2);
border: 1px solid var(--border);
color: var(--muted);
}
.service-card.running .status { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
.service-card.unhealthy .status { color: var(--error); border-color: rgba(239, 68, 68, 0.4); }
.service-card.missing .status,
.service-card.unconfigured .status { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
.service-card .row {
display: flex;
font-size: 12px;
color: var(--muted);
gap: 6px;
}
.service-card .row .k { width: 60px; flex-shrink: 0; }
.service-card .row .v { color: var(--text); font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace; word-break: break-all; }
.service-card .row .v.muted-v { color: var(--muted); font-family: inherit; }
.service-actions {
display: flex;
gap: 6px;
margin-top: 4px;
}
.service-actions .btn { padding: 6px 12px; font-size: 12px; flex: 1; }
.service-actions .btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); }
.service-actions .btn.danger:hover:not(:disabled) { background: rgba(239, 68, 68, 0.08); border-color: var(--error); }
/* ===== Cards ===== */ /* ===== Cards ===== */
.cards { .cards {
+41 -1
View File
@@ -40,6 +40,42 @@ const inputSpec = InputSpec.of({
placeholder: 'your SSH username', placeholder: 'your SSH username',
masked: false, masked: false,
}), }),
parakeet_host: Value.text({
name: 'Parakeet host (optional)',
description:
'Override the host running the Parakeet STT container. Leave blank if Parakeet runs on Spark 2 — that\'s the default. Set this if you run Parakeet on Spark 1 or a different machine.',
required: false,
default: null,
placeholder: 'leave blank to use Spark 2',
masked: false,
}),
parakeet_container: Value.text({
name: 'Parakeet container name (optional)',
description:
'Docker container name for Parakeet. Defaults to "parakeet-asr" — change only if you named yours something else.',
required: false,
default: null,
placeholder: 'parakeet-asr',
masked: false,
}),
magpie_host: Value.text({
name: 'Magpie host (optional)',
description:
'Override the host running the Magpie TTS container. Leave blank if Magpie runs on Spark 2.',
required: false,
default: null,
placeholder: 'leave blank to use Spark 2',
masked: false,
}),
magpie_container: Value.text({
name: 'Magpie container name (optional)',
description:
'Docker container name for Magpie. Defaults to "magpie-tts".',
required: false,
default: null,
placeholder: 'magpie-tts',
masked: false,
}),
}) })
export const configureSparks = sdk.Action.withInput( export const configureSparks = sdk.Action.withInput(
@@ -58,7 +94,11 @@ export const configureSparks = sdk.Action.withInput(
return cfg ?? null return cfg ?? null
}, },
async ({ effects, input }) => { async ({ effects, input }) => {
await sparkConfigYaml.merge(effects, input) // Optional fields come through as `null`; coerce to empty string for the schema.
const normalized = Object.fromEntries(
Object.entries(input).map(([k, v]) => [k, v ?? '']),
) as Record<string, string>
await sparkConfigYaml.merge(effects, normalized)
return null return null
}, },
) )
@@ -7,6 +7,13 @@ export const sparkConfigSchema = z.object({
spark1_user: z.string().catch(''), spark1_user: z.string().catch(''),
spark2_host: z.string().catch(''), spark2_host: z.string().catch(''),
spark2_user: z.string().catch(''), spark2_user: z.string().catch(''),
// Optional per-service overrides. Blank => use spark2_host / spark2_user.
parakeet_host: z.string().catch(''),
parakeet_user: z.string().catch(''),
parakeet_container: z.string().catch(''),
magpie_host: z.string().catch(''),
magpie_user: z.string().catch(''),
magpie_container: z.string().catch(''),
}) })
export type SparkConfig = z.infer<typeof sparkConfigSchema> export type SparkConfig = z.infer<typeof sparkConfigSchema>
+12
View File
@@ -13,6 +13,12 @@ export const main = sdk.setupMain(async ({ effects }) => {
spark1_user: '', spark1_user: '',
spark2_host: '', spark2_host: '',
spark2_user: '', spark2_user: '',
parakeet_host: '',
parakeet_user: '',
parakeet_container: '',
magpie_host: '',
magpie_user: '',
magpie_container: '',
} }
return sdk.Daemons.of(effects).addDaemon('primary', { return sdk.Daemons.of(effects).addDaemon('primary', {
@@ -34,6 +40,12 @@ export const main = sdk.setupMain(async ({ effects }) => {
SPARK1_USER: cfg.spark1_user, SPARK1_USER: cfg.spark1_user,
SPARK2_HOST: cfg.spark2_host, SPARK2_HOST: cfg.spark2_host,
SPARK2_USER: cfg.spark2_user, SPARK2_USER: cfg.spark2_user,
PARAKEET_HOST: cfg.parakeet_host,
PARAKEET_USER: cfg.parakeet_user,
PARAKEET_CONTAINER: cfg.parakeet_container,
MAGPIE_HOST: cfg.magpie_host,
MAGPIE_USER: cfg.magpie_user,
MAGPIE_CONTAINER: cfg.magpie_container,
BIND_PORT: String(uiPort), BIND_PORT: String(uiPort),
}, },
}, },
+2 -2
View File
@@ -1,10 +1,10 @@
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk' import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
export const v0_1_0 = VersionInfo.of({ export const v0_1_0 = VersionInfo.of({
version: '0.1.0:4', version: '0.2.0:0',
releaseNotes: { releaseNotes: {
en_US: en_US:
'Expose /api/endpoints as a separate StartOS service interface (type: api) so it appears alongside Web UI in the dashboard and gets its own discoverable URL.', 'Always-on services panel: dashboard now has cards for Parakeet and Magpie with Start/Stop/Restart buttons and live container state (uptime, restart count). Configure Sparks adds optional per-service host/container fields so Parakeet or Magpie can live on Spark 1 (or anywhere) instead of being hard-wired to Spark 2.',
}, },
migrations: { migrations: {
up: async ({ effects }) => {}, up: async ({ effects }) => {},