v0.21.0:0 - matrix-bridge bot tile (status, update, restart, logs)
This commit is contained in:
@@ -42,6 +42,11 @@ class Settings:
|
||||
qdrant_user: str
|
||||
qdrant_container: str
|
||||
qdrant_collection: str
|
||||
matrix_bridge_host: str
|
||||
matrix_bridge_user: str
|
||||
matrix_bridge_container: str
|
||||
matrix_bridge_dir: str
|
||||
matrix_bridge_branch: str
|
||||
redaction_map_db: str
|
||||
redaction_map_ttl: int
|
||||
ssh_key_path: str
|
||||
@@ -81,6 +86,19 @@ class Settings:
|
||||
qdrant_user=_env("QDRANT_USER") or spark2_user,
|
||||
qdrant_container=_env("QDRANT_CONTAINER") or "qdrant",
|
||||
qdrant_collection=_env("QDRANT_COLLECTION", ""),
|
||||
# matrix-bridge bot container, driven as its own SSH user (the owner
|
||||
# of the ~/matrix-bridge git clone) so git/docker run unprivileged.
|
||||
# The user is BLANK by default and set via the "Configure Sparks"
|
||||
# action; leaving it blank reports the service as unconfigured, which
|
||||
# hides the tile. That keeps the shared package portable — a
|
||||
# deployment without the bot never shows a stray tile or a hardcoded
|
||||
# username. Host defaults to Spark 2 (same box); container/dir/branch
|
||||
# are sensible defaults. All are env-overridable.
|
||||
matrix_bridge_host=_env("MATRIX_BRIDGE_HOST") or spark2_host,
|
||||
matrix_bridge_user=_env("MATRIX_BRIDGE_USER"),
|
||||
matrix_bridge_container=_env("MATRIX_BRIDGE_CONTAINER") or "matrix-bridge",
|
||||
matrix_bridge_dir=_env("MATRIX_BRIDGE_DIR") or "~/matrix-bridge",
|
||||
matrix_bridge_branch=_env("MATRIX_BRIDGE_BRANCH") or "master",
|
||||
# Redaction gateway pseudonym-map store (server-held de-anon key).
|
||||
redaction_map_db=_env("REDACTION_MAP_DB", "/data/redaction_maps.db"),
|
||||
redaction_map_ttl=int(_env("REDACTION_MAP_TTL", "7200")),
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
"""Update + logs for the matrix-bridge bot container on the Spark.
|
||||
|
||||
matrix-bridge is a single Docker container managed by docker compose out of a
|
||||
git clone at `~matrix_bridge_user/matrix-bridge`. Status (the badge) and
|
||||
start/stop/restart ride the generic service machinery in `services.py`
|
||||
(`docker_state` / `run_action`). The two things that don't fit that mould live
|
||||
here:
|
||||
|
||||
- **Update** — `git fetch && git reset --hard origin/<branch> && docker
|
||||
compose up -d --build`. Long-running (docker build), so it streams like the
|
||||
vLLM `UpdateManager`: fire-and-forget job, SSE stream, fail-loud rc.
|
||||
- **Logs** — a one-shot `docker logs --tail N` for diagnosing a red badge.
|
||||
|
||||
We connect **directly as the configured user** (`modelo` — the repo owner), so
|
||||
git never trips its dubious-ownership guard and docker runs via the user's
|
||||
docker-group membership. We deliberately do NOT `sudo -iu modelo`: this Spark
|
||||
has no passwordless sudo, so a sudo wrap would hang in SSH BatchMode.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import asyncio
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from .config import Settings
|
||||
from .shellsafe import quote_arg
|
||||
from .ssh import ssh_run, ssh_stream, StreamHandle
|
||||
|
||||
# Hard ceiling on a single update. A first build after a base-image bump is
|
||||
# slow (minutes); the cache makes later ones quick. 25 min is generous headroom
|
||||
# without letting a genuinely wedged build spin forever.
|
||||
_UPDATE_TIMEOUT_S = 1500
|
||||
|
||||
|
||||
def build_update_command(directory: str, branch: str) -> str:
|
||||
"""The update one-liner, run from the bot's git clone as its owner.
|
||||
|
||||
`directory` and `branch` come from operator config (not request input), so
|
||||
they're interpolated directly — same trust model as the Spark hostnames in
|
||||
`health`/`updates`. `directory` may be `~/...`, which must stay unquoted so
|
||||
the remote login shell expands it; quoting would defeat that.
|
||||
"""
|
||||
return (
|
||||
f"cd {directory} && "
|
||||
f"git fetch origin && "
|
||||
f"git reset --hard origin/{branch} && "
|
||||
f"docker compose up -d --build"
|
||||
)
|
||||
|
||||
|
||||
def _phase_for(line: str) -> Optional[str]:
|
||||
"""Map a streamed output line to a human-readable phase, or None to keep
|
||||
the current phase. Kept loose — compose/buildkit output varies by version."""
|
||||
low = line.lower()
|
||||
if "git reset" in low or "head is now at" in low:
|
||||
return "Resetting to the latest release…"
|
||||
if "docker compose" in low or "buildkit" in low or low.startswith("step ") or "=> " in line or "building " in low:
|
||||
return "Building the bot image…"
|
||||
if "recreate" in low or "starting" in low or "started" in low or "container matrix-bridge" in low:
|
||||
return "Recreating the container…"
|
||||
if "already up to date" in low:
|
||||
return "No new code; rebuilding…"
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpdateJob:
|
||||
id: str
|
||||
started_at: str
|
||||
state: str = "starting"
|
||||
lines: list[str] = field(default_factory=list)
|
||||
returncode: Optional[int] = None
|
||||
finished_at: Optional[str] = None
|
||||
phase: str = "Starting…"
|
||||
|
||||
def append(self, line: str) -> None:
|
||||
self.lines.append(line)
|
||||
if len(self.lines) > 1000:
|
||||
del self.lines[: len(self.lines) - 1000]
|
||||
|
||||
|
||||
class MatrixBridgeManager:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self.lock = asyncio.Lock()
|
||||
self.jobs: dict[str, UpdateJob] = {}
|
||||
self.current_job_id: Optional[str] = None
|
||||
|
||||
def _configured(self) -> bool:
|
||||
s = self.settings
|
||||
return bool(s.matrix_bridge_host and s.matrix_bridge_user)
|
||||
|
||||
def get(self, job_id: str) -> UpdateJob | None:
|
||||
return self.jobs.get(job_id)
|
||||
|
||||
async def fetch_logs(self, tail: int = 100) -> dict:
|
||||
"""One-shot `docker logs --tail N <container>` (stderr merged in)."""
|
||||
s = self.settings
|
||||
if not self._configured():
|
||||
return {"ok": False, "error": "matrix-bridge host not configured"}
|
||||
tail = max(1, min(int(tail), 1000))
|
||||
# tail is already int-clamped, but quote at the sink anyway so the
|
||||
# shellsafe convention (no raw interpolation into an SSH command) holds
|
||||
# regardless of caller.
|
||||
cmd = f"docker logs --tail {quote_arg(str(tail))} {quote_arg(s.matrix_bridge_container)} 2>&1"
|
||||
rc, out, err = await ssh_run(
|
||||
s.matrix_bridge_host, s.matrix_bridge_user, cmd, s, timeout=20
|
||||
)
|
||||
return {
|
||||
"ok": rc == 0,
|
||||
"rc": rc,
|
||||
"container": s.matrix_bridge_container,
|
||||
"output": (out or err).strip(),
|
||||
}
|
||||
|
||||
async def trigger_update(self) -> UpdateJob:
|
||||
if not self._configured():
|
||||
raise RuntimeError("matrix-bridge host not configured")
|
||||
if self.lock.locked():
|
||||
raise RuntimeError("An update is already in progress")
|
||||
job = UpdateJob(
|
||||
id=uuid.uuid4().hex[:8],
|
||||
started_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
self.jobs[job.id] = job
|
||||
self.current_job_id = job.id
|
||||
asyncio.create_task(self._run(job))
|
||||
return job
|
||||
|
||||
async def _run(self, job: UpdateJob) -> None:
|
||||
async with self.lock:
|
||||
try:
|
||||
await self._do(job)
|
||||
if job.state != "failed":
|
||||
job.state = "done"
|
||||
job.returncode = 0
|
||||
job.phase = "Done"
|
||||
except asyncio.TimeoutError:
|
||||
job.append(f"[error] update timed out after {_UPDATE_TIMEOUT_S}s")
|
||||
job.state = "failed"
|
||||
job.returncode = 124
|
||||
job.phase = "Timed out"
|
||||
except Exception as e:
|
||||
job.append(f"[error] {type(e).__name__}: {e}")
|
||||
job.state = "failed"
|
||||
if job.returncode is None:
|
||||
job.returncode = 1
|
||||
finally:
|
||||
job.finished_at = datetime.now(timezone.utc).isoformat()
|
||||
if self.current_job_id == job.id:
|
||||
self.current_job_id = None
|
||||
|
||||
async def _do(self, job: UpdateJob) -> None:
|
||||
s = self.settings
|
||||
cmd = build_update_command(s.matrix_bridge_dir, s.matrix_bridge_branch)
|
||||
job.append(f"$ {cmd}")
|
||||
job.state = "running"
|
||||
job.phase = "Fetching latest code…"
|
||||
|
||||
handle = StreamHandle()
|
||||
gen = ssh_stream(s.matrix_bridge_host, s.matrix_bridge_user, cmd, s, handle=handle)
|
||||
deadline = time.monotonic() + _UPDATE_TIMEOUT_S
|
||||
try:
|
||||
while True:
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining <= 0:
|
||||
raise asyncio.TimeoutError
|
||||
try:
|
||||
line = await asyncio.wait_for(gen.__anext__(), timeout=remaining)
|
||||
except StopAsyncIteration:
|
||||
break
|
||||
job.append(line)
|
||||
phase = _phase_for(line)
|
||||
if phase:
|
||||
job.phase = phase
|
||||
finally:
|
||||
# Closing the generator terminates the underlying ssh process and
|
||||
# populates handle.returncode via ssh_stream's finally block.
|
||||
await gen.aclose()
|
||||
|
||||
rc = handle.returncode or 0
|
||||
if rc != 0:
|
||||
job.state = "failed"
|
||||
job.returncode = rc
|
||||
+92
-5
@@ -3,7 +3,7 @@ import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi import FastAPI, HTTPException, Query, Request
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
@@ -21,6 +21,7 @@ from .embeddings_proxy import build_router as build_embeddings_router
|
||||
from .redaction_gateway import build_router as build_redaction_router, MapStore
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
@@ -43,6 +44,7 @@ hardware_probe = HardwareProbe(settings)
|
||||
nim_manager = NimManager(settings)
|
||||
deep_health = DeepHealth(settings)
|
||||
speech_models = SpeechModelsManager(settings)
|
||||
matrix_bridge = MatrixBridgeManager(settings)
|
||||
|
||||
app = FastAPI(title="spark-control", version="0.1.0")
|
||||
|
||||
@@ -474,6 +476,11 @@ async def get_services() -> dict:
|
||||
http = await check_embeddings(settings)
|
||||
elif name == "qdrant":
|
||||
http = await check_qdrant(settings)
|
||||
elif svc.kind == "bot":
|
||||
# No HTTP health endpoint (host networking, no port) — judged purely
|
||||
# by docker state. http_ready stays None so the badge isn't pinned
|
||||
# to a "Starting…" verdict that can never clear.
|
||||
http = {"ok": None, "base_url": None}
|
||||
else:
|
||||
# Custom services expose a /health endpoint by convention.
|
||||
http = await check_kokoro(settings) if svc.kind == "tts" else {"ok": None, "base_url": svc.host and f"http://{svc.host}:{svc.port}"}
|
||||
@@ -484,7 +491,9 @@ async def get_services() -> dict:
|
||||
"container": svc.container,
|
||||
"kind": svc.kind,
|
||||
"base_url": http.get("base_url"),
|
||||
"http_ready": bool(http.get("ok")),
|
||||
# None (not False) for services with no HTTP surface (the bot), so
|
||||
# the UI judges them by docker state alone instead of "Starting…".
|
||||
"http_ready": None if svc.kind == "bot" else bool(http.get("ok")),
|
||||
# Prefer the check fn's own top-level model key (embeddings reports
|
||||
# it there); fall back to a model field inside detail for services
|
||||
# whose /health embeds it (parakeet).
|
||||
@@ -500,8 +509,11 @@ async def get_services() -> dict:
|
||||
results = await asyncio.gather(*[one(n) for n in services.keys()])
|
||||
for name, info in results:
|
||||
out[name] = info
|
||||
# Feed http reachability into the connectivity log (transition-only)
|
||||
record_state(name, bool(info.get("http_ready")))
|
||||
# Feed http reachability into the connectivity log (transition-only).
|
||||
# Skip services with no HTTP surface (http_ready is None) — they'd
|
||||
# otherwise register as perpetually "down".
|
||||
if info.get("http_ready") is not None:
|
||||
record_state(name, bool(info.get("http_ready")))
|
||||
return out
|
||||
|
||||
|
||||
@@ -606,7 +618,7 @@ async def stream_nim_install(job_id: str):
|
||||
@app.delete("/api/services/{name}")
|
||||
async def del_service(name: str) -> dict:
|
||||
# Only allow deleting custom services (not the bundled built-in keys)
|
||||
if name in ("parakeet", "kokoro", "embeddings", "qdrant"):
|
||||
if name in ("parakeet", "kokoro", "embeddings", "qdrant", "matrix-bridge"):
|
||||
raise HTTPException(400, "built-in service; cannot delete (use Configure Sparks to point at a different host)")
|
||||
delete_custom_service(name)
|
||||
return {"ok": True, "name": name}
|
||||
@@ -625,6 +637,81 @@ async def service_action(name: str, action: str) -> dict:
|
||||
return {"name": name, "action": action, **result}
|
||||
|
||||
|
||||
# ---- matrix-bridge bot: update (git pull + rebuild) + logs ----
|
||||
# Status badge + start/stop/restart ride the generic /api/services machinery
|
||||
# above (the bot is a registered ServiceDef). Only the long-running Update and
|
||||
# the logs view need bespoke endpoints.
|
||||
|
||||
def _serialize_mb_update(job) -> dict:
|
||||
return {
|
||||
"id": job.id,
|
||||
"state": job.state,
|
||||
"phase": job.phase,
|
||||
"started_at": job.started_at,
|
||||
"finished_at": job.finished_at,
|
||||
"returncode": job.returncode,
|
||||
"lines": job.lines,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/matrix-bridge/update")
|
||||
async def post_matrix_bridge_update() -> dict:
|
||||
"""Pull latest code, rebuild, and recreate the bot container. Long-running
|
||||
(docker build) — returns a job id to stream."""
|
||||
try:
|
||||
job = await matrix_bridge.trigger_update()
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(409 if "in progress" in str(e) else 503, str(e))
|
||||
return {"job_id": job.id, "state": job.state}
|
||||
|
||||
|
||||
@app.get("/api/matrix-bridge/update/{job_id}")
|
||||
async def get_matrix_bridge_update(job_id: str) -> dict:
|
||||
job = matrix_bridge.get(job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "no such job")
|
||||
return _serialize_mb_update(job)
|
||||
|
||||
|
||||
@app.get("/api/matrix-bridge/update/{job_id}/stream")
|
||||
async def stream_matrix_bridge_update(job_id: str, request: Request):
|
||||
job = matrix_bridge.get(job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "no such job")
|
||||
|
||||
async def gen():
|
||||
sent = 0
|
||||
last_phase = None
|
||||
while True:
|
||||
# An update can run for minutes; bail promptly if the client is gone
|
||||
# rather than spinning the poll loop until the job's 25-min ceiling.
|
||||
if await request.is_disconnected():
|
||||
return
|
||||
n = len(job.lines)
|
||||
if n > sent:
|
||||
for line in job.lines[sent:n]:
|
||||
yield f"data: {json.dumps({'line': line})}\n\n"
|
||||
sent = n
|
||||
if job.phase != last_phase:
|
||||
yield f"event: phase\ndata: {json.dumps({'state': job.state, 'phase': job.phase})}\n\n"
|
||||
last_phase = job.phase
|
||||
if job.returncode is not None and sent >= len(job.lines):
|
||||
yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n"
|
||||
return
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/api/matrix-bridge/logs")
|
||||
async def get_matrix_bridge_logs(tail: int = Query(100, ge=1, le=1000)) -> dict:
|
||||
"""Last N lines of `docker logs` for the bot container (stderr merged)."""
|
||||
result = await matrix_bridge.fetch_logs(tail=tail)
|
||||
if not result.get("ok"):
|
||||
raise HTTPException(502, result.get("output") or result.get("error") or "could not read logs")
|
||||
return result
|
||||
|
||||
|
||||
# ---- Speech model patch management ----
|
||||
|
||||
@app.get("/api/speech-models")
|
||||
|
||||
@@ -89,6 +89,17 @@ def services_from_settings(s: Settings) -> dict[str, ServiceDef]:
|
||||
container=s.qdrant_container,
|
||||
port=s.qdrant_port,
|
||||
),
|
||||
# matrix-bridge Matrix bot. No HTTP port to probe (host networking, no
|
||||
# health endpoint) — judged purely by docker state. Driven as its own
|
||||
# SSH user (modelo, the repo owner) so git/docker run unprivileged.
|
||||
"matrix-bridge": ServiceDef(
|
||||
name="matrix-bridge",
|
||||
kind="bot",
|
||||
host=s.matrix_bridge_host,
|
||||
user=s.matrix_bridge_user,
|
||||
container=s.matrix_bridge_container,
|
||||
port=0,
|
||||
),
|
||||
}
|
||||
for entry in load_custom_services():
|
||||
key = entry.get("key")
|
||||
|
||||
+142
-2
@@ -13,6 +13,7 @@ const state = {
|
||||
swap_progress: 0, // 0–1
|
||||
services: {},
|
||||
service_action_in_flight: null, // e.g. "parakeet:restart"
|
||||
mb_update_in_flight: false, // matrix-bridge update job running
|
||||
hardware: {},
|
||||
config: {},
|
||||
configured: true,
|
||||
@@ -438,8 +439,13 @@ function classifyService(s) {
|
||||
if (s.docker_state === 'missing') return 'missing';
|
||||
if (s.docker_state === 'restarting') return 'unhealthy';
|
||||
if (s.docker_state === 'exited') return 'unhealthy';
|
||||
if (s.docker_state === 'running' && !s.http_ready) return 'starting';
|
||||
if (s.docker_state === 'running' && s.http_ready) return 'running';
|
||||
if (s.docker_state === 'running') {
|
||||
// http_ready === false means an HTTP probe is expected but failing → still
|
||||
// warming up. null means the service has no HTTP surface (e.g. the bot), so
|
||||
// a running container is simply healthy.
|
||||
if (s.http_ready === false) return 'starting';
|
||||
return 'running';
|
||||
}
|
||||
return s.docker_state || 'unknown';
|
||||
}
|
||||
|
||||
@@ -471,6 +477,11 @@ async function renderServices() {
|
||||
grid.innerHTML = '';
|
||||
for (const [name, s] of entries) {
|
||||
const cls = classifyService(s);
|
||||
const isBot = s.kind === 'bot';
|
||||
// The bot tile is opt-in: it only belongs to deployments that actually run
|
||||
// matrix-bridge. When the container is absent (missing) or the host isn't
|
||||
// configured, hide the tile entirely rather than show a stray red card.
|
||||
if (isBot && (cls === 'missing' || cls === 'unconfigured')) continue;
|
||||
const card = document.createElement('div');
|
||||
card.className = `service-card ${cls}`;
|
||||
const inFlight = state.service_action_in_flight && state.service_action_in_flight.startsWith(name + ':');
|
||||
@@ -537,9 +548,11 @@ async function renderServices() {
|
||||
${restartsRow}
|
||||
${deepRow}
|
||||
<div class="service-actions">
|
||||
${isBot ? `<button class="btn primary" data-mb-update title="Pull latest code, rebuild, and recreate the bot" ${inFlight || state.mb_update_in_flight ? 'disabled' : ''}>Update</button>` : ''}
|
||||
<button class="btn" data-svc-action="${name}:start" ${disable('start') ? 'disabled' : ''}>Start</button>
|
||||
<button class="btn" data-svc-action="${name}:restart" ${disable('restart') ? 'disabled' : ''}>Restart</button>
|
||||
<button class="btn danger" data-svc-action="${name}:stop" ${disable('stop') ? 'disabled' : ''}>Stop</button>
|
||||
${isBot ? `<button class="btn" data-mb-logs title="Show the last 100 log lines">View logs</button>` : ''}
|
||||
</div>
|
||||
`;
|
||||
grid.appendChild(card);
|
||||
@@ -547,6 +560,10 @@ async function renderServices() {
|
||||
for (const btn of grid.querySelectorAll('.btn[data-svc-action]')) {
|
||||
btn.addEventListener('click', () => onServiceAction(btn.dataset.svcAction));
|
||||
}
|
||||
const mbUpdateBtn = grid.querySelector('[data-mb-update]');
|
||||
if (mbUpdateBtn) mbUpdateBtn.addEventListener('click', onMatrixBridgeUpdate);
|
||||
const mbLogsBtn = grid.querySelector('[data-mb-logs]');
|
||||
if (mbLogsBtn) mbLogsBtn.addEventListener('click', openMatrixBridgeLogs);
|
||||
for (const btn of grid.querySelectorAll('[data-dh-run]')) {
|
||||
btn.addEventListener('click', () => onDeepHealthRun(btn.dataset.dhRun, btn));
|
||||
}
|
||||
@@ -725,6 +742,118 @@ async function onServiceAction(key) {
|
||||
}
|
||||
}
|
||||
|
||||
// ===================== matrix-bridge bot (update + logs) =====================
|
||||
|
||||
const mbState = { job_id: null, eventsource: null, timer: null, started_at: null };
|
||||
|
||||
function mbTimerStart(at) {
|
||||
mbState.started_at = at;
|
||||
if (mbState.timer) clearInterval(mbState.timer);
|
||||
const tick = () => {
|
||||
if (!mbState.started_at) return;
|
||||
const sec = Math.max(0, Math.floor((Date.now() - mbState.started_at) / 1000));
|
||||
el('#mb-update-elapsed').textContent = `${Math.floor(sec / 60)}:${(sec % 60).toString().padStart(2, '0')}`;
|
||||
};
|
||||
tick();
|
||||
mbState.timer = setInterval(tick, 500);
|
||||
}
|
||||
|
||||
async function onMatrixBridgeUpdate() {
|
||||
if (state.mb_update_in_flight) return;
|
||||
if (!confirm('Update the matrix-bridge bot?\n\nThis pulls the latest code, rebuilds the container image, and recreates the container. The first build after a base-image change can take several minutes. The bot is briefly offline while it restarts.')) return;
|
||||
state.mb_update_in_flight = true;
|
||||
renderServices();
|
||||
try {
|
||||
const r = await fetchJSON('/api/matrix-bridge/update', { method: 'POST' });
|
||||
attachMbUpdateProgress(r.job_id);
|
||||
} catch (e) {
|
||||
state.mb_update_in_flight = false;
|
||||
renderServices();
|
||||
alert('Update failed to start: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function attachMbUpdateProgress(jobId) {
|
||||
mbState.job_id = jobId;
|
||||
el('#mb-update-log').textContent = '';
|
||||
el('#mb-update-title').textContent = 'Updating matrix-bridge…';
|
||||
el('#mb-update-phase').textContent = 'Starting…';
|
||||
el('#mb-update-dialog').showModal();
|
||||
try {
|
||||
const snap = await fetchJSON(`/api/matrix-bridge/update/${jobId}`);
|
||||
mbTimerStart(Date.parse(snap.started_at));
|
||||
el('#mb-update-phase').textContent = snap.phase || 'Working…';
|
||||
el('#mb-update-log').textContent = (snap.lines || []).join('\n');
|
||||
if (snap.returncode !== null) { onMbUpdateDone(snap); return; }
|
||||
} catch { mbTimerStart(Date.now()); }
|
||||
const es = new EventSource(`/api/matrix-bridge/update/${jobId}/stream`);
|
||||
mbState.eventsource = es;
|
||||
es.onmessage = ev => {
|
||||
try {
|
||||
const d = JSON.parse(ev.data);
|
||||
if (d.line !== undefined) {
|
||||
const log = el('#mb-update-log');
|
||||
log.textContent += d.line + '\n';
|
||||
log.scrollTop = log.scrollHeight;
|
||||
}
|
||||
} catch {}
|
||||
};
|
||||
es.addEventListener('phase', ev => {
|
||||
try { el('#mb-update-phase').textContent = JSON.parse(ev.data).phase; } catch {}
|
||||
});
|
||||
es.addEventListener('done', ev => {
|
||||
let d = {}; try { d = JSON.parse(ev.data); } catch {}
|
||||
onMbUpdateDone(d);
|
||||
});
|
||||
es.onerror = () => {
|
||||
// Don't leave the Update button wedged-disabled on a dropped stream. The
|
||||
// job keeps running server-side; re-clicking Update returns a clean 409.
|
||||
es.close();
|
||||
mbState.eventsource = null;
|
||||
state.mb_update_in_flight = false;
|
||||
el('#mb-update-phase').textContent = 'Lost connection to the update stream — reopen or check logs.';
|
||||
renderServices();
|
||||
};
|
||||
}
|
||||
|
||||
function onMbUpdateDone(d) {
|
||||
if (mbState.eventsource) { mbState.eventsource.close(); mbState.eventsource = null; }
|
||||
if (mbState.timer) { clearInterval(mbState.timer); mbState.timer = null; }
|
||||
state.mb_update_in_flight = false;
|
||||
if (d.state === 'failed') {
|
||||
el('#mb-update-title').textContent = `Update failed (rc=${d.returncode})`;
|
||||
el('#mb-update-phase').textContent = 'Failed — see the log above.';
|
||||
} else {
|
||||
el('#mb-update-title').textContent = 'Update complete';
|
||||
el('#mb-update-phase').textContent = 'Done ✓';
|
||||
}
|
||||
// Refresh the tile's badge.
|
||||
(async () => { try { state.services = await fetchJSON('/api/services'); } catch {} renderServices(); })();
|
||||
}
|
||||
|
||||
async function openMatrixBridgeLogs() {
|
||||
const pre = el('#mb-logs-pre');
|
||||
el('#mb-logs-title').textContent = 'matrix-bridge logs';
|
||||
pre.textContent = 'Loading…';
|
||||
el('#mb-logs-dialog').showModal();
|
||||
await loadMatrixBridgeLogs();
|
||||
}
|
||||
|
||||
async function loadMatrixBridgeLogs() {
|
||||
const pre = el('#mb-logs-pre');
|
||||
const btn = el('#mb-logs-refresh');
|
||||
if (btn) btn.disabled = true;
|
||||
try {
|
||||
const r = await fetchJSON('/api/matrix-bridge/logs?tail=100');
|
||||
pre.textContent = r.output || '(no output)';
|
||||
pre.scrollTop = pre.scrollHeight;
|
||||
} catch (e) {
|
||||
pre.textContent = 'Could not read logs: ' + e.message;
|
||||
} finally {
|
||||
if (btn) btn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
function renderEndpoint(status) {
|
||||
const v = status.vllm || {};
|
||||
const panel = el('#endpoint-panel');
|
||||
@@ -1883,6 +2012,17 @@ async function init() {
|
||||
el('#nim-cancel').addEventListener('click', () => el('#nim-dialog').close());
|
||||
el('#nim-form').addEventListener('submit', submitNim);
|
||||
el('#nim-prog-close').addEventListener('click', () => el('#nim-progress-dialog').close());
|
||||
el('#mb-update-close').addEventListener('click', () => el('#mb-update-dialog').close());
|
||||
// Dismissing the modal (Close or Esc) stops streaming; the job runs on
|
||||
// server-side and re-clicking Update returns a 409 if still in progress.
|
||||
el('#mb-update-dialog').addEventListener('close', () => {
|
||||
if (mbState.eventsource) { mbState.eventsource.close(); mbState.eventsource = null; }
|
||||
if (mbState.timer) { clearInterval(mbState.timer); mbState.timer = null; }
|
||||
state.mb_update_in_flight = false;
|
||||
renderServices();
|
||||
});
|
||||
el('#mb-logs-close').addEventListener('click', () => el('#mb-logs-dialog').close());
|
||||
el('#mb-logs-refresh').addEventListener('click', loadMatrixBridgeLogs);
|
||||
el('#open-connectivity').addEventListener('click', openConnectivityDialog);
|
||||
el('#connectivity-close').addEventListener('click', () => el('#connectivity-dialog').close());
|
||||
// Hardware-card buttons (Wake-on-LAN on unreachable cards; SSH-key copy on
|
||||
|
||||
@@ -164,6 +164,37 @@
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="mb-update-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form">
|
||||
<h3 id="mb-update-title">Updating matrix-bridge…</h3>
|
||||
<div class="phase-row">
|
||||
<div class="phase" id="mb-update-phase">Starting…</div>
|
||||
<span class="spacer"></span>
|
||||
<span class="timer" id="mb-update-elapsed">0:00</span>
|
||||
</div>
|
||||
<details open>
|
||||
<summary class="muted small">Log</summary>
|
||||
<pre id="mb-update-log" class="log"></pre>
|
||||
</details>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="mb-update-close" class="btn">Close</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="mb-logs-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form">
|
||||
<h3 id="mb-logs-title">matrix-bridge logs</h3>
|
||||
<p class="muted small">Last 100 lines from <code>docker logs</code> on the Spark.</p>
|
||||
<pre id="mb-logs-pre" class="log"></pre>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="mb-logs-refresh" class="btn">Refresh</button>
|
||||
<span class="spacer"></span>
|
||||
<button type="button" id="mb-logs-close" class="btn">Close</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
</section>
|
||||
|
||||
<section id="speech-models-panel" class="speech-models hidden">
|
||||
|
||||
@@ -526,10 +526,12 @@ main {
|
||||
#dl-log-details { margin-top: 12px; }
|
||||
#dl-log-details summary { cursor: pointer; padding: 4px 0; }
|
||||
|
||||
/* ===== NIM install dialog ===== */
|
||||
/* ===== NIM install + matrix-bridge dialogs ===== */
|
||||
|
||||
.modal#nim-dialog,
|
||||
.modal#nim-progress-dialog { max-width: 640px; }
|
||||
.modal#nim-progress-dialog,
|
||||
.modal#mb-update-dialog,
|
||||
.modal#mb-logs-dialog { max-width: 640px; }
|
||||
.nim-grid {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
|
||||
Reference in New Issue
Block a user