v0.21.0:0 - matrix-bridge bot tile (status, update, restart, logs)

This commit is contained in:
Keysat
2026-06-15 22:57:40 -05:00
parent e307a08f05
commit 39f8410623
13 changed files with 549 additions and 13 deletions
+92 -5
View File
@@ -3,7 +3,7 @@ import asyncio
import json
from pathlib import Path
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Query, Request
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
@@ -21,6 +21,7 @@ from .embeddings_proxy import build_router as build_embeddings_router
from .redaction_gateway import build_router as build_redaction_router, MapStore
from .hardware import HardwareProbe
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
from .matrix_bridge import MatrixBridgeManager
from .models import load_catalog
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
@@ -43,6 +44,7 @@ hardware_probe = HardwareProbe(settings)
nim_manager = NimManager(settings)
deep_health = DeepHealth(settings)
speech_models = SpeechModelsManager(settings)
matrix_bridge = MatrixBridgeManager(settings)
app = FastAPI(title="spark-control", version="0.1.0")
@@ -474,6 +476,11 @@ async def get_services() -> dict:
http = await check_embeddings(settings)
elif name == "qdrant":
http = await check_qdrant(settings)
elif svc.kind == "bot":
# No HTTP health endpoint (host networking, no port) — judged purely
# by docker state. http_ready stays None so the badge isn't pinned
# to a "Starting…" verdict that can never clear.
http = {"ok": None, "base_url": None}
else:
# Custom services expose a /health endpoint by convention.
http = await check_kokoro(settings) if svc.kind == "tts" else {"ok": None, "base_url": svc.host and f"http://{svc.host}:{svc.port}"}
@@ -484,7 +491,9 @@ async def get_services() -> dict:
"container": svc.container,
"kind": svc.kind,
"base_url": http.get("base_url"),
"http_ready": bool(http.get("ok")),
# None (not False) for services with no HTTP surface (the bot), so
# the UI judges them by docker state alone instead of "Starting…".
"http_ready": None if svc.kind == "bot" else bool(http.get("ok")),
# Prefer the check fn's own top-level model key (embeddings reports
# it there); fall back to a model field inside detail for services
# whose /health embeds it (parakeet).
@@ -500,8 +509,11 @@ async def get_services() -> dict:
results = await asyncio.gather(*[one(n) for n in services.keys()])
for name, info in results:
out[name] = info
# Feed http reachability into the connectivity log (transition-only)
record_state(name, bool(info.get("http_ready")))
# Feed http reachability into the connectivity log (transition-only).
# Skip services with no HTTP surface (http_ready is None) — they'd
# otherwise register as perpetually "down".
if info.get("http_ready") is not None:
record_state(name, bool(info.get("http_ready")))
return out
@@ -606,7 +618,7 @@ async def stream_nim_install(job_id: str):
@app.delete("/api/services/{name}")
async def del_service(name: str) -> dict:
# Only allow deleting custom services (not the bundled built-in keys)
if name in ("parakeet", "kokoro", "embeddings", "qdrant"):
if name in ("parakeet", "kokoro", "embeddings", "qdrant", "matrix-bridge"):
raise HTTPException(400, "built-in service; cannot delete (use Configure Sparks to point at a different host)")
delete_custom_service(name)
return {"ok": True, "name": name}
@@ -625,6 +637,81 @@ async def service_action(name: str, action: str) -> dict:
return {"name": name, "action": action, **result}
# ---- matrix-bridge bot: update (git pull + rebuild) + logs ----
# Status badge + start/stop/restart ride the generic /api/services machinery
# above (the bot is a registered ServiceDef). Only the long-running Update and
# the logs view need bespoke endpoints.
def _serialize_mb_update(job) -> dict:
return {
"id": job.id,
"state": job.state,
"phase": job.phase,
"started_at": job.started_at,
"finished_at": job.finished_at,
"returncode": job.returncode,
"lines": job.lines,
}
@app.post("/api/matrix-bridge/update")
async def post_matrix_bridge_update() -> dict:
"""Pull latest code, rebuild, and recreate the bot container. Long-running
(docker build) — returns a job id to stream."""
try:
job = await matrix_bridge.trigger_update()
except RuntimeError as e:
raise HTTPException(409 if "in progress" in str(e) else 503, str(e))
return {"job_id": job.id, "state": job.state}
@app.get("/api/matrix-bridge/update/{job_id}")
async def get_matrix_bridge_update(job_id: str) -> dict:
job = matrix_bridge.get(job_id)
if job is None:
raise HTTPException(404, "no such job")
return _serialize_mb_update(job)
@app.get("/api/matrix-bridge/update/{job_id}/stream")
async def stream_matrix_bridge_update(job_id: str, request: Request):
job = matrix_bridge.get(job_id)
if job is None:
raise HTTPException(404, "no such job")
async def gen():
sent = 0
last_phase = None
while True:
# An update can run for minutes; bail promptly if the client is gone
# rather than spinning the poll loop until the job's 25-min ceiling.
if await request.is_disconnected():
return
n = len(job.lines)
if n > sent:
for line in job.lines[sent:n]:
yield f"data: {json.dumps({'line': line})}\n\n"
sent = n
if job.phase != last_phase:
yield f"event: phase\ndata: {json.dumps({'state': job.state, 'phase': job.phase})}\n\n"
last_phase = job.phase
if job.returncode is not None and sent >= len(job.lines):
yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n"
return
await asyncio.sleep(0.5)
return StreamingResponse(gen(), media_type="text/event-stream")
@app.get("/api/matrix-bridge/logs")
async def get_matrix_bridge_logs(tail: int = Query(100, ge=1, le=1000)) -> dict:
"""Last N lines of `docker logs` for the bot container (stderr merged)."""
result = await matrix_bridge.fetch_logs(tail=tail)
if not result.get("ok"):
raise HTTPException(502, result.get("output") or result.get("error") or "could not read logs")
return result
# ---- Speech model patch management ----
@app.get("/api/speech-models")