v0.21.0:0 - matrix-bridge bot tile (status, update, restart, logs)
This commit is contained in:
+92
-5
@@ -3,7 +3,7 @@ import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi import FastAPI, HTTPException, Query, Request
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
@@ -21,6 +21,7 @@ from .embeddings_proxy import build_router as build_embeddings_router
|
||||
from .redaction_gateway import build_router as build_redaction_router, MapStore
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
@@ -43,6 +44,7 @@ hardware_probe = HardwareProbe(settings)
|
||||
nim_manager = NimManager(settings)
|
||||
deep_health = DeepHealth(settings)
|
||||
speech_models = SpeechModelsManager(settings)
|
||||
matrix_bridge = MatrixBridgeManager(settings)
|
||||
|
||||
app = FastAPI(title="spark-control", version="0.1.0")
|
||||
|
||||
@@ -474,6 +476,11 @@ async def get_services() -> dict:
|
||||
http = await check_embeddings(settings)
|
||||
elif name == "qdrant":
|
||||
http = await check_qdrant(settings)
|
||||
elif svc.kind == "bot":
|
||||
# No HTTP health endpoint (host networking, no port) — judged purely
|
||||
# by docker state. http_ready stays None so the badge isn't pinned
|
||||
# to a "Starting…" verdict that can never clear.
|
||||
http = {"ok": None, "base_url": None}
|
||||
else:
|
||||
# Custom services expose a /health endpoint by convention.
|
||||
http = await check_kokoro(settings) if svc.kind == "tts" else {"ok": None, "base_url": svc.host and f"http://{svc.host}:{svc.port}"}
|
||||
@@ -484,7 +491,9 @@ async def get_services() -> dict:
|
||||
"container": svc.container,
|
||||
"kind": svc.kind,
|
||||
"base_url": http.get("base_url"),
|
||||
"http_ready": bool(http.get("ok")),
|
||||
# None (not False) for services with no HTTP surface (the bot), so
|
||||
# the UI judges them by docker state alone instead of "Starting…".
|
||||
"http_ready": None if svc.kind == "bot" else bool(http.get("ok")),
|
||||
# Prefer the check fn's own top-level model key (embeddings reports
|
||||
# it there); fall back to a model field inside detail for services
|
||||
# whose /health embeds it (parakeet).
|
||||
@@ -500,8 +509,11 @@ async def get_services() -> dict:
|
||||
results = await asyncio.gather(*[one(n) for n in services.keys()])
|
||||
for name, info in results:
|
||||
out[name] = info
|
||||
# Feed http reachability into the connectivity log (transition-only)
|
||||
record_state(name, bool(info.get("http_ready")))
|
||||
# Feed http reachability into the connectivity log (transition-only).
|
||||
# Skip services with no HTTP surface (http_ready is None) — they'd
|
||||
# otherwise register as perpetually "down".
|
||||
if info.get("http_ready") is not None:
|
||||
record_state(name, bool(info.get("http_ready")))
|
||||
return out
|
||||
|
||||
|
||||
@@ -606,7 +618,7 @@ async def stream_nim_install(job_id: str):
|
||||
@app.delete("/api/services/{name}")
|
||||
async def del_service(name: str) -> dict:
|
||||
# Only allow deleting custom services (not the bundled built-in keys)
|
||||
if name in ("parakeet", "kokoro", "embeddings", "qdrant"):
|
||||
if name in ("parakeet", "kokoro", "embeddings", "qdrant", "matrix-bridge"):
|
||||
raise HTTPException(400, "built-in service; cannot delete (use Configure Sparks to point at a different host)")
|
||||
delete_custom_service(name)
|
||||
return {"ok": True, "name": name}
|
||||
@@ -625,6 +637,81 @@ async def service_action(name: str, action: str) -> dict:
|
||||
return {"name": name, "action": action, **result}
|
||||
|
||||
|
||||
# ---- matrix-bridge bot: update (git pull + rebuild) + logs ----
|
||||
# Status badge + start/stop/restart ride the generic /api/services machinery
|
||||
# above (the bot is a registered ServiceDef). Only the long-running Update and
|
||||
# the logs view need bespoke endpoints.
|
||||
|
||||
def _serialize_mb_update(job) -> dict:
|
||||
return {
|
||||
"id": job.id,
|
||||
"state": job.state,
|
||||
"phase": job.phase,
|
||||
"started_at": job.started_at,
|
||||
"finished_at": job.finished_at,
|
||||
"returncode": job.returncode,
|
||||
"lines": job.lines,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/matrix-bridge/update")
|
||||
async def post_matrix_bridge_update() -> dict:
|
||||
"""Pull latest code, rebuild, and recreate the bot container. Long-running
|
||||
(docker build) — returns a job id to stream."""
|
||||
try:
|
||||
job = await matrix_bridge.trigger_update()
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(409 if "in progress" in str(e) else 503, str(e))
|
||||
return {"job_id": job.id, "state": job.state}
|
||||
|
||||
|
||||
@app.get("/api/matrix-bridge/update/{job_id}")
|
||||
async def get_matrix_bridge_update(job_id: str) -> dict:
|
||||
job = matrix_bridge.get(job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "no such job")
|
||||
return _serialize_mb_update(job)
|
||||
|
||||
|
||||
@app.get("/api/matrix-bridge/update/{job_id}/stream")
|
||||
async def stream_matrix_bridge_update(job_id: str, request: Request):
|
||||
job = matrix_bridge.get(job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "no such job")
|
||||
|
||||
async def gen():
|
||||
sent = 0
|
||||
last_phase = None
|
||||
while True:
|
||||
# An update can run for minutes; bail promptly if the client is gone
|
||||
# rather than spinning the poll loop until the job's 25-min ceiling.
|
||||
if await request.is_disconnected():
|
||||
return
|
||||
n = len(job.lines)
|
||||
if n > sent:
|
||||
for line in job.lines[sent:n]:
|
||||
yield f"data: {json.dumps({'line': line})}\n\n"
|
||||
sent = n
|
||||
if job.phase != last_phase:
|
||||
yield f"event: phase\ndata: {json.dumps({'state': job.state, 'phase': job.phase})}\n\n"
|
||||
last_phase = job.phase
|
||||
if job.returncode is not None and sent >= len(job.lines):
|
||||
yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n"
|
||||
return
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/api/matrix-bridge/logs")
|
||||
async def get_matrix_bridge_logs(tail: int = Query(100, ge=1, le=1000)) -> dict:
|
||||
"""Last N lines of `docker logs` for the bot container (stderr merged)."""
|
||||
result = await matrix_bridge.fetch_logs(tail=tail)
|
||||
if not result.get("ok"):
|
||||
raise HTTPException(502, result.get("output") or result.get("error") or "could not read logs")
|
||||
return result
|
||||
|
||||
|
||||
# ---- Speech model patch management ----
|
||||
|
||||
@app.get("/api/speech-models")
|
||||
|
||||
Reference in New Issue
Block a user