v0.2.2 - spark-vllm-docker update checks + Apply Update

Backend:
- updates.py: get_update_status() runs git fetch + git rev-list --left-right --count HEAD...origin/main to learn ahead/behind/dirty, plus git log for pending commits
- UpdateManager class with asyncio.Lock; one update at a time
- POST /api/updates/apply triggers "git pull --ff-only && ./build-and-copy.sh -c" over SSH with streamed log + phase detection (Pulling / Building the vLLM container / Copying to peer Sparks)
- GET /api/updates returns {ok, behind, ahead, dirty, current, log[], branch}

Frontend:
- Persistent banner near footer: hidden when up-to-date, blue when N commits behind, warn (orange) when local dirty changes block update
- 'Show details' expands a list of pending commits
- 'Apply update' triggers the long-running build with phase + elapsed timer + collapsible logs
- Confirmation dialog explains the 5–40 min duration

Package: bump 0.2.2:0
This commit is contained in:
Grant
2026-05-12 11:26:55 -05:00
parent 9dde938348
commit 474417b458
6 changed files with 408 additions and 2 deletions
+65
View File
@@ -16,12 +16,14 @@ from .models import load_catalog
from .services import docker_state, run_action, services_from_settings
from .ssh import ssh_run
from .swap import SwapManager
from .updates import UpdateManager, get_update_status
settings = Settings.from_env()
catalog = load_catalog(settings.models_yaml)
swap_manager = SwapManager(settings, catalog)
download_manager = DownloadManager(settings)
update_manager = UpdateManager(settings)
app = FastAPI(title="spark-control", version="0.1.0")
@@ -307,6 +309,69 @@ async def stream_download(job_id: str):
return StreamingResponse(gen(), media_type="text/event-stream")
@app.get("/api/updates")
async def get_updates() -> dict:
return await get_update_status(settings)
class UpdateRequest(BaseModel):
mode: Literal["solo", "cluster"] = "cluster"
@app.post("/api/updates/apply")
async def post_update_apply(req: UpdateRequest) -> dict:
if not settings.configured:
raise HTTPException(503, "spark1 not configured")
try:
job = await update_manager.trigger(req.mode)
except RuntimeError as e:
raise HTTPException(409, str(e))
return {"job_id": job.id, "mode": job.mode, "state": job.state}
@app.get("/api/updates/{job_id}")
async def get_update_job(job_id: str) -> dict:
job = update_manager.get(job_id)
if job is None:
raise HTTPException(404, "no such job")
return {
"id": job.id,
"mode": job.mode,
"state": job.state,
"phase": job.phase,
"started_at": job.started_at,
"finished_at": job.finished_at,
"returncode": job.returncode,
"lines": job.lines,
}
@app.get("/api/updates/{job_id}/stream")
async def stream_update(job_id: str):
job = update_manager.get(job_id)
if job is None:
raise HTTPException(404, "no such job")
async def gen():
sent = 0
last_phase = None
while True:
n = len(job.lines)
if n > sent:
for line in job.lines[sent:n]:
yield f"data: {json.dumps({'line': line})}\n\n"
sent = n
if job.phase != last_phase:
yield f"event: phase\ndata: {json.dumps({'state': job.state, 'phase': job.phase})}\n\n"
last_phase = job.phase
if job.returncode is not None and sent >= len(job.lines):
yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n"
return
await asyncio.sleep(0.5)
return StreamingResponse(gen(), media_type="text/event-stream")
@app.post("/api/test-connection")
async def test_connection() -> dict:
"""Probe both Sparks with a `hostname` command. Useful for the StartOS setup flow."""