v0.2.2 - spark-vllm-docker update checks + Apply Update
Backend:
- updates.py: get_update_status() runs git fetch + git rev-list --left-right --count HEAD...origin/main to learn ahead/behind/dirty, plus git log for pending commits
- UpdateManager class with asyncio.Lock; one update at a time
- POST /api/updates/apply triggers "git pull --ff-only && ./build-and-copy.sh -c" over SSH with streamed log + phase detection (Pulling / Building the vLLM container / Copying to peer Sparks)
- GET /api/updates returns {ok, behind, ahead, dirty, current, log[], branch}
Frontend:
- Persistent banner near footer: hidden when up-to-date, blue when N commits behind, warn (orange) when local dirty changes block update
- 'Show details' expands a list of pending commits
- 'Apply update' triggers the long-running build with phase + elapsed timer + collapsible logs
- Confirmation dialog explains the 5–40 min duration
Package: bump 0.2.2:0
This commit is contained in:
@@ -16,12 +16,14 @@ from .models import load_catalog
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
from .ssh import ssh_run
|
||||
from .swap import SwapManager
|
||||
from .updates import UpdateManager, get_update_status
|
||||
|
||||
|
||||
settings = Settings.from_env()
|
||||
catalog = load_catalog(settings.models_yaml)
|
||||
swap_manager = SwapManager(settings, catalog)
|
||||
download_manager = DownloadManager(settings)
|
||||
update_manager = UpdateManager(settings)
|
||||
|
||||
app = FastAPI(title="spark-control", version="0.1.0")
|
||||
|
||||
@@ -307,6 +309,69 @@ async def stream_download(job_id: str):
|
||||
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.get("/api/updates")
|
||||
async def get_updates() -> dict:
|
||||
return await get_update_status(settings)
|
||||
|
||||
|
||||
class UpdateRequest(BaseModel):
|
||||
mode: Literal["solo", "cluster"] = "cluster"
|
||||
|
||||
|
||||
@app.post("/api/updates/apply")
|
||||
async def post_update_apply(req: UpdateRequest) -> dict:
|
||||
if not settings.configured:
|
||||
raise HTTPException(503, "spark1 not configured")
|
||||
try:
|
||||
job = await update_manager.trigger(req.mode)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(409, str(e))
|
||||
return {"job_id": job.id, "mode": job.mode, "state": job.state}
|
||||
|
||||
|
||||
@app.get("/api/updates/{job_id}")
|
||||
async def get_update_job(job_id: str) -> dict:
|
||||
job = update_manager.get(job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "no such job")
|
||||
return {
|
||||
"id": job.id,
|
||||
"mode": job.mode,
|
||||
"state": job.state,
|
||||
"phase": job.phase,
|
||||
"started_at": job.started_at,
|
||||
"finished_at": job.finished_at,
|
||||
"returncode": job.returncode,
|
||||
"lines": job.lines,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/updates/{job_id}/stream")
|
||||
async def stream_update(job_id: str):
|
||||
job = update_manager.get(job_id)
|
||||
if job is None:
|
||||
raise HTTPException(404, "no such job")
|
||||
|
||||
async def gen():
|
||||
sent = 0
|
||||
last_phase = None
|
||||
while True:
|
||||
n = len(job.lines)
|
||||
if n > sent:
|
||||
for line in job.lines[sent:n]:
|
||||
yield f"data: {json.dumps({'line': line})}\n\n"
|
||||
sent = n
|
||||
if job.phase != last_phase:
|
||||
yield f"event: phase\ndata: {json.dumps({'state': job.state, 'phase': job.phase})}\n\n"
|
||||
last_phase = job.phase
|
||||
if job.returncode is not None and sent >= len(job.lines):
|
||||
yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n"
|
||||
return
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||
|
||||
|
||||
@app.post("/api/test-connection")
|
||||
async def test_connection() -> dict:
|
||||
"""Probe both Sparks with a `hostname` command. Useful for the StartOS setup flow."""
|
||||
|
||||
Reference in New Issue
Block a user