Initial scaffold: image/ FastAPI app, models.yaml, docs

- image/ FastAPI app: /api/status, /api/swap, /api/swap/{id}/stream, /api/test-connection
- models.yaml: 5-model catalog (qwen3-vl, gemma4, qwen36, qwen3-235b-fp8, qwen25-72b)
- README, runbook, known-issues
- Dry-run swap verified against live Spark 1 (gemma4 currently loaded)
This commit is contained in:
Grant
2026-05-12 09:29:13 -05:00
commit ae8efa1754
19 changed files with 1500 additions and 0 deletions
+43
View File
@@ -0,0 +1,43 @@
from __future__ import annotations
import httpx
from .config import Settings
_TIMEOUT = 3.0
async def check_vllm(settings: Settings) -> dict:
if not settings.spark1_host:
return {"ok": False, "error": "spark1 not configured"}
try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
r = await c.get(f"http://{settings.spark1_host}:{settings.vllm_port}/v1/models")
r.raise_for_status()
ids = [m["id"] for m in r.json().get("data", [])]
return {"ok": True, "current_model": ids[0] if ids else None, "all": ids}
except Exception as e:
return {"ok": False, "error": str(e)}
async def check_parakeet(settings: Settings) -> dict:
if not settings.spark2_host:
return {"ok": False, "error": "spark2 not configured"}
try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
r = await c.get(f"http://{settings.spark2_host}:{settings.parakeet_port}/health")
r.raise_for_status()
return {"ok": True, "detail": r.json()}
except Exception as e:
return {"ok": False, "error": str(e)}
async def check_magpie(settings: Settings) -> dict:
if not settings.spark2_host:
return {"ok": False, "error": "spark2 not configured"}
try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as c:
r = await c.get(f"http://{settings.spark2_host}:{settings.magpie_port}/v1/health/ready")
r.raise_for_status()
return {"ok": True, "detail": r.json() if r.headers.get("content-type", "").startswith("application/json") else r.text}
except Exception as e:
return {"ok": False, "error": str(e)}