Initial scaffold: image/ FastAPI app, models.yaml, docs
- image/ FastAPI app: /api/status, /api/swap, /api/swap/{id}/stream, /api/test-connection
- models.yaml: 5-model catalog (qwen3-vl, gemma4, qwen36, qwen3-235b-fp8, qwen25-72b)
- README, runbook, known-issues
- Dry-run swap verified against live Spark 1 (gemma4 currently loaded)
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
from __future__ import annotations
|
||||
from typing import Literal
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ModelDef(BaseModel):
|
||||
display_name: str
|
||||
repo: str
|
||||
size_gb: float
|
||||
mode: Literal["solo", "cluster"]
|
||||
capabilities: list[str] = Field(default_factory=list)
|
||||
expected_ready_seconds: int = 300
|
||||
vllm_args: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class Defaults(BaseModel):
|
||||
port: int = 8888
|
||||
host: str = "0.0.0.0"
|
||||
|
||||
|
||||
class Catalog(BaseModel):
|
||||
defaults: Defaults = Field(default_factory=Defaults)
|
||||
models: dict[str, ModelDef]
|
||||
|
||||
|
||||
def load_catalog(path: str) -> Catalog:
|
||||
with open(path) as f:
|
||||
data = yaml.safe_load(f)
|
||||
return Catalog.model_validate(data)
|
||||
|
||||
|
||||
def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str:
|
||||
"""Return the shell command to launch `model` on Spark 1.
|
||||
|
||||
Assumes cwd will be `~/spark-vllm-docker` (we cd in the SSH wrapper).
|
||||
"""
|
||||
solo = "--solo " if model.mode == "solo" else ""
|
||||
args = [f"--port={defaults.port}", f"--host={defaults.host}", *model.vllm_args]
|
||||
return f"./launch-cluster.sh {solo}-d exec vllm serve {model.repo} {' '.join(args)}"
|
||||
Reference in New Issue
Block a user