from __future__ import annotations from typing import Literal import yaml from pydantic import BaseModel, Field class ModelDef(BaseModel): display_name: str repo: str size_gb: float mode: Literal["solo", "cluster"] capabilities: list[str] = Field(default_factory=list) expected_ready_seconds: int = 300 vllm_args: list[str] = Field(default_factory=list) class Defaults(BaseModel): port: int = 8888 host: str = "0.0.0.0" class Catalog(BaseModel): defaults: Defaults = Field(default_factory=Defaults) models: dict[str, ModelDef] def load_catalog(path: str) -> Catalog: with open(path) as f: data = yaml.safe_load(f) return Catalog.model_validate(data) def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str: """Return the shell command to launch `model` on Spark 1. Assumes cwd will be `~/spark-vllm-docker` (we cd in the SSH wrapper). """ solo = "--solo " if model.mode == "solo" else "" args = [f"--port={defaults.port}", f"--host={defaults.host}", *model.vllm_args] return f"./launch-cluster.sh {solo}-d exec vllm serve {model.repo} {' '.join(args)}"