87334f85f0
- models.yaml: add 'description' field for all 5 models (generic, anyone-can-use) - ModelDef gains optional description: str | None field - UI: render description below meta tags; mute the repo line further - escapeHtml() for safety in case descriptions/names contain HTML chars - Update runbook: how to add a new model with description
42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
from __future__ import annotations
|
|
from typing import Literal
|
|
import yaml
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class ModelDef(BaseModel):
|
|
display_name: str
|
|
repo: str
|
|
size_gb: float
|
|
mode: Literal["solo", "cluster"]
|
|
capabilities: list[str] = Field(default_factory=list)
|
|
expected_ready_seconds: int = 300
|
|
vllm_args: list[str] = Field(default_factory=list)
|
|
description: str | None = None
|
|
|
|
|
|
class Defaults(BaseModel):
|
|
port: int = 8888
|
|
host: str = "0.0.0.0"
|
|
|
|
|
|
class Catalog(BaseModel):
|
|
defaults: Defaults = Field(default_factory=Defaults)
|
|
models: dict[str, ModelDef]
|
|
|
|
|
|
def load_catalog(path: str) -> Catalog:
|
|
with open(path) as f:
|
|
data = yaml.safe_load(f)
|
|
return Catalog.model_validate(data)
|
|
|
|
|
|
def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str:
|
|
"""Return the shell command to launch `model` on Spark 1.
|
|
|
|
Assumes cwd will be `~/spark-vllm-docker` (we cd in the SSH wrapper).
|
|
"""
|
|
solo = "--solo " if model.mode == "solo" else ""
|
|
args = [f"--port={defaults.port}", f"--host={defaults.host}", *model.vllm_args]
|
|
return f"./launch-cluster.sh {solo}-d exec vllm serve {model.repo} {' '.join(args)}"
|