75fd0846b4
Backend:
- overrides.py: read/write /data/models-overrides.yaml (knobs + custom entries)
- apply_knobs_to_args(): strip matching flags from bundled vllm_args and append knob values, so knob changes properly override bundled defaults
- extract_knobs_from_args(): seed UI knob values from bundled args so the Advanced dialog has correct starting state
- models.py: load_catalog merges overrides on top of bundled yaml
- GET /api/models returns effective_knobs per model
- PUT /api/models/{key}/knobs persists knob changes
- POST /api/models adds a custom catalog entry
- DELETE /api/models/{key} removes a custom entry (bundled models cannot be deleted)
- swap_manager.reload_catalog() called after each mutation so swaps see latest
Frontend:
- New 'Advanced' button on every card opens a modal dialog: max-model-len input, gpu-memory-utilization slider, three optimization checkboxes (fastsafetensors, prefix caching, FP8 KV cache). Save persists; Cancel discards. Custom models also have a Delete button.
- After a successful download, automatically open the 'Add to catalog' dialog pre-filled with the repo, with the same knob defaults — user just enters key, display name, and clicks Save.
- Custom catalog entries are tagged with a blue 'custom' pill on the card.
Package: bump 0.2.3:0; main.ts sets MODELS_OVERRIDES=/data/models-overrides.yaml so overrides persist on the StartOS volume.
81 lines
2.8 KiB
Python
81 lines
2.8 KiB
Python
from __future__ import annotations
|
|
from typing import Literal, Optional
|
|
import yaml
|
|
from pydantic import BaseModel, Field
|
|
|
|
from .overrides import apply_knobs_to_args, load_overrides
|
|
|
|
|
|
class ModelDef(BaseModel):
|
|
display_name: str
|
|
repo: str
|
|
size_gb: float
|
|
mode: Literal["solo", "cluster"]
|
|
capabilities: list[str] = Field(default_factory=list)
|
|
expected_ready_seconds: int = 300
|
|
vllm_args: list[str] = Field(default_factory=list)
|
|
description: str | None = None
|
|
knobs: dict | None = None # user-customized; merged at launch time
|
|
custom: bool = False # True if this came from /data overrides
|
|
|
|
|
|
class Defaults(BaseModel):
|
|
port: int = 8888
|
|
host: str = "0.0.0.0"
|
|
|
|
|
|
class Catalog(BaseModel):
|
|
defaults: Defaults = Field(default_factory=Defaults)
|
|
models: dict[str, ModelDef]
|
|
|
|
|
|
def _merge_overrides(catalog: Catalog) -> Catalog:
|
|
"""Apply user overrides + custom entries from /data/models-overrides.yaml."""
|
|
ov = load_overrides()
|
|
knobs_by_key = ov.get("knobs") or {}
|
|
custom_entries = ov.get("custom") or []
|
|
|
|
new_models: dict[str, ModelDef] = {}
|
|
for key, m in catalog.models.items():
|
|
k = knobs_by_key.get(key)
|
|
new_models[key] = m.model_copy(update={"knobs": k}) if k else m
|
|
|
|
for entry in custom_entries:
|
|
key = entry.get("key")
|
|
if not key:
|
|
continue
|
|
defaults_dump = {
|
|
"display_name": entry.get("display_name", key),
|
|
"repo": entry["repo"],
|
|
"size_gb": float(entry.get("size_gb", 0)),
|
|
"mode": entry.get("mode", "solo"),
|
|
"capabilities": entry.get("capabilities") or [],
|
|
"expected_ready_seconds": int(entry.get("expected_ready_seconds", 300)),
|
|
"vllm_args": entry.get("vllm_args") or [],
|
|
"description": entry.get("description"),
|
|
"knobs": entry.get("knobs"),
|
|
"custom": True,
|
|
}
|
|
new_models[key] = ModelDef.model_validate(defaults_dump)
|
|
|
|
return Catalog(defaults=catalog.defaults, models=new_models)
|
|
|
|
|
|
def load_catalog(path: str) -> Catalog:
|
|
with open(path) as f:
|
|
data = yaml.safe_load(f)
|
|
bundled = Catalog.model_validate(data)
|
|
return _merge_overrides(bundled)
|
|
|
|
|
|
def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str:
|
|
"""Return the shell command to launch `model` on Spark 1.
|
|
|
|
User knobs (if any) override matching flags in the bundled vllm_args.
|
|
Assumes cwd will be `~/spark-vllm-docker` (we cd in the SSH wrapper).
|
|
"""
|
|
solo = "--solo " if model.mode == "solo" else ""
|
|
base_args = apply_knobs_to_args(list(model.vllm_args), model.knobs)
|
|
args = [f"--port={defaults.port}", f"--host={defaults.host}", *base_args]
|
|
return f"./launch-cluster.sh {solo}-d exec vllm serve {model.repo} {' '.join(args)}"
|