v0.2.3 - Per-model Advanced settings + catalog-add for downloaded models

Backend:
- overrides.py: read/write /data/models-overrides.yaml (knobs + custom entries)
- apply_knobs_to_args(): strip matching flags from bundled vllm_args and append knob values, so knob changes properly override bundled defaults
- extract_knobs_from_args(): seed UI knob values from bundled args so the Advanced dialog has correct starting state
- models.py: load_catalog merges overrides on top of bundled yaml
- GET /api/models returns effective_knobs per model
- PUT /api/models/{key}/knobs persists knob changes
- POST /api/models adds a custom catalog entry
- DELETE /api/models/{key} removes a custom entry (bundled models cannot be deleted)
- swap_manager.reload_catalog() called after each mutation so swaps see latest

Frontend:
- New 'Advanced' button on every card opens a modal dialog: max-model-len input, gpu-memory-utilization slider, three optimization checkboxes (fastsafetensors, prefix caching, FP8 KV cache). Save persists; Cancel discards. Custom models also have a Delete button.
- After a successful download, automatically open the 'Add to catalog' dialog pre-filled with the repo, with the same knob defaults — user just enters key, display name, and clicks Save.
- Custom catalog entries are tagged with a blue 'custom' pill on the card.

Package: bump 0.2.3:0; main.ts sets MODELS_OVERRIDES=/data/models-overrides.yaml so overrides persist on the StartOS volume.
This commit is contained in:
Grant
2026-05-12 11:30:47 -05:00
parent 474417b458
commit 75fd0846b4
8 changed files with 490 additions and 12 deletions
+63 -1
View File
@@ -13,6 +13,7 @@ from .config import Settings
from .download import DownloadManager
from .health import check_magpie, check_parakeet, check_vllm
from .models import load_catalog
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
from .services import docker_state, run_action, services_from_settings
from .ssh import ssh_run
from .swap import SwapManager
@@ -46,14 +47,75 @@ async def get_config() -> dict:
}
def _reload_catalog() -> None:
global catalog
catalog = load_catalog(settings.models_yaml)
swap_manager.reload_catalog(catalog)
@app.get("/api/models")
async def get_models() -> dict:
out_models: dict[str, dict] = {}
for key, m in catalog.models.items():
d = m.model_dump()
# Always include effective knobs for the UI (defaults from base args + any overrides)
d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
out_models[key] = d
return {
"defaults": catalog.defaults.model_dump(),
"models": {k: v.model_dump() for k, v in catalog.models.items()},
"models": out_models,
}
class KnobsBody(BaseModel):
knobs: dict
@app.put("/api/models/{key}/knobs")
async def put_model_knobs(key: str, body: KnobsBody) -> dict:
if key not in catalog.models:
raise HTTPException(404, f"unknown model: {key}")
# Strip empty/None values
clean = {k: v for k, v in body.knobs.items() if v not in (None, "")}
set_knobs(key, clean)
_reload_catalog()
return {"ok": True, "key": key, "knobs": clean}
class CustomModelBody(BaseModel):
key: str
display_name: str
repo: str
size_gb: float = 0
mode: Literal["solo", "cluster"] = "solo"
description: str | None = None
capabilities: list[str] = []
vllm_args: list[str] = []
knobs: dict | None = None
@app.post("/api/models")
async def post_model(body: CustomModelBody) -> dict:
if not body.key or not body.key.replace("-", "").replace("_", "").isalnum():
raise HTTPException(400, "key must be alphanumeric/-/_ only")
if body.key in catalog.models and not catalog.models[body.key].custom:
raise HTTPException(409, f"'{body.key}' is a bundled model — pick a different key")
add_custom(body.model_dump())
_reload_catalog()
return {"ok": True, "key": body.key}
@app.delete("/api/models/{key}")
async def del_model(key: str) -> dict:
if key not in catalog.models:
raise HTTPException(404, f"unknown model: {key}")
if not catalog.models[key].custom:
raise HTTPException(400, "cannot delete a bundled model; you may override its knobs instead")
delete_custom(key)
_reload_catalog()
return {"ok": True, "key": key}
@app.get("/api/services")
async def get_services() -> dict:
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).