v0.2.3 - Per-model Advanced settings + catalog-add for downloaded models
Backend:
- overrides.py: read/write /data/models-overrides.yaml (knobs + custom entries)
- apply_knobs_to_args(): strip matching flags from bundled vllm_args and append knob values, so knob changes properly override bundled defaults
- extract_knobs_from_args(): seed UI knob values from bundled args so the Advanced dialog has correct starting state
- models.py: load_catalog merges overrides on top of bundled yaml
- GET /api/models returns effective_knobs per model
- PUT /api/models/{key}/knobs persists knob changes
- POST /api/models adds a custom catalog entry
- DELETE /api/models/{key} removes a custom entry (bundled models cannot be deleted)
- swap_manager.reload_catalog() called after each mutation so swaps see latest
Frontend:
- New 'Advanced' button on every card opens a modal dialog: max-model-len input, gpu-memory-utilization slider, three optimization checkboxes (fastsafetensors, prefix caching, FP8 KV cache). Save persists; Cancel discards. Custom models also have a Delete button.
- After a successful download, automatically open the 'Add to catalog' dialog pre-filled with the repo, with the same knob defaults — user just enters key, display name, and clicks Save.
- Custom catalog entries are tagged with a blue 'custom' pill on the card.
Package: bump 0.2.3:0; main.ts sets MODELS_OVERRIDES=/data/models-overrides.yaml so overrides persist on the StartOS volume.
This commit is contained in:
+63
-1
@@ -13,6 +13,7 @@ from .config import Settings
|
||||
from .download import DownloadManager
|
||||
from .health import check_magpie, check_parakeet, check_vllm
|
||||
from .models import load_catalog
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
from .ssh import ssh_run
|
||||
from .swap import SwapManager
|
||||
@@ -46,14 +47,75 @@ async def get_config() -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _reload_catalog() -> None:
|
||||
global catalog
|
||||
catalog = load_catalog(settings.models_yaml)
|
||||
swap_manager.reload_catalog(catalog)
|
||||
|
||||
|
||||
@app.get("/api/models")
|
||||
async def get_models() -> dict:
|
||||
out_models: dict[str, dict] = {}
|
||||
for key, m in catalog.models.items():
|
||||
d = m.model_dump()
|
||||
# Always include effective knobs for the UI (defaults from base args + any overrides)
|
||||
d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
|
||||
out_models[key] = d
|
||||
return {
|
||||
"defaults": catalog.defaults.model_dump(),
|
||||
"models": {k: v.model_dump() for k, v in catalog.models.items()},
|
||||
"models": out_models,
|
||||
}
|
||||
|
||||
|
||||
class KnobsBody(BaseModel):
|
||||
knobs: dict
|
||||
|
||||
|
||||
@app.put("/api/models/{key}/knobs")
|
||||
async def put_model_knobs(key: str, body: KnobsBody) -> dict:
|
||||
if key not in catalog.models:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
# Strip empty/None values
|
||||
clean = {k: v for k, v in body.knobs.items() if v not in (None, "")}
|
||||
set_knobs(key, clean)
|
||||
_reload_catalog()
|
||||
return {"ok": True, "key": key, "knobs": clean}
|
||||
|
||||
|
||||
class CustomModelBody(BaseModel):
|
||||
key: str
|
||||
display_name: str
|
||||
repo: str
|
||||
size_gb: float = 0
|
||||
mode: Literal["solo", "cluster"] = "solo"
|
||||
description: str | None = None
|
||||
capabilities: list[str] = []
|
||||
vllm_args: list[str] = []
|
||||
knobs: dict | None = None
|
||||
|
||||
|
||||
@app.post("/api/models")
|
||||
async def post_model(body: CustomModelBody) -> dict:
|
||||
if not body.key or not body.key.replace("-", "").replace("_", "").isalnum():
|
||||
raise HTTPException(400, "key must be alphanumeric/-/_ only")
|
||||
if body.key in catalog.models and not catalog.models[body.key].custom:
|
||||
raise HTTPException(409, f"'{body.key}' is a bundled model — pick a different key")
|
||||
add_custom(body.model_dump())
|
||||
_reload_catalog()
|
||||
return {"ok": True, "key": body.key}
|
||||
|
||||
|
||||
@app.delete("/api/models/{key}")
|
||||
async def del_model(key: str) -> dict:
|
||||
if key not in catalog.models:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
if not catalog.models[key].custom:
|
||||
raise HTTPException(400, "cannot delete a bundled model; you may override its knobs instead")
|
||||
delete_custom(key)
|
||||
_reload_catalog()
|
||||
return {"ok": True, "key": key}
|
||||
|
||||
|
||||
@app.get("/api/services")
|
||||
async def get_services() -> dict:
|
||||
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
||||
|
||||
Reference in New Issue
Block a user