v0.2.3 - Per-model Advanced settings + catalog-add for downloaded models
Backend:
- overrides.py: read/write /data/models-overrides.yaml (knobs + custom entries)
- apply_knobs_to_args(): strip matching flags from bundled vllm_args and append knob values, so knob changes properly override bundled defaults
- extract_knobs_from_args(): seed UI knob values from bundled args so the Advanced dialog has correct starting state
- models.py: load_catalog merges overrides on top of bundled yaml
- GET /api/models returns effective_knobs per model
- PUT /api/models/{key}/knobs persists knob changes
- POST /api/models adds a custom catalog entry
- DELETE /api/models/{key} removes a custom entry (bundled models cannot be deleted)
- swap_manager.reload_catalog() called after each mutation so swaps see latest
Frontend:
- New 'Advanced' button on every card opens a modal dialog: max-model-len input, gpu-memory-utilization slider, three optimization checkboxes (fastsafetensors, prefix caching, FP8 KV cache). Save persists; Cancel discards. Custom models also have a Delete button.
- After a successful download, automatically open the 'Add to catalog' dialog pre-filled with the repo, with the same knob defaults — user just enters key, display name, and clicks Save.
- Custom catalog entries are tagged with a blue 'custom' pill on the card.
Package: bump 0.2.3:0; main.ts sets MODELS_OVERRIDES=/data/models-overrides.yaml so overrides persist on the StartOS volume.
This commit is contained in:
+42
-3
@@ -1,8 +1,10 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from typing import Literal
|
from typing import Literal, Optional
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .overrides import apply_knobs_to_args, load_overrides
|
||||||
|
|
||||||
|
|
||||||
class ModelDef(BaseModel):
|
class ModelDef(BaseModel):
|
||||||
display_name: str
|
display_name: str
|
||||||
@@ -13,6 +15,8 @@ class ModelDef(BaseModel):
|
|||||||
expected_ready_seconds: int = 300
|
expected_ready_seconds: int = 300
|
||||||
vllm_args: list[str] = Field(default_factory=list)
|
vllm_args: list[str] = Field(default_factory=list)
|
||||||
description: str | None = None
|
description: str | None = None
|
||||||
|
knobs: dict | None = None # user-customized; merged at launch time
|
||||||
|
custom: bool = False # True if this came from /data overrides
|
||||||
|
|
||||||
|
|
||||||
class Defaults(BaseModel):
|
class Defaults(BaseModel):
|
||||||
@@ -25,17 +29,52 @@ class Catalog(BaseModel):
|
|||||||
models: dict[str, ModelDef]
|
models: dict[str, ModelDef]
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_overrides(catalog: Catalog) -> Catalog:
|
||||||
|
"""Apply user overrides + custom entries from /data/models-overrides.yaml."""
|
||||||
|
ov = load_overrides()
|
||||||
|
knobs_by_key = ov.get("knobs") or {}
|
||||||
|
custom_entries = ov.get("custom") or []
|
||||||
|
|
||||||
|
new_models: dict[str, ModelDef] = {}
|
||||||
|
for key, m in catalog.models.items():
|
||||||
|
k = knobs_by_key.get(key)
|
||||||
|
new_models[key] = m.model_copy(update={"knobs": k}) if k else m
|
||||||
|
|
||||||
|
for entry in custom_entries:
|
||||||
|
key = entry.get("key")
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
defaults_dump = {
|
||||||
|
"display_name": entry.get("display_name", key),
|
||||||
|
"repo": entry["repo"],
|
||||||
|
"size_gb": float(entry.get("size_gb", 0)),
|
||||||
|
"mode": entry.get("mode", "solo"),
|
||||||
|
"capabilities": entry.get("capabilities") or [],
|
||||||
|
"expected_ready_seconds": int(entry.get("expected_ready_seconds", 300)),
|
||||||
|
"vllm_args": entry.get("vllm_args") or [],
|
||||||
|
"description": entry.get("description"),
|
||||||
|
"knobs": entry.get("knobs"),
|
||||||
|
"custom": True,
|
||||||
|
}
|
||||||
|
new_models[key] = ModelDef.model_validate(defaults_dump)
|
||||||
|
|
||||||
|
return Catalog(defaults=catalog.defaults, models=new_models)
|
||||||
|
|
||||||
|
|
||||||
def load_catalog(path: str) -> Catalog:
|
def load_catalog(path: str) -> Catalog:
|
||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
data = yaml.safe_load(f)
|
data = yaml.safe_load(f)
|
||||||
return Catalog.model_validate(data)
|
bundled = Catalog.model_validate(data)
|
||||||
|
return _merge_overrides(bundled)
|
||||||
|
|
||||||
|
|
||||||
def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str:
|
def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str:
|
||||||
"""Return the shell command to launch `model` on Spark 1.
|
"""Return the shell command to launch `model` on Spark 1.
|
||||||
|
|
||||||
|
User knobs (if any) override matching flags in the bundled vllm_args.
|
||||||
Assumes cwd will be `~/spark-vllm-docker` (we cd in the SSH wrapper).
|
Assumes cwd will be `~/spark-vllm-docker` (we cd in the SSH wrapper).
|
||||||
"""
|
"""
|
||||||
solo = "--solo " if model.mode == "solo" else ""
|
solo = "--solo " if model.mode == "solo" else ""
|
||||||
args = [f"--port={defaults.port}", f"--host={defaults.host}", *model.vllm_args]
|
base_args = apply_knobs_to_args(list(model.vllm_args), model.knobs)
|
||||||
|
args = [f"--port={defaults.port}", f"--host={defaults.host}", *base_args]
|
||||||
return f"./launch-cluster.sh {solo}-d exec vllm serve {model.repo} {' '.join(args)}"
|
return f"./launch-cluster.sh {solo}-d exec vllm serve {model.repo} {' '.join(args)}"
|
||||||
|
|||||||
@@ -0,0 +1,145 @@
|
|||||||
|
"""User overrides + custom catalog entries, persisted on the package's main volume.
|
||||||
|
|
||||||
|
File: /data/models-overrides.yaml (only when something has been customized).
|
||||||
|
Survives package updates because /data is the StartOS volume.
|
||||||
|
|
||||||
|
Shape:
|
||||||
|
knobs:
|
||||||
|
qwen36:
|
||||||
|
max_model_len: 65536
|
||||||
|
gpu_memory_utilization: 0.85
|
||||||
|
fastsafetensors: true
|
||||||
|
prefix_caching: true
|
||||||
|
kv_cache_dtype: fp8 # one of: 'fp8' | 'auto' | null
|
||||||
|
custom:
|
||||||
|
- key: my-new-model
|
||||||
|
display_name: My New Model (from download)
|
||||||
|
repo: my-org/my-model
|
||||||
|
size_gb: 20
|
||||||
|
mode: solo
|
||||||
|
description: null
|
||||||
|
capabilities: []
|
||||||
|
knobs:
|
||||||
|
max_model_len: 32768
|
||||||
|
gpu_memory_utilization: 0.85
|
||||||
|
fastsafetensors: true
|
||||||
|
prefix_caching: true
|
||||||
|
kv_cache_dtype: fp8
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
KNOWN_FLAG_MAP = {
|
||||||
|
"max_model_len": "--max-model-len",
|
||||||
|
"gpu_memory_utilization": "--gpu-memory-utilization",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _path() -> str:
|
||||||
|
# Container: /data/models-overrides.yaml. Local dev: cwd or env override.
|
||||||
|
return os.environ.get("MODELS_OVERRIDES", "/data/models-overrides.yaml")
|
||||||
|
|
||||||
|
|
||||||
|
def load_overrides() -> dict:
|
||||||
|
p = _path()
|
||||||
|
try:
|
||||||
|
with open(p) as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
except FileNotFoundError:
|
||||||
|
return {"knobs": {}, "custom": []}
|
||||||
|
data.setdefault("knobs", {})
|
||||||
|
data.setdefault("custom", [])
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def save_overrides(data: dict) -> None:
|
||||||
|
p = _path()
|
||||||
|
parent = Path(p).parent
|
||||||
|
parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(p, "w") as f:
|
||||||
|
yaml.safe_dump(data, f, sort_keys=False)
|
||||||
|
|
||||||
|
|
||||||
|
def set_knobs(key: str, knobs: dict) -> dict:
|
||||||
|
data = load_overrides()
|
||||||
|
if not knobs:
|
||||||
|
data["knobs"].pop(key, None)
|
||||||
|
else:
|
||||||
|
data["knobs"][key] = knobs
|
||||||
|
save_overrides(data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def add_custom(entry: dict) -> dict:
|
||||||
|
data = load_overrides()
|
||||||
|
# Replace if same key already exists
|
||||||
|
data["custom"] = [c for c in data["custom"] if c.get("key") != entry["key"]]
|
||||||
|
data["custom"].append(entry)
|
||||||
|
save_overrides(data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def delete_custom(key: str) -> dict:
|
||||||
|
data = load_overrides()
|
||||||
|
data["custom"] = [c for c in data["custom"] if c.get("key") != key]
|
||||||
|
data["knobs"].pop(key, None)
|
||||||
|
save_overrides(data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def apply_knobs_to_args(vllm_args: list[str], knobs: Optional[dict]) -> list[str]:
|
||||||
|
"""Return a new vllm_args list with knob values overriding matching flags."""
|
||||||
|
if not knobs:
|
||||||
|
return list(vllm_args)
|
||||||
|
skip_prefixes: list[str] = []
|
||||||
|
for k, flag in KNOWN_FLAG_MAP.items():
|
||||||
|
if k in knobs:
|
||||||
|
skip_prefixes.append(f"{flag}=")
|
||||||
|
skip_prefixes.append(flag + " ") # rare style
|
||||||
|
# opt-in flag toggles
|
||||||
|
if "fastsafetensors" in knobs:
|
||||||
|
skip_prefixes.append("--load-format=")
|
||||||
|
if "prefix_caching" in knobs:
|
||||||
|
skip_prefixes.append("--enable-prefix-caching")
|
||||||
|
if "kv_cache_dtype" in knobs:
|
||||||
|
skip_prefixes.append("--kv-cache-dtype=")
|
||||||
|
|
||||||
|
filtered = [a for a in vllm_args if not any(a.startswith(p) or a == p.rstrip("= ") for p in skip_prefixes)]
|
||||||
|
|
||||||
|
for k, flag in KNOWN_FLAG_MAP.items():
|
||||||
|
if k in knobs and knobs[k] is not None:
|
||||||
|
filtered.append(f"{flag}={knobs[k]}")
|
||||||
|
|
||||||
|
if knobs.get("fastsafetensors"):
|
||||||
|
filtered.append("--load-format=fastsafetensors")
|
||||||
|
if knobs.get("prefix_caching"):
|
||||||
|
filtered.append("--enable-prefix-caching")
|
||||||
|
kvd = knobs.get("kv_cache_dtype")
|
||||||
|
if kvd and kvd != "auto":
|
||||||
|
filtered.append(f"--kv-cache-dtype={kvd}")
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def extract_knobs_from_args(vllm_args: list[str]) -> dict:
|
||||||
|
"""Reverse: read default knob values from a model's bundled vllm_args, so the UI
|
||||||
|
has a starting point even before the user has saved any overrides."""
|
||||||
|
knobs: dict[str, Any] = {}
|
||||||
|
for a in vllm_args:
|
||||||
|
if a.startswith("--max-model-len="):
|
||||||
|
try: knobs["max_model_len"] = int(a.split("=", 1)[1])
|
||||||
|
except ValueError: pass
|
||||||
|
elif a.startswith("--gpu-memory-utilization="):
|
||||||
|
try: knobs["gpu_memory_utilization"] = float(a.split("=", 1)[1])
|
||||||
|
except ValueError: pass
|
||||||
|
elif a == "--load-format=fastsafetensors":
|
||||||
|
knobs["fastsafetensors"] = True
|
||||||
|
elif a == "--enable-prefix-caching":
|
||||||
|
knobs["prefix_caching"] = True
|
||||||
|
elif a.startswith("--kv-cache-dtype="):
|
||||||
|
knobs["kv_cache_dtype"] = a.split("=", 1)[1]
|
||||||
|
return knobs
|
||||||
+63
-1
@@ -13,6 +13,7 @@ from .config import Settings
|
|||||||
from .download import DownloadManager
|
from .download import DownloadManager
|
||||||
from .health import check_magpie, check_parakeet, check_vllm
|
from .health import check_magpie, check_parakeet, check_vllm
|
||||||
from .models import load_catalog
|
from .models import load_catalog
|
||||||
|
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||||
from .services import docker_state, run_action, services_from_settings
|
from .services import docker_state, run_action, services_from_settings
|
||||||
from .ssh import ssh_run
|
from .ssh import ssh_run
|
||||||
from .swap import SwapManager
|
from .swap import SwapManager
|
||||||
@@ -46,14 +47,75 @@ async def get_config() -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _reload_catalog() -> None:
|
||||||
|
global catalog
|
||||||
|
catalog = load_catalog(settings.models_yaml)
|
||||||
|
swap_manager.reload_catalog(catalog)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/models")
|
@app.get("/api/models")
|
||||||
async def get_models() -> dict:
|
async def get_models() -> dict:
|
||||||
|
out_models: dict[str, dict] = {}
|
||||||
|
for key, m in catalog.models.items():
|
||||||
|
d = m.model_dump()
|
||||||
|
# Always include effective knobs for the UI (defaults from base args + any overrides)
|
||||||
|
d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
|
||||||
|
out_models[key] = d
|
||||||
return {
|
return {
|
||||||
"defaults": catalog.defaults.model_dump(),
|
"defaults": catalog.defaults.model_dump(),
|
||||||
"models": {k: v.model_dump() for k, v in catalog.models.items()},
|
"models": out_models,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KnobsBody(BaseModel):
|
||||||
|
knobs: dict
|
||||||
|
|
||||||
|
|
||||||
|
@app.put("/api/models/{key}/knobs")
|
||||||
|
async def put_model_knobs(key: str, body: KnobsBody) -> dict:
|
||||||
|
if key not in catalog.models:
|
||||||
|
raise HTTPException(404, f"unknown model: {key}")
|
||||||
|
# Strip empty/None values
|
||||||
|
clean = {k: v for k, v in body.knobs.items() if v not in (None, "")}
|
||||||
|
set_knobs(key, clean)
|
||||||
|
_reload_catalog()
|
||||||
|
return {"ok": True, "key": key, "knobs": clean}
|
||||||
|
|
||||||
|
|
||||||
|
class CustomModelBody(BaseModel):
|
||||||
|
key: str
|
||||||
|
display_name: str
|
||||||
|
repo: str
|
||||||
|
size_gb: float = 0
|
||||||
|
mode: Literal["solo", "cluster"] = "solo"
|
||||||
|
description: str | None = None
|
||||||
|
capabilities: list[str] = []
|
||||||
|
vllm_args: list[str] = []
|
||||||
|
knobs: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/models")
|
||||||
|
async def post_model(body: CustomModelBody) -> dict:
|
||||||
|
if not body.key or not body.key.replace("-", "").replace("_", "").isalnum():
|
||||||
|
raise HTTPException(400, "key must be alphanumeric/-/_ only")
|
||||||
|
if body.key in catalog.models and not catalog.models[body.key].custom:
|
||||||
|
raise HTTPException(409, f"'{body.key}' is a bundled model — pick a different key")
|
||||||
|
add_custom(body.model_dump())
|
||||||
|
_reload_catalog()
|
||||||
|
return {"ok": True, "key": body.key}
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/models/{key}")
|
||||||
|
async def del_model(key: str) -> dict:
|
||||||
|
if key not in catalog.models:
|
||||||
|
raise HTTPException(404, f"unknown model: {key}")
|
||||||
|
if not catalog.models[key].custom:
|
||||||
|
raise HTTPException(400, "cannot delete a bundled model; you may override its knobs instead")
|
||||||
|
delete_custom(key)
|
||||||
|
_reload_catalog()
|
||||||
|
return {"ok": True, "key": key}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/services")
|
@app.get("/api/services")
|
||||||
async def get_services() -> dict:
|
async def get_services() -> dict:
|
||||||
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
||||||
|
|||||||
+130
-4
@@ -53,24 +53,32 @@ function renderCards() {
|
|||||||
const desc = m.description
|
const desc = m.description
|
||||||
? `<div class="desc">${escapeHtml(m.description)}</div>`
|
? `<div class="desc">${escapeHtml(m.description)}</div>`
|
||||||
: '';
|
: '';
|
||||||
|
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
|
||||||
card.innerHTML = `
|
card.innerHTML = `
|
||||||
<div class="name">${escapeHtml(m.display_name)}</div>
|
<div class="name">${escapeHtml(m.display_name)}</div>
|
||||||
<div class="meta">
|
<div class="meta">
|
||||||
<span class="tag mode-${m.mode}">${m.mode}</span>
|
<span class="tag mode-${m.mode}">${m.mode}</span>
|
||||||
<span class="tag">${m.size_gb} GB</span>
|
<span class="tag">${m.size_gb} GB</span>
|
||||||
|
${customPill}
|
||||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||||
</div>
|
</div>
|
||||||
${desc}
|
${desc}
|
||||||
<div class="muted small repo">${escapeHtml(m.repo)}</div>
|
<div class="muted small repo">${escapeHtml(m.repo)}</div>
|
||||||
<div class="spacer"></div>
|
<div class="spacer"></div>
|
||||||
<button class="btn ${isActive ? '' : 'primary'}" data-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
<div class="card-actions">
|
||||||
|
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
||||||
${isActive ? 'Current' : 'Switch to this'}
|
${isActive ? 'Current' : 'Switch to this'}
|
||||||
</button>
|
</button>
|
||||||
|
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>
|
||||||
|
</div>
|
||||||
`;
|
`;
|
||||||
root.appendChild(card);
|
root.appendChild(card);
|
||||||
}
|
}
|
||||||
for (const btn of $$('.card .btn')) {
|
for (const btn of root.querySelectorAll('[data-swap-key]')) {
|
||||||
btn.addEventListener('click', () => triggerSwap(btn.dataset.key));
|
btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey));
|
||||||
|
}
|
||||||
|
for (const btn of root.querySelectorAll('[data-adv-key]')) {
|
||||||
|
btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -544,6 +552,8 @@ async function startDownload() {
|
|||||||
alert('Enter a HuggingFace repo in the form "org/name", e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4');
|
alert('Enter a HuggingFace repo in the form "org/name", e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
dlState.last_repo = repo;
|
||||||
|
dlState.last_mode = mode;
|
||||||
try {
|
try {
|
||||||
const r = await fetchJSON('/api/download', {
|
const r = await fetchJSON('/api/download', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@@ -623,12 +633,126 @@ function handleDownloadDone(d) {
|
|||||||
el('#dl-phase').textContent = 'Failed';
|
el('#dl-phase').textContent = 'Failed';
|
||||||
} else {
|
} else {
|
||||||
el('#dl-title').textContent = 'Done';
|
el('#dl-title').textContent = 'Done';
|
||||||
el('#dl-phase').textContent = 'Done ✓ — you can now add this model to the catalog and swap to it.';
|
el('#dl-phase').textContent = 'Done ✓';
|
||||||
el('#dl-progress-fill').style.width = '100%';
|
el('#dl-progress-fill').style.width = '100%';
|
||||||
|
// Offer to add to catalog
|
||||||
|
const repo = dlState.last_repo;
|
||||||
|
const mode = dlState.last_mode;
|
||||||
|
if (repo) {
|
||||||
|
setTimeout(() => openCatalogDialog(repo, mode), 600);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
dlState.job_id = null;
|
dlState.job_id = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===================== Advanced / Add to catalog =====================
|
||||||
|
|
||||||
|
function openAdvanced(key) {
|
||||||
|
const m = state.models[key];
|
||||||
|
if (!m) return;
|
||||||
|
const dlg = el('#advanced-dialog');
|
||||||
|
el('#adv-title').textContent = `Advanced — ${m.display_name}`;
|
||||||
|
const k = m.effective_knobs || {};
|
||||||
|
el('#adv-mml').value = k.max_model_len ?? '';
|
||||||
|
el('#adv-gmu').value = k.gpu_memory_utilization ?? 0.85;
|
||||||
|
el('#adv-gmu-out').value = parseFloat(el('#adv-gmu').value).toFixed(2);
|
||||||
|
el('#adv-fst').checked = !!k.fastsafetensors;
|
||||||
|
el('#adv-pcache').checked = !!k.prefix_caching;
|
||||||
|
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
|
||||||
|
const del = el('#adv-delete');
|
||||||
|
del.classList.toggle('hidden', !m.custom);
|
||||||
|
del.onclick = async () => {
|
||||||
|
if (!confirm(`Delete "${m.display_name}" from the catalog? The model weights on disk are NOT deleted.`)) return;
|
||||||
|
try {
|
||||||
|
await fetchJSON(`/api/models/${encodeURIComponent(key)}`, { method: 'DELETE' });
|
||||||
|
dlg.close();
|
||||||
|
await loadModels();
|
||||||
|
pollStatus();
|
||||||
|
} catch (e) { alert('Delete failed: ' + e.message); }
|
||||||
|
};
|
||||||
|
const form = el('#advanced-form');
|
||||||
|
form.onsubmit = async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const knobs = {};
|
||||||
|
const mml = parseInt(el('#adv-mml').value, 10);
|
||||||
|
if (Number.isFinite(mml) && mml > 0) knobs.max_model_len = mml;
|
||||||
|
const gmu = parseFloat(el('#adv-gmu').value);
|
||||||
|
if (Number.isFinite(gmu)) knobs.gpu_memory_utilization = gmu;
|
||||||
|
if (el('#adv-fst').checked) knobs.fastsafetensors = true; else knobs.fastsafetensors = false;
|
||||||
|
if (el('#adv-pcache').checked) knobs.prefix_caching = true; else knobs.prefix_caching = false;
|
||||||
|
knobs.kv_cache_dtype = el('#adv-fp8').checked ? 'fp8' : 'auto';
|
||||||
|
try {
|
||||||
|
await fetchJSON(`/api/models/${encodeURIComponent(key)}/knobs`, {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
body: JSON.stringify({ knobs }),
|
||||||
|
});
|
||||||
|
dlg.close();
|
||||||
|
await loadModels();
|
||||||
|
pollStatus();
|
||||||
|
} catch (e) { alert('Save failed: ' + e.message); }
|
||||||
|
};
|
||||||
|
dlg.showModal();
|
||||||
|
}
|
||||||
|
|
||||||
|
function openCatalogDialog(repo, mode) {
|
||||||
|
const dlg = el('#catalog-dialog');
|
||||||
|
const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-');
|
||||||
|
el('#cd-key').value = key;
|
||||||
|
el('#cd-name').value = repo.split('/').pop();
|
||||||
|
el('#cd-repo').value = repo;
|
||||||
|
el('#cd-size').value = '';
|
||||||
|
el('#cd-mode').value = mode || 'solo';
|
||||||
|
el('#cd-desc').value = '';
|
||||||
|
el('#cd-mml').value = 32768;
|
||||||
|
el('#cd-gmu').value = 0.85;
|
||||||
|
el('#cd-gmu-out').value = '0.85';
|
||||||
|
el('#cd-fst').checked = true;
|
||||||
|
el('#cd-pcache').checked = true;
|
||||||
|
el('#cd-fp8').checked = true;
|
||||||
|
dlg.showModal();
|
||||||
|
}
|
||||||
|
|
||||||
|
function setupCatalogDialog() {
|
||||||
|
el('#cd-cancel').addEventListener('click', () => el('#catalog-dialog').close());
|
||||||
|
el('#cd-gmu').addEventListener('input', (e) => { el('#cd-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||||
|
el('#catalog-form').addEventListener('submit', async (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const body = {
|
||||||
|
key: el('#cd-key').value.trim(),
|
||||||
|
display_name: el('#cd-name').value.trim(),
|
||||||
|
repo: el('#cd-repo').value.trim(),
|
||||||
|
size_gb: parseFloat(el('#cd-size').value) || 0,
|
||||||
|
mode: el('#cd-mode').value,
|
||||||
|
description: el('#cd-desc').value.trim() || null,
|
||||||
|
vllm_args: [],
|
||||||
|
knobs: {
|
||||||
|
max_model_len: parseInt(el('#cd-mml').value, 10) || 32768,
|
||||||
|
gpu_memory_utilization: parseFloat(el('#cd-gmu').value),
|
||||||
|
fastsafetensors: el('#cd-fst').checked,
|
||||||
|
prefix_caching: el('#cd-pcache').checked,
|
||||||
|
kv_cache_dtype: el('#cd-fp8').checked ? 'fp8' : 'auto',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
try {
|
||||||
|
await fetchJSON('/api/models', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
el('#catalog-dialog').close();
|
||||||
|
closeDownloadPanel();
|
||||||
|
await loadModels();
|
||||||
|
pollStatus();
|
||||||
|
} catch (e) { alert('Add to catalog failed: ' + e.message); }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function setupAdvancedDialog() {
|
||||||
|
el('#adv-cancel').addEventListener('click', () => el('#advanced-dialog').close());
|
||||||
|
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||||
|
}
|
||||||
|
|
||||||
// ===================== updates (spark-vllm-docker) =====================
|
// ===================== updates (spark-vllm-docker) =====================
|
||||||
|
|
||||||
const updState = {
|
const updState = {
|
||||||
@@ -769,6 +893,8 @@ async function init() {
|
|||||||
list.open = !list.open;
|
list.open = !list.open;
|
||||||
});
|
});
|
||||||
el('#ub-apply').addEventListener('click', applyUpdate);
|
el('#ub-apply').addEventListener('click', applyUpdate);
|
||||||
|
setupCatalogDialog();
|
||||||
|
setupAdvancedDialog();
|
||||||
await loadModels();
|
await loadModels();
|
||||||
await pollStatus();
|
await pollStatus();
|
||||||
await renderServices();
|
await renderServices();
|
||||||
|
|||||||
@@ -74,6 +74,54 @@
|
|||||||
<button id="open-download" class="btn small-btn">+ Download a new model</button>
|
<button id="open-download" class="btn small-btn">+ Download a new model</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<dialog id="catalog-dialog" class="modal">
|
||||||
|
<form method="dialog" class="modal-form" id="catalog-form">
|
||||||
|
<h3>Add downloaded model to catalog</h3>
|
||||||
|
<p class="muted small">It will appear as a new card you can swap to. Knob values become its default launch flags — you can tweak later via the model's "Advanced" panel.</p>
|
||||||
|
<label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="cd-key" required pattern="[a-zA-Z0-9_-]+"></label>
|
||||||
|
<label class="modal-row"><span>Display name</span><input type="text" id="cd-name" required></label>
|
||||||
|
<label class="modal-row"><span>Repo (read-only)</span><input type="text" id="cd-repo" readonly></label>
|
||||||
|
<label class="modal-row"><span>Size (GB)</span><input type="number" id="cd-size" step="0.1" min="0"></label>
|
||||||
|
<label class="modal-row"><span>Mode</span>
|
||||||
|
<select id="cd-mode">
|
||||||
|
<option value="solo">solo (Spark 1 only)</option>
|
||||||
|
<option value="cluster">cluster (both Sparks via Ray)</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label class="modal-row"><span>Description (optional)</span><textarea id="cd-desc" rows="3"></textarea></label>
|
||||||
|
<fieldset class="modal-fieldset">
|
||||||
|
<legend>Default launch knobs</legend>
|
||||||
|
<label class="modal-row"><span>Max context (tokens)</span><input type="number" id="cd-mml" step="1024" min="1024" value="32768"></label>
|
||||||
|
<label class="modal-row"><span>GPU memory %</span><input type="range" id="cd-gmu" min="0.5" max="0.95" step="0.01" value="0.85"> <output id="cd-gmu-out">0.85</output></label>
|
||||||
|
<label class="modal-row inline"><input type="checkbox" id="cd-fst" checked> Fast safetensors loading</label>
|
||||||
|
<label class="modal-row inline"><input type="checkbox" id="cd-pcache" checked> Prefix caching</label>
|
||||||
|
<label class="modal-row inline"><input type="checkbox" id="cd-fp8" checked> FP8 KV cache</label>
|
||||||
|
</fieldset>
|
||||||
|
<div class="modal-actions">
|
||||||
|
<button type="button" id="cd-cancel" class="btn">Cancel</button>
|
||||||
|
<button type="submit" class="btn primary">Add to catalog</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<dialog id="advanced-dialog" class="modal">
|
||||||
|
<form method="dialog" class="modal-form" id="advanced-form">
|
||||||
|
<h3 id="adv-title">Advanced settings</h3>
|
||||||
|
<p class="muted small">Custom values are stored in the package volume and survive package updates. Empty fields fall back to defaults.</p>
|
||||||
|
<label class="modal-row"><span>Max context (tokens)</span><input type="number" id="adv-mml" step="1024" min="1024"></label>
|
||||||
|
<label class="modal-row"><span>GPU memory %</span><input type="range" id="adv-gmu" min="0.5" max="0.95" step="0.01"> <output id="adv-gmu-out"></output></label>
|
||||||
|
<label class="modal-row inline"><input type="checkbox" id="adv-fst"> Fast safetensors loading <span class="muted small">(faster cold start)</span></label>
|
||||||
|
<label class="modal-row inline"><input type="checkbox" id="adv-pcache"> Prefix caching <span class="muted small">(speeds up repeated prefixes)</span></label>
|
||||||
|
<label class="modal-row inline"><input type="checkbox" id="adv-fp8"> FP8 KV cache <span class="muted small">(halves context memory)</span></label>
|
||||||
|
<div class="modal-actions">
|
||||||
|
<button type="button" id="adv-delete" class="btn danger hidden">Delete model</button>
|
||||||
|
<span class="spacer"></span>
|
||||||
|
<button type="button" id="adv-cancel" class="btn">Cancel</button>
|
||||||
|
<button type="submit" class="btn primary">Save</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
<section id="download-panel" class="download-panel hidden">
|
<section id="download-panel" class="download-panel hidden">
|
||||||
<div class="download-form" id="download-form">
|
<div class="download-form" id="download-form">
|
||||||
<label class="dl-row">
|
<label class="dl-row">
|
||||||
|
|||||||
@@ -217,6 +217,57 @@ main {
|
|||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ===== Modal dialogs (Advanced / Add to catalog) ===== */
|
||||||
|
|
||||||
|
.modal {
|
||||||
|
background: var(--surface);
|
||||||
|
color: var(--text);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 0;
|
||||||
|
max-width: 520px;
|
||||||
|
width: 92vw;
|
||||||
|
}
|
||||||
|
.modal::backdrop {
|
||||||
|
background: rgba(0, 0, 0, 0.6);
|
||||||
|
backdrop-filter: blur(2px);
|
||||||
|
}
|
||||||
|
.modal-form { padding: 22px 24px; display: flex; flex-direction: column; gap: 12px; }
|
||||||
|
.modal-form h3 { margin: 0; font-size: 17px; }
|
||||||
|
.modal-row {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--muted);
|
||||||
|
}
|
||||||
|
.modal-row.inline { flex-direction: row; align-items: center; gap: 8px; color: var(--text); font-size: 14px; }
|
||||||
|
.modal-row > span { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||||
|
.modal-row input[type='text'],
|
||||||
|
.modal-row input[type='number'],
|
||||||
|
.modal-row textarea,
|
||||||
|
.modal-row select {
|
||||||
|
background: var(--surface-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
color: var(--text);
|
||||||
|
padding: 7px 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font: 13px ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
||||||
|
}
|
||||||
|
.modal-row textarea { font-family: inherit; resize: vertical; }
|
||||||
|
.modal-row input:focus, .modal-row textarea:focus, .modal-row select:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
||||||
|
.modal-row input[type='range'] { padding: 0; flex: 1; }
|
||||||
|
.modal-fieldset {
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 12px 14px 4px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 10px;
|
||||||
|
}
|
||||||
|
.modal-fieldset legend { color: var(--muted); font-size: 11px; text-transform: uppercase; letter-spacing: 0.05em; padding: 0 6px; }
|
||||||
|
.modal-actions { display: flex; gap: 8px; justify-content: flex-end; margin-top: 8px; align-items: center; }
|
||||||
|
|
||||||
/* ===== Update banner ===== */
|
/* ===== Update banner ===== */
|
||||||
|
|
||||||
.update-banner {
|
.update-banner {
|
||||||
@@ -436,7 +487,13 @@ main {
|
|||||||
.btn.primary { background: var(--accent); color: #052e16; border-color: var(--accent); }
|
.btn.primary { background: var(--accent); color: #052e16; border-color: var(--accent); }
|
||||||
.btn.primary:hover:not(:disabled) { background: #6ee19a; }
|
.btn.primary:hover:not(:disabled) { background: #6ee19a; }
|
||||||
.btn:disabled { opacity: 0.45; cursor: not-allowed; }
|
.btn:disabled { opacity: 0.45; cursor: not-allowed; }
|
||||||
|
.btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); }
|
||||||
|
.btn.danger:hover:not(:disabled) { background: rgba(239, 68, 68, 0.08); border-color: var(--error); }
|
||||||
.card.active .btn { background: rgba(74, 222, 128, 0.12); color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
|
.card.active .btn { background: rgba(74, 222, 128, 0.12); color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
|
||||||
|
.card-actions { display: flex; gap: 6px; }
|
||||||
|
.card-actions .btn.primary { flex: 1; }
|
||||||
|
.card .adv-btn { padding: 8px 12px; font-size: 12px; }
|
||||||
|
.card .custom-pill { color: var(--info); border-color: rgba(96, 165, 250, 0.4); }
|
||||||
|
|
||||||
.footer {
|
.footer {
|
||||||
margin-top: 28px;
|
margin-top: 28px;
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ export const main = sdk.setupMain(async ({ effects }) => {
|
|||||||
MAGPIE_HOST: cfg.magpie_host,
|
MAGPIE_HOST: cfg.magpie_host,
|
||||||
MAGPIE_USER: cfg.magpie_user,
|
MAGPIE_USER: cfg.magpie_user,
|
||||||
MAGPIE_CONTAINER: cfg.magpie_container,
|
MAGPIE_CONTAINER: cfg.magpie_container,
|
||||||
|
MODELS_OVERRIDES: '/data/models-overrides.yaml',
|
||||||
BIND_PORT: String(uiPort),
|
BIND_PORT: String(uiPort),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
|
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
|
||||||
|
|
||||||
export const v0_1_0 = VersionInfo.of({
|
export const v0_1_0 = VersionInfo.of({
|
||||||
version: '0.2.2:0',
|
version: '0.2.3:0',
|
||||||
releaseNotes: {
|
releaseNotes: {
|
||||||
en_US:
|
en_US:
|
||||||
'Update checking for spark-vllm-docker. Dashboard footer shows "N commits behind upstream" when applicable; click for the commit log, then "Apply update" runs git pull + ./build-and-copy.sh -c on Spark 1 with a streamed log and elapsed timer. No auto-apply — you confirm each update.',
|
'Per-model Advanced settings + downloaded-model catalog flow. Each card now has an Advanced button: max context tokens, GPU memory %, and optimization toggles (fastsafetensors, prefix caching, FP8 KV cache). After a download finishes, a dialog appears to add the model to the catalog with those same knobs as launch defaults. Custom models can be deleted. Overrides persist in /data/models-overrides.yaml and survive package updates.',
|
||||||
},
|
},
|
||||||
migrations: {
|
migrations: {
|
||||||
up: async ({ effects }) => {},
|
up: async ({ effects }) => {},
|
||||||
|
|||||||
Reference in New Issue
Block a user