v0.2.3 - Per-model Advanced settings + catalog-add for downloaded models

Backend:
- overrides.py: read/write /data/models-overrides.yaml (knobs + custom entries)
- apply_knobs_to_args(): strip matching flags from bundled vllm_args and append knob values, so knob changes properly override bundled defaults
- extract_knobs_from_args(): seed UI knob values from bundled args so the Advanced dialog has correct starting state
- models.py: load_catalog merges overrides on top of bundled yaml
- GET /api/models returns effective_knobs per model
- PUT /api/models/{key}/knobs persists knob changes
- POST /api/models adds a custom catalog entry
- DELETE /api/models/{key} removes a custom entry (bundled models cannot be deleted)
- swap_manager.reload_catalog() called after each mutation so swaps see latest

Frontend:
- New 'Advanced' button on every card opens a modal dialog: max-model-len input, gpu-memory-utilization slider, three optimization checkboxes (fastsafetensors, prefix caching, FP8 KV cache). Save persists; Cancel discards. Custom models also have a Delete button.
- After a successful download, automatically open the 'Add to catalog' dialog pre-filled with the repo, with the same knob defaults — user just enters key, display name, and clicks Save.
- Custom catalog entries are tagged with a blue 'custom' pill on the card.

Package: bump 0.2.3:0; main.ts sets MODELS_OVERRIDES=/data/models-overrides.yaml so overrides persist on the StartOS volume.
This commit is contained in:
Grant
2026-05-12 11:30:47 -05:00
parent 474417b458
commit 75fd0846b4
8 changed files with 490 additions and 12 deletions
+132 -6
View File
@@ -53,24 +53,32 @@ function renderCards() {
const desc = m.description
? `<div class="desc">${escapeHtml(m.description)}</div>`
: '';
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
card.innerHTML = `
<div class="name">${escapeHtml(m.display_name)}</div>
<div class="meta">
<span class="tag mode-${m.mode}">${m.mode}</span>
<span class="tag">${m.size_gb} GB</span>
${customPill}
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
</div>
${desc}
<div class="muted small repo">${escapeHtml(m.repo)}</div>
<div class="spacer"></div>
<button class="btn ${isActive ? '' : 'primary'}" data-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
${isActive ? 'Current' : 'Switch to this'}
</button>
<div class="card-actions">
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
${isActive ? 'Current' : 'Switch to this'}
</button>
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>
</div>
`;
root.appendChild(card);
}
for (const btn of $$('.card .btn')) {
btn.addEventListener('click', () => triggerSwap(btn.dataset.key));
for (const btn of root.querySelectorAll('[data-swap-key]')) {
btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey));
}
for (const btn of root.querySelectorAll('[data-adv-key]')) {
btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey));
}
}
@@ -544,6 +552,8 @@ async function startDownload() {
alert('Enter a HuggingFace repo in the form "org/name", e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4');
return;
}
dlState.last_repo = repo;
dlState.last_mode = mode;
try {
const r = await fetchJSON('/api/download', {
method: 'POST',
@@ -623,12 +633,126 @@ function handleDownloadDone(d) {
el('#dl-phase').textContent = 'Failed';
} else {
el('#dl-title').textContent = 'Done';
el('#dl-phase').textContent = 'Done ✓ — you can now add this model to the catalog and swap to it.';
el('#dl-phase').textContent = 'Done ✓';
el('#dl-progress-fill').style.width = '100%';
// Offer to add to catalog
const repo = dlState.last_repo;
const mode = dlState.last_mode;
if (repo) {
setTimeout(() => openCatalogDialog(repo, mode), 600);
}
}
dlState.job_id = null;
}
// ===================== Advanced / Add to catalog =====================
function openAdvanced(key) {
const m = state.models[key];
if (!m) return;
const dlg = el('#advanced-dialog');
el('#adv-title').textContent = `Advanced — ${m.display_name}`;
const k = m.effective_knobs || {};
el('#adv-mml').value = k.max_model_len ?? '';
el('#adv-gmu').value = k.gpu_memory_utilization ?? 0.85;
el('#adv-gmu-out').value = parseFloat(el('#adv-gmu').value).toFixed(2);
el('#adv-fst').checked = !!k.fastsafetensors;
el('#adv-pcache').checked = !!k.prefix_caching;
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
const del = el('#adv-delete');
del.classList.toggle('hidden', !m.custom);
del.onclick = async () => {
if (!confirm(`Delete "${m.display_name}" from the catalog? The model weights on disk are NOT deleted.`)) return;
try {
await fetchJSON(`/api/models/${encodeURIComponent(key)}`, { method: 'DELETE' });
dlg.close();
await loadModels();
pollStatus();
} catch (e) { alert('Delete failed: ' + e.message); }
};
const form = el('#advanced-form');
form.onsubmit = async (e) => {
e.preventDefault();
const knobs = {};
const mml = parseInt(el('#adv-mml').value, 10);
if (Number.isFinite(mml) && mml > 0) knobs.max_model_len = mml;
const gmu = parseFloat(el('#adv-gmu').value);
if (Number.isFinite(gmu)) knobs.gpu_memory_utilization = gmu;
if (el('#adv-fst').checked) knobs.fastsafetensors = true; else knobs.fastsafetensors = false;
if (el('#adv-pcache').checked) knobs.prefix_caching = true; else knobs.prefix_caching = false;
knobs.kv_cache_dtype = el('#adv-fp8').checked ? 'fp8' : 'auto';
try {
await fetchJSON(`/api/models/${encodeURIComponent(key)}/knobs`, {
method: 'PUT',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ knobs }),
});
dlg.close();
await loadModels();
pollStatus();
} catch (e) { alert('Save failed: ' + e.message); }
};
dlg.showModal();
}
function openCatalogDialog(repo, mode) {
const dlg = el('#catalog-dialog');
const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-');
el('#cd-key').value = key;
el('#cd-name').value = repo.split('/').pop();
el('#cd-repo').value = repo;
el('#cd-size').value = '';
el('#cd-mode').value = mode || 'solo';
el('#cd-desc').value = '';
el('#cd-mml').value = 32768;
el('#cd-gmu').value = 0.85;
el('#cd-gmu-out').value = '0.85';
el('#cd-fst').checked = true;
el('#cd-pcache').checked = true;
el('#cd-fp8').checked = true;
dlg.showModal();
}
function setupCatalogDialog() {
el('#cd-cancel').addEventListener('click', () => el('#catalog-dialog').close());
el('#cd-gmu').addEventListener('input', (e) => { el('#cd-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
el('#catalog-form').addEventListener('submit', async (e) => {
e.preventDefault();
const body = {
key: el('#cd-key').value.trim(),
display_name: el('#cd-name').value.trim(),
repo: el('#cd-repo').value.trim(),
size_gb: parseFloat(el('#cd-size').value) || 0,
mode: el('#cd-mode').value,
description: el('#cd-desc').value.trim() || null,
vllm_args: [],
knobs: {
max_model_len: parseInt(el('#cd-mml').value, 10) || 32768,
gpu_memory_utilization: parseFloat(el('#cd-gmu').value),
fastsafetensors: el('#cd-fst').checked,
prefix_caching: el('#cd-pcache').checked,
kv_cache_dtype: el('#cd-fp8').checked ? 'fp8' : 'auto',
},
};
try {
await fetchJSON('/api/models', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#catalog-dialog').close();
closeDownloadPanel();
await loadModels();
pollStatus();
} catch (e) { alert('Add to catalog failed: ' + e.message); }
});
}
function setupAdvancedDialog() {
el('#adv-cancel').addEventListener('click', () => el('#advanced-dialog').close());
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
}
// ===================== updates (spark-vllm-docker) =====================
const updState = {
@@ -769,6 +893,8 @@ async function init() {
list.open = !list.open;
});
el('#ub-apply').addEventListener('click', applyUpdate);
setupCatalogDialog();
setupAdvancedDialog();
await loadModels();
await pollStatus();
await renderServices();