v0.3.0 - Hardware dashboard + knob context + Explain context + Open WebUI link
Hardware dashboard:
- New hardware.py module: SSH probes each Spark for hostname, uptime, load+cores, RAM, disk, GPU (name, util, temp, power) + per-process GPU memory sum
- DGX Spark uses unified memory (nvidia-smi memory.total returns N/A); fall back to per-process compute memory and compute fraction against system RAM. Marks with gpu_unified_memory=true.
- 4s TTL cache in HardwareProbe to avoid hammering
- /api/hardware returns per-Spark snapshot
- UI: 'Spark hardware' section at the top with per-Spark cards (CPU load, RAM, GPU mem (unified), GPU util + temp + power, disk) — bars with warn threshold styling
- Polls every 8s
Knob context (tied to live hardware):
- Each Advanced knob now shows plain-English help text
- 'GPU memory %' shows '~N GB allocated · ~M GB left for OS/buffers' computed from actual Spark RAM
- 'Max context' shows '~N pages of text'
- Toggles show tradeoff descriptions
Explain context:
- '✨ Explain context' button on the update banner
- /api/explain-updates POST: forwards pending commits to the loaded vLLM model and streams its response back as SSE
- Renders into an expandable 'Explained by the loaded LLM' section under Pending commits
- Reasoning tokens shown italicized when the model emits them
Open WebUI integration:
- New 'Open WebUI URL' optional field in Configure Sparks
- /api/config exposes it; UI shows 'Open chat ↗' button in the top bar if set
Downloads:
- Third radio option: Spark 1 only / Spark 2 only / Both Sparks
- Backend picks SSH target based on mode
- HF repo link icon next to the input
- Helper line about NVFP4 for Blackwell
Model cards:
- Repo name is now a clickable link to its Hugging Face page
Package: bump 0.3.0:0
This commit is contained in:
+243
-1
@@ -13,6 +13,8 @@ const state = {
|
||||
swap_progress: 0, // 0–1
|
||||
services: {},
|
||||
service_action_in_flight: null, // e.g. "parakeet:restart"
|
||||
hardware: {},
|
||||
config: {},
|
||||
configured: true,
|
||||
timer_handle: null,
|
||||
};
|
||||
@@ -63,7 +65,9 @@ function renderCards() {
|
||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||
</div>
|
||||
${desc}
|
||||
<div class="muted small repo">${escapeHtml(m.repo)}</div>
|
||||
<div class="muted small repo">
|
||||
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
|
||||
</div>
|
||||
<div class="spacer"></div>
|
||||
<div class="card-actions">
|
||||
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
||||
@@ -93,6 +97,107 @@ function renderCurrent(status) {
|
||||
c.innerHTML = `<strong>${label}</strong>`;
|
||||
}
|
||||
|
||||
// ===================== hardware dashboard =====================
|
||||
|
||||
function fmtBytes(n) {
|
||||
if (!n && n !== 0) return '—';
|
||||
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||
let i = 0; let v = n;
|
||||
while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
|
||||
return v < 10 ? `${v.toFixed(1)} ${u[i]}` : `${Math.round(v)} ${u[i]}`;
|
||||
}
|
||||
function fmtMiB(n) {
|
||||
if (!n && n !== 0) return null;
|
||||
// n is in MiB; render in GB
|
||||
const gb = n / 1024;
|
||||
return gb < 10 ? gb.toFixed(1) : Math.round(gb).toString();
|
||||
}
|
||||
|
||||
function bar(usedPct, warn) {
|
||||
const pct = Math.max(2, Math.min(100, usedPct));
|
||||
return `<div class="bar ${warn ? 'warn' : ''}"><span style="width:${pct}%"></span></div>`;
|
||||
}
|
||||
|
||||
async function pollHardware() {
|
||||
try {
|
||||
state.hardware = await fetchJSON('/api/hardware');
|
||||
renderHardware();
|
||||
} catch (e) { console.warn('hardware poll failed', e); }
|
||||
}
|
||||
|
||||
function renderHardware() {
|
||||
const panel = el('#hardware-panel');
|
||||
const grid = el('#hardware-grid');
|
||||
const hw = state.hardware || {};
|
||||
const keys = Object.keys(hw).filter(k => hw[k] && (hw[k].configured !== false));
|
||||
if (keys.length === 0) { panel.classList.add('hidden'); return; }
|
||||
panel.classList.remove('hidden');
|
||||
grid.innerHTML = '';
|
||||
for (const key of keys) {
|
||||
const s = hw[key];
|
||||
const card = document.createElement('div');
|
||||
if (!s.reachable) {
|
||||
card.className = 'hw-card unreachable';
|
||||
card.innerHTML = `
|
||||
<div class="head">
|
||||
<span class="name">${escapeHtml(key)}</span>
|
||||
<span class="meta">unreachable</span>
|
||||
</div>
|
||||
<div class="muted small">${escapeHtml(s.host || '')} — ${escapeHtml(s.error || 'no response')}</div>
|
||||
`;
|
||||
grid.appendChild(card);
|
||||
continue;
|
||||
}
|
||||
const ramPct = s.ram_used_bytes && s.ram_total_bytes ? (s.ram_used_bytes / s.ram_total_bytes) * 100 : 0;
|
||||
const diskPct = s.disk_used_bytes && s.disk_total_bytes ? (s.disk_used_bytes / s.disk_total_bytes) * 100 : 0;
|
||||
const loadPct = (s.load && s.cores) ? Math.min(100, (s.load[0] / s.cores) * 100) : 0;
|
||||
// GPU memory: on unified-memory systems (DGX Spark) total is N/A, so use system RAM as the pool.
|
||||
const gpuMemTotalMiB = s.gpu_mem_total_mib || (s.gpu_unified_memory ? (s.ram_total_bytes / (1024 * 1024)) : null);
|
||||
const gpuMemUsedMiB = s.gpu_mem_used_mib ?? null;
|
||||
const gpuMemPct = (gpuMemTotalMiB && gpuMemUsedMiB != null) ? (gpuMemUsedMiB / gpuMemTotalMiB) * 100 : 0;
|
||||
const gpuMemNote = s.gpu_unified_memory ? ' <span class="muted">(unified)</span>' : '';
|
||||
const gpuExtras = [];
|
||||
if (s.gpu_temp_c != null) gpuExtras.push(`${s.gpu_temp_c}°C`);
|
||||
if (s.gpu_power_w != null) gpuExtras.push(`${s.gpu_power_w.toFixed(0)}W`);
|
||||
const gpuExtrasStr = gpuExtras.length ? ` · ${gpuExtras.join(' · ')}` : '';
|
||||
card.className = 'hw-card';
|
||||
card.innerHTML = `
|
||||
<div class="head">
|
||||
<span class="name">${escapeHtml(s.hostname || key)}</span>
|
||||
<span class="meta">${escapeHtml(key)} · ${escapeHtml(s.gpu_name || '')} · ${escapeHtml(s.uptime || '')}</span>
|
||||
</div>
|
||||
<div class="hw-metric">
|
||||
<span class="label">CPU</span>
|
||||
${bar(loadPct, loadPct > 80)}
|
||||
<span class="val">${s.load ? s.load[0].toFixed(2) : '—'} / ${s.cores || '?'} cores</span>
|
||||
</div>
|
||||
<div class="hw-metric">
|
||||
<span class="label">RAM</span>
|
||||
${bar(ramPct, ramPct > 85)}
|
||||
<span class="val">${fmtBytes(s.ram_used_bytes)} / ${fmtBytes(s.ram_total_bytes)}</span>
|
||||
</div>
|
||||
<div class="hw-metric">
|
||||
<span class="label">GPU mem${gpuMemNote}</span>
|
||||
${bar(gpuMemPct, gpuMemPct > 90)}
|
||||
<span class="val">${fmtMiB(gpuMemUsedMiB) || '—'} / ${fmtMiB(gpuMemTotalMiB) || '?'} GB</span>
|
||||
</div>
|
||||
<div class="hw-metric">
|
||||
<span class="label">GPU util</span>
|
||||
${bar(s.gpu_util_pct || 0, (s.gpu_util_pct || 0) > 90)}
|
||||
<span class="val">${s.gpu_util_pct ?? 0}%${gpuExtrasStr}</span>
|
||||
</div>
|
||||
<div class="hw-metric">
|
||||
<span class="label">Disk</span>
|
||||
${bar(diskPct, diskPct > 85)}
|
||||
<span class="val">${fmtBytes(s.disk_used_bytes)} / ${fmtBytes(s.disk_total_bytes)}</span>
|
||||
</div>
|
||||
`;
|
||||
grid.appendChild(card);
|
||||
}
|
||||
}
|
||||
|
||||
// ===================== service classification =====================
|
||||
|
||||
function classifyService(s) {
|
||||
// returns one of: running | unhealthy | missing | unconfigured | starting
|
||||
if (!s.host) return 'unconfigured';
|
||||
@@ -543,6 +648,18 @@ function openDownloadForm() {
|
||||
el('#download-form').classList.remove('hidden');
|
||||
el('#download-progress').classList.add('hidden');
|
||||
el('#dl-repo').focus();
|
||||
updateDlHfLink();
|
||||
}
|
||||
|
||||
function updateDlHfLink() {
|
||||
const repo = el('#dl-repo').value.trim();
|
||||
const link = el('#dl-hf-link');
|
||||
if (repo.includes('/')) {
|
||||
link.href = `https://huggingface.co/${encodeURIComponent(repo)}`;
|
||||
link.classList.remove('hidden');
|
||||
} else {
|
||||
link.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
function closeDownloadPanel() {
|
||||
@@ -672,6 +789,47 @@ function handleDownloadDone(d) {
|
||||
|
||||
// ===================== Advanced / Add to catalog =====================
|
||||
|
||||
function gpuTotalGB(modelMode) {
|
||||
// Solo uses Spark 1's GPU only. Cluster shares across both — but loading is per-Spark.
|
||||
const s1 = state.hardware?.spark1;
|
||||
const s2 = state.hardware?.spark2;
|
||||
const g1 = s1?.gpu_mem_total_mib ? s1.gpu_mem_total_mib / 1024 : null;
|
||||
const g2 = s2?.gpu_mem_total_mib ? s2.gpu_mem_total_mib / 1024 : null;
|
||||
if (modelMode === 'cluster' && g1 && g2) return Math.min(g1, g2); // bottleneck
|
||||
return g1 || g2 || null;
|
||||
}
|
||||
|
||||
function knobContextHint(field, value, mode) {
|
||||
if (field === 'gpu_memory_utilization') {
|
||||
const gb = gpuTotalGB(mode);
|
||||
if (!gb) return '';
|
||||
const used = (value * gb).toFixed(0);
|
||||
const free = (gb - value * gb).toFixed(0);
|
||||
return `~${used} GB allocated to model + KV cache · ~${free} GB left for OS, buffers, other GPU workloads.`;
|
||||
}
|
||||
if (field === 'max_model_len') {
|
||||
if (!value) return '';
|
||||
const pages = Math.round(value / 350); // ~350 tokens per page
|
||||
const kvBytes = (value * 2 * 4 * 32 * 128); // rough fp16 KV cache size for typical 32-layer model
|
||||
return `~${pages.toLocaleString()} pages of text (very rough). Larger context = more GPU memory reserved for KV cache.`;
|
||||
}
|
||||
if (field === 'fastsafetensors') return value ? 'Faster cold-start weight loading.' : 'Standard safetensors loading.';
|
||||
if (field === 'prefix_caching') return value ? 'Reuses GPU state for repeated prompt prefixes (e.g. long system prompts).' : 'Off — every request re-processes the full prompt.';
|
||||
if (field === 'kv_cache_dtype') return value === 'fp8' ? 'Halves KV cache memory (fits ~2× more context). Quality cost is usually imperceptible.' : 'Default precision.';
|
||||
return '';
|
||||
}
|
||||
|
||||
function ensureKnobHint(rowEl, id) {
|
||||
let h = rowEl.querySelector(`.knob-hint[data-for="${id}"]`);
|
||||
if (!h) {
|
||||
h = document.createElement('div');
|
||||
h.className = 'knob-hint muted small';
|
||||
h.dataset.for = id;
|
||||
rowEl.appendChild(h);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
function openAdvanced(key) {
|
||||
const m = state.models[key];
|
||||
if (!m) return;
|
||||
@@ -684,6 +842,23 @@ function openAdvanced(key) {
|
||||
el('#adv-fst').checked = !!k.fastsafetensors;
|
||||
el('#adv-pcache').checked = !!k.prefix_caching;
|
||||
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
|
||||
|
||||
// Wire up live knob hints
|
||||
const updateHints = () => {
|
||||
const mml = parseInt(el('#adv-mml').value, 10);
|
||||
const gmu = parseFloat(el('#adv-gmu').value);
|
||||
ensureKnobHint(el('#adv-mml').parentElement, 'mml').textContent = knobContextHint('max_model_len', mml, m.mode);
|
||||
ensureKnobHint(el('#adv-gmu').parentElement, 'gmu').textContent = knobContextHint('gpu_memory_utilization', gmu, m.mode);
|
||||
ensureKnobHint(el('#adv-fst').parentElement, 'fst').textContent = knobContextHint('fastsafetensors', el('#adv-fst').checked, m.mode);
|
||||
ensureKnobHint(el('#adv-pcache').parentElement, 'pcache').textContent = knobContextHint('prefix_caching', el('#adv-pcache').checked, m.mode);
|
||||
ensureKnobHint(el('#adv-fp8').parentElement, 'fp8').textContent = knobContextHint('kv_cache_dtype', el('#adv-fp8').checked ? 'fp8' : 'auto', m.mode);
|
||||
};
|
||||
updateHints();
|
||||
el('#adv-mml').oninput = updateHints;
|
||||
el('#adv-gmu').oninput = (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); updateHints(); };
|
||||
el('#adv-fst').onchange = updateHints;
|
||||
el('#adv-pcache').onchange = updateHints;
|
||||
el('#adv-fp8').onchange = updateHints;
|
||||
const del = el('#adv-delete');
|
||||
del.classList.toggle('hidden', !m.custom);
|
||||
del.onclick = async () => {
|
||||
@@ -778,6 +953,56 @@ function setupAdvancedDialog() {
|
||||
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||
}
|
||||
|
||||
// ===================== Explain context (LLM commit summary) =====================
|
||||
|
||||
let explainEventSource = null;
|
||||
|
||||
async function explainContext() {
|
||||
if (explainEventSource) { explainEventSource.close(); explainEventSource = null; }
|
||||
const section = el('#ub-explain-section');
|
||||
const content = el('#ub-explain-content');
|
||||
section.classList.remove('hidden');
|
||||
section.open = true;
|
||||
content.innerHTML = '<span class="muted">Asking the loaded model…</span>';
|
||||
let text = '';
|
||||
const es = new EventSource('/api/explain-updates');
|
||||
explainEventSource = es;
|
||||
let firstChunk = true;
|
||||
es.onmessage = (ev) => {
|
||||
try {
|
||||
const d = JSON.parse(ev.data);
|
||||
if (d.error) {
|
||||
content.innerHTML = `<span class="muted">Couldn't get explanation: ${escapeHtml(d.error)}</span>`;
|
||||
return;
|
||||
}
|
||||
if (firstChunk) { content.innerHTML = ''; firstChunk = false; }
|
||||
if (d.content) {
|
||||
text += d.content;
|
||||
content.textContent = text;
|
||||
content.scrollTop = content.scrollHeight;
|
||||
} else if (d.reasoning) {
|
||||
// Show reasoning tokens but de-emphasized
|
||||
let r = content.querySelector('.reasoning-current');
|
||||
if (!r) {
|
||||
r = document.createElement('div');
|
||||
r.className = 'reasoning reasoning-current';
|
||||
r.textContent = '';
|
||||
content.appendChild(r);
|
||||
}
|
||||
r.textContent += d.reasoning;
|
||||
}
|
||||
} catch {}
|
||||
};
|
||||
es.addEventListener('done', () => {
|
||||
es.close();
|
||||
explainEventSource = null;
|
||||
// strip the reasoning-current marker
|
||||
const r = content.querySelector('.reasoning-current');
|
||||
if (r) r.classList.remove('reasoning-current');
|
||||
});
|
||||
es.onerror = () => { es.close(); explainEventSource = null; };
|
||||
}
|
||||
|
||||
// ===================== updates (spark-vllm-docker) =====================
|
||||
|
||||
const updState = {
|
||||
@@ -817,19 +1042,23 @@ function renderUpdateBanner() {
|
||||
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
|
||||
banner.classList.toggle('warn', !!dirty);
|
||||
|
||||
const explain = el('#ub-explain');
|
||||
if (dirty > 0) {
|
||||
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
|
||||
details.classList.add('hidden');
|
||||
apply.classList.add('hidden');
|
||||
explain.classList.add('hidden');
|
||||
} else if (behind === 0) {
|
||||
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
|
||||
details.classList.add('hidden');
|
||||
apply.classList.add('hidden');
|
||||
list.classList.add('hidden');
|
||||
explain.classList.add('hidden');
|
||||
} else {
|
||||
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
|
||||
details.classList.remove('hidden');
|
||||
apply.classList.remove('hidden');
|
||||
explain.classList.remove('hidden');
|
||||
log.textContent = (info.log || []).join('\n') || '(no log)';
|
||||
}
|
||||
}
|
||||
@@ -918,13 +1147,26 @@ async function init() {
|
||||
list.open = !list.open;
|
||||
});
|
||||
el('#ub-apply').addEventListener('click', applyUpdate);
|
||||
el('#ub-explain').addEventListener('click', explainContext);
|
||||
el('#dl-repo').addEventListener('input', updateDlHfLink);
|
||||
setupCatalogDialog();
|
||||
setupAdvancedDialog();
|
||||
// Open WebUI link from /api/config
|
||||
try {
|
||||
state.config = await fetchJSON('/api/config');
|
||||
if (state.config.open_webui_url) {
|
||||
const a = el('#open-webui-link');
|
||||
a.href = state.config.open_webui_url;
|
||||
a.classList.remove('hidden');
|
||||
}
|
||||
} catch {}
|
||||
await loadModels();
|
||||
await pollStatus();
|
||||
await renderServices();
|
||||
pollHardware();
|
||||
pollUpdates();
|
||||
setInterval(pollStatus, 5000);
|
||||
setInterval(pollHardware, 8000); // every 8s
|
||||
setInterval(pollUpdates, 300000); // every 5 min
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user