Files
spark-control/image/app/static/app.js
T
Grant ee8c2406b8 v0.6.0 - Service-level connectivity tracking + passive failure-report endpoint
connectivity.py:
- Generalized 'spark' subject to any string; renamed 'spark' field to 'subject'
- Legacy v0.5 events with the old 'spark' field are migrated transparently on read (kind defaults to 'transition')
- New record_report(subject, ok, source, detail, latency_ms): always appends an event with kind='report'; does NOT mutate the current state (only active polling is authoritative)
- summary() returns events normalized to the new schema

Wiring:
- /api/status now calls record_state for vllm/parakeet/magpie (dedup on no-change)
- /api/services calls record_state for each service after its http check
- Result: dashboard observes service-level transitions automatically with no extra polling

Passive endpoint:
- POST /api/health-event with {service, ok, source?, error?, ms?}
- Useful for external apps (e.g. Open WebUI) to surface sub-poll-interval failures the dashboard would otherwise miss

UI:
- Connectivity dialog groups events by subject (hosts ordered first, then services)
- Per-subject summary shows transition count, down count, report count, failed-report count
- Transitions and reports render inline with distinct styling; reports show source app + error + latency
- Legacy v0.5 events render unchanged

Docs:
- README documents /api/health-event with a curl example

Package: bump to 0.6.0:0
2026-05-12 13:19:27 -05:00

1448 lines
53 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// spark-control front-end
const state = {
models: {},
defaults: {},
current_model_key: null,
swap_job_id: null,
swap_eventsource: null,
swap_started_at: null,
swap_lines: [], // local accumulator for phase detection
swap_phase: 'Starting…',
swap_phase_detail: '',
swap_progress: 0, // 01
services: {},
service_action_in_flight: null, // e.g. "parakeet:restart"
hardware: {},
config: {},
configured: true,
timer_handle: null,
};
const el = (sel) => document.querySelector(sel);
const $$ = (sel) => document.querySelectorAll(sel);
function escapeHtml(s) {
if (s == null) return '';
return String(s)
.replaceAll('&', '&')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;');
}
async function fetchJSON(url, opts) {
const r = await fetch(url, opts);
if (!r.ok) {
const text = await r.text().catch(() => '');
throw new Error(`${r.status} ${r.statusText}: ${text}`);
}
return r.json();
}
// ===================== rendering =====================
function renderCards() {
const root = el('#cards');
root.innerHTML = '';
const isSwapping = !!state.swap_job_id;
for (const key of Object.keys(state.models)) {
const m = state.models[key];
const isActive = key === state.current_model_key;
const card = document.createElement('div');
card.className = 'card' + (isActive ? ' active' : '');
const desc = m.description
? `<div class="desc">${escapeHtml(m.description)}</div>`
: '';
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
card.innerHTML = `
<div class="name">${escapeHtml(m.display_name)}</div>
<div class="meta">
<span class="tag mode-${m.mode}">${m.mode}</span>
<span class="tag">${m.size_gb} GB</span>
${customPill}
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
</div>
${desc}
<div class="muted small repo">
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
</div>
<div class="spacer"></div>
<div class="card-actions">
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
${isActive ? 'Current' : 'Switch to this'}
</button>
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>
</div>
`;
root.appendChild(card);
}
for (const btn of root.querySelectorAll('[data-swap-key]')) {
btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey));
}
for (const btn of root.querySelectorAll('[data-adv-key]')) {
btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey));
}
}
function renderCurrent(status) {
const c = el('#current');
if (!status.configured) { c.innerHTML = `<span class="muted">not configured</span>`; return; }
if (status.current_swap_job) { c.innerHTML = `<span class="muted">swap in progress</span>`; return; }
const v = status.vllm || {};
if (!v.ok) { c.innerHTML = `<span class="muted">vLLM unreachable</span>`; return; }
const m = status.current_model_key ? state.models[status.current_model_key] : null;
const label = m ? m.display_name : (v.current_model || '(unknown)');
c.innerHTML = `<strong>${label}</strong>`;
}
// ===================== hardware dashboard =====================
function fmtBytes(n) {
if (!n && n !== 0) return '—';
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
let i = 0; let v = n;
while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
return v < 10 ? `${v.toFixed(1)} ${u[i]}` : `${Math.round(v)} ${u[i]}`;
}
function fmtMiB(n) {
if (!n && n !== 0) return null;
// n is in MiB; render in GB
const gb = n / 1024;
return gb < 10 ? gb.toFixed(1) : Math.round(gb).toString();
}
function bar(usedPct, warn) {
const pct = Math.max(2, Math.min(100, usedPct));
return `<div class="bar ${warn ? 'warn' : ''}"><span style="width:${pct}%"></span></div>`;
}
async function pollHardware() {
try {
state.hardware = await fetchJSON('/api/hardware');
try { state.connectivity = await fetchJSON('/api/connectivity'); } catch {}
renderHardware();
} catch (e) { console.warn('hardware poll failed', e); }
}
function fmtDuration(sec) {
if (sec == null) return '';
if (sec < 60) return `${Math.round(sec)}s`;
if (sec < 3600) return `${Math.round(sec / 60)}m`;
if (sec < 86400) {
const h = Math.floor(sec / 3600);
const m = Math.round((sec % 3600) / 60);
return m ? `${h}h ${m}m` : `${h}h`;
}
const d = Math.floor(sec / 86400);
const h = Math.round((sec % 86400) / 3600);
return h ? `${d}d ${h}h` : `${d}d`;
}
function openConnectivityDialog() {
const dlg = el('#connectivity-dialog');
const content = el('#connectivity-content');
const c = state.connectivity || {};
const events = c.events || [];
if (events.length === 0) {
content.innerHTML = '<div class="muted small">No events recorded yet. Once a Spark or service goes down and back up (or an external app reports a failure), entries appear here.</div>';
dlg.showModal();
return;
}
const bySubject = {};
for (const e of events) {
const subj = e.subject || e.spark || 'unknown'; // legacy fallback
(bySubject[subj] = bySubject[subj] || []).push(e);
}
// Sort subjects: hosts first, then services, alphabetical
const hostOrder = ['spark1', 'spark2'];
const subjects = Object.keys(bySubject).sort((a, b) => {
const ia = hostOrder.indexOf(a);
const ib = hostOrder.indexOf(b);
if (ia >= 0 && ib >= 0) return ia - ib;
if (ia >= 0) return -1;
if (ib >= 0) return 1;
return a.localeCompare(b);
});
const html = subjects.map((subj) => {
const evs = bySubject[subj];
const transitions = evs.filter(e => (e.kind || 'transition') === 'transition');
const reports = evs.filter(e => e.kind === 'report');
const downs = transitions.filter(e => e.transition === 'down').length;
const failedReports = reports.filter(e => !e.ok).length;
const mac = c.macs?.[subj];
const summaryParts = [];
if (transitions.length) summaryParts.push(`${transitions.length} probe transition${transitions.length===1?'':'s'} (${downs} down)`);
if (reports.length) summaryParts.push(`${reports.length} app report${reports.length===1?'':'s'} (${failedReports} failed)`);
const isHost = hostOrder.includes(subj);
return `
<div class="conn-spark">
<h4>${escapeHtml(subj)}${isHost ? ' <span class="muted small">[host]</span>' : ' <span class="muted small">[service]</span>'}${mac ? ` <span class="muted small">${escapeHtml(mac)}</span>` : ''}</h4>
<div class="conn-summary">${summaryParts.join(' · ') || 'no events'}</div>
${evs.slice(-30).reverse().map(e => renderConnEvent(e)).join('')}
</div>
`;
}).join('');
content.innerHTML = html;
dlg.showModal();
}
function renderConnEvent(e) {
const when = escapeHtml((e.at || '').replace('T', ' ').replace('Z', ''));
const kind = e.kind || 'transition';
if (kind === 'report') {
const ok = !!e.ok;
const source = escapeHtml(e.source || 'external');
const detail = e.detail ? `${escapeHtml(e.detail)}` : '';
const latency = e.latency_ms != null ? ` (${e.latency_ms} ms)` : '';
return `
<div class="conn-event ${ok ? 'up' : 'down'} report">
<span class="when">${when}</span>
<span class="what">${ok ? '◷ report: ok' : '◷ report: failed'} <span class="muted">from</span> ${source}${detail}</span>
<span class="dur">${latency}</span>
</div>
`;
}
const down = e.down_seconds != null ? `was down ${fmtDuration(e.down_seconds)}` : '';
const up = e.up_seconds != null ? `was up ${fmtDuration(e.up_seconds)}` : '';
return `
<div class="conn-event ${e.transition}">
<span class="when">${when}</span>
<span class="what">${e.transition === 'up' ? '↑ came back online' : '↓ dropped offline'}</span>
<span class="dur">${down}${up}</span>
</div>
`;
}
async function wakeSpark(name) {
try {
const r = await fetchJSON(`/api/spark/${name}/wake`, { method: 'POST' });
alert(`Wake-on-LAN sent to ${name} (MAC ${r.mac}, via ${r.delivered_via}). Give it ~30 seconds to wake; the card will go green when it comes back.`);
} catch (e) {
alert(`Wake failed: ${e.message}`);
}
}
function renderHardware() {
const panel = el('#hardware-panel');
const grid = el('#hardware-grid');
const hw = state.hardware || {};
const keys = Object.keys(hw).filter(k => hw[k] && (hw[k].configured !== false));
if (keys.length === 0) { panel.classList.add('hidden'); return; }
panel.classList.remove('hidden');
grid.innerHTML = '';
for (const key of keys) {
const s = hw[key];
const card = document.createElement('div');
if (!s.reachable) {
card.className = 'hw-card unreachable';
const mac = state.connectivity?.macs?.[key];
const wolRow = mac
? `<div class="wol-row">
<span class="mac-display">${escapeHtml(mac)}</span>
<span class="spacer"></span>
<button class="btn" data-wake="${escapeHtml(key)}">Wake (WoL)</button>
</div>`
: `<div class="muted small">MAC not yet known — once it's been up once with this dashboard installed, "Wake" will appear here.</div>`;
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(key)}</span>
<span class="meta">unreachable</span>
</div>
<div class="muted small">${escapeHtml(s.host || '')}${escapeHtml(s.error || 'no response')}</div>
${wolRow}
<div class="muted small" style="line-height:1.5">
If Wake-on-LAN doesn't bring it back, manual steps:
<ol style="margin: 6px 0 0 18px; padding: 0;">
<li>Verify it's powered on (check the front LED).</li>
<li>Ping it from another LAN device.</li>
<li>Power-cycle it physically.</li>
<li>If it boots, this card will go green again automatically.</li>
</ol>
</div>
`;
grid.appendChild(card);
continue;
}
const ramPct = s.ram_used_bytes && s.ram_total_bytes ? (s.ram_used_bytes / s.ram_total_bytes) * 100 : 0;
const diskPct = s.disk_used_bytes && s.disk_total_bytes ? (s.disk_used_bytes / s.disk_total_bytes) * 100 : 0;
const loadPct = (s.load && s.cores) ? Math.min(100, (s.load[0] / s.cores) * 100) : 0;
// GPU memory: on unified-memory systems (DGX Spark) total is N/A, so use system RAM as the pool.
const gpuMemTotalMiB = s.gpu_mem_total_mib || (s.gpu_unified_memory ? (s.ram_total_bytes / (1024 * 1024)) : null);
const gpuMemUsedMiB = s.gpu_mem_used_mib ?? null;
const gpuMemPct = (gpuMemTotalMiB && gpuMemUsedMiB != null) ? (gpuMemUsedMiB / gpuMemTotalMiB) * 100 : 0;
const gpuMemNote = s.gpu_unified_memory ? ' <span class="muted">(unified)</span>' : '';
const gpuExtras = [];
if (s.gpu_temp_c != null) gpuExtras.push(`${s.gpu_temp_c}°C`);
if (s.gpu_power_w != null) gpuExtras.push(`${s.gpu_power_w.toFixed(0)}W`);
const gpuExtrasStr = gpuExtras.length ? ` · ${gpuExtras.join(' · ')}` : '';
card.className = 'hw-card';
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(s.hostname || key)}</span>
<span class="meta">${escapeHtml(key)} · ${escapeHtml(s.gpu_name || '')} · ${escapeHtml(s.uptime || '')}</span>
</div>
<div class="hw-metric">
<span class="label">CPU</span>
${bar(loadPct, loadPct > 80)}
<span class="val">${s.load ? s.load[0].toFixed(2) : '—'} / ${s.cores || '?'} cores</span>
</div>
<div class="hw-metric">
<span class="label">RAM</span>
${bar(ramPct, ramPct > 85)}
<span class="val">${fmtBytes(s.ram_used_bytes)} / ${fmtBytes(s.ram_total_bytes)}</span>
</div>
<div class="hw-metric">
<span class="label">GPU mem${gpuMemNote}</span>
${bar(gpuMemPct, gpuMemPct > 90)}
<span class="val">${fmtMiB(gpuMemUsedMiB) || '—'} / ${fmtMiB(gpuMemTotalMiB) || '?'} GB</span>
</div>
<div class="hw-metric">
<span class="label">GPU util</span>
${bar(s.gpu_util_pct || 0, (s.gpu_util_pct || 0) > 90)}
<span class="val">${s.gpu_util_pct ?? 0}%${gpuExtrasStr}</span>
</div>
<div class="hw-metric">
<span class="label">Disk</span>
${bar(diskPct, diskPct > 85)}
<span class="val">${fmtBytes(s.disk_used_bytes)} / ${fmtBytes(s.disk_total_bytes)}</span>
</div>
`;
grid.appendChild(card);
}
}
// ===================== service classification =====================
function classifyService(s) {
// returns one of: running | unhealthy | missing | unconfigured | starting
if (!s.host) return 'unconfigured';
if (s.docker_state === 'missing') return 'missing';
if (s.docker_state === 'restarting') return 'unhealthy';
if (s.docker_state === 'exited') return 'unhealthy';
if (s.docker_state === 'running' && !s.http_ready) return 'starting';
if (s.docker_state === 'running' && s.http_ready) return 'running';
return s.docker_state || 'unknown';
}
function statusLabel(cls) {
return {
running: 'Healthy',
unhealthy: 'Unhealthy',
starting: 'Starting…',
missing: 'Not installed',
unconfigured: 'Not configured',
unknown: 'Unknown',
}[cls] || cls;
}
async function renderServices() {
let services = state.services;
// First render: fetch.
if (!services || Object.keys(services).length === 0) {
try {
services = await fetchJSON('/api/services');
state.services = services;
} catch (e) { console.error('services fetch failed', e); return; }
}
const panel = el('#services-panel');
const grid = el('#services-grid');
const entries = Object.entries(services);
if (entries.length === 0) { panel.classList.add('hidden'); return; }
panel.classList.remove('hidden');
grid.innerHTML = '';
for (const [name, s] of entries) {
const cls = classifyService(s);
const card = document.createElement('div');
card.className = `service-card ${cls}`;
const inFlight = state.service_action_in_flight && state.service_action_in_flight.startsWith(name + ':');
const disable = (action) => {
// Disable buttons that don't make sense for the current state
if (inFlight) return true;
if (cls === 'unconfigured' || cls === 'missing') return true;
if (action === 'start' && (cls === 'running' || cls === 'starting')) return true;
if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true;
return false;
};
const copyIcon = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>`;
const hostStr = s.host ? `${s.host}:${s.port}` : '';
const hostRow = s.host
? `<div class="row"><span class="k">Host</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(hostStr)}</span><button class="icon-btn" data-copy-text="${escapeHtml(hostStr)}" title="Copy host" aria-label="Copy">${copyIcon}</button></div>`
: `<div class="row"><span class="k">Host</span><span class="v muted-v">not configured</span></div>`;
const urlRow = s.base_url
? `<div class="row"><span class="k">URL</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(s.base_url)}</span><button class="icon-btn" data-copy-text="${escapeHtml(s.base_url)}" title="Copy URL" aria-label="Copy">${copyIcon}</button></div>`
: '';
const modelRow = s.model
? `<div class="row"><span class="k">Model</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(s.model)}</span><button class="icon-btn" data-copy-text="${escapeHtml(s.model)}" title="Copy model" aria-label="Copy">${copyIcon}</button></div>`
: '';
const restartsRow = s.restart_count != null && s.restart_count > 1
? `<div class="row"><span class="k">Restarts</span><span class="v">${s.restart_count}</span></div>`
: '';
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(name)}</span>
<span class="kind">${escapeHtml(s.kind || '')}</span>
<span class="status">${statusLabel(cls)}</span>
</div>
${hostRow}
${urlRow}
${modelRow}
${restartsRow}
<div class="service-actions">
<button class="btn" data-svc-action="${name}:start" ${disable('start') ? 'disabled' : ''}>Start</button>
<button class="btn" data-svc-action="${name}:restart" ${disable('restart') ? 'disabled' : ''}>Restart</button>
<button class="btn danger" data-svc-action="${name}:stop" ${disable('stop') ? 'disabled' : ''}>Stop</button>
</div>
`;
grid.appendChild(card);
}
for (const btn of grid.querySelectorAll('.btn[data-svc-action]')) {
btn.addEventListener('click', () => onServiceAction(btn.dataset.svcAction));
}
}
async function onServiceAction(key) {
if (state.service_action_in_flight) return;
const [name, action] = key.split(':');
state.service_action_in_flight = key;
renderServices();
try {
await fetchJSON(`/api/services/${name}/${action}`, { method: 'POST' });
} catch (e) {
alert(`${action} ${name} failed: ${e.message}`);
} finally {
state.service_action_in_flight = null;
// Refresh services state
try {
state.services = await fetchJSON('/api/services');
} catch {}
renderServices();
pollStatus();
}
}
function renderEndpoint(status) {
const v = status.vllm || {};
const panel = el('#endpoint-panel');
const ready = v.ok && v.current_model && v.base_url;
panel.classList.toggle('hidden', !ready);
if (!ready) return;
el('#ep-url').textContent = v.base_url;
el('#ep-model').textContent = v.current_model;
const snippet =
`curl -s ${v.base_url}/chat/completions \\
-H 'content-type: application/json' \\
-d '{
"model": "${v.current_model}",
"messages": [{"role": "user", "content": "Hello"}]
}'`;
el('#ep-curl-snippet').textContent = snippet;
}
async function copyText(text, indicatorEl) {
try {
await navigator.clipboard.writeText(text);
if (indicatorEl) {
indicatorEl.classList.add('copied');
setTimeout(() => indicatorEl.classList.remove('copied'), 1200);
}
return true;
} catch {
// Plain HTTP fallback: select the text so the user can ⌘C
if (indicatorEl) {
const range = document.createRange();
range.selectNode(indicatorEl);
window.getSelection().removeAllRanges();
window.getSelection().addRange(range);
}
return false;
}
}
function setupCopyButtons() {
document.body.addEventListener('click', async (e) => {
// Inline icon copy with literal text (used for dynamically-rendered service rows)
const litBtn = e.target.closest('[data-copy-text]');
if (litBtn) {
await copyText(litBtn.dataset.copyText, litBtn);
return;
}
// Copy buttons (with svg icon) referenced by data-copy="selector"
const btn = e.target.closest('[data-copy]');
if (btn) {
const target = el(btn.dataset.copy);
if (target) {
await copyText(target.textContent, btn);
target.classList.add('copied');
setTimeout(() => target.classList.remove('copied'), 1200);
}
return;
}
// Self-copy: clicking the text itself
const selfCopy = e.target.closest('[data-copy-self]');
if (selfCopy) {
await copyText(selfCopy.textContent, selfCopy);
}
});
}
function renderHealth(status) {
function setDot(id, ok, payload) {
const item = el(id);
if (!item) return;
const dot = item.querySelector('.dot');
dot.classList.remove('ok', 'bad', 'warn');
if (ok === true) dot.classList.add('ok');
else if (ok === false) dot.classList.add('bad');
else dot.classList.add('warn');
item.title = JSON.stringify(payload || {}, null, 2);
}
setDot('#h-vllm', status.vllm && status.vllm.ok, status.vllm);
setDot('#h-parakeet', status.parakeet && status.parakeet.ok, status.parakeet);
setDot('#h-magpie', status.magpie && status.magpie.ok, status.magpie);
el('#updated').textContent = `updated ${new Date().toLocaleTimeString()}`;
}
function renderBanner(status) {
el('#setup-banner').classList.toggle('hidden', !!status.configured);
}
function renderSwapPanel() {
el('#swap-phase').textContent = state.swap_phase;
el('#swap-phase-detail').textContent = state.swap_phase_detail;
el('#swap-phase-fill').style.width = `${Math.max(2, Math.round(state.swap_progress * 100))}%`;
}
// ===================== phase detection =====================
const PHASE_ORDER = [
['Stopping current model…', 0.08],
['Starting new model…', 0.16],
['Joining Ray cluster…', 0.22],
['Loading weights…', 0.30],
['Compiling kernels…', 0.78],
['Warming up…', 0.88],
['Starting API server…', 0.94],
['Ready ✓', 1.00],
['Failed', 1.00],
];
function phaseProgress(name) {
const found = PHASE_ORDER.find(([n]) => n === name);
return found ? found[1] : 0.05;
}
function deriveSwapPhase(serverState, lines) {
// Default phase from server state
let phase = ({
starting: 'Starting…',
stopping: 'Stopping current model…',
launching: 'Starting new model…',
tailing: 'Loading weights…',
ready: 'Ready ✓',
failed: 'Failed',
})[serverState] || 'Working…';
let detail = '';
// Refine from log content (search recent lines first)
const tail = lines.slice(-40);
for (let i = tail.length - 1; i >= 0; i--) {
const line = tail[i];
if (line.includes('Application startup complete')) {
phase = 'Ready ✓';
break;
}
if (line.includes('Started server process')) {
phase = 'Starting API server…';
break;
}
if (line.includes('Initial profiling/warmup') || line.includes('init engine (profile, create kv cache, warmup model)')) {
phase = 'Warming up…';
break;
}
if (line.match(/Capturing CUDA graphs|Compiling a graph|torch\.compile took|Graph capturing/)) {
phase = 'Compiling kernels…';
break;
}
const shard = line.match(/Loading safetensors checkpoint shards:\s+(\d+)%\s+Completed\s+\|\s+(\d+)\/(\d+)/);
if (shard) {
phase = 'Loading weights…';
detail = `${shard[2]} of ${shard[3]} shards (${shard[1]}%)`;
const innerProgress = parseInt(shard[2], 10) / parseInt(shard[3], 10);
// Map shard progress 0..1 into the 0.30..0.78 band
state.swap_progress = 0.30 + (0.78 - 0.30) * innerProgress;
state.swap_phase = phase;
state.swap_phase_detail = detail;
return;
}
if (line.includes('Connecting to existing Ray cluster')) {
phase = 'Joining Ray cluster…';
break;
}
if (line.includes('Resolved architecture') || line.match(/launch-cluster\.sh.*exec vllm serve/)) {
phase = 'Starting new model…';
break;
}
if (line.match(/launch-cluster\.sh stop/)) {
phase = 'Stopping current model…';
break;
}
}
state.swap_phase = phase;
state.swap_phase_detail = detail;
state.swap_progress = phaseProgress(phase);
}
// ===================== timer =====================
function startTimer(startedAtMillis) {
state.swap_started_at = startedAtMillis;
if (state.timer_handle) clearInterval(state.timer_handle);
const tick = () => {
if (!state.swap_started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - state.swap_started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#swap-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
state.timer_handle = setInterval(tick, 500);
}
function stopTimer() {
if (state.timer_handle) { clearInterval(state.timer_handle); state.timer_handle = null; }
}
// ===================== polling + SSE =====================
async function pollStatus() {
try {
const status = await fetchJSON('/api/status');
state.current_model_key = status.current_model_key;
state.configured = status.configured;
renderBanner(status);
renderCurrent(status);
renderEndpoint(status);
renderHealth(status);
// If models hasn't loaded yet (init may have hit a transient proxy timeout), retry.
if (!state.models || Object.keys(state.models).length === 0) {
try { await loadModels(); } catch {}
}
// Refresh services state lazily — every 5s poll triggers this too.
try {
state.services = await fetchJSON('/api/services');
renderServices();
} catch {}
if (status.current_swap_job && status.current_swap_job !== state.swap_job_id) {
attachToSwap(status.current_swap_job, /*needsBackfill=*/true);
} else if (!status.current_swap_job && state.swap_job_id && !state.swap_eventsource) {
// Foreign swap ended
detachSwap();
}
renderCards();
} catch (e) {
console.error('status poll failed', e);
}
}
async function loadModels() {
const data = await fetchJSON('/api/models');
state.defaults = data.defaults || {};
state.models = data.models || {};
}
async function triggerSwap(modelKey) {
if (state.swap_job_id) return;
try {
const r = await fetchJSON('/api/swap', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ model_key: modelKey }),
});
attachToSwap(r.job_id, /*needsBackfill=*/false);
} catch (e) {
alert('Failed to start swap: ' + e.message);
}
}
async function attachToSwap(jobId, needsBackfill) {
if (state.swap_eventsource) {
state.swap_eventsource.close();
state.swap_eventsource = null;
}
state.swap_job_id = jobId;
state.swap_lines = [];
state.swap_phase = 'Starting…';
state.swap_phase_detail = '';
state.swap_progress = 0.05;
el('#swap-log').textContent = '';
el('#swap-panel').classList.remove('hidden');
renderSwapPanel();
// Backfill (if joining mid-swap) — fetch the snapshot so we have started_at + history
try {
const snap = await fetchJSON(`/api/swap/${jobId}`);
const ts = Date.parse(snap.started_at);
if (!isNaN(ts)) startTimer(ts);
state.swap_lines = snap.lines || [];
for (const line of state.swap_lines) appendLog(line);
deriveSwapPhase(snap.state, state.swap_lines);
renderSwapPanel();
if (snap.returncode !== null && snap.returncode !== undefined) {
// Already finished — close panel after a beat
handleSwapDone(snap);
return;
}
} catch (e) {
if (!needsBackfill) startTimer(Date.now());
console.warn('backfill failed', e);
}
const es = new EventSource(`/api/swap/${jobId}/stream`);
state.swap_eventsource = es;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
state.swap_lines.push(d.line);
appendLog(d.line);
deriveSwapPhase(d.state, state.swap_lines);
renderSwapPanel();
} else if (d.state) {
deriveSwapPhase(d.state, state.swap_lines);
renderSwapPanel();
}
} catch {}
};
es.addEventListener('done', async (ev) => {
let d = {};
try { d = JSON.parse(ev.data); } catch {}
handleSwapDone(d);
});
es.onerror = () => {
// Tab backgrounded or network blip — close; status poll will reattach
es.close();
state.swap_eventsource = null;
};
renderCards();
}
function handleSwapDone(d) {
if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; }
const finalState = d.state || 'ready';
state.swap_phase = finalState === 'failed' ? 'Failed' : 'Ready ✓';
state.swap_phase_detail = d.returncode !== undefined ? `exit code ${d.returncode}` : '';
state.swap_progress = 1.0;
renderSwapPanel();
setTimeout(() => detachSwap(), 4000);
pollStatus();
}
function detachSwap() {
state.swap_job_id = null;
if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; }
stopTimer();
el('#swap-panel').classList.add('hidden');
renderCards();
}
function appendLog(line) {
const log = el('#swap-log');
log.textContent += line + '\n';
log.scrollTop = log.scrollHeight;
}
// ===================== model downloads =====================
const dlState = {
job_id: null,
eventsource: null,
started_at: null,
timer_handle: null,
};
function openDownloadForm() {
el('#download-panel').classList.remove('hidden');
el('#download-form').classList.remove('hidden');
el('#download-progress').classList.add('hidden');
el('#dl-repo').focus();
updateDlHfLink();
}
function updateDlHfLink() {
const repo = el('#dl-repo').value.trim();
const link = el('#dl-hf-link');
if (repo.includes('/')) {
link.href = `https://huggingface.co/${encodeURIComponent(repo)}`;
link.classList.remove('hidden');
} else {
link.classList.add('hidden');
}
}
function closeDownloadPanel() {
el('#download-panel').classList.add('hidden');
el('#download-form').classList.remove('hidden');
el('#download-progress').classList.add('hidden');
el('#dl-repo').value = '';
}
function dlTimerStart(startedAt) {
dlState.started_at = startedAt;
if (dlState.timer_handle) clearInterval(dlState.timer_handle);
const tick = () => {
if (!dlState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - dlState.started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#dl-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
dlState.timer_handle = setInterval(tick, 500);
}
function dlTimerStop() {
if (dlState.timer_handle) { clearInterval(dlState.timer_handle); dlState.timer_handle = null; }
}
async function startDownload() {
const repo = el('#dl-repo').value.trim();
const mode = document.querySelector('input[name="dl-mode"]:checked').value;
if (!repo || !repo.includes('/')) {
alert('Enter a HuggingFace repo in the form "org/name", e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4');
return;
}
dlState.last_repo = repo;
dlState.last_mode = mode;
try {
const r = await fetchJSON('/api/download', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ repo, mode }),
});
attachToDownload(r.job_id);
} catch (e) {
alert('Failed to start download: ' + e.message);
}
}
function renderDownloadProgress(p) {
el('#dl-phase').textContent = p.phase || 'Working…';
const statsParts = [];
if (p.downloaded && p.total) statsParts.push(`${p.downloaded} / ${p.total}`);
if (p.rate) statsParts.push(p.rate);
if (p.eta) statsParts.push(`ETA ${p.eta}`);
el('#dl-stats').textContent = statsParts.join(' · ');
const pct = Math.max(2, Math.min(100, p.percent || 2));
el('#dl-progress-fill').style.width = `${pct}%`;
el('#dl-phase-detail').textContent = p.percent > 0 ? `${p.percent.toFixed(1)}%` : '';
}
function dlAppendLog(line) {
const log = el('#dl-log');
log.textContent += line + '\n';
log.scrollTop = log.scrollHeight;
}
async function attachToDownload(jobId) {
if (dlState.eventsource) { dlState.eventsource.close(); dlState.eventsource = null; }
dlState.job_id = jobId;
el('#download-form').classList.add('hidden');
el('#download-progress').classList.remove('hidden');
el('#dl-log').textContent = '';
el('#dl-title').textContent = 'Downloading…';
try {
const snap = await fetchJSON(`/api/download/${jobId}`);
dlTimerStart(Date.parse(snap.started_at));
for (const line of snap.lines || []) dlAppendLog(line);
renderDownloadProgress(snap.progress);
if (snap.returncode !== null && snap.returncode !== undefined) {
handleDownloadDone(snap);
return;
}
} catch (e) {
console.warn('download backfill failed', e);
dlTimerStart(Date.now());
}
const es = new EventSource(`/api/download/${jobId}/stream`);
dlState.eventsource = es;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) dlAppendLog(d.line);
} catch {}
};
es.addEventListener('progress', (ev) => {
try { renderDownloadProgress(JSON.parse(ev.data)); } catch {}
});
es.addEventListener('done', (ev) => {
let d = {};
try { d = JSON.parse(ev.data); } catch {}
handleDownloadDone(d);
});
es.onerror = () => { es.close(); dlState.eventsource = null; };
}
function handleDownloadDone(d) {
if (dlState.eventsource) { dlState.eventsource.close(); dlState.eventsource = null; }
dlTimerStop();
if (d.state === 'failed') {
el('#dl-title').textContent = `Failed (rc=${d.returncode})`;
el('#dl-phase').textContent = 'Failed';
} else {
el('#dl-title').textContent = 'Done';
el('#dl-phase').textContent = 'Done ✓';
el('#dl-progress-fill').style.width = '100%';
// Offer to add to catalog
const repo = dlState.last_repo;
const mode = dlState.last_mode;
if (repo) {
setTimeout(() => openCatalogDialog(repo, mode), 600);
}
}
dlState.job_id = null;
}
// ===================== Advanced / Add to catalog =====================
function gpuTotalGB(modelMode) {
// Solo uses Spark 1's GPU only. Cluster shares across both — but loading is per-Spark.
const s1 = state.hardware?.spark1;
const s2 = state.hardware?.spark2;
const g1 = s1?.gpu_mem_total_mib ? s1.gpu_mem_total_mib / 1024 : null;
const g2 = s2?.gpu_mem_total_mib ? s2.gpu_mem_total_mib / 1024 : null;
if (modelMode === 'cluster' && g1 && g2) return Math.min(g1, g2); // bottleneck
return g1 || g2 || null;
}
function knobContextHint(field, value, mode) {
if (field === 'gpu_memory_utilization') {
const gb = gpuTotalGB(mode);
if (!gb) return '';
const used = (value * gb).toFixed(0);
const free = (gb - value * gb).toFixed(0);
return `~${used} GB allocated to model + KV cache · ~${free} GB left for OS, buffers, other GPU workloads.`;
}
if (field === 'max_model_len') {
if (!value) return '';
const pages = Math.round(value / 350); // ~350 tokens per page
const kvBytes = (value * 2 * 4 * 32 * 128); // rough fp16 KV cache size for typical 32-layer model
return `~${pages.toLocaleString()} pages of text (very rough). Larger context = more GPU memory reserved for KV cache.`;
}
if (field === 'fastsafetensors') return value ? 'Faster cold-start weight loading.' : 'Standard safetensors loading.';
if (field === 'prefix_caching') return value ? 'Reuses GPU state for repeated prompt prefixes (e.g. long system prompts).' : 'Off — every request re-processes the full prompt.';
if (field === 'kv_cache_dtype') return value === 'fp8' ? 'Halves KV cache memory (fits ~2× more context). Quality cost is usually imperceptible.' : 'Default precision.';
return '';
}
function ensureKnobHint(rowEl, id) {
let h = rowEl.querySelector(`.knob-hint[data-for="${id}"]`);
if (!h) {
h = document.createElement('div');
h.className = 'knob-hint muted small';
h.dataset.for = id;
rowEl.appendChild(h);
}
return h;
}
function openAdvanced(key) {
const m = state.models[key];
if (!m) return;
const dlg = el('#advanced-dialog');
el('#adv-title').textContent = `Advanced — ${m.display_name}`;
const k = m.effective_knobs || {};
el('#adv-mml').value = k.max_model_len ?? '';
el('#adv-gmu').value = k.gpu_memory_utilization ?? 0.85;
el('#adv-gmu-out').value = parseFloat(el('#adv-gmu').value).toFixed(2);
el('#adv-fst').checked = !!k.fastsafetensors;
el('#adv-pcache').checked = !!k.prefix_caching;
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
// Wire up live knob hints
const updateHints = () => {
const mml = parseInt(el('#adv-mml').value, 10);
const gmu = parseFloat(el('#adv-gmu').value);
ensureKnobHint(el('#adv-mml').parentElement, 'mml').textContent = knobContextHint('max_model_len', mml, m.mode);
ensureKnobHint(el('#adv-gmu').parentElement, 'gmu').textContent = knobContextHint('gpu_memory_utilization', gmu, m.mode);
ensureKnobHint(el('#adv-fst').parentElement, 'fst').textContent = knobContextHint('fastsafetensors', el('#adv-fst').checked, m.mode);
ensureKnobHint(el('#adv-pcache').parentElement, 'pcache').textContent = knobContextHint('prefix_caching', el('#adv-pcache').checked, m.mode);
ensureKnobHint(el('#adv-fp8').parentElement, 'fp8').textContent = knobContextHint('kv_cache_dtype', el('#adv-fp8').checked ? 'fp8' : 'auto', m.mode);
};
updateHints();
el('#adv-mml').oninput = updateHints;
el('#adv-gmu').oninput = (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); updateHints(); };
el('#adv-fst').onchange = updateHints;
el('#adv-pcache').onchange = updateHints;
el('#adv-fp8').onchange = updateHints;
const del = el('#adv-delete');
del.classList.toggle('hidden', !m.custom);
del.onclick = async () => {
if (!confirm(`Delete "${m.display_name}" from the catalog? The model weights on disk are NOT deleted.`)) return;
try {
await fetchJSON(`/api/models/${encodeURIComponent(key)}`, { method: 'DELETE' });
dlg.close();
await loadModels();
pollStatus();
} catch (e) { alert('Delete failed: ' + e.message); }
};
const form = el('#advanced-form');
form.onsubmit = async (e) => {
e.preventDefault();
const knobs = {};
const mml = parseInt(el('#adv-mml').value, 10);
if (Number.isFinite(mml) && mml > 0) knobs.max_model_len = mml;
const gmu = parseFloat(el('#adv-gmu').value);
if (Number.isFinite(gmu)) knobs.gpu_memory_utilization = gmu;
if (el('#adv-fst').checked) knobs.fastsafetensors = true; else knobs.fastsafetensors = false;
if (el('#adv-pcache').checked) knobs.prefix_caching = true; else knobs.prefix_caching = false;
knobs.kv_cache_dtype = el('#adv-fp8').checked ? 'fp8' : 'auto';
try {
await fetchJSON(`/api/models/${encodeURIComponent(key)}/knobs`, {
method: 'PUT',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ knobs }),
});
dlg.close();
await loadModels();
pollStatus();
} catch (e) { alert('Save failed: ' + e.message); }
};
dlg.showModal();
}
function openCatalogDialog(repo, mode) {
const dlg = el('#catalog-dialog');
const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-');
el('#cd-key').value = key;
el('#cd-name').value = repo.split('/').pop();
el('#cd-repo').value = repo;
el('#cd-size').value = '';
el('#cd-mode').value = mode || 'solo';
el('#cd-desc').value = '';
el('#cd-mml').value = 32768;
el('#cd-gmu').value = 0.85;
el('#cd-gmu-out').value = '0.85';
el('#cd-fst').checked = true;
el('#cd-pcache').checked = true;
el('#cd-fp8').checked = true;
dlg.showModal();
}
function setupCatalogDialog() {
el('#cd-cancel').addEventListener('click', () => el('#catalog-dialog').close());
el('#cd-gmu').addEventListener('input', (e) => { el('#cd-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
el('#catalog-form').addEventListener('submit', async (e) => {
e.preventDefault();
const body = {
key: el('#cd-key').value.trim(),
display_name: el('#cd-name').value.trim(),
repo: el('#cd-repo').value.trim(),
size_gb: parseFloat(el('#cd-size').value) || 0,
mode: el('#cd-mode').value,
description: el('#cd-desc').value.trim() || null,
vllm_args: [],
knobs: {
max_model_len: parseInt(el('#cd-mml').value, 10) || 32768,
gpu_memory_utilization: parseFloat(el('#cd-gmu').value),
fastsafetensors: el('#cd-fst').checked,
prefix_caching: el('#cd-pcache').checked,
kv_cache_dtype: el('#cd-fp8').checked ? 'fp8' : 'auto',
},
};
try {
await fetchJSON('/api/models', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#catalog-dialog').close();
closeDownloadPanel();
await loadModels();
pollStatus();
} catch (e) { alert('Add to catalog failed: ' + e.message); }
});
}
function setupAdvancedDialog() {
el('#adv-cancel').addEventListener('click', () => el('#advanced-dialog').close());
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
}
// ===================== NIM installer =====================
const nimState = {
catalog: null,
job_id: null,
eventsource: null,
timer: null,
started_at: null,
};
async function loadNimCatalog() {
try {
nimState.catalog = await fetchJSON('/api/nim/catalog');
el('#nim-catalog-link').href = nimState.catalog.catalog_url;
const warn = el('#nim-key-warn');
if (!nimState.catalog.ngc_key_configured) {
warn.classList.add('nim-key-warn');
warn.innerHTML = '⚠️ NGC API key not set. Open <strong>Configure Sparks</strong> in StartOS and paste your NGC personal API key, otherwise installs will fail. <a href="https://ngc.nvidia.com/setup/personal-key" target="_blank" rel="noopener">Get a key</a>';
} else {
warn.classList.remove('nim-key-warn');
warn.textContent = '';
}
const grid = el('#nim-suggested');
grid.innerHTML = '';
for (const s of nimState.catalog.suggested || []) {
const card = document.createElement('div');
card.className = 'nim-card';
card.innerHTML = `
<div class="info">
<div class="name">${escapeHtml(s.name)} <span class="muted small">· ${escapeHtml(s.kind || 'nim')}</span></div>
<div class="desc">${escapeHtml(s.description || '')}</div>
<div class="img">${escapeHtml(s.image)}</div>
<div class="links">${s.homepage ? `<a href="${escapeHtml(s.homepage)}" target="_blank" rel="noopener">View on NGC ↗</a>` : ''}</div>
</div>
<button type="button" class="btn primary nim-pick" data-image="${escapeHtml(s.image)}" data-container="${escapeHtml(s.default_container)}" data-port="${s.default_port}" data-kind="${escapeHtml(s.kind)}">Pick</button>
`;
grid.appendChild(card);
}
grid.querySelectorAll('.nim-pick').forEach(btn => {
btn.addEventListener('click', () => {
el('#nim-image').value = btn.dataset.image;
el('#nim-container').value = btn.dataset.container;
el('#nim-port').value = btn.dataset.port;
el('#nim-kind').value = btn.dataset.kind || 'nim';
});
});
} catch (e) { console.warn('nim catalog failed', e); }
}
function openNimDialog() {
loadNimCatalog();
el('#nim-dialog').showModal();
}
async function submitNim(e) {
e.preventDefault();
const body = {
image: el('#nim-image').value.trim(),
container: el('#nim-container').value.trim(),
port: parseInt(el('#nim-port').value, 10),
host: el('#nim-host').value,
kind: el('#nim-kind').value,
};
if (!body.image || !body.container || !body.port) {
alert('Image, container name, and port are required.');
return;
}
try {
const r = await fetchJSON('/api/nim/install', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#nim-dialog').close();
attachNimProgress(r.job_id);
} catch (e) {
alert('Install failed: ' + e.message);
}
}
function nimTimerStart(at) {
nimState.started_at = at;
if (nimState.timer) clearInterval(nimState.timer);
const tick = () => {
if (!nimState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - nimState.started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#nim-prog-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
nimState.timer = setInterval(tick, 500);
}
async function attachNimProgress(jobId) {
nimState.job_id = jobId;
el('#nim-prog-log').textContent = '';
el('#nim-prog-title').textContent = 'Installing…';
el('#nim-progress-dialog').showModal();
try {
const snap = await fetchJSON(`/api/nim/install/${jobId}`);
nimTimerStart(Date.parse(snap.started_at));
el('#nim-prog-phase').textContent = snap.phase || 'Working…';
el('#nim-prog-log').textContent = (snap.lines || []).join('\n');
if (snap.returncode !== null) { onNimDone(snap); return; }
} catch { nimTimerStart(Date.now()); }
const es = new EventSource(`/api/nim/install/${jobId}/stream`);
nimState.eventsource = es;
es.onmessage = ev => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
const log = el('#nim-prog-log');
log.textContent += d.line + '\n';
log.scrollTop = log.scrollHeight;
}
} catch {}
};
es.addEventListener('phase', ev => {
try { el('#nim-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {}
});
es.addEventListener('done', ev => {
let d = {}; try { d = JSON.parse(ev.data); } catch {}
onNimDone(d);
});
es.onerror = () => { es.close(); nimState.eventsource = null; };
}
function onNimDone(d) {
if (nimState.eventsource) { nimState.eventsource.close(); nimState.eventsource = null; }
if (nimState.timer) { clearInterval(nimState.timer); nimState.timer = null; }
if (d.state === 'failed') {
el('#nim-prog-title').textContent = `Failed (rc=${d.returncode})`;
el('#nim-prog-phase').textContent = 'Failed';
} else {
el('#nim-prog-title').textContent = 'Installed';
el('#nim-prog-phase').textContent = 'Done ✓ — service will appear when the container reports healthy.';
}
pollStatus();
}
// ===================== Explain context (LLM commit summary) =====================
let explainEventSource = null;
async function explainContext() {
if (explainEventSource) { explainEventSource.close(); explainEventSource = null; }
const section = el('#ub-explain-section');
const content = el('#ub-explain-content');
section.classList.remove('hidden');
section.open = true;
content.innerHTML = '<span class="muted">Asking the loaded model…</span>';
let text = '';
const es = new EventSource('/api/explain-updates');
explainEventSource = es;
let firstChunk = true;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.error) {
content.innerHTML = `<span class="muted">Couldn't get explanation: ${escapeHtml(d.error)}</span>`;
return;
}
if (firstChunk) { content.innerHTML = ''; firstChunk = false; }
if (d.content) {
text += d.content;
content.textContent = text;
content.scrollTop = content.scrollHeight;
} else if (d.reasoning) {
// Show reasoning tokens but de-emphasized
let r = content.querySelector('.reasoning-current');
if (!r) {
r = document.createElement('div');
r.className = 'reasoning reasoning-current';
r.textContent = '';
content.appendChild(r);
}
r.textContent += d.reasoning;
}
} catch {}
};
es.addEventListener('done', () => {
es.close();
explainEventSource = null;
// strip the reasoning-current marker
const r = content.querySelector('.reasoning-current');
if (r) r.classList.remove('reasoning-current');
});
es.onerror = () => { es.close(); explainEventSource = null; };
}
// ===================== updates (spark-vllm-docker) =====================
const updState = {
info: null,
job_id: null,
eventsource: null,
started_at: null,
timer_handle: null,
};
async function pollUpdates() {
try {
const info = await fetchJSON('/api/updates');
updState.info = info;
renderUpdateBanner();
} catch (e) {
console.warn('updates poll failed', e);
}
}
function renderUpdateBanner() {
const banner = el('#update-banner');
const info = updState.info;
const text = el('#ub-text');
const details = el('#ub-details');
const apply = el('#ub-apply');
const list = el('#ub-list');
const log = el('#ub-log');
if (!info || !info.ok) {
banner.classList.add('hidden');
return;
}
banner.classList.remove('hidden');
const behind = info.behind || 0;
const dirty = info.dirty || 0;
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
banner.classList.toggle('warn', !!dirty);
const explain = el('#ub-explain');
if (dirty > 0) {
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
details.classList.add('hidden');
apply.classList.add('hidden');
explain.classList.add('hidden');
} else if (behind === 0) {
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
details.classList.add('hidden');
apply.classList.add('hidden');
list.classList.add('hidden');
explain.classList.add('hidden');
} else {
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
details.classList.remove('hidden');
apply.classList.remove('hidden');
explain.classList.remove('hidden');
log.textContent = (info.log || []).join('\n') || '(no log)';
}
}
function ubTimerStart(startedAt) {
updState.started_at = startedAt;
if (updState.timer_handle) clearInterval(updState.timer_handle);
const tick = () => {
if (!updState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - updState.started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#ub-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
updState.timer_handle = setInterval(tick, 500);
}
async function applyUpdate() {
if (!confirm('This pulls the latest spark-vllm-docker and rebuilds the vLLM container. Can take 540 minutes; the cluster is unaffected until you swap to a different model. Continue?')) return;
try {
const r = await fetchJSON('/api/updates/apply', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ mode: 'cluster' }),
});
attachToUpdate(r.job_id);
} catch (e) {
alert('Failed to start update: ' + e.message);
}
}
async function attachToUpdate(jobId) {
updState.job_id = jobId;
el('#ub-progress').classList.remove('hidden');
el('#ub-apply').classList.add('hidden');
el('#ub-stream').textContent = '';
el('#ub-phase').textContent = 'Starting…';
try {
const snap = await fetchJSON(`/api/updates/${jobId}`);
ubTimerStart(Date.parse(snap.started_at));
el('#ub-phase').textContent = snap.phase || 'Working…';
el('#ub-stream').textContent = (snap.lines || []).join('\n');
if (snap.returncode !== null) { handleUpdateDone(snap); return; }
} catch (e) {
ubTimerStart(Date.now());
}
const es = new EventSource(`/api/updates/${jobId}/stream`);
updState.eventsource = es;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
const log = el('#ub-stream');
log.textContent += d.line + '\n';
log.scrollTop = log.scrollHeight;
}
} catch {}
};
es.addEventListener('phase', (ev) => {
try { el('#ub-phase').textContent = JSON.parse(ev.data).phase; } catch {}
});
es.addEventListener('done', (ev) => {
let d = {}; try { d = JSON.parse(ev.data); } catch {}
handleUpdateDone(d);
});
es.onerror = () => { es.close(); updState.eventsource = null; };
}
function handleUpdateDone(d) {
if (updState.eventsource) { updState.eventsource.close(); updState.eventsource = null; }
if (updState.timer_handle) { clearInterval(updState.timer_handle); updState.timer_handle = null; }
el('#ub-phase').textContent = d.state === 'failed' ? `Failed (rc=${d.returncode})` : 'Done ✓ — re-check from the banner.';
setTimeout(pollUpdates, 2000);
}
async function init() {
setupCopyButtons();
el('#open-download').addEventListener('click', openDownloadForm);
el('#dl-cancel').addEventListener('click', closeDownloadPanel);
el('#dl-start').addEventListener('click', startDownload);
el('#dl-repo').addEventListener('keydown', (e) => { if (e.key === 'Enter') startDownload(); });
el('#ub-details').addEventListener('click', () => {
const list = el('#ub-list');
list.classList.toggle('hidden');
list.open = !list.open;
});
el('#ub-apply').addEventListener('click', applyUpdate);
el('#ub-explain').addEventListener('click', explainContext);
el('#dl-repo').addEventListener('input', updateDlHfLink);
el('#open-nim').addEventListener('click', openNimDialog);
el('#nim-cancel').addEventListener('click', () => el('#nim-dialog').close());
el('#nim-form').addEventListener('submit', submitNim);
el('#nim-prog-close').addEventListener('click', () => el('#nim-progress-dialog').close());
el('#open-connectivity').addEventListener('click', openConnectivityDialog);
el('#connectivity-close').addEventListener('click', () => el('#connectivity-dialog').close());
// Wake-on-LAN buttons live on unreachable hardware cards; delegate.
el('#hardware-grid').addEventListener('click', (e) => {
const btn = e.target.closest('[data-wake]');
if (btn) wakeSpark(btn.dataset.wake);
});
setupCatalogDialog();
setupAdvancedDialog();
// Open WebUI link from /api/config
try {
state.config = await fetchJSON('/api/config');
if (state.config.open_webui_url) {
const a = el('#open-webui-link');
a.href = state.config.open_webui_url;
a.classList.remove('hidden');
}
} catch {}
await loadModels();
await pollStatus();
await renderServices();
pollHardware();
pollUpdates();
setInterval(pollStatus, 5000);
setInterval(pollHardware, 8000); // every 8s
setInterval(pollUpdates, 300000); // every 5 min
}
init();