Files
spark-control/image/app/static/app.js
T
Keysat 26070eb191 v0.24.0:0 - configurable cluster topology (vllm container name, hide services, second-vllm monitor)
Make the cluster topology configurable so an adopter wired differently
(vLLM on both Sparks, port 8000, different container name, no Parakeet)
can monitor without forking. Covers the OpenClaw report P4/P5/#6.

- VLLM_CONTAINER override (default vllm_node), validated at the boundary
  and quote_arg-quoted into the swap log-tail + pre-flight validator exec.
- DISABLED_SERVICES list: hidden services show no tile and are skipped by
  status/deep-health/connectivity probes (kills the Parakeet-on-8000
  collision).
- kind: vllm custom service monitors a second Spark's vLLM via the shared
  probe_vllm_endpoint; /api/endpoints gains a disabled flag.

Swap mechanism intentionally not generalized to raw docker run (that's
coordination, roadmap item 4).
2026-06-17 23:03:33 -05:00

2136 lines
84 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// spark-control front-end
const state = {
models: {},
defaults: {},
current_model_key: null,
swap_job_id: null,
swap_eventsource: null,
swap_started_at: null,
swap_lines: [], // local accumulator for phase detection
swap_phase: 'Starting…',
swap_phase_detail: '',
swap_progress: 0, // 01
services: {},
service_action_in_flight: null, // e.g. "parakeet:restart"
mb_update_in_flight: false, // matrix-bridge update job running
hardware: {},
config: {},
configured: true,
timer_handle: null,
deep_health: {},
disk_status: {}, // keyed by model key: { on_disk, total_bytes, per_host }
disk_status_loaded: false,
};
const el = (sel) => document.querySelector(sel);
const $$ = (sel) => document.querySelectorAll(sel);
function escapeHtml(s) {
if (s == null) return '';
return String(s)
.replaceAll('&', '&')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;');
}
async function fetchJSON(url, opts) {
const r = await fetch(url, opts);
if (!r.ok) {
const text = await r.text().catch(() => '');
throw new Error(`${r.status} ${r.statusText}: ${text}`);
}
return r.json();
}
// ===================== rendering =====================
function renderCards() {
const root = el('#cards');
root.innerHTML = '';
const isSwapping = !!state.swap_job_id;
for (const key of Object.keys(state.models)) {
const m = state.models[key];
const isActive = key === state.current_model_key;
const card = document.createElement('div');
card.className = 'card' + (isActive ? ' active' : '');
const desc = m.description
? `<div class="desc">${escapeHtml(m.description)}</div>`
: '';
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
const localPill = m.local_path ? `<span class="tag local-pill" title="Served from a directory on the Spark, not Hugging Face">local</span>` : '';
// Disk-presence pill + trash button. Until /api/models/disk-status comes back,
// we don't know — render a neutral placeholder.
const disk = state.disk_status[key];
let diskPill = '';
if (state.disk_status_loaded) {
if (disk && disk.on_disk) {
const gb = (disk.total_bytes / 1e9);
diskPill = `<span class="tag on-disk" title="Weights present on disk">on disk · ${gb.toFixed(1)} GB</span>`;
} else {
diskPill = `<span class="tag not-on-disk" title="Weights not downloaded">not downloaded</span>`;
}
}
// Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded.
// Never offered for local models: their directory is hand-placed training output,
// not a re-downloadable HF cache (the server refuses the delete too).
let trashBtn = '';
if (state.disk_status_loaded && disk && disk.on_disk && !m.local_path) {
const disabled = isActive || isSwapping;
const tip = isActive
? 'Currently loaded — switch to another model first'
: isSwapping
? 'A swap is in progress'
: 'Delete weights from disk';
trashBtn = `<button class="icon-btn danger" data-disk-del-key="${key}" title="${escapeHtml(tip)}" aria-label="Delete from disk" ${disabled ? 'disabled' : ''}>${trashIcon}</button>`;
}
// Primary card action: "Switch to this" (green) when on disk; "Download" (blue) when not.
// Before disk-status loads we render the swap button as a sensible default.
const isOnDisk = !state.disk_status_loaded || (disk && disk.on_disk);
const dlInFlight = !!(typeof dlState !== 'undefined' && dlState && dlState.job_id);
let primaryBtn = '';
if (isActive) {
primaryBtn = `<button class="btn" disabled>Current</button>`;
} else if (isOnDisk) {
primaryBtn = `<button class="btn primary" data-swap-key="${key}" ${isSwapping ? 'disabled' : ''}>Switch to this</button>`;
} else if (m.local_path) {
// A local model can't be "downloaded" — its directory has to exist on the Spark.
primaryBtn = `<button class="btn" disabled title="Directory not found on the Spark — create it there, then refresh">Not found on Spark</button>`;
} else {
const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)';
primaryBtn = `<button class="btn info" data-download-key="${key}" title="${escapeHtml(tip)}" ${dlInFlight ? 'disabled' : ''}>Download</button>`;
}
card.innerHTML = `
<div class="name">${escapeHtml(m.display_name)}</div>
<div class="meta">
<span class="tag mode-${m.mode}">${m.mode}</span>
<span class="tag">${m.size_gb} GB</span>
${customPill}
${localPill}
${diskPill}
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
</div>
${desc}
<div class="muted small repo">
${m.local_path
? `<span class="local-path" title="Local model directory on the Spark">${escapeHtml(m.local_path)}</span>`
: `<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>`}
</div>
<div class="spacer"></div>
<div class="card-actions">
${primaryBtn}
<button class="btn test-btn" data-test-key="${key}" title="Pre-flight check the launch command without starting the engine">Test</button>
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>
${trashBtn}
</div>
<div class="test-result hidden" data-test-result-for="${key}"></div>
`;
root.appendChild(card);
}
for (const btn of root.querySelectorAll('[data-swap-key]')) {
btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey));
}
for (const btn of root.querySelectorAll('[data-download-key]')) {
btn.addEventListener('click', () => triggerDownloadForKey(btn.dataset.downloadKey));
}
for (const btn of root.querySelectorAll('[data-adv-key]')) {
btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey));
}
for (const btn of root.querySelectorAll('[data-test-key]')) {
btn.addEventListener('click', () => testLaunch(btn.dataset.testKey, btn));
}
for (const btn of root.querySelectorAll('[data-disk-del-key]')) {
btn.addEventListener('click', () => openDiskDeleteDialog(btn.dataset.diskDelKey));
}
}
const trashIcon = '<svg viewBox="0 0 24 24" width="14" height="14" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><polyline points="3 6 5 6 21 6"></polyline><path d="M19 6l-1 14a2 2 0 0 1-2 2H8a2 2 0 0 1-2-2L5 6"></path><path d="M10 11v6"></path><path d="M14 11v6"></path><path d="M9 6V4a2 2 0 0 1 2-2h2a2 2 0 0 1 2 2v2"></path></svg>';
async function testLaunch(key, btn) {
const resultEl = document.querySelector(`[data-test-result-for="${key}"]`);
if (!resultEl) return;
const originalText = btn.textContent;
btn.disabled = true;
btn.textContent = 'Testing…';
resultEl.classList.remove('hidden', 'ok', 'fail');
resultEl.innerHTML = '<span class="muted small">Checking launch args against vLLM\'s parser…</span>';
try {
const r = await fetchJSON(`/api/swap/${encodeURIComponent(key)}/validate`, { method: 'POST' });
if (r.ok) {
resultEl.classList.add('ok');
resultEl.innerHTML = `<span class="ok-mark">✓</span> Launch args parse OK. <span class="muted small">(Doesn't guarantee runtime success — only catches argparse-level issues.)</span>`;
} else {
resultEl.classList.add('fail');
const err = escapeHtml(r.error || 'unknown error');
const stage = r.stage ? ` <span class="muted small">(${escapeHtml(r.stage)})</span>` : '';
resultEl.innerHTML = `<span class="fail-mark">✗</span> Would fail: ${err}${stage}`;
}
} catch (e) {
resultEl.classList.add('fail');
resultEl.innerHTML = `<span class="fail-mark">✗</span> Test failed: ${escapeHtml(e.message)}`;
} finally {
btn.disabled = false;
btn.textContent = originalText;
}
}
function renderCurrent(status) {
const c = el('#current');
if (!status.configured) { c.innerHTML = `<span class="muted">not configured</span>`; return; }
if (status.current_swap_job) { c.innerHTML = `<span class="muted">swap in progress</span>`; return; }
const v = status.vllm || {};
if (!v.ok) { c.innerHTML = `<span class="muted">vLLM unreachable</span>`; return; }
const m = status.current_model_key ? state.models[status.current_model_key] : null;
const label = m ? m.display_name : (v.current_model || '(unknown)');
c.innerHTML = `<strong>${label}</strong>`;
}
// ===================== hardware dashboard =====================
function fmtBytes(n) {
if (!n && n !== 0) return '—';
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
let i = 0; let v = n;
while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
return v < 10 ? `${v.toFixed(1)} ${u[i]}` : `${Math.round(v)} ${u[i]}`;
}
function fmtMiB(n) {
if (!n && n !== 0) return null;
// n is in MiB; render in GB
const gb = n / 1024;
return gb < 10 ? gb.toFixed(1) : Math.round(gb).toString();
}
function bar(usedPct, warn) {
const pct = Math.max(2, Math.min(100, usedPct));
return `<div class="bar ${warn ? 'warn' : ''}"><span style="width:${pct}%"></span></div>`;
}
async function pollHardware() {
try {
state.hardware = await fetchJSON('/api/hardware');
try { state.connectivity = await fetchJSON('/api/connectivity'); } catch {}
renderHardware();
} catch (e) { console.warn('hardware poll failed', e); }
}
function fmtDuration(sec) {
if (sec == null) return '';
if (sec < 60) return `${Math.round(sec)}s`;
if (sec < 3600) return `${Math.round(sec / 60)}m`;
if (sec < 86400) {
const h = Math.floor(sec / 3600);
const m = Math.round((sec % 3600) / 60);
return m ? `${h}h ${m}m` : `${h}h`;
}
const d = Math.floor(sec / 86400);
const h = Math.round((sec % 86400) / 3600);
return h ? `${d}d ${h}h` : `${d}d`;
}
function openConnectivityDialog() {
const dlg = el('#connectivity-dialog');
const content = el('#connectivity-content');
const c = state.connectivity || {};
const events = c.events || [];
if (events.length === 0) {
content.innerHTML = '<div class="muted small">No events recorded yet. Once a Spark or service goes down and back up (or an external app reports a failure), entries appear here.</div>';
dlg.showModal();
return;
}
const bySubject = {};
for (const e of events) {
const subj = e.subject || e.spark || 'unknown'; // legacy fallback
(bySubject[subj] = bySubject[subj] || []).push(e);
}
// Sort subjects: hosts first, then services, alphabetical
const hostOrder = ['spark1', 'spark2'];
const subjects = Object.keys(bySubject).sort((a, b) => {
const ia = hostOrder.indexOf(a);
const ib = hostOrder.indexOf(b);
if (ia >= 0 && ib >= 0) return ia - ib;
if (ia >= 0) return -1;
if (ib >= 0) return 1;
return a.localeCompare(b);
});
const html = subjects.map((subj) => {
const evs = bySubject[subj];
const transitions = evs.filter(e => (e.kind || 'transition') === 'transition');
const reports = evs.filter(e => e.kind === 'report');
const downs = transitions.filter(e => e.transition === 'down').length;
const failedReports = reports.filter(e => !e.ok).length;
const mac = c.macs?.[subj];
const summaryParts = [];
if (transitions.length) summaryParts.push(`${transitions.length} probe transition${transitions.length===1?'':'s'} (${downs} down)`);
if (reports.length) summaryParts.push(`${reports.length} app report${reports.length===1?'':'s'} (${failedReports} failed)`);
const isHost = hostOrder.includes(subj);
return `
<div class="conn-spark">
<h4>${escapeHtml(subj)}${isHost ? ' <span class="muted small">[host]</span>' : ' <span class="muted small">[service]</span>'}${mac ? ` <span class="muted small">${escapeHtml(mac)}</span>` : ''}</h4>
<div class="conn-summary">${summaryParts.join(' · ') || 'no events'}</div>
${evs.slice(-30).reverse().map(e => renderConnEvent(e)).join('')}
</div>
`;
}).join('');
content.innerHTML = html;
dlg.showModal();
}
function renderConnEvent(e) {
const when = escapeHtml((e.at || '').replace('T', ' ').replace('Z', ''));
const kind = e.kind || 'transition';
if (kind === 'report') {
const ok = !!e.ok;
const source = escapeHtml(e.source || 'external');
const detail = e.detail ? `${escapeHtml(e.detail)}` : '';
const latency = e.latency_ms != null ? ` (${e.latency_ms} ms)` : '';
return `
<div class="conn-event ${ok ? 'up' : 'down'} report">
<span class="when">${when}</span>
<span class="what">${ok ? '◷ report: ok' : '◷ report: failed'} <span class="muted">from</span> ${source}${detail}</span>
<span class="dur">${latency}</span>
</div>
`;
}
const down = e.down_seconds != null ? `was down ${fmtDuration(e.down_seconds)}` : '';
const up = e.up_seconds != null ? `was up ${fmtDuration(e.up_seconds)}` : '';
return `
<div class="conn-event ${e.transition}">
<span class="when">${when}</span>
<span class="what">${e.transition === 'up' ? '↑ came back online' : '↓ dropped offline'}</span>
<span class="dur">${down}${up}</span>
</div>
`;
}
async function wakeSpark(name) {
try {
const r = await fetchJSON(`/api/spark/${name}/wake`, { method: 'POST' });
alert(`Wake-on-LAN sent to ${name} (MAC ${r.mac}, via ${r.delivered_via}). Give it ~30 seconds to wake; the card will go green when it comes back.`);
} catch (e) {
alert(`Wake failed: ${e.message}`);
}
}
// Generate-if-missing + copy this Spark's OUTBOUND ssh public key (the key the
// Spark uses to log in to other machines, e.g. the Mac). Distinct from the
// package's own key in the StartOS "Show Public Key" action.
async function copySparkSshKey(name, btn) {
if (btn) btn.disabled = true;
try {
const r = await fetchJSON(`/api/spark/${name}/ssh-key`, { method: 'POST' });
// Best-effort clipboard copy; on plain-HTTP this no-ops, but the dialog
// below always shows the key for manual selection.
await copyText(r.pubkey, btn);
const label = r.host ? `${name} (${r.host})` : name;
el('#sshkey-title').textContent = `${name} — SSH public key`;
el('#sshkey-intro').textContent = r.created
? `Generated a new SSH key on ${label} and copied it to your clipboard. This is the key ${name} uses to log in to OTHER machines.`
: `${label} already had an SSH key; copied its public key to your clipboard. This is the key ${name} uses to log in to OTHER machines.`;
el('#sshkey-value').textContent = r.pubkey;
el('#sshkey-install').textContent =
`mkdir -p ~/.ssh && echo '${r.pubkey}' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys`;
el('#sshkey-dialog').showModal();
} catch (e) {
alert(`Couldn't get the SSH key for ${name}: ${e.message}`);
} finally {
if (btn) btn.disabled = false;
}
}
function renderHardware() {
const panel = el('#hardware-panel');
const grid = el('#hardware-grid');
const hw = state.hardware || {};
const keys = Object.keys(hw).filter(k => hw[k] && (hw[k].configured !== false));
if (keys.length === 0) { panel.classList.add('hidden'); return; }
panel.classList.remove('hidden');
grid.innerHTML = '';
for (const key of keys) {
const s = hw[key];
const card = document.createElement('div');
if (!s.reachable) {
card.className = 'hw-card unreachable';
const mac = state.connectivity?.macs?.[key];
const wolRow = mac
? `<div class="wol-row">
<span class="mac-display">${escapeHtml(mac)}</span>
<span class="spacer"></span>
<button class="btn" data-wake="${escapeHtml(key)}">Wake (WoL)</button>
</div>`
: `<div class="muted small">MAC not yet known — once it's been up once with this dashboard installed, "Wake" will appear here.</div>`;
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(key)}</span>
<span class="meta">unreachable</span>
</div>
<div class="muted small">${escapeHtml(s.host || '')}${escapeHtml(s.error || 'no response')}</div>
${wolRow}
<div class="muted small" style="line-height:1.5">
If Wake-on-LAN doesn't bring it back, manual steps:
<ol style="margin: 6px 0 0 18px; padding: 0;">
<li>Verify it's powered on (check the front LED).</li>
<li>Ping it from another LAN device.</li>
<li>Power-cycle it physically.</li>
<li>If it boots, this card will go green again automatically.</li>
</ol>
</div>
`;
grid.appendChild(card);
continue;
}
const ramPct = s.ram_used_bytes && s.ram_total_bytes ? (s.ram_used_bytes / s.ram_total_bytes) * 100 : 0;
const diskPct = s.disk_used_bytes && s.disk_total_bytes ? (s.disk_used_bytes / s.disk_total_bytes) * 100 : 0;
const loadPct = (s.load && s.cores) ? Math.min(100, (s.load[0] / s.cores) * 100) : 0;
// GPU memory: on unified-memory systems (DGX Spark) total is N/A, so use system RAM as the pool.
const gpuMemTotalMiB = s.gpu_mem_total_mib || (s.gpu_unified_memory ? (s.ram_total_bytes / (1024 * 1024)) : null);
const gpuMemUsedMiB = s.gpu_mem_used_mib ?? null;
const gpuMemPct = (gpuMemTotalMiB && gpuMemUsedMiB != null) ? (gpuMemUsedMiB / gpuMemTotalMiB) * 100 : 0;
const gpuMemNote = s.gpu_unified_memory ? ' <span class="muted">(unified)</span>' : '';
const gpuExtras = [];
if (s.gpu_temp_c != null) gpuExtras.push(`${s.gpu_temp_c}°C`);
if (s.gpu_power_w != null) gpuExtras.push(`${s.gpu_power_w.toFixed(0)}W`);
const gpuExtrasStr = gpuExtras.length ? ` · ${gpuExtras.join(' · ')}` : '';
// Read-only WireGuard badge: shown only when the Spark has a wg interface up.
// "VPN <ip>" means it's a peer on that tunnel (reachable off-LAN when the
// tunnel is up); it reflects interface presence, not live peer reachability.
const wgIp = s.wg_addr ? String(s.wg_addr).split('/')[0] : '';
const wgBadge = s.wg_iface
? ` · <span class="wg-badge" title="On WireGuard tunnel '${escapeHtml(s.wg_iface)}'${wgIp ? ' as ' + escapeHtml(wgIp) : ''} — reachable off-LAN while the tunnel is up">VPN${wgIp ? ' ' + escapeHtml(wgIp) : ''}</span>`
: '';
card.className = 'hw-card';
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(s.hostname || key)}</span>
<span class="meta">${escapeHtml(key)} · ${escapeHtml(s.gpu_name || '')} · ${escapeHtml(s.uptime || '')}${wgBadge}</span>
<button class="icon-btn ssh-key-btn" data-ssh-key="${escapeHtml(key)}" title="Copy this Spark's SSH public key (creates one if it doesn't have one) — e.g. to let it log in to your Mac" aria-label="Copy SSH public key">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>
</button>
</div>
<div class="hw-metric">
<span class="label">CPU</span>
${bar(loadPct, loadPct > 80)}
<span class="val">${s.load ? s.load[0].toFixed(2) : '—'} / ${s.cores || '?'} cores</span>
</div>
<div class="hw-metric">
<span class="label">RAM</span>
${bar(ramPct, ramPct > 85)}
<span class="val">${fmtBytes(s.ram_used_bytes)} / ${fmtBytes(s.ram_total_bytes)}</span>
</div>
<div class="hw-metric">
<span class="label">GPU mem${gpuMemNote}</span>
${bar(gpuMemPct, gpuMemPct > 90)}
<span class="val">${fmtMiB(gpuMemUsedMiB) || '—'} / ${fmtMiB(gpuMemTotalMiB) || '?'} GB</span>
</div>
<div class="hw-metric">
<span class="label">GPU util</span>
${bar(s.gpu_util_pct || 0, (s.gpu_util_pct || 0) > 90)}
<span class="val">${s.gpu_util_pct ?? 0}%${gpuExtrasStr}</span>
</div>
<div class="hw-metric">
<span class="label">Disk</span>
${bar(diskPct, diskPct > 85)}
<span class="val">${fmtBytes(s.disk_used_bytes)} / ${fmtBytes(s.disk_total_bytes)}</span>
</div>
`;
grid.appendChild(card);
}
}
// ===================== service classification =====================
function classifyService(s) {
// returns one of: running | unhealthy | missing | unconfigured | starting
if (!s.host) return 'unconfigured';
if (s.docker_state === 'missing') return 'missing';
if (s.docker_state === 'restarting') return 'unhealthy';
if (s.docker_state === 'exited') return 'unhealthy';
if (s.docker_state === 'running') {
// http_ready === false means an HTTP probe is expected but failing → still
// warming up. null means the service has no HTTP surface (e.g. the bot), so
// a running container is simply healthy.
if (s.http_ready === false) return 'starting';
return 'running';
}
return s.docker_state || 'unknown';
}
function statusLabel(cls) {
return {
running: 'Healthy',
unhealthy: 'Unhealthy',
starting: 'Starting…',
missing: 'Not installed',
unconfigured: 'Not configured',
unknown: 'Unknown',
}[cls] || cls;
}
async function renderServices() {
let services = state.services;
// First render: fetch.
if (!services || Object.keys(services).length === 0) {
try {
services = await fetchJSON('/api/services');
state.services = services;
} catch (e) { console.error('services fetch failed', e); return; }
}
const panel = el('#services-panel');
const grid = el('#services-grid');
const entries = Object.entries(services);
if (entries.length === 0) { panel.classList.add('hidden'); return; }
panel.classList.remove('hidden');
grid.innerHTML = '';
for (const [name, s] of entries) {
const cls = classifyService(s);
const isBot = s.kind === 'bot';
// The bot tile is opt-in: it only belongs to deployments that actually run
// matrix-bridge. When the container is absent (missing) or the host isn't
// configured, hide the tile entirely rather than show a stray red card.
if (isBot && (cls === 'missing' || cls === 'unconfigured')) continue;
const card = document.createElement('div');
card.className = `service-card ${cls}`;
const inFlight = state.service_action_in_flight && state.service_action_in_flight.startsWith(name + ':');
const disable = (action) => {
// Disable buttons that don't make sense for the current state
if (inFlight) return true;
if (cls === 'unconfigured' || cls === 'missing') return true;
if (action === 'start' && (cls === 'running' || cls === 'starting')) return true;
if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true;
return false;
};
const copyIcon = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>`;
const hostStr = s.host ? (s.port ? `${s.host}:${s.port}` : s.host) : '';
const hostRow = s.host
? `<div class="row"><span class="k">Host</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(hostStr)}</span><button class="icon-btn" data-copy-text="${escapeHtml(hostStr)}" title="Copy host" aria-label="Copy">${copyIcon}</button></div>`
: `<div class="row"><span class="k">Host</span><span class="v muted-v">not configured</span></div>`;
const urlRow = s.base_url
? `<div class="row"><span class="k">URL</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(s.base_url)}</span><button class="icon-btn" data-copy-text="${escapeHtml(s.base_url)}" title="Copy URL" aria-label="Copy">${copyIcon}</button></div>`
: '';
const modelRow = s.model
? `<div class="row"><span class="k">Model</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(s.model)}</span><button class="icon-btn" data-copy-text="${escapeHtml(s.model)}" title="Copy model" aria-label="Copy">${copyIcon}</button></div>`
: '';
const restartsRow = s.restart_count != null && s.restart_count > 1
? `<div class="row"><span class="k">Restarts</span><span class="v">${s.restart_count}</span></div>`
: '';
const dh = state.deep_health?.[name];
let deepRow = '';
if (dh && dh.last) {
const last = dh.last;
const when = (last.at || '').slice(11, 19); // HH:MM:SS
const verdict = last.ok
? `<span class="dh-ok">deep check ok</span>`
: `<span class="dh-fail">deep check FAILED</span>`;
const lat = last.latency_ms != null ? ` <span class="muted">${last.latency_ms} ms</span>` : '';
const restarts = dh.auto_restarts_window > 0
? ` <span class="muted">· ${dh.auto_restarts_window} auto-restart${dh.auto_restarts_window === 1 ? '' : 's'} in 30 min</span>`
: '';
deepRow = `
<div class="row deep-row">
<span class="k">Deep</span>
<span class="v deep-v">${verdict} <span class="muted small">${escapeHtml(when)}</span>${lat}${restarts}</span>
<button class="icon-btn dh-run-btn" data-dh-run="${escapeHtml(name)}" title="Run deep check now">↻</button>
</div>
${last.ok ? '' : `<div class="deep-error muted small">${escapeHtml((last.error || last.note || '').slice(0, 200))}</div>`}
`;
} else if (dh) {
deepRow = `
<div class="row deep-row">
<span class="k">Deep</span>
<span class="v muted-v">no probe yet</span>
<button class="icon-btn dh-run-btn" data-dh-run="${escapeHtml(name)}" title="Run deep check now">↻</button>
</div>
`;
}
card.innerHTML = `
<div class="head">
<span class="name">${escapeHtml(name)}</span>
<span class="kind">${escapeHtml(s.kind || '')}</span>
<span class="status">${statusLabel(cls)}</span>
</div>
${hostRow}
${urlRow}
${modelRow}
${restartsRow}
${deepRow}
<div class="service-actions">
${isBot ? `<button class="btn primary" data-mb-update title="Pull latest code, rebuild, and recreate the bot" ${inFlight || state.mb_update_in_flight ? 'disabled' : ''}>Update</button>` : ''}
<button class="btn" data-svc-action="${name}:start" ${disable('start') ? 'disabled' : ''}>Start</button>
<button class="btn" data-svc-action="${name}:restart" ${disable('restart') ? 'disabled' : ''}>Restart</button>
<button class="btn danger" data-svc-action="${name}:stop" ${disable('stop') ? 'disabled' : ''}>Stop</button>
${isBot ? `<button class="btn" data-mb-logs title="Show the last 100 log lines">View logs</button>` : ''}
</div>
`;
grid.appendChild(card);
}
for (const btn of grid.querySelectorAll('.btn[data-svc-action]')) {
btn.addEventListener('click', () => onServiceAction(btn.dataset.svcAction));
}
const mbUpdateBtn = grid.querySelector('[data-mb-update]');
if (mbUpdateBtn) mbUpdateBtn.addEventListener('click', onMatrixBridgeUpdate);
const mbLogsBtn = grid.querySelector('[data-mb-logs]');
if (mbLogsBtn) mbLogsBtn.addEventListener('click', openMatrixBridgeLogs);
for (const btn of grid.querySelectorAll('[data-dh-run]')) {
btn.addEventListener('click', () => onDeepHealthRun(btn.dataset.dhRun, btn));
}
}
async function onDeepHealthRun(name, btn) {
btn.disabled = true;
const orig = btn.textContent;
btn.textContent = '…';
try {
await fetchJSON(`/api/deep-health/${encodeURIComponent(name)}/run`, { method: 'POST' });
} catch (e) {
console.warn('deep-health run failed', e);
} finally {
try { state.deep_health = await fetchJSON('/api/deep-health'); } catch {}
btn.textContent = orig;
btn.disabled = false;
renderServices();
}
}
// ===================== speech-model patches (v0.11) =====================
async function renderSpeechModels() {
const panel = el('#speech-models-panel');
const card = el('#speech-models-card');
if (!panel || !card) return;
let data;
try {
data = await fetchJSON('/api/speech-models');
} catch (e) {
// If parakeet host isn't even configured, hide the section entirely
panel.classList.add('hidden');
return;
}
if (!data || !data.patches) { panel.classList.add('hidden'); return; }
panel.classList.remove('hidden');
const patches = data.patches || {};
const health = data.container_health || {};
const status = patches.status || 'unknown';
let statusPill;
if (status === 'in_sync') {
statusPill = `<span class="tag ok">patches in sync</span>`;
} else if (status === 'drift') {
statusPill = `<span class="tag warn">spark-control has newer patches</span>`;
} else if (status === 'missing') {
statusPill = `<span class="tag bad">patches missing in container</span>`;
} else {
statusPill = `<span class="tag warn">unknown</span>`;
}
const asrLoaded = !!health.asr_loaded;
const diarLoaded = !!health.diarizer_loaded;
const asrModel = escapeHtml(health.model || '—');
const diarModel = escapeHtml(health.diarizer_model || '—');
const fileRows = (patches.files || []).map((f) => {
const sync = f.in_sync
? '<span class="sm-file-ok">✓ in sync</span>'
: f.remote_sha == null
? '<span class="sm-file-bad">✗ missing</span>'
: '<span class="sm-file-warn">⚠ drift</span>';
const local = f.local_sha ? `<code>${escapeHtml(f.local_sha)}</code>` : '<span class="muted">—</span>';
const remote = f.remote_sha ? `<code>${escapeHtml(f.remote_sha)}</code>` : '<span class="muted">—</span>';
return `
<div class="sm-file-row">
<span class="sm-file-name"><code>${escapeHtml(f.name)}</code></span>
<span class="sm-file-sync">${sync}</span>
<span class="sm-file-sha muted small">local ${local} → remote ${remote}</span>
</div>
`;
}).join('');
const lastReapply = patches.last_reapply_at ? new Date(patches.last_reapply_at).toLocaleString() : 'never (since spark-control boot)';
const lastRestart = patches.last_restart_at ? new Date(patches.last_restart_at).toLocaleString() : 'never (since spark-control boot)';
card.innerHTML = `
<div class="sm-header">
<div class="sm-title">parakeet-asr container</div>
${statusPill}
</div>
<div class="sm-models">
<div class="sm-model-row">
<span class="sm-model-kind">Parakeet ASR</span>
<span class="sm-model-name">${asrModel}</span>
<span class="sm-model-loaded">${asrLoaded ? '<span class="tag ok">loaded</span>' : '<span class="tag bad">not loaded</span>'}</span>
</div>
<div class="sm-model-row">
<span class="sm-model-kind">Sortformer diarizer</span>
<span class="sm-model-name">${diarModel}</span>
<span class="sm-model-loaded">${diarLoaded ? '<span class="tag ok">loaded</span>' : '<span class="tag bad">not loaded</span>'}</span>
</div>
</div>
<div class="sm-files">${fileRows}</div>
<div class="sm-meta muted small">
Last reapply: ${escapeHtml(lastReapply)} · Last manual restart: ${escapeHtml(lastRestart)}
</div>
<div class="sm-actions">
<button class="btn primary" id="sm-reapply">Reapply patches</button>
<button class="btn" id="sm-restart">Restart container</button>
</div>
`;
el('#sm-reapply').addEventListener('click', onSpeechModelsReapply);
el('#sm-restart').addEventListener('click', onSpeechModelsRestart);
}
async function onSpeechModelsReapply() {
if (!confirm('Reapply Sortformer patches to the parakeet-asr container? The container will restart and both ASR + diarizer will be unavailable for ~60120 seconds.')) return;
const dlg = el('#speech-models-progress-dialog');
const steps = el('#sm-prog-steps');
const closeBtn = el('#sm-prog-close');
steps.innerHTML = '<div class="muted small">Starting…</div>';
closeBtn.disabled = true;
closeBtn.onclick = () => dlg.close();
dlg.showModal();
try {
const r = await fetchJSON('/api/speech-models/reapply', { method: 'POST' });
steps.innerHTML = (r.steps || []).map((s) => {
const mark = s.ok ? '<span class="sm-file-ok">✓</span>' : '<span class="sm-file-bad">✗</span>';
const extra = s.error ? `<div class="muted small">${escapeHtml(s.error)}</div>` : '';
return `<div class="sm-prog-step">${mark} <strong>${escapeHtml(s.step)}</strong>${s.name ? ` (${escapeHtml(s.name)})` : ''}${extra}</div>`;
}).join('') + `<div class="sm-prog-done sm-file-ok">Done — both models reloaded.</div>`;
} catch (e) {
let parsed = null;
try { parsed = JSON.parse(e.message.split(':').slice(2).join(':').trim()); } catch {}
const stepHtml = parsed && parsed.result && parsed.result.steps
? parsed.result.steps.map((s) => {
const mark = s.ok ? '<span class="sm-file-ok">✓</span>' : '<span class="sm-file-bad">✗</span>';
return `<div class="sm-prog-step">${mark} <strong>${escapeHtml(s.step)}</strong>${s.name ? ` (${escapeHtml(s.name)})` : ''}${s.error ? `<div class="muted small">${escapeHtml(s.error)}</div>` : ''}</div>`;
}).join('')
: `<div class="sm-file-bad">${escapeHtml(e.message)}</div>`;
steps.innerHTML = stepHtml + `<div class="sm-prog-done sm-file-bad">Failed.</div>`;
} finally {
closeBtn.disabled = false;
try { await renderSpeechModels(); } catch {}
}
}
async function onSpeechModelsRestart() {
if (!confirm('Restart parakeet-asr container? STT + diarization will be unavailable for ~30 seconds.')) return;
try {
await fetchJSON('/api/speech-models/restart', { method: 'POST' });
} catch (e) {
alert('Restart failed: ' + e.message);
} finally {
try { await renderSpeechModels(); } catch {}
}
}
// NOTE: a WhisperX install action lived here briefly in v0.12 but was
// reverted in v0.13.0:0 — the NGC PyTorch container on ARM64 doesn't ship
// torchaudio and we couldn't reliably build it from source. The existing
// Parakeet + Sortformer pipeline stays as the audio path. See release notes.
async function onServiceAction(key) {
if (state.service_action_in_flight) return;
const [name, action] = key.split(':');
state.service_action_in_flight = key;
renderServices();
try {
await fetchJSON(`/api/services/${name}/${action}`, { method: 'POST' });
} catch (e) {
alert(`${action} ${name} failed: ${e.message}`);
} finally {
state.service_action_in_flight = null;
// Refresh services state
try {
state.services = await fetchJSON('/api/services');
} catch {}
renderServices();
pollStatus();
}
}
// ===================== matrix-bridge bot (update + logs) =====================
const mbState = { job_id: null, eventsource: null, timer: null, started_at: null };
function mbTimerStart(at) {
mbState.started_at = at;
if (mbState.timer) clearInterval(mbState.timer);
const tick = () => {
if (!mbState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - mbState.started_at) / 1000));
el('#mb-update-elapsed').textContent = `${Math.floor(sec / 60)}:${(sec % 60).toString().padStart(2, '0')}`;
};
tick();
mbState.timer = setInterval(tick, 500);
}
async function onMatrixBridgeUpdate() {
if (state.mb_update_in_flight) return;
if (!confirm('Update the matrix-bridge bot?\n\nThis pulls the latest code, rebuilds the container image, and recreates the container. The first build after a base-image change can take several minutes. The bot is briefly offline while it restarts.')) return;
state.mb_update_in_flight = true;
renderServices();
try {
const r = await fetchJSON('/api/matrix-bridge/update', { method: 'POST' });
attachMbUpdateProgress(r.job_id);
} catch (e) {
state.mb_update_in_flight = false;
renderServices();
alert('Update failed to start: ' + e.message);
}
}
async function attachMbUpdateProgress(jobId) {
mbState.job_id = jobId;
el('#mb-update-log').textContent = '';
el('#mb-update-title').textContent = 'Updating matrix-bridge…';
el('#mb-update-phase').textContent = 'Starting…';
el('#mb-update-dialog').showModal();
try {
const snap = await fetchJSON(`/api/matrix-bridge/update/${jobId}`);
mbTimerStart(Date.parse(snap.started_at));
el('#mb-update-phase').textContent = snap.phase || 'Working…';
el('#mb-update-log').textContent = (snap.lines || []).join('\n');
if (snap.returncode !== null) { onMbUpdateDone(snap); return; }
} catch { mbTimerStart(Date.now()); }
const es = new EventSource(`/api/matrix-bridge/update/${jobId}/stream`);
mbState.eventsource = es;
es.onmessage = ev => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
const log = el('#mb-update-log');
log.textContent += d.line + '\n';
log.scrollTop = log.scrollHeight;
}
} catch {}
};
es.addEventListener('phase', ev => {
try { el('#mb-update-phase').textContent = JSON.parse(ev.data).phase; } catch {}
});
es.addEventListener('done', ev => {
let d = {}; try { d = JSON.parse(ev.data); } catch {}
onMbUpdateDone(d);
});
es.onerror = () => {
// Don't leave the Update button wedged-disabled on a dropped stream. The
// job keeps running server-side; re-clicking Update returns a clean 409.
es.close();
mbState.eventsource = null;
state.mb_update_in_flight = false;
el('#mb-update-phase').textContent = 'Lost connection to the update stream — reopen or check logs.';
renderServices();
};
}
function onMbUpdateDone(d) {
if (mbState.eventsource) { mbState.eventsource.close(); mbState.eventsource = null; }
if (mbState.timer) { clearInterval(mbState.timer); mbState.timer = null; }
state.mb_update_in_flight = false;
if (d.state === 'failed') {
el('#mb-update-title').textContent = `Update failed (rc=${d.returncode})`;
el('#mb-update-phase').textContent = 'Failed — see the log above.';
} else {
el('#mb-update-title').textContent = 'Update complete';
el('#mb-update-phase').textContent = 'Done ✓';
}
// Refresh the tile's badge.
(async () => { try { state.services = await fetchJSON('/api/services'); } catch {} renderServices(); })();
}
async function openMatrixBridgeLogs() {
const pre = el('#mb-logs-pre');
el('#mb-logs-title').textContent = 'matrix-bridge logs';
pre.textContent = 'Loading…';
el('#mb-logs-dialog').showModal();
await loadMatrixBridgeLogs();
}
async function loadMatrixBridgeLogs() {
const pre = el('#mb-logs-pre');
const btn = el('#mb-logs-refresh');
if (btn) btn.disabled = true;
try {
const r = await fetchJSON('/api/matrix-bridge/logs?tail=100');
pre.textContent = r.output || '(no output)';
pre.scrollTop = pre.scrollHeight;
} catch (e) {
pre.textContent = 'Could not read logs: ' + e.message;
} finally {
if (btn) btn.disabled = false;
}
}
function renderEndpoint(status) {
const v = status.vllm || {};
const panel = el('#endpoint-panel');
const ready = v.ok && v.current_model && v.base_url;
panel.classList.toggle('hidden', !ready);
if (!ready) return;
el('#ep-url').textContent = v.base_url;
el('#ep-model').textContent = v.current_model;
const snippet =
`curl -s ${v.base_url}/chat/completions \\
-H 'content-type: application/json' \\
-d '{
"model": "${v.current_model}",
"messages": [{"role": "user", "content": "Hello"}]
}'`;
el('#ep-curl-snippet').textContent = snippet;
}
async function copyText(text, indicatorEl) {
try {
await navigator.clipboard.writeText(text);
if (indicatorEl) {
indicatorEl.classList.add('copied');
setTimeout(() => indicatorEl.classList.remove('copied'), 1200);
}
return true;
} catch {
// Plain HTTP fallback: select the text so the user can ⌘C
if (indicatorEl) {
const range = document.createRange();
range.selectNode(indicatorEl);
window.getSelection().removeAllRanges();
window.getSelection().addRange(range);
}
return false;
}
}
function setupCopyButtons() {
document.body.addEventListener('click', async (e) => {
// Inline icon copy with literal text (used for dynamically-rendered service rows)
const litBtn = e.target.closest('[data-copy-text]');
if (litBtn) {
await copyText(litBtn.dataset.copyText, litBtn);
return;
}
// Copy buttons (with svg icon) referenced by data-copy="selector"
const btn = e.target.closest('[data-copy]');
if (btn) {
const target = el(btn.dataset.copy);
if (target) {
await copyText(target.textContent, btn);
target.classList.add('copied');
setTimeout(() => target.classList.remove('copied'), 1200);
}
return;
}
// Self-copy: clicking the text itself
const selfCopy = e.target.closest('[data-copy-self]');
if (selfCopy) {
await copyText(selfCopy.textContent, selfCopy);
}
});
}
function renderHealth(status) {
function setDot(id, ok, payload) {
const item = el(id);
if (!item) return;
// A service switched off via DISABLED_SERVICES isn't part of this
// deployment — hide its indicator entirely rather than show it as down.
if (payload && payload.disabled) { item.classList.add('hidden'); return; }
item.classList.remove('hidden');
const dot = item.querySelector('.dot');
dot.classList.remove('ok', 'bad', 'warn');
if (ok === true) dot.classList.add('ok');
else if (ok === false) dot.classList.add('bad');
else dot.classList.add('warn');
item.title = JSON.stringify(payload || {}, null, 2);
}
setDot('#h-vllm', status.vllm && status.vllm.ok, status.vllm);
setDot('#h-parakeet', status.parakeet && status.parakeet.ok, status.parakeet);
setDot('#h-kokoro', status.kokoro && status.kokoro.ok, status.kokoro);
setDot('#h-embeddings', status.embeddings && status.embeddings.ok, status.embeddings);
setDot('#h-qdrant', status.qdrant && status.qdrant.ok, status.qdrant);
el('#updated').textContent = `updated ${new Date().toLocaleTimeString()}`;
}
function renderBanner(status) {
el('#setup-banner').classList.toggle('hidden', !!status.configured);
// Dashboard tabs share the same "configured" gate as the rest of the
// body — hidden until SSH is set up, then visible.
const tabs = el('#dashboard-tabs');
if (tabs) tabs.classList.toggle('hidden', !status.configured);
}
// ===================== dashboard tabs (LLM / Audio) =====================
const TABS_STORAGE_KEY = 'sparkcontrol.dashboard.activeTab';
function setupDashboardTabs() {
const buttons = $$('.dashboard-tab');
if (!buttons.length) return;
// Restore the last-selected tab, default to "llm"
let saved;
try { saved = localStorage.getItem(TABS_STORAGE_KEY); } catch {}
const initial = saved === 'audio' || saved === 'llm' ? saved : 'llm';
function selectTab(name) {
buttons.forEach((b) => {
const active = b.dataset.tab === name;
b.classList.toggle('active', active);
b.setAttribute('aria-selected', active ? 'true' : 'false');
});
$$('.tab-content').forEach((c) => {
c.classList.toggle('active', c.id === `tab-${name}`);
});
try { localStorage.setItem(TABS_STORAGE_KEY, name); } catch {}
}
buttons.forEach((b) => {
b.addEventListener('click', () => selectTab(b.dataset.tab));
});
selectTab(initial);
}
// ===================== collapsible endpoint card =====================
const ENDPOINT_COLLAPSED_KEY = 'sparkcontrol.endpoint.collapsed';
function setupEndpointCollapse() {
const panel = el('#endpoint-panel');
const btn = el('#ep-collapse');
if (!panel || !btn) return;
// Default: collapsed (most of the time you don't need to see endpoint details)
let collapsed = true;
try {
const v = localStorage.getItem(ENDPOINT_COLLAPSED_KEY);
if (v === 'false') collapsed = false;
else if (v === 'true') collapsed = true;
} catch {}
panel.classList.toggle('collapsed', collapsed);
btn.addEventListener('click', () => {
const nowCollapsed = !panel.classList.contains('collapsed');
panel.classList.toggle('collapsed', nowCollapsed);
try { localStorage.setItem(ENDPOINT_COLLAPSED_KEY, nowCollapsed ? 'true' : 'false'); } catch {}
});
}
function renderSwapPanel() {
el('#swap-phase').textContent = state.swap_phase;
el('#swap-phase-detail').textContent = state.swap_phase_detail;
el('#swap-phase-fill').style.width = `${Math.max(2, Math.round(state.swap_progress * 100))}%`;
}
// ===================== phase detection =====================
const PHASE_ORDER = [
['Stopping current model…', 0.08],
['Starting new model…', 0.16],
['Joining Ray cluster…', 0.22],
['Loading weights…', 0.30],
['Compiling kernels…', 0.78],
['Warming up…', 0.88],
['Starting API server…', 0.94],
['Ready ✓', 1.00],
['Failed', 1.00],
];
function phaseProgress(name) {
const found = PHASE_ORDER.find(([n]) => n === name);
return found ? found[1] : 0.05;
}
function deriveSwapPhase(serverState, lines) {
// Default phase from server state
let phase = ({
starting: 'Starting…',
stopping: 'Stopping current model…',
launching: 'Starting new model…',
tailing: 'Loading weights…',
ready: 'Ready ✓',
failed: 'Failed',
})[serverState] || 'Working…';
let detail = '';
// Refine from log content (search recent lines first)
const tail = lines.slice(-40);
for (let i = tail.length - 1; i >= 0; i--) {
const line = tail[i];
if (line.includes('Application startup complete')) {
phase = 'Ready ✓';
break;
}
if (line.includes('Started server process')) {
phase = 'Starting API server…';
break;
}
if (line.includes('Initial profiling/warmup') || line.includes('init engine (profile, create kv cache, warmup model)')) {
phase = 'Warming up…';
break;
}
if (line.match(/Capturing CUDA graphs|Compiling a graph|torch\.compile took|Graph capturing/)) {
phase = 'Compiling kernels…';
break;
}
const shard = line.match(/Loading safetensors checkpoint shards:\s+(\d+)%\s+Completed\s+\|\s+(\d+)\/(\d+)/);
if (shard) {
phase = 'Loading weights…';
detail = `${shard[2]} of ${shard[3]} shards (${shard[1]}%)`;
const innerProgress = parseInt(shard[2], 10) / parseInt(shard[3], 10);
// Map shard progress 0..1 into the 0.30..0.78 band
state.swap_progress = 0.30 + (0.78 - 0.30) * innerProgress;
state.swap_phase = phase;
state.swap_phase_detail = detail;
return;
}
if (line.includes('Connecting to existing Ray cluster')) {
phase = 'Joining Ray cluster…';
break;
}
if (line.includes('Resolved architecture') || line.match(/launch-cluster\.sh.*exec vllm serve/)) {
phase = 'Starting new model…';
break;
}
if (line.match(/launch-cluster\.sh stop/)) {
phase = 'Stopping current model…';
break;
}
}
state.swap_phase = phase;
state.swap_phase_detail = detail;
state.swap_progress = phaseProgress(phase);
}
// ===================== timer =====================
function startTimer(startedAtMillis) {
state.swap_started_at = startedAtMillis;
if (state.timer_handle) clearInterval(state.timer_handle);
const tick = () => {
if (!state.swap_started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - state.swap_started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#swap-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
state.timer_handle = setInterval(tick, 500);
}
function stopTimer() {
if (state.timer_handle) { clearInterval(state.timer_handle); state.timer_handle = null; }
}
// ===================== polling + SSE =====================
async function pollStatus() {
try {
const status = await fetchJSON('/api/status');
state.current_model_key = status.current_model_key;
state.configured = status.configured;
renderBanner(status);
renderCurrent(status);
renderEndpoint(status);
renderHealth(status);
// If models hasn't loaded yet (init may have hit a transient proxy timeout), retry.
if (!state.models || Object.keys(state.models).length === 0) {
try { await loadModels(); } catch {}
}
// Refresh services state lazily — every 5s poll triggers this too.
try {
state.services = await fetchJSON('/api/services');
try { state.deep_health = await fetchJSON('/api/deep-health'); } catch {}
renderServices();
} catch {}
if (status.current_swap_job && status.current_swap_job !== state.swap_job_id) {
attachToSwap(status.current_swap_job, /*needsBackfill=*/true);
} else if (!status.current_swap_job && state.swap_job_id && !state.swap_eventsource) {
// Foreign swap ended
detachSwap();
}
renderCards();
} catch (e) {
console.error('status poll failed', e);
}
}
async function loadModels() {
const data = await fetchJSON('/api/models');
state.defaults = data.defaults || {};
state.models = data.models || {};
}
async function loadDiskStatus() {
// Probes each catalog model's HF cache over SSH; takes a beat. Best-effort.
try {
const r = await fetchJSON('/api/models/disk-status');
if (r && r.models) {
state.disk_status = r.models;
state.disk_status_loaded = true;
renderCards();
}
} catch (e) {
// Silent — pills just won't render. Don't block dashboard.
console.warn('disk-status probe failed:', e.message);
}
}
function fmtBytesShort(n) {
if (!Number.isFinite(n) || n <= 0) return '0 B';
if (n >= 1e9) return `${(n / 1e9).toFixed(1)} GB`;
if (n >= 1e6) return `${(n / 1e6).toFixed(1)} MB`;
if (n >= 1e3) return `${(n / 1e3).toFixed(1)} KB`;
return `${n} B`;
}
function openDiskDeleteDialog(key) {
const m = state.models[key];
const disk = state.disk_status[key];
if (!m || !disk || !disk.on_disk) return;
const dlg = el('#disk-delete-dialog');
el('#dd-summary').innerHTML = `Free <strong>${fmtBytesShort(disk.total_bytes)}</strong> by removing <strong>${escapeHtml(m.display_name)}</strong> (<code>${escapeHtml(m.repo)}</code>) from disk.`;
const hostsEl = el('#dd-hosts');
hostsEl.innerHTML = '';
for (const h of (disk.per_host || [])) {
if (!h.on_disk) continue;
const li = document.createElement('li');
li.innerHTML = `<code>${escapeHtml(h.host)}</code> — ${fmtBytesShort(h.size_bytes)}`;
hostsEl.appendChild(li);
}
const errEl = el('#dd-error');
errEl.classList.add('hidden');
errEl.textContent = '';
const confirm = el('#dd-confirm');
const cancel = el('#dd-cancel');
const onCancel = () => dlg.close();
const onConfirm = async () => {
confirm.disabled = true;
cancel.disabled = true;
confirm.textContent = 'Deleting…';
try {
const r = await fetchJSON(`/api/models/${encodeURIComponent(key)}/disk`, { method: 'DELETE' });
dlg.close();
// Optimistically clear local disk state for this key, then refresh.
delete state.disk_status[key];
renderCards();
// Eagerly re-probe so size is accurate (and shows "not downloaded" pill).
loadDiskStatus();
const freed = r && typeof r.bytes_freed === 'number' ? fmtBytesShort(r.bytes_freed) : '';
console.log(`Deleted ${m.display_name} from disk${freed ? ` — freed ${freed}` : ''}.`);
} catch (e) {
errEl.textContent = e.message || 'Delete failed';
errEl.classList.remove('hidden');
} finally {
confirm.disabled = false;
cancel.disabled = false;
confirm.textContent = 'Delete from disk';
}
};
cancel.onclick = onCancel;
confirm.onclick = onConfirm;
dlg.showModal();
}
async function triggerSwap(modelKey) {
if (state.swap_job_id) return;
try {
const r = await fetchJSON('/api/swap', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ model_key: modelKey }),
});
attachToSwap(r.job_id, /*needsBackfill=*/false);
} catch (e) {
alert('Failed to start swap: ' + e.message);
}
}
async function triggerDownloadForKey(modelKey) {
const m = state.models[modelKey];
if (!m) return;
if (dlState.job_id) {
alert('A download is already in progress; wait for it to finish.');
return;
}
// Pick the download target from the model's mode:
// solo -> spark1 only
// cluster -> both Sparks (fetch on Spark 1, rsync to Spark 2 in parallel)
const dlMode = m.mode === 'cluster' ? 'cluster' : 'spark1';
const sizeNote = m.size_gb ? ` (~${m.size_gb} GB)` : '';
const target = m.mode === 'cluster' ? 'both Sparks' : 'Spark 1';
if (!confirm(`Download "${m.display_name}"${sizeNote} to ${target}? Large models can take a while; you can watch progress in the download panel.`)) {
return;
}
dlState.last_repo = m.repo;
dlState.last_mode = dlMode;
try {
const r = await fetchJSON('/api/download', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ repo: m.repo, mode: dlMode }),
});
// Open the download panel + attach to progress stream
openDownloadForm();
attachToDownload(r.job_id);
} catch (e) {
alert('Failed to start download: ' + e.message);
}
}
async function attachToSwap(jobId, needsBackfill) {
if (state.swap_eventsource) {
state.swap_eventsource.close();
state.swap_eventsource = null;
}
state.swap_job_id = jobId;
state.swap_lines = [];
state.swap_phase = 'Starting…';
state.swap_phase_detail = '';
state.swap_progress = 0.05;
el('#swap-log').textContent = '';
el('#swap-panel').classList.remove('hidden');
renderSwapPanel();
// Backfill (if joining mid-swap) — fetch the snapshot so we have started_at + history
try {
const snap = await fetchJSON(`/api/swap/${jobId}`);
const ts = Date.parse(snap.started_at);
if (!isNaN(ts)) startTimer(ts);
state.swap_lines = snap.lines || [];
for (const line of state.swap_lines) appendLog(line);
deriveSwapPhase(snap.state, state.swap_lines);
renderSwapPanel();
if (snap.returncode !== null && snap.returncode !== undefined) {
// Already finished — close panel after a beat
handleSwapDone(snap);
return;
}
} catch (e) {
if (!needsBackfill) startTimer(Date.now());
console.warn('backfill failed', e);
}
const es = new EventSource(`/api/swap/${jobId}/stream`);
state.swap_eventsource = es;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
state.swap_lines.push(d.line);
appendLog(d.line);
deriveSwapPhase(d.state, state.swap_lines);
renderSwapPanel();
} else if (d.state) {
deriveSwapPhase(d.state, state.swap_lines);
renderSwapPanel();
}
} catch {}
};
es.addEventListener('done', async (ev) => {
let d = {};
try { d = JSON.parse(ev.data); } catch {}
handleSwapDone(d);
});
es.onerror = () => {
// Tab backgrounded or network blip — close; status poll will reattach
es.close();
state.swap_eventsource = null;
};
renderCards();
}
function handleSwapDone(d) {
if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; }
const finalState = d.state || 'ready';
state.swap_phase = finalState === 'failed' ? 'Failed' : 'Ready ✓';
state.swap_phase_detail = d.returncode !== undefined ? `exit code ${d.returncode}` : '';
state.swap_progress = 1.0;
renderSwapPanel();
setTimeout(() => detachSwap(), 4000);
pollStatus();
}
function detachSwap() {
state.swap_job_id = null;
if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; }
stopTimer();
el('#swap-panel').classList.add('hidden');
renderCards();
}
function appendLog(line) {
const log = el('#swap-log');
log.textContent += line + '\n';
log.scrollTop = log.scrollHeight;
}
// ===================== model downloads =====================
const dlState = {
job_id: null,
eventsource: null,
started_at: null,
timer_handle: null,
};
function openDownloadForm() {
el('#download-panel').classList.remove('hidden');
el('#download-form').classList.remove('hidden');
el('#download-progress').classList.add('hidden');
el('#dl-repo').focus();
updateDlHfLink();
}
function updateDlHfLink() {
const repo = el('#dl-repo').value.trim();
const link = el('#dl-hf-link');
if (repo.includes('/')) {
link.href = `https://huggingface.co/${encodeURIComponent(repo)}`;
link.classList.remove('hidden');
} else {
link.classList.add('hidden');
}
}
function closeDownloadPanel() {
el('#download-panel').classList.add('hidden');
el('#download-form').classList.remove('hidden');
el('#download-progress').classList.add('hidden');
el('#dl-repo').value = '';
}
function dlTimerStart(startedAt) {
dlState.started_at = startedAt;
if (dlState.timer_handle) clearInterval(dlState.timer_handle);
const tick = () => {
if (!dlState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - dlState.started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#dl-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
dlState.timer_handle = setInterval(tick, 500);
}
function dlTimerStop() {
if (dlState.timer_handle) { clearInterval(dlState.timer_handle); dlState.timer_handle = null; }
}
async function startDownload() {
const repo = el('#dl-repo').value.trim();
const mode = document.querySelector('input[name="dl-mode"]:checked').value;
if (!repo || !repo.includes('/')) {
alert('Enter a HuggingFace repo in the form "org/name", e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4');
return;
}
dlState.last_repo = repo;
dlState.last_mode = mode;
try {
const r = await fetchJSON('/api/download', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ repo, mode }),
});
attachToDownload(r.job_id);
} catch (e) {
alert('Failed to start download: ' + e.message);
}
}
function renderDownloadProgress(p) {
el('#dl-phase').textContent = p.phase || 'Working…';
const statsParts = [];
if (p.downloaded && p.total) statsParts.push(`${p.downloaded} / ${p.total}`);
if (p.rate) statsParts.push(p.rate);
if (p.eta) statsParts.push(`ETA ${p.eta}`);
el('#dl-stats').textContent = statsParts.join(' · ');
const pct = Math.max(2, Math.min(100, p.percent || 2));
el('#dl-progress-fill').style.width = `${pct}%`;
el('#dl-phase-detail').textContent = p.percent > 0 ? `${p.percent.toFixed(1)}%` : '';
}
function dlAppendLog(line) {
const log = el('#dl-log');
log.textContent += line + '\n';
log.scrollTop = log.scrollHeight;
}
async function attachToDownload(jobId) {
if (dlState.eventsource) { dlState.eventsource.close(); dlState.eventsource = null; }
dlState.job_id = jobId;
el('#download-form').classList.add('hidden');
el('#download-progress').classList.remove('hidden');
el('#dl-log').textContent = '';
el('#dl-title').textContent = 'Downloading…';
try {
const snap = await fetchJSON(`/api/download/${jobId}`);
dlTimerStart(Date.parse(snap.started_at));
for (const line of snap.lines || []) dlAppendLog(line);
renderDownloadProgress(snap.progress);
if (snap.returncode !== null && snap.returncode !== undefined) {
handleDownloadDone(snap);
return;
}
} catch (e) {
console.warn('download backfill failed', e);
dlTimerStart(Date.now());
}
const es = new EventSource(`/api/download/${jobId}/stream`);
dlState.eventsource = es;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) dlAppendLog(d.line);
} catch {}
};
es.addEventListener('progress', (ev) => {
try { renderDownloadProgress(JSON.parse(ev.data)); } catch {}
});
es.addEventListener('done', (ev) => {
let d = {};
try { d = JSON.parse(ev.data); } catch {}
handleDownloadDone(d);
});
es.onerror = () => { es.close(); dlState.eventsource = null; };
}
function handleDownloadDone(d) {
if (dlState.eventsource) { dlState.eventsource.close(); dlState.eventsource = null; }
dlTimerStop();
if (d.state === 'failed') {
el('#dl-title').textContent = `Failed (rc=${d.returncode})`;
el('#dl-phase').textContent = 'Failed';
} else {
el('#dl-title').textContent = 'Done';
el('#dl-phase').textContent = 'Done ✓';
el('#dl-progress-fill').style.width = '100%';
// Offer to add to catalog
const repo = dlState.last_repo;
const mode = dlState.last_mode;
if (repo) {
setTimeout(() => openCatalogDialog(repo, mode), 600);
}
}
dlState.job_id = null;
}
// ===================== Advanced / Add to catalog =====================
function gpuTotalGB(modelMode) {
// Solo uses Spark 1's GPU only. Cluster shares across both — but loading is per-Spark.
const s1 = state.hardware?.spark1;
const s2 = state.hardware?.spark2;
const g1 = s1?.gpu_mem_total_mib ? s1.gpu_mem_total_mib / 1024 : null;
const g2 = s2?.gpu_mem_total_mib ? s2.gpu_mem_total_mib / 1024 : null;
if (modelMode === 'cluster' && g1 && g2) return Math.min(g1, g2); // bottleneck
return g1 || g2 || null;
}
function knobContextHint(field, value, mode) {
if (field === 'gpu_memory_utilization') {
const gb = gpuTotalGB(mode);
if (!gb) return '';
const used = (value * gb).toFixed(0);
const free = (gb - value * gb).toFixed(0);
return `~${used} GB allocated to model + KV cache · ~${free} GB left for OS, buffers, other GPU workloads.`;
}
if (field === 'max_model_len') {
if (!value) return '';
const pages = Math.round(value / 350); // ~350 tokens per page
const kvBytes = (value * 2 * 4 * 32 * 128); // rough fp16 KV cache size for typical 32-layer model
return `~${pages.toLocaleString()} pages of text (very rough). Larger context = more GPU memory reserved for KV cache.`;
}
if (field === 'fastsafetensors') return value ? 'Faster cold-start weight loading.' : 'Standard safetensors loading.';
if (field === 'prefix_caching') return value ? 'Reuses GPU state for repeated prompt prefixes (e.g. long system prompts).' : 'Off — every request re-processes the full prompt.';
if (field === 'kv_cache_dtype') return value === 'fp8' ? 'Halves KV cache memory (fits ~2× more context). Quality cost is usually imperceptible.' : 'Default precision.';
return '';
}
function ensureKnobHint(rowEl, id) {
let h = rowEl.querySelector(`.knob-hint[data-for="${id}"]`);
if (!h) {
h = document.createElement('div');
h.className = 'knob-hint muted small';
h.dataset.for = id;
rowEl.appendChild(h);
}
return h;
}
function openAdvanced(key) {
const m = state.models[key];
if (!m) return;
const dlg = el('#advanced-dialog');
el('#adv-title').textContent = `Advanced — ${m.display_name}`;
const k = m.effective_knobs || {};
el('#adv-mml').value = k.max_model_len ?? '';
el('#adv-gmu').value = k.gpu_memory_utilization ?? 0.85;
el('#adv-gmu-out').value = parseFloat(el('#adv-gmu').value).toFixed(2);
el('#adv-fst').checked = !!k.fastsafetensors;
el('#adv-pcache').checked = !!k.prefix_caching;
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
// Wire up live knob hints
const updateHints = () => {
const mml = parseInt(el('#adv-mml').value, 10);
const gmu = parseFloat(el('#adv-gmu').value);
ensureKnobHint(el('#adv-mml').parentElement, 'mml').textContent = knobContextHint('max_model_len', mml, m.mode);
ensureKnobHint(el('#adv-gmu').parentElement, 'gmu').textContent = knobContextHint('gpu_memory_utilization', gmu, m.mode);
ensureKnobHint(el('#adv-fst').parentElement, 'fst').textContent = knobContextHint('fastsafetensors', el('#adv-fst').checked, m.mode);
ensureKnobHint(el('#adv-pcache').parentElement, 'pcache').textContent = knobContextHint('prefix_caching', el('#adv-pcache').checked, m.mode);
ensureKnobHint(el('#adv-fp8').parentElement, 'fp8').textContent = knobContextHint('kv_cache_dtype', el('#adv-fp8').checked ? 'fp8' : 'auto', m.mode);
};
updateHints();
el('#adv-mml').oninput = updateHints;
el('#adv-gmu').oninput = (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); updateHints(); };
el('#adv-fst').onchange = updateHints;
el('#adv-pcache').onchange = updateHints;
el('#adv-fp8').onchange = updateHints;
const del = el('#adv-delete');
del.classList.toggle('hidden', !m.custom);
del.onclick = async () => {
if (!confirm(`Delete "${m.display_name}" from the catalog? The model weights on disk are NOT deleted.`)) return;
try {
await fetchJSON(`/api/models/${encodeURIComponent(key)}`, { method: 'DELETE' });
dlg.close();
await loadModels();
pollStatus();
} catch (e) { alert('Delete failed: ' + e.message); }
};
const form = el('#advanced-form');
form.onsubmit = async (e) => {
e.preventDefault();
const knobs = {};
const mml = parseInt(el('#adv-mml').value, 10);
if (Number.isFinite(mml) && mml > 0) knobs.max_model_len = mml;
const gmu = parseFloat(el('#adv-gmu').value);
if (Number.isFinite(gmu)) knobs.gpu_memory_utilization = gmu;
if (el('#adv-fst').checked) knobs.fastsafetensors = true; else knobs.fastsafetensors = false;
if (el('#adv-pcache').checked) knobs.prefix_caching = true; else knobs.prefix_caching = false;
knobs.kv_cache_dtype = el('#adv-fp8').checked ? 'fp8' : 'auto';
try {
await fetchJSON(`/api/models/${encodeURIComponent(key)}/knobs`, {
method: 'PUT',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ knobs }),
});
dlg.close();
await loadModels();
pollStatus();
} catch (e) { alert('Save failed: ' + e.message); }
};
dlg.showModal();
}
function openCatalogDialog(repo, mode) {
const dlg = el('#catalog-dialog');
const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-');
el('#cd-key').value = key;
el('#cd-name').value = repo.split('/').pop();
el('#cd-repo').value = repo;
el('#cd-size').value = '';
el('#cd-mode').value = mode || 'solo';
el('#cd-desc').value = '';
el('#cd-mml').value = 32768;
el('#cd-gmu').value = 0.85;
el('#cd-gmu-out').value = '0.85';
el('#cd-fst').checked = true;
el('#cd-pcache').checked = true;
el('#cd-fp8').checked = true;
dlg.showModal();
}
function setupCatalogDialog() {
el('#cd-cancel').addEventListener('click', () => el('#catalog-dialog').close());
el('#cd-gmu').addEventListener('input', (e) => { el('#cd-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
el('#catalog-form').addEventListener('submit', async (e) => {
e.preventDefault();
const body = {
key: el('#cd-key').value.trim(),
display_name: el('#cd-name').value.trim(),
repo: el('#cd-repo').value.trim(),
size_gb: parseFloat(el('#cd-size').value) || 0,
mode: el('#cd-mode').value,
description: el('#cd-desc').value.trim() || null,
vllm_args: [],
knobs: {
max_model_len: parseInt(el('#cd-mml').value, 10) || 32768,
gpu_memory_utilization: parseFloat(el('#cd-gmu').value),
fastsafetensors: el('#cd-fst').checked,
prefix_caching: el('#cd-pcache').checked,
kv_cache_dtype: el('#cd-fp8').checked ? 'fp8' : 'auto',
},
};
try {
await fetchJSON('/api/models', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#catalog-dialog').close();
closeDownloadPanel();
await loadModels();
pollStatus();
} catch (e) { alert('Add to catalog failed: ' + e.message); }
});
}
function setupAdvancedDialog() {
el('#adv-cancel').addEventListener('click', () => el('#advanced-dialog').close());
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
}
function openLocalModelDialog() {
const dlg = el('#local-model-dialog');
el('#lm-key').value = '';
el('#lm-name').value = '';
el('#lm-path').value = '';
el('#lm-chat').value = '';
el('#lm-size').value = '';
el('#lm-mode').value = 'solo';
el('#lm-desc').value = '';
el('#lm-mml').value = 32768;
el('#lm-gmu').value = 0.85;
el('#lm-gmu-out').value = '0.85';
el('#lm-fst').checked = true;
el('#lm-pcache').checked = true;
el('#lm-fp8').checked = true;
dlg.showModal();
}
function setupLocalModelDialog() {
el('#lm-cancel').addEventListener('click', () => el('#local-model-dialog').close());
el('#lm-gmu').addEventListener('input', (e) => { el('#lm-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
el('#local-model-form').addEventListener('submit', async (e) => {
e.preventDefault();
const chat = el('#lm-chat').value.trim();
const body = {
key: el('#lm-key').value.trim(),
display_name: el('#lm-name').value.trim(),
local_path: el('#lm-path').value.trim(),
size_gb: parseFloat(el('#lm-size').value) || 0,
mode: el('#lm-mode').value,
description: el('#lm-desc').value.trim() || null,
// A fine-tune's chat template (if any) rides along as a launch flag.
vllm_args: chat ? [`--chat-template=${chat}`] : [],
knobs: {
max_model_len: parseInt(el('#lm-mml').value, 10) || 32768,
gpu_memory_utilization: parseFloat(el('#lm-gmu').value),
fastsafetensors: el('#lm-fst').checked,
prefix_caching: el('#lm-pcache').checked,
kv_cache_dtype: el('#lm-fp8').checked ? 'fp8' : 'auto',
},
};
try {
await fetchJSON('/api/models', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#local-model-dialog').close();
await loadModels();
pollStatus();
} catch (e) { alert('Add local model failed: ' + e.message); }
});
}
// ===================== NIM installer =====================
const nimState = {
catalog: null,
job_id: null,
eventsource: null,
timer: null,
started_at: null,
};
async function loadNimCatalog() {
try {
nimState.catalog = await fetchJSON('/api/nim/catalog');
el('#nim-catalog-link').href = nimState.catalog.catalog_url;
const warn = el('#nim-key-warn');
if (!nimState.catalog.ngc_key_configured) {
warn.classList.add('nim-key-warn');
warn.innerHTML = '⚠️ NGC API key not set. Open <strong>Configure Sparks</strong> in StartOS and paste your NGC personal API key, otherwise installs will fail. <a href="https://ngc.nvidia.com/setup/personal-key" target="_blank" rel="noopener">Get a key</a>';
} else {
warn.classList.remove('nim-key-warn');
warn.textContent = '';
}
const grid = el('#nim-suggested');
grid.innerHTML = '';
for (const s of nimState.catalog.suggested || []) {
const card = document.createElement('div');
card.className = 'nim-card';
card.innerHTML = `
<div class="info">
<div class="name">${escapeHtml(s.name)} <span class="muted small">· ${escapeHtml(s.kind || 'nim')}</span></div>
<div class="desc">${escapeHtml(s.description || '')}</div>
<div class="img">${escapeHtml(s.image)}</div>
<div class="links">${s.homepage ? `<a href="${escapeHtml(s.homepage)}" target="_blank" rel="noopener">View on NGC ↗</a>` : ''}</div>
</div>
<button type="button" class="btn primary nim-pick" data-image="${escapeHtml(s.image)}" data-container="${escapeHtml(s.default_container)}" data-port="${s.default_port}" data-kind="${escapeHtml(s.kind)}">Pick</button>
`;
grid.appendChild(card);
}
grid.querySelectorAll('.nim-pick').forEach(btn => {
btn.addEventListener('click', () => {
el('#nim-image').value = btn.dataset.image;
el('#nim-container').value = btn.dataset.container;
el('#nim-port').value = btn.dataset.port;
el('#nim-kind').value = btn.dataset.kind || 'nim';
});
});
} catch (e) { console.warn('nim catalog failed', e); }
}
function openNimDialog() {
loadNimCatalog();
el('#nim-dialog').showModal();
}
async function submitNim(e) {
e.preventDefault();
const body = {
image: el('#nim-image').value.trim(),
container: el('#nim-container').value.trim(),
port: parseInt(el('#nim-port').value, 10),
host: el('#nim-host').value,
kind: el('#nim-kind').value,
};
if (!body.image || !body.container || !body.port) {
alert('Image, container name, and port are required.');
return;
}
try {
const r = await fetchJSON('/api/nim/install', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#nim-dialog').close();
attachNimProgress(r.job_id);
} catch (e) {
alert('Install failed: ' + e.message);
}
}
function nimTimerStart(at) {
nimState.started_at = at;
if (nimState.timer) clearInterval(nimState.timer);
const tick = () => {
if (!nimState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - nimState.started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#nim-prog-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
nimState.timer = setInterval(tick, 500);
}
async function attachNimProgress(jobId) {
nimState.job_id = jobId;
el('#nim-prog-log').textContent = '';
el('#nim-prog-title').textContent = 'Installing…';
el('#nim-progress-dialog').showModal();
try {
const snap = await fetchJSON(`/api/nim/install/${jobId}`);
nimTimerStart(Date.parse(snap.started_at));
el('#nim-prog-phase').textContent = snap.phase || 'Working…';
el('#nim-prog-log').textContent = (snap.lines || []).join('\n');
if (snap.returncode !== null) { onNimDone(snap); return; }
} catch { nimTimerStart(Date.now()); }
const es = new EventSource(`/api/nim/install/${jobId}/stream`);
nimState.eventsource = es;
es.onmessage = ev => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
const log = el('#nim-prog-log');
log.textContent += d.line + '\n';
log.scrollTop = log.scrollHeight;
}
} catch {}
};
es.addEventListener('phase', ev => {
try { el('#nim-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {}
});
es.addEventListener('done', ev => {
let d = {}; try { d = JSON.parse(ev.data); } catch {}
onNimDone(d);
});
es.onerror = () => { es.close(); nimState.eventsource = null; };
}
function onNimDone(d) {
if (nimState.eventsource) { nimState.eventsource.close(); nimState.eventsource = null; }
if (nimState.timer) { clearInterval(nimState.timer); nimState.timer = null; }
if (d.state === 'failed') {
el('#nim-prog-title').textContent = `Failed (rc=${d.returncode})`;
el('#nim-prog-phase').textContent = 'Failed';
} else {
el('#nim-prog-title').textContent = 'Installed';
el('#nim-prog-phase').textContent = 'Done ✓ — service will appear when the container reports healthy.';
}
pollStatus();
}
// ===================== Explain context (LLM commit summary) =====================
let explainEventSource = null;
async function explainContext() {
if (explainEventSource) { explainEventSource.close(); explainEventSource = null; }
const section = el('#ub-explain-section');
const content = el('#ub-explain-content');
section.classList.remove('hidden');
section.open = true;
content.innerHTML = '<span class="muted">Asking the loaded model…</span>';
let text = '';
const es = new EventSource('/api/explain-updates');
explainEventSource = es;
let firstChunk = true;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.error) {
content.innerHTML = `<span class="muted">Couldn't get explanation: ${escapeHtml(d.error)}</span>`;
return;
}
if (firstChunk) { content.innerHTML = ''; firstChunk = false; }
if (d.content) {
text += d.content;
content.textContent = text;
content.scrollTop = content.scrollHeight;
} else if (d.reasoning) {
// Show reasoning tokens but de-emphasized
let r = content.querySelector('.reasoning-current');
if (!r) {
r = document.createElement('div');
r.className = 'reasoning reasoning-current';
r.textContent = '';
content.appendChild(r);
}
r.textContent += d.reasoning;
}
} catch {}
};
es.addEventListener('done', () => {
es.close();
explainEventSource = null;
// strip the reasoning-current marker
const r = content.querySelector('.reasoning-current');
if (r) r.classList.remove('reasoning-current');
});
es.onerror = () => { es.close(); explainEventSource = null; };
}
// ===================== updates (spark-vllm-docker) =====================
const updState = {
info: null,
job_id: null,
eventsource: null,
started_at: null,
timer_handle: null,
};
async function pollUpdates() {
try {
const info = await fetchJSON('/api/updates');
updState.info = info;
renderUpdateBanner();
} catch (e) {
console.warn('updates poll failed', e);
}
}
function renderUpdateBanner() {
const banner = el('#update-banner');
const info = updState.info;
const text = el('#ub-text');
const details = el('#ub-details');
const apply = el('#ub-apply');
const list = el('#ub-list');
const log = el('#ub-log');
if (!info || !info.ok) {
banner.classList.add('hidden');
return;
}
banner.classList.remove('hidden');
const behind = info.behind || 0;
const dirty = info.dirty || 0;
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
banner.classList.toggle('warn', !!dirty);
const explain = el('#ub-explain');
if (dirty > 0) {
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
details.classList.add('hidden');
apply.classList.add('hidden');
explain.classList.add('hidden');
} else if (behind === 0) {
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
details.classList.add('hidden');
apply.classList.add('hidden');
list.classList.add('hidden');
explain.classList.add('hidden');
} else {
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
details.classList.remove('hidden');
apply.classList.remove('hidden');
explain.classList.remove('hidden');
log.textContent = (info.log || []).join('\n') || '(no log)';
}
}
function ubTimerStart(startedAt) {
updState.started_at = startedAt;
if (updState.timer_handle) clearInterval(updState.timer_handle);
const tick = () => {
if (!updState.started_at) return;
const sec = Math.max(0, Math.floor((Date.now() - updState.started_at) / 1000));
const m = Math.floor(sec / 60);
const s = sec % 60;
el('#ub-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
};
tick();
updState.timer_handle = setInterval(tick, 500);
}
async function applyUpdate() {
if (!confirm('This pulls the latest spark-vllm-docker and rebuilds the vLLM container. Can take 540 minutes; the cluster is unaffected until you swap to a different model. Continue?')) return;
try {
const r = await fetchJSON('/api/updates/apply', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({ mode: 'cluster' }),
});
attachToUpdate(r.job_id);
} catch (e) {
alert('Failed to start update: ' + e.message);
}
}
async function attachToUpdate(jobId) {
updState.job_id = jobId;
el('#ub-progress').classList.remove('hidden');
el('#ub-apply').classList.add('hidden');
el('#ub-stream').textContent = '';
el('#ub-phase').textContent = 'Starting…';
try {
const snap = await fetchJSON(`/api/updates/${jobId}`);
ubTimerStart(Date.parse(snap.started_at));
el('#ub-phase').textContent = snap.phase || 'Working…';
el('#ub-stream').textContent = (snap.lines || []).join('\n');
if (snap.returncode !== null) { handleUpdateDone(snap); return; }
} catch (e) {
ubTimerStart(Date.now());
}
const es = new EventSource(`/api/updates/${jobId}/stream`);
updState.eventsource = es;
es.onmessage = (ev) => {
try {
const d = JSON.parse(ev.data);
if (d.line !== undefined) {
const log = el('#ub-stream');
log.textContent += d.line + '\n';
log.scrollTop = log.scrollHeight;
}
} catch {}
};
es.addEventListener('phase', (ev) => {
try { el('#ub-phase').textContent = JSON.parse(ev.data).phase; } catch {}
});
es.addEventListener('done', (ev) => {
let d = {}; try { d = JSON.parse(ev.data); } catch {}
handleUpdateDone(d);
});
es.onerror = () => { es.close(); updState.eventsource = null; };
}
function handleUpdateDone(d) {
if (updState.eventsource) { updState.eventsource.close(); updState.eventsource = null; }
if (updState.timer_handle) { clearInterval(updState.timer_handle); updState.timer_handle = null; }
el('#ub-phase').textContent = d.state === 'failed' ? `Failed (rc=${d.returncode})` : 'Done ✓ — re-check from the banner.';
setTimeout(pollUpdates, 2000);
}
async function init() {
setupCopyButtons();
el('#open-download').addEventListener('click', openDownloadForm);
el('#dl-cancel').addEventListener('click', closeDownloadPanel);
el('#dl-start').addEventListener('click', startDownload);
el('#dl-repo').addEventListener('keydown', (e) => { if (e.key === 'Enter') startDownload(); });
el('#ub-details').addEventListener('click', () => {
const list = el('#ub-list');
list.classList.toggle('hidden');
list.open = !list.open;
});
el('#ub-apply').addEventListener('click', applyUpdate);
el('#ub-explain').addEventListener('click', explainContext);
el('#dl-repo').addEventListener('input', updateDlHfLink);
el('#open-nim').addEventListener('click', openNimDialog);
el('#nim-cancel').addEventListener('click', () => el('#nim-dialog').close());
el('#nim-form').addEventListener('submit', submitNim);
el('#nim-prog-close').addEventListener('click', () => el('#nim-progress-dialog').close());
el('#mb-update-close').addEventListener('click', () => el('#mb-update-dialog').close());
// Dismissing the modal (Close or Esc) stops streaming; the job runs on
// server-side and re-clicking Update returns a 409 if still in progress.
el('#mb-update-dialog').addEventListener('close', () => {
if (mbState.eventsource) { mbState.eventsource.close(); mbState.eventsource = null; }
if (mbState.timer) { clearInterval(mbState.timer); mbState.timer = null; }
state.mb_update_in_flight = false;
renderServices();
});
el('#mb-logs-close').addEventListener('click', () => el('#mb-logs-dialog').close());
el('#mb-logs-refresh').addEventListener('click', loadMatrixBridgeLogs);
el('#open-connectivity').addEventListener('click', openConnectivityDialog);
el('#connectivity-close').addEventListener('click', () => el('#connectivity-dialog').close());
// Hardware-card buttons (Wake-on-LAN on unreachable cards; SSH-key copy on
// reachable ones) are rendered dynamically, so delegate from the grid.
el('#hardware-grid').addEventListener('click', (e) => {
const wbtn = e.target.closest('[data-wake]');
if (wbtn) { wakeSpark(wbtn.dataset.wake); return; }
const kbtn = e.target.closest('[data-ssh-key]');
if (kbtn) { copySparkSshKey(kbtn.dataset.sshKey, kbtn); return; }
});
el('#sshkey-close').addEventListener('click', () => el('#sshkey-dialog').close());
el('#open-local').addEventListener('click', openLocalModelDialog);
setupCatalogDialog();
setupAdvancedDialog();
setupLocalModelDialog();
// Open WebUI link from /api/config
try {
state.config = await fetchJSON('/api/config');
if (state.config.open_webui_url) {
const a = el('#open-webui-link');
a.href = state.config.open_webui_url;
a.classList.remove('hidden');
}
} catch {}
setupDashboardTabs();
setupEndpointCollapse();
await loadModels();
await pollStatus();
await renderServices();
pollHardware();
pollUpdates();
// Disk-status probe runs after first paint — slow over SSH and not blocking.
loadDiskStatus();
// Speech-model patches panel — slow over SSH, runs after first paint.
renderSpeechModels();
setInterval(pollStatus, 5000);
setInterval(pollHardware, 8000); // every 8s
setInterval(pollUpdates, 300000); // every 5 min
setInterval(loadDiskStatus, 60000); // every 60s — disk state changes rarely
setInterval(renderSpeechModels, 120000); // every 2 min — patches change rarely
}
init();