// spark-control front-end const state = { models: {}, defaults: {}, current_model_key: null, swap_job_id: null, swap_eventsource: null, swap_started_at: null, swap_lines: [], // local accumulator for phase detection swap_phase: 'Starting…', swap_phase_detail: '', swap_progress: 0, // 0–1 services: {}, service_action_in_flight: null, // e.g. "parakeet:restart" mb_update_in_flight: false, // matrix-bridge update job running hardware: {}, config: {}, configured: true, timer_handle: null, deep_health: {}, disk_status: {}, // keyed by model key: { on_disk, total_bytes, per_host } disk_status_loaded: false, }; const el = (sel) => document.querySelector(sel); const $$ = (sel) => document.querySelectorAll(sel); function escapeHtml(s) { if (s == null) return ''; return String(s) .replaceAll('&', '&') .replaceAll('<', '<') .replaceAll('>', '>') .replaceAll('"', '"') .replaceAll("'", '''); } async function fetchJSON(url, opts) { const r = await fetch(url, opts); if (!r.ok) { const text = await r.text().catch(() => ''); throw new Error(`${r.status} ${r.statusText}: ${text}`); } return r.json(); } // ===================== rendering ===================== function renderCards() { const root = el('#cards'); root.innerHTML = ''; const isSwapping = !!state.swap_job_id; for (const key of Object.keys(state.models)) { const m = state.models[key]; const isActive = key === state.current_model_key; const card = document.createElement('div'); card.className = 'card' + (isActive ? ' active' : ''); const desc = m.description ? `
${escapeHtml(m.description)}
` : ''; const customPill = m.custom ? `custom` : ''; // Disk-presence pill + trash button. Until /api/models/disk-status comes back, // we don't know — render a neutral placeholder. const disk = state.disk_status[key]; let diskPill = ''; if (state.disk_status_loaded) { if (disk && disk.on_disk) { const gb = (disk.total_bytes / 1e9); diskPill = `on disk · ${gb.toFixed(1)} GB`; } else { diskPill = `not downloaded`; } } // Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded. let trashBtn = ''; if (state.disk_status_loaded && disk && disk.on_disk) { const disabled = isActive || isSwapping; const tip = isActive ? 'Currently loaded — switch to another model first' : isSwapping ? 'A swap is in progress' : 'Delete weights from disk'; trashBtn = ``; } // Primary card action: "Switch to this" (green) when on disk; "Download" (blue) when not. // Before disk-status loads we render the swap button as a sensible default. const isOnDisk = !state.disk_status_loaded || (disk && disk.on_disk); const dlInFlight = !!(typeof dlState !== 'undefined' && dlState && dlState.job_id); let primaryBtn = ''; if (isActive) { primaryBtn = ``; } else if (isOnDisk) { primaryBtn = ``; } else { const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)'; primaryBtn = ``; } card.innerHTML = `
${escapeHtml(m.display_name)}
${m.mode} ${m.size_gb} GB ${customPill} ${diskPill} ${(m.capabilities || []).map(c => `${escapeHtml(c)}`).join('')}
${desc}
${escapeHtml(m.repo)}
${primaryBtn} ${trashBtn}
`; root.appendChild(card); } for (const btn of root.querySelectorAll('[data-swap-key]')) { btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey)); } for (const btn of root.querySelectorAll('[data-download-key]')) { btn.addEventListener('click', () => triggerDownloadForKey(btn.dataset.downloadKey)); } for (const btn of root.querySelectorAll('[data-adv-key]')) { btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey)); } for (const btn of root.querySelectorAll('[data-test-key]')) { btn.addEventListener('click', () => testLaunch(btn.dataset.testKey, btn)); } for (const btn of root.querySelectorAll('[data-disk-del-key]')) { btn.addEventListener('click', () => openDiskDeleteDialog(btn.dataset.diskDelKey)); } } const trashIcon = ''; async function testLaunch(key, btn) { const resultEl = document.querySelector(`[data-test-result-for="${key}"]`); if (!resultEl) return; const originalText = btn.textContent; btn.disabled = true; btn.textContent = 'Testing…'; resultEl.classList.remove('hidden', 'ok', 'fail'); resultEl.innerHTML = 'Checking launch args against vLLM\'s parser…'; try { const r = await fetchJSON(`/api/swap/${encodeURIComponent(key)}/validate`, { method: 'POST' }); if (r.ok) { resultEl.classList.add('ok'); resultEl.innerHTML = ` Launch args parse OK. (Doesn't guarantee runtime success — only catches argparse-level issues.)`; } else { resultEl.classList.add('fail'); const err = escapeHtml(r.error || 'unknown error'); const stage = r.stage ? ` (${escapeHtml(r.stage)})` : ''; resultEl.innerHTML = ` Would fail: ${err}${stage}`; } } catch (e) { resultEl.classList.add('fail'); resultEl.innerHTML = ` Test failed: ${escapeHtml(e.message)}`; } finally { btn.disabled = false; btn.textContent = originalText; } } function renderCurrent(status) { const c = el('#current'); if (!status.configured) { c.innerHTML = `not configured`; return; } if (status.current_swap_job) { c.innerHTML = `swap in progress`; return; } const v = status.vllm || {}; if (!v.ok) { c.innerHTML = `vLLM unreachable`; return; } const m = status.current_model_key ? state.models[status.current_model_key] : null; const label = m ? m.display_name : (v.current_model || '(unknown)'); c.innerHTML = `${label}`; } // ===================== hardware dashboard ===================== function fmtBytes(n) { if (!n && n !== 0) return '—'; const u = ['B', 'KB', 'MB', 'GB', 'TB']; let i = 0; let v = n; while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; } return v < 10 ? `${v.toFixed(1)} ${u[i]}` : `${Math.round(v)} ${u[i]}`; } function fmtMiB(n) { if (!n && n !== 0) return null; // n is in MiB; render in GB const gb = n / 1024; return gb < 10 ? gb.toFixed(1) : Math.round(gb).toString(); } function bar(usedPct, warn) { const pct = Math.max(2, Math.min(100, usedPct)); return `
`; } async function pollHardware() { try { state.hardware = await fetchJSON('/api/hardware'); try { state.connectivity = await fetchJSON('/api/connectivity'); } catch {} renderHardware(); } catch (e) { console.warn('hardware poll failed', e); } } function fmtDuration(sec) { if (sec == null) return ''; if (sec < 60) return `${Math.round(sec)}s`; if (sec < 3600) return `${Math.round(sec / 60)}m`; if (sec < 86400) { const h = Math.floor(sec / 3600); const m = Math.round((sec % 3600) / 60); return m ? `${h}h ${m}m` : `${h}h`; } const d = Math.floor(sec / 86400); const h = Math.round((sec % 86400) / 3600); return h ? `${d}d ${h}h` : `${d}d`; } function openConnectivityDialog() { const dlg = el('#connectivity-dialog'); const content = el('#connectivity-content'); const c = state.connectivity || {}; const events = c.events || []; if (events.length === 0) { content.innerHTML = '
No events recorded yet. Once a Spark or service goes down and back up (or an external app reports a failure), entries appear here.
'; dlg.showModal(); return; } const bySubject = {}; for (const e of events) { const subj = e.subject || e.spark || 'unknown'; // legacy fallback (bySubject[subj] = bySubject[subj] || []).push(e); } // Sort subjects: hosts first, then services, alphabetical const hostOrder = ['spark1', 'spark2']; const subjects = Object.keys(bySubject).sort((a, b) => { const ia = hostOrder.indexOf(a); const ib = hostOrder.indexOf(b); if (ia >= 0 && ib >= 0) return ia - ib; if (ia >= 0) return -1; if (ib >= 0) return 1; return a.localeCompare(b); }); const html = subjects.map((subj) => { const evs = bySubject[subj]; const transitions = evs.filter(e => (e.kind || 'transition') === 'transition'); const reports = evs.filter(e => e.kind === 'report'); const downs = transitions.filter(e => e.transition === 'down').length; const failedReports = reports.filter(e => !e.ok).length; const mac = c.macs?.[subj]; const summaryParts = []; if (transitions.length) summaryParts.push(`${transitions.length} probe transition${transitions.length===1?'':'s'} (${downs} down)`); if (reports.length) summaryParts.push(`${reports.length} app report${reports.length===1?'':'s'} (${failedReports} failed)`); const isHost = hostOrder.includes(subj); return `

${escapeHtml(subj)}${isHost ? ' [host]' : ' [service]'}${mac ? ` ${escapeHtml(mac)}` : ''}

${summaryParts.join(' · ') || 'no events'}
${evs.slice(-30).reverse().map(e => renderConnEvent(e)).join('')}
`; }).join(''); content.innerHTML = html; dlg.showModal(); } function renderConnEvent(e) { const when = escapeHtml((e.at || '').replace('T', ' ').replace('Z', '')); const kind = e.kind || 'transition'; if (kind === 'report') { const ok = !!e.ok; const source = escapeHtml(e.source || 'external'); const detail = e.detail ? ` — ${escapeHtml(e.detail)}` : ''; const latency = e.latency_ms != null ? ` (${e.latency_ms} ms)` : ''; return `
${when} ${ok ? '◷ report: ok' : '◷ report: failed'} from ${source}${detail} ${latency}
`; } const down = e.down_seconds != null ? `was down ${fmtDuration(e.down_seconds)}` : ''; const up = e.up_seconds != null ? `was up ${fmtDuration(e.up_seconds)}` : ''; return `
${when} ${e.transition === 'up' ? '↑ came back online' : '↓ dropped offline'} ${down}${up}
`; } async function wakeSpark(name) { try { const r = await fetchJSON(`/api/spark/${name}/wake`, { method: 'POST' }); alert(`Wake-on-LAN sent to ${name} (MAC ${r.mac}, via ${r.delivered_via}). Give it ~30 seconds to wake; the card will go green when it comes back.`); } catch (e) { alert(`Wake failed: ${e.message}`); } } // Generate-if-missing + copy this Spark's OUTBOUND ssh public key (the key the // Spark uses to log in to other machines, e.g. the Mac). Distinct from the // package's own key in the StartOS "Show Public Key" action. async function copySparkSshKey(name, btn) { if (btn) btn.disabled = true; try { const r = await fetchJSON(`/api/spark/${name}/ssh-key`, { method: 'POST' }); // Best-effort clipboard copy; on plain-HTTP this no-ops, but the dialog // below always shows the key for manual selection. await copyText(r.pubkey, btn); const label = r.host ? `${name} (${r.host})` : name; el('#sshkey-title').textContent = `${name} — SSH public key`; el('#sshkey-intro').textContent = r.created ? `Generated a new SSH key on ${label} and copied it to your clipboard. This is the key ${name} uses to log in to OTHER machines.` : `${label} already had an SSH key; copied its public key to your clipboard. This is the key ${name} uses to log in to OTHER machines.`; el('#sshkey-value').textContent = r.pubkey; el('#sshkey-install').textContent = `mkdir -p ~/.ssh && echo '${r.pubkey}' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys`; el('#sshkey-dialog').showModal(); } catch (e) { alert(`Couldn't get the SSH key for ${name}: ${e.message}`); } finally { if (btn) btn.disabled = false; } } function renderHardware() { const panel = el('#hardware-panel'); const grid = el('#hardware-grid'); const hw = state.hardware || {}; const keys = Object.keys(hw).filter(k => hw[k] && (hw[k].configured !== false)); if (keys.length === 0) { panel.classList.add('hidden'); return; } panel.classList.remove('hidden'); grid.innerHTML = ''; for (const key of keys) { const s = hw[key]; const card = document.createElement('div'); if (!s.reachable) { card.className = 'hw-card unreachable'; const mac = state.connectivity?.macs?.[key]; const wolRow = mac ? `
${escapeHtml(mac)}
` : `
MAC not yet known — once it's been up once with this dashboard installed, "Wake" will appear here.
`; card.innerHTML = `
${escapeHtml(key)} unreachable
${escapeHtml(s.host || '')} — ${escapeHtml(s.error || 'no response')}
${wolRow}
If Wake-on-LAN doesn't bring it back, manual steps:
  1. Verify it's powered on (check the front LED).
  2. Ping it from another LAN device.
  3. Power-cycle it physically.
  4. If it boots, this card will go green again automatically.
`; grid.appendChild(card); continue; } const ramPct = s.ram_used_bytes && s.ram_total_bytes ? (s.ram_used_bytes / s.ram_total_bytes) * 100 : 0; const diskPct = s.disk_used_bytes && s.disk_total_bytes ? (s.disk_used_bytes / s.disk_total_bytes) * 100 : 0; const loadPct = (s.load && s.cores) ? Math.min(100, (s.load[0] / s.cores) * 100) : 0; // GPU memory: on unified-memory systems (DGX Spark) total is N/A, so use system RAM as the pool. const gpuMemTotalMiB = s.gpu_mem_total_mib || (s.gpu_unified_memory ? (s.ram_total_bytes / (1024 * 1024)) : null); const gpuMemUsedMiB = s.gpu_mem_used_mib ?? null; const gpuMemPct = (gpuMemTotalMiB && gpuMemUsedMiB != null) ? (gpuMemUsedMiB / gpuMemTotalMiB) * 100 : 0; const gpuMemNote = s.gpu_unified_memory ? ' (unified)' : ''; const gpuExtras = []; if (s.gpu_temp_c != null) gpuExtras.push(`${s.gpu_temp_c}°C`); if (s.gpu_power_w != null) gpuExtras.push(`${s.gpu_power_w.toFixed(0)}W`); const gpuExtrasStr = gpuExtras.length ? ` · ${gpuExtras.join(' · ')}` : ''; // Read-only WireGuard badge: shown only when the Spark has a wg interface up. // "VPN " means it's a peer on that tunnel (reachable off-LAN when the // tunnel is up); it reflects interface presence, not live peer reachability. const wgIp = s.wg_addr ? String(s.wg_addr).split('/')[0] : ''; const wgBadge = s.wg_iface ? ` · VPN${wgIp ? ' ' + escapeHtml(wgIp) : ''}` : ''; card.className = 'hw-card'; card.innerHTML = `
${escapeHtml(s.hostname || key)} ${escapeHtml(key)} · ${escapeHtml(s.gpu_name || '')} · ${escapeHtml(s.uptime || '')}${wgBadge}
CPU ${bar(loadPct, loadPct > 80)} ${s.load ? s.load[0].toFixed(2) : '—'} / ${s.cores || '?'} cores
RAM ${bar(ramPct, ramPct > 85)} ${fmtBytes(s.ram_used_bytes)} / ${fmtBytes(s.ram_total_bytes)}
GPU mem${gpuMemNote} ${bar(gpuMemPct, gpuMemPct > 90)} ${fmtMiB(gpuMemUsedMiB) || '—'} / ${fmtMiB(gpuMemTotalMiB) || '?'} GB
GPU util ${bar(s.gpu_util_pct || 0, (s.gpu_util_pct || 0) > 90)} ${s.gpu_util_pct ?? 0}%${gpuExtrasStr}
Disk ${bar(diskPct, diskPct > 85)} ${fmtBytes(s.disk_used_bytes)} / ${fmtBytes(s.disk_total_bytes)}
`; grid.appendChild(card); } } // ===================== service classification ===================== function classifyService(s) { // returns one of: running | unhealthy | missing | unconfigured | starting if (!s.host) return 'unconfigured'; if (s.docker_state === 'missing') return 'missing'; if (s.docker_state === 'restarting') return 'unhealthy'; if (s.docker_state === 'exited') return 'unhealthy'; if (s.docker_state === 'running') { // http_ready === false means an HTTP probe is expected but failing → still // warming up. null means the service has no HTTP surface (e.g. the bot), so // a running container is simply healthy. if (s.http_ready === false) return 'starting'; return 'running'; } return s.docker_state || 'unknown'; } function statusLabel(cls) { return { running: 'Healthy', unhealthy: 'Unhealthy', starting: 'Starting…', missing: 'Not installed', unconfigured: 'Not configured', unknown: 'Unknown', }[cls] || cls; } async function renderServices() { let services = state.services; // First render: fetch. if (!services || Object.keys(services).length === 0) { try { services = await fetchJSON('/api/services'); state.services = services; } catch (e) { console.error('services fetch failed', e); return; } } const panel = el('#services-panel'); const grid = el('#services-grid'); const entries = Object.entries(services); if (entries.length === 0) { panel.classList.add('hidden'); return; } panel.classList.remove('hidden'); grid.innerHTML = ''; for (const [name, s] of entries) { const cls = classifyService(s); const isBot = s.kind === 'bot'; // The bot tile is opt-in: it only belongs to deployments that actually run // matrix-bridge. When the container is absent (missing) or the host isn't // configured, hide the tile entirely rather than show a stray red card. if (isBot && (cls === 'missing' || cls === 'unconfigured')) continue; const card = document.createElement('div'); card.className = `service-card ${cls}`; const inFlight = state.service_action_in_flight && state.service_action_in_flight.startsWith(name + ':'); const disable = (action) => { // Disable buttons that don't make sense for the current state if (inFlight) return true; if (cls === 'unconfigured' || cls === 'missing') return true; if (action === 'start' && (cls === 'running' || cls === 'starting')) return true; if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true; return false; }; const copyIcon = ``; const hostStr = s.host ? `${s.host}:${s.port}` : ''; const hostRow = s.host ? `
Host${escapeHtml(hostStr)}
` : `
Hostnot configured
`; const urlRow = s.base_url ? `
URL${escapeHtml(s.base_url)}
` : ''; const modelRow = s.model ? `
Model${escapeHtml(s.model)}
` : ''; const restartsRow = s.restart_count != null && s.restart_count > 1 ? `
Restarts${s.restart_count}
` : ''; const dh = state.deep_health?.[name]; let deepRow = ''; if (dh && dh.last) { const last = dh.last; const when = (last.at || '').slice(11, 19); // HH:MM:SS const verdict = last.ok ? `deep check ok` : `deep check FAILED`; const lat = last.latency_ms != null ? ` ${last.latency_ms} ms` : ''; const restarts = dh.auto_restarts_window > 0 ? ` · ${dh.auto_restarts_window} auto-restart${dh.auto_restarts_window === 1 ? '' : 's'} in 30 min` : ''; deepRow = `
Deep ${verdict} ${escapeHtml(when)}${lat}${restarts}
${last.ok ? '' : `
${escapeHtml((last.error || last.note || '').slice(0, 200))}
`} `; } else if (dh) { deepRow = `
Deep no probe yet
`; } card.innerHTML = `
${escapeHtml(name)} ${escapeHtml(s.kind || '')} ${statusLabel(cls)}
${hostRow} ${urlRow} ${modelRow} ${restartsRow} ${deepRow}
${isBot ? `` : ''} ${isBot ? `` : ''}
`; grid.appendChild(card); } for (const btn of grid.querySelectorAll('.btn[data-svc-action]')) { btn.addEventListener('click', () => onServiceAction(btn.dataset.svcAction)); } const mbUpdateBtn = grid.querySelector('[data-mb-update]'); if (mbUpdateBtn) mbUpdateBtn.addEventListener('click', onMatrixBridgeUpdate); const mbLogsBtn = grid.querySelector('[data-mb-logs]'); if (mbLogsBtn) mbLogsBtn.addEventListener('click', openMatrixBridgeLogs); for (const btn of grid.querySelectorAll('[data-dh-run]')) { btn.addEventListener('click', () => onDeepHealthRun(btn.dataset.dhRun, btn)); } } async function onDeepHealthRun(name, btn) { btn.disabled = true; const orig = btn.textContent; btn.textContent = '…'; try { await fetchJSON(`/api/deep-health/${encodeURIComponent(name)}/run`, { method: 'POST' }); } catch (e) { console.warn('deep-health run failed', e); } finally { try { state.deep_health = await fetchJSON('/api/deep-health'); } catch {} btn.textContent = orig; btn.disabled = false; renderServices(); } } // ===================== speech-model patches (v0.11) ===================== async function renderSpeechModels() { const panel = el('#speech-models-panel'); const card = el('#speech-models-card'); if (!panel || !card) return; let data; try { data = await fetchJSON('/api/speech-models'); } catch (e) { // If parakeet host isn't even configured, hide the section entirely panel.classList.add('hidden'); return; } if (!data || !data.patches) { panel.classList.add('hidden'); return; } panel.classList.remove('hidden'); const patches = data.patches || {}; const health = data.container_health || {}; const status = patches.status || 'unknown'; let statusPill; if (status === 'in_sync') { statusPill = `patches in sync`; } else if (status === 'drift') { statusPill = `spark-control has newer patches`; } else if (status === 'missing') { statusPill = `patches missing in container`; } else { statusPill = `unknown`; } const asrLoaded = !!health.asr_loaded; const diarLoaded = !!health.diarizer_loaded; const asrModel = escapeHtml(health.model || '—'); const diarModel = escapeHtml(health.diarizer_model || '—'); const fileRows = (patches.files || []).map((f) => { const sync = f.in_sync ? '✓ in sync' : f.remote_sha == null ? '✗ missing' : '⚠ drift'; const local = f.local_sha ? `${escapeHtml(f.local_sha)}` : ''; const remote = f.remote_sha ? `${escapeHtml(f.remote_sha)}` : ''; return `
${escapeHtml(f.name)} ${sync} local ${local} → remote ${remote}
`; }).join(''); const lastReapply = patches.last_reapply_at ? new Date(patches.last_reapply_at).toLocaleString() : 'never (since spark-control boot)'; const lastRestart = patches.last_restart_at ? new Date(patches.last_restart_at).toLocaleString() : 'never (since spark-control boot)'; card.innerHTML = `
parakeet-asr container
${statusPill}
Parakeet ASR ${asrModel} ${asrLoaded ? 'loaded' : 'not loaded'}
Sortformer diarizer ${diarModel} ${diarLoaded ? 'loaded' : 'not loaded'}
${fileRows}
Last reapply: ${escapeHtml(lastReapply)} · Last manual restart: ${escapeHtml(lastRestart)}
`; el('#sm-reapply').addEventListener('click', onSpeechModelsReapply); el('#sm-restart').addEventListener('click', onSpeechModelsRestart); } async function onSpeechModelsReapply() { if (!confirm('Reapply Sortformer patches to the parakeet-asr container? The container will restart and both ASR + diarizer will be unavailable for ~60–120 seconds.')) return; const dlg = el('#speech-models-progress-dialog'); const steps = el('#sm-prog-steps'); const closeBtn = el('#sm-prog-close'); steps.innerHTML = '
Starting…
'; closeBtn.disabled = true; closeBtn.onclick = () => dlg.close(); dlg.showModal(); try { const r = await fetchJSON('/api/speech-models/reapply', { method: 'POST' }); steps.innerHTML = (r.steps || []).map((s) => { const mark = s.ok ? '' : ''; const extra = s.error ? `
${escapeHtml(s.error)}
` : ''; return `
${mark} ${escapeHtml(s.step)}${s.name ? ` (${escapeHtml(s.name)})` : ''}${extra}
`; }).join('') + `
Done — both models reloaded.
`; } catch (e) { let parsed = null; try { parsed = JSON.parse(e.message.split(':').slice(2).join(':').trim()); } catch {} const stepHtml = parsed && parsed.result && parsed.result.steps ? parsed.result.steps.map((s) => { const mark = s.ok ? '' : ''; return `
${mark} ${escapeHtml(s.step)}${s.name ? ` (${escapeHtml(s.name)})` : ''}${s.error ? `
${escapeHtml(s.error)}
` : ''}
`; }).join('') : `
${escapeHtml(e.message)}
`; steps.innerHTML = stepHtml + `
Failed.
`; } finally { closeBtn.disabled = false; try { await renderSpeechModels(); } catch {} } } async function onSpeechModelsRestart() { if (!confirm('Restart parakeet-asr container? STT + diarization will be unavailable for ~30 seconds.')) return; try { await fetchJSON('/api/speech-models/restart', { method: 'POST' }); } catch (e) { alert('Restart failed: ' + e.message); } finally { try { await renderSpeechModels(); } catch {} } } // NOTE: a WhisperX install action lived here briefly in v0.12 but was // reverted in v0.13.0:0 — the NGC PyTorch container on ARM64 doesn't ship // torchaudio and we couldn't reliably build it from source. The existing // Parakeet + Sortformer pipeline stays as the audio path. See release notes. async function onServiceAction(key) { if (state.service_action_in_flight) return; const [name, action] = key.split(':'); state.service_action_in_flight = key; renderServices(); try { await fetchJSON(`/api/services/${name}/${action}`, { method: 'POST' }); } catch (e) { alert(`${action} ${name} failed: ${e.message}`); } finally { state.service_action_in_flight = null; // Refresh services state try { state.services = await fetchJSON('/api/services'); } catch {} renderServices(); pollStatus(); } } // ===================== matrix-bridge bot (update + logs) ===================== const mbState = { job_id: null, eventsource: null, timer: null, started_at: null }; function mbTimerStart(at) { mbState.started_at = at; if (mbState.timer) clearInterval(mbState.timer); const tick = () => { if (!mbState.started_at) return; const sec = Math.max(0, Math.floor((Date.now() - mbState.started_at) / 1000)); el('#mb-update-elapsed').textContent = `${Math.floor(sec / 60)}:${(sec % 60).toString().padStart(2, '0')}`; }; tick(); mbState.timer = setInterval(tick, 500); } async function onMatrixBridgeUpdate() { if (state.mb_update_in_flight) return; if (!confirm('Update the matrix-bridge bot?\n\nThis pulls the latest code, rebuilds the container image, and recreates the container. The first build after a base-image change can take several minutes. The bot is briefly offline while it restarts.')) return; state.mb_update_in_flight = true; renderServices(); try { const r = await fetchJSON('/api/matrix-bridge/update', { method: 'POST' }); attachMbUpdateProgress(r.job_id); } catch (e) { state.mb_update_in_flight = false; renderServices(); alert('Update failed to start: ' + e.message); } } async function attachMbUpdateProgress(jobId) { mbState.job_id = jobId; el('#mb-update-log').textContent = ''; el('#mb-update-title').textContent = 'Updating matrix-bridge…'; el('#mb-update-phase').textContent = 'Starting…'; el('#mb-update-dialog').showModal(); try { const snap = await fetchJSON(`/api/matrix-bridge/update/${jobId}`); mbTimerStart(Date.parse(snap.started_at)); el('#mb-update-phase').textContent = snap.phase || 'Working…'; el('#mb-update-log').textContent = (snap.lines || []).join('\n'); if (snap.returncode !== null) { onMbUpdateDone(snap); return; } } catch { mbTimerStart(Date.now()); } const es = new EventSource(`/api/matrix-bridge/update/${jobId}/stream`); mbState.eventsource = es; es.onmessage = ev => { try { const d = JSON.parse(ev.data); if (d.line !== undefined) { const log = el('#mb-update-log'); log.textContent += d.line + '\n'; log.scrollTop = log.scrollHeight; } } catch {} }; es.addEventListener('phase', ev => { try { el('#mb-update-phase').textContent = JSON.parse(ev.data).phase; } catch {} }); es.addEventListener('done', ev => { let d = {}; try { d = JSON.parse(ev.data); } catch {} onMbUpdateDone(d); }); es.onerror = () => { // Don't leave the Update button wedged-disabled on a dropped stream. The // job keeps running server-side; re-clicking Update returns a clean 409. es.close(); mbState.eventsource = null; state.mb_update_in_flight = false; el('#mb-update-phase').textContent = 'Lost connection to the update stream — reopen or check logs.'; renderServices(); }; } function onMbUpdateDone(d) { if (mbState.eventsource) { mbState.eventsource.close(); mbState.eventsource = null; } if (mbState.timer) { clearInterval(mbState.timer); mbState.timer = null; } state.mb_update_in_flight = false; if (d.state === 'failed') { el('#mb-update-title').textContent = `Update failed (rc=${d.returncode})`; el('#mb-update-phase').textContent = 'Failed — see the log above.'; } else { el('#mb-update-title').textContent = 'Update complete'; el('#mb-update-phase').textContent = 'Done ✓'; } // Refresh the tile's badge. (async () => { try { state.services = await fetchJSON('/api/services'); } catch {} renderServices(); })(); } async function openMatrixBridgeLogs() { const pre = el('#mb-logs-pre'); el('#mb-logs-title').textContent = 'matrix-bridge logs'; pre.textContent = 'Loading…'; el('#mb-logs-dialog').showModal(); await loadMatrixBridgeLogs(); } async function loadMatrixBridgeLogs() { const pre = el('#mb-logs-pre'); const btn = el('#mb-logs-refresh'); if (btn) btn.disabled = true; try { const r = await fetchJSON('/api/matrix-bridge/logs?tail=100'); pre.textContent = r.output || '(no output)'; pre.scrollTop = pre.scrollHeight; } catch (e) { pre.textContent = 'Could not read logs: ' + e.message; } finally { if (btn) btn.disabled = false; } } function renderEndpoint(status) { const v = status.vllm || {}; const panel = el('#endpoint-panel'); const ready = v.ok && v.current_model && v.base_url; panel.classList.toggle('hidden', !ready); if (!ready) return; el('#ep-url').textContent = v.base_url; el('#ep-model').textContent = v.current_model; const snippet = `curl -s ${v.base_url}/chat/completions \\ -H 'content-type: application/json' \\ -d '{ "model": "${v.current_model}", "messages": [{"role": "user", "content": "Hello"}] }'`; el('#ep-curl-snippet').textContent = snippet; } async function copyText(text, indicatorEl) { try { await navigator.clipboard.writeText(text); if (indicatorEl) { indicatorEl.classList.add('copied'); setTimeout(() => indicatorEl.classList.remove('copied'), 1200); } return true; } catch { // Plain HTTP fallback: select the text so the user can ⌘C if (indicatorEl) { const range = document.createRange(); range.selectNode(indicatorEl); window.getSelection().removeAllRanges(); window.getSelection().addRange(range); } return false; } } function setupCopyButtons() { document.body.addEventListener('click', async (e) => { // Inline icon copy with literal text (used for dynamically-rendered service rows) const litBtn = e.target.closest('[data-copy-text]'); if (litBtn) { await copyText(litBtn.dataset.copyText, litBtn); return; } // Copy buttons (with svg icon) referenced by data-copy="selector" const btn = e.target.closest('[data-copy]'); if (btn) { const target = el(btn.dataset.copy); if (target) { await copyText(target.textContent, btn); target.classList.add('copied'); setTimeout(() => target.classList.remove('copied'), 1200); } return; } // Self-copy: clicking the text itself const selfCopy = e.target.closest('[data-copy-self]'); if (selfCopy) { await copyText(selfCopy.textContent, selfCopy); } }); } function renderHealth(status) { function setDot(id, ok, payload) { const item = el(id); if (!item) return; const dot = item.querySelector('.dot'); dot.classList.remove('ok', 'bad', 'warn'); if (ok === true) dot.classList.add('ok'); else if (ok === false) dot.classList.add('bad'); else dot.classList.add('warn'); item.title = JSON.stringify(payload || {}, null, 2); } setDot('#h-vllm', status.vllm && status.vllm.ok, status.vllm); setDot('#h-parakeet', status.parakeet && status.parakeet.ok, status.parakeet); setDot('#h-kokoro', status.kokoro && status.kokoro.ok, status.kokoro); setDot('#h-embeddings', status.embeddings && status.embeddings.ok, status.embeddings); setDot('#h-qdrant', status.qdrant && status.qdrant.ok, status.qdrant); el('#updated').textContent = `updated ${new Date().toLocaleTimeString()}`; } function renderBanner(status) { el('#setup-banner').classList.toggle('hidden', !!status.configured); // Dashboard tabs share the same "configured" gate as the rest of the // body — hidden until SSH is set up, then visible. const tabs = el('#dashboard-tabs'); if (tabs) tabs.classList.toggle('hidden', !status.configured); } // ===================== dashboard tabs (LLM / Audio) ===================== const TABS_STORAGE_KEY = 'sparkcontrol.dashboard.activeTab'; function setupDashboardTabs() { const buttons = $$('.dashboard-tab'); if (!buttons.length) return; // Restore the last-selected tab, default to "llm" let saved; try { saved = localStorage.getItem(TABS_STORAGE_KEY); } catch {} const initial = saved === 'audio' || saved === 'llm' ? saved : 'llm'; function selectTab(name) { buttons.forEach((b) => { const active = b.dataset.tab === name; b.classList.toggle('active', active); b.setAttribute('aria-selected', active ? 'true' : 'false'); }); $$('.tab-content').forEach((c) => { c.classList.toggle('active', c.id === `tab-${name}`); }); try { localStorage.setItem(TABS_STORAGE_KEY, name); } catch {} } buttons.forEach((b) => { b.addEventListener('click', () => selectTab(b.dataset.tab)); }); selectTab(initial); } // ===================== collapsible endpoint card ===================== const ENDPOINT_COLLAPSED_KEY = 'sparkcontrol.endpoint.collapsed'; function setupEndpointCollapse() { const panel = el('#endpoint-panel'); const btn = el('#ep-collapse'); if (!panel || !btn) return; // Default: collapsed (most of the time you don't need to see endpoint details) let collapsed = true; try { const v = localStorage.getItem(ENDPOINT_COLLAPSED_KEY); if (v === 'false') collapsed = false; else if (v === 'true') collapsed = true; } catch {} panel.classList.toggle('collapsed', collapsed); btn.addEventListener('click', () => { const nowCollapsed = !panel.classList.contains('collapsed'); panel.classList.toggle('collapsed', nowCollapsed); try { localStorage.setItem(ENDPOINT_COLLAPSED_KEY, nowCollapsed ? 'true' : 'false'); } catch {} }); } function renderSwapPanel() { el('#swap-phase').textContent = state.swap_phase; el('#swap-phase-detail').textContent = state.swap_phase_detail; el('#swap-phase-fill').style.width = `${Math.max(2, Math.round(state.swap_progress * 100))}%`; } // ===================== phase detection ===================== const PHASE_ORDER = [ ['Stopping current model…', 0.08], ['Starting new model…', 0.16], ['Joining Ray cluster…', 0.22], ['Loading weights…', 0.30], ['Compiling kernels…', 0.78], ['Warming up…', 0.88], ['Starting API server…', 0.94], ['Ready ✓', 1.00], ['Failed', 1.00], ]; function phaseProgress(name) { const found = PHASE_ORDER.find(([n]) => n === name); return found ? found[1] : 0.05; } function deriveSwapPhase(serverState, lines) { // Default phase from server state let phase = ({ starting: 'Starting…', stopping: 'Stopping current model…', launching: 'Starting new model…', tailing: 'Loading weights…', ready: 'Ready ✓', failed: 'Failed', })[serverState] || 'Working…'; let detail = ''; // Refine from log content (search recent lines first) const tail = lines.slice(-40); for (let i = tail.length - 1; i >= 0; i--) { const line = tail[i]; if (line.includes('Application startup complete')) { phase = 'Ready ✓'; break; } if (line.includes('Started server process')) { phase = 'Starting API server…'; break; } if (line.includes('Initial profiling/warmup') || line.includes('init engine (profile, create kv cache, warmup model)')) { phase = 'Warming up…'; break; } if (line.match(/Capturing CUDA graphs|Compiling a graph|torch\.compile took|Graph capturing/)) { phase = 'Compiling kernels…'; break; } const shard = line.match(/Loading safetensors checkpoint shards:\s+(\d+)%\s+Completed\s+\|\s+(\d+)\/(\d+)/); if (shard) { phase = 'Loading weights…'; detail = `${shard[2]} of ${shard[3]} shards (${shard[1]}%)`; const innerProgress = parseInt(shard[2], 10) / parseInt(shard[3], 10); // Map shard progress 0..1 into the 0.30..0.78 band state.swap_progress = 0.30 + (0.78 - 0.30) * innerProgress; state.swap_phase = phase; state.swap_phase_detail = detail; return; } if (line.includes('Connecting to existing Ray cluster')) { phase = 'Joining Ray cluster…'; break; } if (line.includes('Resolved architecture') || line.match(/launch-cluster\.sh.*exec vllm serve/)) { phase = 'Starting new model…'; break; } if (line.match(/launch-cluster\.sh stop/)) { phase = 'Stopping current model…'; break; } } state.swap_phase = phase; state.swap_phase_detail = detail; state.swap_progress = phaseProgress(phase); } // ===================== timer ===================== function startTimer(startedAtMillis) { state.swap_started_at = startedAtMillis; if (state.timer_handle) clearInterval(state.timer_handle); const tick = () => { if (!state.swap_started_at) return; const sec = Math.max(0, Math.floor((Date.now() - state.swap_started_at) / 1000)); const m = Math.floor(sec / 60); const s = sec % 60; el('#swap-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`; }; tick(); state.timer_handle = setInterval(tick, 500); } function stopTimer() { if (state.timer_handle) { clearInterval(state.timer_handle); state.timer_handle = null; } } // ===================== polling + SSE ===================== async function pollStatus() { try { const status = await fetchJSON('/api/status'); state.current_model_key = status.current_model_key; state.configured = status.configured; renderBanner(status); renderCurrent(status); renderEndpoint(status); renderHealth(status); // If models hasn't loaded yet (init may have hit a transient proxy timeout), retry. if (!state.models || Object.keys(state.models).length === 0) { try { await loadModels(); } catch {} } // Refresh services state lazily — every 5s poll triggers this too. try { state.services = await fetchJSON('/api/services'); try { state.deep_health = await fetchJSON('/api/deep-health'); } catch {} renderServices(); } catch {} if (status.current_swap_job && status.current_swap_job !== state.swap_job_id) { attachToSwap(status.current_swap_job, /*needsBackfill=*/true); } else if (!status.current_swap_job && state.swap_job_id && !state.swap_eventsource) { // Foreign swap ended detachSwap(); } renderCards(); } catch (e) { console.error('status poll failed', e); } } async function loadModels() { const data = await fetchJSON('/api/models'); state.defaults = data.defaults || {}; state.models = data.models || {}; } async function loadDiskStatus() { // Probes each catalog model's HF cache over SSH; takes a beat. Best-effort. try { const r = await fetchJSON('/api/models/disk-status'); if (r && r.models) { state.disk_status = r.models; state.disk_status_loaded = true; renderCards(); } } catch (e) { // Silent — pills just won't render. Don't block dashboard. console.warn('disk-status probe failed:', e.message); } } function fmtBytesShort(n) { if (!Number.isFinite(n) || n <= 0) return '0 B'; if (n >= 1e9) return `${(n / 1e9).toFixed(1)} GB`; if (n >= 1e6) return `${(n / 1e6).toFixed(1)} MB`; if (n >= 1e3) return `${(n / 1e3).toFixed(1)} KB`; return `${n} B`; } function openDiskDeleteDialog(key) { const m = state.models[key]; const disk = state.disk_status[key]; if (!m || !disk || !disk.on_disk) return; const dlg = el('#disk-delete-dialog'); el('#dd-summary').innerHTML = `Free ${fmtBytesShort(disk.total_bytes)} by removing ${escapeHtml(m.display_name)} (${escapeHtml(m.repo)}) from disk.`; const hostsEl = el('#dd-hosts'); hostsEl.innerHTML = ''; for (const h of (disk.per_host || [])) { if (!h.on_disk) continue; const li = document.createElement('li'); li.innerHTML = `${escapeHtml(h.host)} — ${fmtBytesShort(h.size_bytes)}`; hostsEl.appendChild(li); } const errEl = el('#dd-error'); errEl.classList.add('hidden'); errEl.textContent = ''; const confirm = el('#dd-confirm'); const cancel = el('#dd-cancel'); const onCancel = () => dlg.close(); const onConfirm = async () => { confirm.disabled = true; cancel.disabled = true; confirm.textContent = 'Deleting…'; try { const r = await fetchJSON(`/api/models/${encodeURIComponent(key)}/disk`, { method: 'DELETE' }); dlg.close(); // Optimistically clear local disk state for this key, then refresh. delete state.disk_status[key]; renderCards(); // Eagerly re-probe so size is accurate (and shows "not downloaded" pill). loadDiskStatus(); const freed = r && typeof r.bytes_freed === 'number' ? fmtBytesShort(r.bytes_freed) : ''; console.log(`Deleted ${m.display_name} from disk${freed ? ` — freed ${freed}` : ''}.`); } catch (e) { errEl.textContent = e.message || 'Delete failed'; errEl.classList.remove('hidden'); } finally { confirm.disabled = false; cancel.disabled = false; confirm.textContent = 'Delete from disk'; } }; cancel.onclick = onCancel; confirm.onclick = onConfirm; dlg.showModal(); } async function triggerSwap(modelKey) { if (state.swap_job_id) return; try { const r = await fetchJSON('/api/swap', { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify({ model_key: modelKey }), }); attachToSwap(r.job_id, /*needsBackfill=*/false); } catch (e) { alert('Failed to start swap: ' + e.message); } } async function triggerDownloadForKey(modelKey) { const m = state.models[modelKey]; if (!m) return; if (dlState.job_id) { alert('A download is already in progress; wait for it to finish.'); return; } // Pick the download target from the model's mode: // solo -> spark1 only // cluster -> both Sparks (fetch on Spark 1, rsync to Spark 2 in parallel) const dlMode = m.mode === 'cluster' ? 'cluster' : 'spark1'; const sizeNote = m.size_gb ? ` (~${m.size_gb} GB)` : ''; const target = m.mode === 'cluster' ? 'both Sparks' : 'Spark 1'; if (!confirm(`Download "${m.display_name}"${sizeNote} to ${target}? Large models can take a while; you can watch progress in the download panel.`)) { return; } dlState.last_repo = m.repo; dlState.last_mode = dlMode; try { const r = await fetchJSON('/api/download', { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify({ repo: m.repo, mode: dlMode }), }); // Open the download panel + attach to progress stream openDownloadForm(); attachToDownload(r.job_id); } catch (e) { alert('Failed to start download: ' + e.message); } } async function attachToSwap(jobId, needsBackfill) { if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; } state.swap_job_id = jobId; state.swap_lines = []; state.swap_phase = 'Starting…'; state.swap_phase_detail = ''; state.swap_progress = 0.05; el('#swap-log').textContent = ''; el('#swap-panel').classList.remove('hidden'); renderSwapPanel(); // Backfill (if joining mid-swap) — fetch the snapshot so we have started_at + history try { const snap = await fetchJSON(`/api/swap/${jobId}`); const ts = Date.parse(snap.started_at); if (!isNaN(ts)) startTimer(ts); state.swap_lines = snap.lines || []; for (const line of state.swap_lines) appendLog(line); deriveSwapPhase(snap.state, state.swap_lines); renderSwapPanel(); if (snap.returncode !== null && snap.returncode !== undefined) { // Already finished — close panel after a beat handleSwapDone(snap); return; } } catch (e) { if (!needsBackfill) startTimer(Date.now()); console.warn('backfill failed', e); } const es = new EventSource(`/api/swap/${jobId}/stream`); state.swap_eventsource = es; es.onmessage = (ev) => { try { const d = JSON.parse(ev.data); if (d.line !== undefined) { state.swap_lines.push(d.line); appendLog(d.line); deriveSwapPhase(d.state, state.swap_lines); renderSwapPanel(); } else if (d.state) { deriveSwapPhase(d.state, state.swap_lines); renderSwapPanel(); } } catch {} }; es.addEventListener('done', async (ev) => { let d = {}; try { d = JSON.parse(ev.data); } catch {} handleSwapDone(d); }); es.onerror = () => { // Tab backgrounded or network blip — close; status poll will reattach es.close(); state.swap_eventsource = null; }; renderCards(); } function handleSwapDone(d) { if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; } const finalState = d.state || 'ready'; state.swap_phase = finalState === 'failed' ? 'Failed' : 'Ready ✓'; state.swap_phase_detail = d.returncode !== undefined ? `exit code ${d.returncode}` : ''; state.swap_progress = 1.0; renderSwapPanel(); setTimeout(() => detachSwap(), 4000); pollStatus(); } function detachSwap() { state.swap_job_id = null; if (state.swap_eventsource) { state.swap_eventsource.close(); state.swap_eventsource = null; } stopTimer(); el('#swap-panel').classList.add('hidden'); renderCards(); } function appendLog(line) { const log = el('#swap-log'); log.textContent += line + '\n'; log.scrollTop = log.scrollHeight; } // ===================== model downloads ===================== const dlState = { job_id: null, eventsource: null, started_at: null, timer_handle: null, }; function openDownloadForm() { el('#download-panel').classList.remove('hidden'); el('#download-form').classList.remove('hidden'); el('#download-progress').classList.add('hidden'); el('#dl-repo').focus(); updateDlHfLink(); } function updateDlHfLink() { const repo = el('#dl-repo').value.trim(); const link = el('#dl-hf-link'); if (repo.includes('/')) { link.href = `https://huggingface.co/${encodeURIComponent(repo)}`; link.classList.remove('hidden'); } else { link.classList.add('hidden'); } } function closeDownloadPanel() { el('#download-panel').classList.add('hidden'); el('#download-form').classList.remove('hidden'); el('#download-progress').classList.add('hidden'); el('#dl-repo').value = ''; } function dlTimerStart(startedAt) { dlState.started_at = startedAt; if (dlState.timer_handle) clearInterval(dlState.timer_handle); const tick = () => { if (!dlState.started_at) return; const sec = Math.max(0, Math.floor((Date.now() - dlState.started_at) / 1000)); const m = Math.floor(sec / 60); const s = sec % 60; el('#dl-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`; }; tick(); dlState.timer_handle = setInterval(tick, 500); } function dlTimerStop() { if (dlState.timer_handle) { clearInterval(dlState.timer_handle); dlState.timer_handle = null; } } async function startDownload() { const repo = el('#dl-repo').value.trim(); const mode = document.querySelector('input[name="dl-mode"]:checked').value; if (!repo || !repo.includes('/')) { alert('Enter a HuggingFace repo in the form "org/name", e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4'); return; } dlState.last_repo = repo; dlState.last_mode = mode; try { const r = await fetchJSON('/api/download', { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify({ repo, mode }), }); attachToDownload(r.job_id); } catch (e) { alert('Failed to start download: ' + e.message); } } function renderDownloadProgress(p) { el('#dl-phase').textContent = p.phase || 'Working…'; const statsParts = []; if (p.downloaded && p.total) statsParts.push(`${p.downloaded} / ${p.total}`); if (p.rate) statsParts.push(p.rate); if (p.eta) statsParts.push(`ETA ${p.eta}`); el('#dl-stats').textContent = statsParts.join(' · '); const pct = Math.max(2, Math.min(100, p.percent || 2)); el('#dl-progress-fill').style.width = `${pct}%`; el('#dl-phase-detail').textContent = p.percent > 0 ? `${p.percent.toFixed(1)}%` : ''; } function dlAppendLog(line) { const log = el('#dl-log'); log.textContent += line + '\n'; log.scrollTop = log.scrollHeight; } async function attachToDownload(jobId) { if (dlState.eventsource) { dlState.eventsource.close(); dlState.eventsource = null; } dlState.job_id = jobId; el('#download-form').classList.add('hidden'); el('#download-progress').classList.remove('hidden'); el('#dl-log').textContent = ''; el('#dl-title').textContent = 'Downloading…'; try { const snap = await fetchJSON(`/api/download/${jobId}`); dlTimerStart(Date.parse(snap.started_at)); for (const line of snap.lines || []) dlAppendLog(line); renderDownloadProgress(snap.progress); if (snap.returncode !== null && snap.returncode !== undefined) { handleDownloadDone(snap); return; } } catch (e) { console.warn('download backfill failed', e); dlTimerStart(Date.now()); } const es = new EventSource(`/api/download/${jobId}/stream`); dlState.eventsource = es; es.onmessage = (ev) => { try { const d = JSON.parse(ev.data); if (d.line !== undefined) dlAppendLog(d.line); } catch {} }; es.addEventListener('progress', (ev) => { try { renderDownloadProgress(JSON.parse(ev.data)); } catch {} }); es.addEventListener('done', (ev) => { let d = {}; try { d = JSON.parse(ev.data); } catch {} handleDownloadDone(d); }); es.onerror = () => { es.close(); dlState.eventsource = null; }; } function handleDownloadDone(d) { if (dlState.eventsource) { dlState.eventsource.close(); dlState.eventsource = null; } dlTimerStop(); if (d.state === 'failed') { el('#dl-title').textContent = `Failed (rc=${d.returncode})`; el('#dl-phase').textContent = 'Failed'; } else { el('#dl-title').textContent = 'Done'; el('#dl-phase').textContent = 'Done ✓'; el('#dl-progress-fill').style.width = '100%'; // Offer to add to catalog const repo = dlState.last_repo; const mode = dlState.last_mode; if (repo) { setTimeout(() => openCatalogDialog(repo, mode), 600); } } dlState.job_id = null; } // ===================== Advanced / Add to catalog ===================== function gpuTotalGB(modelMode) { // Solo uses Spark 1's GPU only. Cluster shares across both — but loading is per-Spark. const s1 = state.hardware?.spark1; const s2 = state.hardware?.spark2; const g1 = s1?.gpu_mem_total_mib ? s1.gpu_mem_total_mib / 1024 : null; const g2 = s2?.gpu_mem_total_mib ? s2.gpu_mem_total_mib / 1024 : null; if (modelMode === 'cluster' && g1 && g2) return Math.min(g1, g2); // bottleneck return g1 || g2 || null; } function knobContextHint(field, value, mode) { if (field === 'gpu_memory_utilization') { const gb = gpuTotalGB(mode); if (!gb) return ''; const used = (value * gb).toFixed(0); const free = (gb - value * gb).toFixed(0); return `~${used} GB allocated to model + KV cache · ~${free} GB left for OS, buffers, other GPU workloads.`; } if (field === 'max_model_len') { if (!value) return ''; const pages = Math.round(value / 350); // ~350 tokens per page const kvBytes = (value * 2 * 4 * 32 * 128); // rough fp16 KV cache size for typical 32-layer model return `~${pages.toLocaleString()} pages of text (very rough). Larger context = more GPU memory reserved for KV cache.`; } if (field === 'fastsafetensors') return value ? 'Faster cold-start weight loading.' : 'Standard safetensors loading.'; if (field === 'prefix_caching') return value ? 'Reuses GPU state for repeated prompt prefixes (e.g. long system prompts).' : 'Off — every request re-processes the full prompt.'; if (field === 'kv_cache_dtype') return value === 'fp8' ? 'Halves KV cache memory (fits ~2× more context). Quality cost is usually imperceptible.' : 'Default precision.'; return ''; } function ensureKnobHint(rowEl, id) { let h = rowEl.querySelector(`.knob-hint[data-for="${id}"]`); if (!h) { h = document.createElement('div'); h.className = 'knob-hint muted small'; h.dataset.for = id; rowEl.appendChild(h); } return h; } function openAdvanced(key) { const m = state.models[key]; if (!m) return; const dlg = el('#advanced-dialog'); el('#adv-title').textContent = `Advanced — ${m.display_name}`; const k = m.effective_knobs || {}; el('#adv-mml').value = k.max_model_len ?? ''; el('#adv-gmu').value = k.gpu_memory_utilization ?? 0.85; el('#adv-gmu-out').value = parseFloat(el('#adv-gmu').value).toFixed(2); el('#adv-fst').checked = !!k.fastsafetensors; el('#adv-pcache').checked = !!k.prefix_caching; el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8'; // Wire up live knob hints const updateHints = () => { const mml = parseInt(el('#adv-mml').value, 10); const gmu = parseFloat(el('#adv-gmu').value); ensureKnobHint(el('#adv-mml').parentElement, 'mml').textContent = knobContextHint('max_model_len', mml, m.mode); ensureKnobHint(el('#adv-gmu').parentElement, 'gmu').textContent = knobContextHint('gpu_memory_utilization', gmu, m.mode); ensureKnobHint(el('#adv-fst').parentElement, 'fst').textContent = knobContextHint('fastsafetensors', el('#adv-fst').checked, m.mode); ensureKnobHint(el('#adv-pcache').parentElement, 'pcache').textContent = knobContextHint('prefix_caching', el('#adv-pcache').checked, m.mode); ensureKnobHint(el('#adv-fp8').parentElement, 'fp8').textContent = knobContextHint('kv_cache_dtype', el('#adv-fp8').checked ? 'fp8' : 'auto', m.mode); }; updateHints(); el('#adv-mml').oninput = updateHints; el('#adv-gmu').oninput = (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); updateHints(); }; el('#adv-fst').onchange = updateHints; el('#adv-pcache').onchange = updateHints; el('#adv-fp8').onchange = updateHints; const del = el('#adv-delete'); del.classList.toggle('hidden', !m.custom); del.onclick = async () => { if (!confirm(`Delete "${m.display_name}" from the catalog? The model weights on disk are NOT deleted.`)) return; try { await fetchJSON(`/api/models/${encodeURIComponent(key)}`, { method: 'DELETE' }); dlg.close(); await loadModels(); pollStatus(); } catch (e) { alert('Delete failed: ' + e.message); } }; const form = el('#advanced-form'); form.onsubmit = async (e) => { e.preventDefault(); const knobs = {}; const mml = parseInt(el('#adv-mml').value, 10); if (Number.isFinite(mml) && mml > 0) knobs.max_model_len = mml; const gmu = parseFloat(el('#adv-gmu').value); if (Number.isFinite(gmu)) knobs.gpu_memory_utilization = gmu; if (el('#adv-fst').checked) knobs.fastsafetensors = true; else knobs.fastsafetensors = false; if (el('#adv-pcache').checked) knobs.prefix_caching = true; else knobs.prefix_caching = false; knobs.kv_cache_dtype = el('#adv-fp8').checked ? 'fp8' : 'auto'; try { await fetchJSON(`/api/models/${encodeURIComponent(key)}/knobs`, { method: 'PUT', headers: { 'content-type': 'application/json' }, body: JSON.stringify({ knobs }), }); dlg.close(); await loadModels(); pollStatus(); } catch (e) { alert('Save failed: ' + e.message); } }; dlg.showModal(); } function openCatalogDialog(repo, mode) { const dlg = el('#catalog-dialog'); const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-'); el('#cd-key').value = key; el('#cd-name').value = repo.split('/').pop(); el('#cd-repo').value = repo; el('#cd-size').value = ''; el('#cd-mode').value = mode || 'solo'; el('#cd-desc').value = ''; el('#cd-mml').value = 32768; el('#cd-gmu').value = 0.85; el('#cd-gmu-out').value = '0.85'; el('#cd-fst').checked = true; el('#cd-pcache').checked = true; el('#cd-fp8').checked = true; dlg.showModal(); } function setupCatalogDialog() { el('#cd-cancel').addEventListener('click', () => el('#catalog-dialog').close()); el('#cd-gmu').addEventListener('input', (e) => { el('#cd-gmu-out').value = parseFloat(e.target.value).toFixed(2); }); el('#catalog-form').addEventListener('submit', async (e) => { e.preventDefault(); const body = { key: el('#cd-key').value.trim(), display_name: el('#cd-name').value.trim(), repo: el('#cd-repo').value.trim(), size_gb: parseFloat(el('#cd-size').value) || 0, mode: el('#cd-mode').value, description: el('#cd-desc').value.trim() || null, vllm_args: [], knobs: { max_model_len: parseInt(el('#cd-mml').value, 10) || 32768, gpu_memory_utilization: parseFloat(el('#cd-gmu').value), fastsafetensors: el('#cd-fst').checked, prefix_caching: el('#cd-pcache').checked, kv_cache_dtype: el('#cd-fp8').checked ? 'fp8' : 'auto', }, }; try { await fetchJSON('/api/models', { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify(body), }); el('#catalog-dialog').close(); closeDownloadPanel(); await loadModels(); pollStatus(); } catch (e) { alert('Add to catalog failed: ' + e.message); } }); } function setupAdvancedDialog() { el('#adv-cancel').addEventListener('click', () => el('#advanced-dialog').close()); el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); }); } // ===================== NIM installer ===================== const nimState = { catalog: null, job_id: null, eventsource: null, timer: null, started_at: null, }; async function loadNimCatalog() { try { nimState.catalog = await fetchJSON('/api/nim/catalog'); el('#nim-catalog-link').href = nimState.catalog.catalog_url; const warn = el('#nim-key-warn'); if (!nimState.catalog.ngc_key_configured) { warn.classList.add('nim-key-warn'); warn.innerHTML = '⚠️ NGC API key not set. Open Configure Sparks in StartOS and paste your NGC personal API key, otherwise installs will fail. Get a key'; } else { warn.classList.remove('nim-key-warn'); warn.textContent = ''; } const grid = el('#nim-suggested'); grid.innerHTML = ''; for (const s of nimState.catalog.suggested || []) { const card = document.createElement('div'); card.className = 'nim-card'; card.innerHTML = `
${escapeHtml(s.name)} · ${escapeHtml(s.kind || 'nim')}
${escapeHtml(s.description || '')}
${escapeHtml(s.image)}
`; grid.appendChild(card); } grid.querySelectorAll('.nim-pick').forEach(btn => { btn.addEventListener('click', () => { el('#nim-image').value = btn.dataset.image; el('#nim-container').value = btn.dataset.container; el('#nim-port').value = btn.dataset.port; el('#nim-kind').value = btn.dataset.kind || 'nim'; }); }); } catch (e) { console.warn('nim catalog failed', e); } } function openNimDialog() { loadNimCatalog(); el('#nim-dialog').showModal(); } async function submitNim(e) { e.preventDefault(); const body = { image: el('#nim-image').value.trim(), container: el('#nim-container').value.trim(), port: parseInt(el('#nim-port').value, 10), host: el('#nim-host').value, kind: el('#nim-kind').value, }; if (!body.image || !body.container || !body.port) { alert('Image, container name, and port are required.'); return; } try { const r = await fetchJSON('/api/nim/install', { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify(body), }); el('#nim-dialog').close(); attachNimProgress(r.job_id); } catch (e) { alert('Install failed: ' + e.message); } } function nimTimerStart(at) { nimState.started_at = at; if (nimState.timer) clearInterval(nimState.timer); const tick = () => { if (!nimState.started_at) return; const sec = Math.max(0, Math.floor((Date.now() - nimState.started_at) / 1000)); const m = Math.floor(sec / 60); const s = sec % 60; el('#nim-prog-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`; }; tick(); nimState.timer = setInterval(tick, 500); } async function attachNimProgress(jobId) { nimState.job_id = jobId; el('#nim-prog-log').textContent = ''; el('#nim-prog-title').textContent = 'Installing…'; el('#nim-progress-dialog').showModal(); try { const snap = await fetchJSON(`/api/nim/install/${jobId}`); nimTimerStart(Date.parse(snap.started_at)); el('#nim-prog-phase').textContent = snap.phase || 'Working…'; el('#nim-prog-log').textContent = (snap.lines || []).join('\n'); if (snap.returncode !== null) { onNimDone(snap); return; } } catch { nimTimerStart(Date.now()); } const es = new EventSource(`/api/nim/install/${jobId}/stream`); nimState.eventsource = es; es.onmessage = ev => { try { const d = JSON.parse(ev.data); if (d.line !== undefined) { const log = el('#nim-prog-log'); log.textContent += d.line + '\n'; log.scrollTop = log.scrollHeight; } } catch {} }; es.addEventListener('phase', ev => { try { el('#nim-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {} }); es.addEventListener('done', ev => { let d = {}; try { d = JSON.parse(ev.data); } catch {} onNimDone(d); }); es.onerror = () => { es.close(); nimState.eventsource = null; }; } function onNimDone(d) { if (nimState.eventsource) { nimState.eventsource.close(); nimState.eventsource = null; } if (nimState.timer) { clearInterval(nimState.timer); nimState.timer = null; } if (d.state === 'failed') { el('#nim-prog-title').textContent = `Failed (rc=${d.returncode})`; el('#nim-prog-phase').textContent = 'Failed'; } else { el('#nim-prog-title').textContent = 'Installed'; el('#nim-prog-phase').textContent = 'Done ✓ — service will appear when the container reports healthy.'; } pollStatus(); } // ===================== Explain context (LLM commit summary) ===================== let explainEventSource = null; async function explainContext() { if (explainEventSource) { explainEventSource.close(); explainEventSource = null; } const section = el('#ub-explain-section'); const content = el('#ub-explain-content'); section.classList.remove('hidden'); section.open = true; content.innerHTML = 'Asking the loaded model…'; let text = ''; const es = new EventSource('/api/explain-updates'); explainEventSource = es; let firstChunk = true; es.onmessage = (ev) => { try { const d = JSON.parse(ev.data); if (d.error) { content.innerHTML = `Couldn't get explanation: ${escapeHtml(d.error)}`; return; } if (firstChunk) { content.innerHTML = ''; firstChunk = false; } if (d.content) { text += d.content; content.textContent = text; content.scrollTop = content.scrollHeight; } else if (d.reasoning) { // Show reasoning tokens but de-emphasized let r = content.querySelector('.reasoning-current'); if (!r) { r = document.createElement('div'); r.className = 'reasoning reasoning-current'; r.textContent = ''; content.appendChild(r); } r.textContent += d.reasoning; } } catch {} }; es.addEventListener('done', () => { es.close(); explainEventSource = null; // strip the reasoning-current marker const r = content.querySelector('.reasoning-current'); if (r) r.classList.remove('reasoning-current'); }); es.onerror = () => { es.close(); explainEventSource = null; }; } // ===================== updates (spark-vllm-docker) ===================== const updState = { info: null, job_id: null, eventsource: null, started_at: null, timer_handle: null, }; async function pollUpdates() { try { const info = await fetchJSON('/api/updates'); updState.info = info; renderUpdateBanner(); } catch (e) { console.warn('updates poll failed', e); } } function renderUpdateBanner() { const banner = el('#update-banner'); const info = updState.info; const text = el('#ub-text'); const details = el('#ub-details'); const apply = el('#ub-apply'); const list = el('#ub-list'); const log = el('#ub-log'); if (!info || !info.ok) { banner.classList.add('hidden'); return; } banner.classList.remove('hidden'); const behind = info.behind || 0; const dirty = info.dirty || 0; banner.classList.toggle('up-to-date', behind === 0 && !dirty); banner.classList.toggle('warn', !!dirty); const explain = el('#ub-explain'); if (dirty > 0) { text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`; details.classList.add('hidden'); apply.classList.add('hidden'); explain.classList.add('hidden'); } else if (behind === 0) { text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`; details.classList.add('hidden'); apply.classList.add('hidden'); list.classList.add('hidden'); explain.classList.add('hidden'); } else { text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`; details.classList.remove('hidden'); apply.classList.remove('hidden'); explain.classList.remove('hidden'); log.textContent = (info.log || []).join('\n') || '(no log)'; } } function ubTimerStart(startedAt) { updState.started_at = startedAt; if (updState.timer_handle) clearInterval(updState.timer_handle); const tick = () => { if (!updState.started_at) return; const sec = Math.max(0, Math.floor((Date.now() - updState.started_at) / 1000)); const m = Math.floor(sec / 60); const s = sec % 60; el('#ub-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`; }; tick(); updState.timer_handle = setInterval(tick, 500); } async function applyUpdate() { if (!confirm('This pulls the latest spark-vllm-docker and rebuilds the vLLM container. Can take 5–40 minutes; the cluster is unaffected until you swap to a different model. Continue?')) return; try { const r = await fetchJSON('/api/updates/apply', { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify({ mode: 'cluster' }), }); attachToUpdate(r.job_id); } catch (e) { alert('Failed to start update: ' + e.message); } } async function attachToUpdate(jobId) { updState.job_id = jobId; el('#ub-progress').classList.remove('hidden'); el('#ub-apply').classList.add('hidden'); el('#ub-stream').textContent = ''; el('#ub-phase').textContent = 'Starting…'; try { const snap = await fetchJSON(`/api/updates/${jobId}`); ubTimerStart(Date.parse(snap.started_at)); el('#ub-phase').textContent = snap.phase || 'Working…'; el('#ub-stream').textContent = (snap.lines || []).join('\n'); if (snap.returncode !== null) { handleUpdateDone(snap); return; } } catch (e) { ubTimerStart(Date.now()); } const es = new EventSource(`/api/updates/${jobId}/stream`); updState.eventsource = es; es.onmessage = (ev) => { try { const d = JSON.parse(ev.data); if (d.line !== undefined) { const log = el('#ub-stream'); log.textContent += d.line + '\n'; log.scrollTop = log.scrollHeight; } } catch {} }; es.addEventListener('phase', (ev) => { try { el('#ub-phase').textContent = JSON.parse(ev.data).phase; } catch {} }); es.addEventListener('done', (ev) => { let d = {}; try { d = JSON.parse(ev.data); } catch {} handleUpdateDone(d); }); es.onerror = () => { es.close(); updState.eventsource = null; }; } function handleUpdateDone(d) { if (updState.eventsource) { updState.eventsource.close(); updState.eventsource = null; } if (updState.timer_handle) { clearInterval(updState.timer_handle); updState.timer_handle = null; } el('#ub-phase').textContent = d.state === 'failed' ? `Failed (rc=${d.returncode})` : 'Done ✓ — re-check from the banner.'; setTimeout(pollUpdates, 2000); } async function init() { setupCopyButtons(); el('#open-download').addEventListener('click', openDownloadForm); el('#dl-cancel').addEventListener('click', closeDownloadPanel); el('#dl-start').addEventListener('click', startDownload); el('#dl-repo').addEventListener('keydown', (e) => { if (e.key === 'Enter') startDownload(); }); el('#ub-details').addEventListener('click', () => { const list = el('#ub-list'); list.classList.toggle('hidden'); list.open = !list.open; }); el('#ub-apply').addEventListener('click', applyUpdate); el('#ub-explain').addEventListener('click', explainContext); el('#dl-repo').addEventListener('input', updateDlHfLink); el('#open-nim').addEventListener('click', openNimDialog); el('#nim-cancel').addEventListener('click', () => el('#nim-dialog').close()); el('#nim-form').addEventListener('submit', submitNim); el('#nim-prog-close').addEventListener('click', () => el('#nim-progress-dialog').close()); el('#mb-update-close').addEventListener('click', () => el('#mb-update-dialog').close()); // Dismissing the modal (Close or Esc) stops streaming; the job runs on // server-side and re-clicking Update returns a 409 if still in progress. el('#mb-update-dialog').addEventListener('close', () => { if (mbState.eventsource) { mbState.eventsource.close(); mbState.eventsource = null; } if (mbState.timer) { clearInterval(mbState.timer); mbState.timer = null; } state.mb_update_in_flight = false; renderServices(); }); el('#mb-logs-close').addEventListener('click', () => el('#mb-logs-dialog').close()); el('#mb-logs-refresh').addEventListener('click', loadMatrixBridgeLogs); el('#open-connectivity').addEventListener('click', openConnectivityDialog); el('#connectivity-close').addEventListener('click', () => el('#connectivity-dialog').close()); // Hardware-card buttons (Wake-on-LAN on unreachable cards; SSH-key copy on // reachable ones) are rendered dynamically, so delegate from the grid. el('#hardware-grid').addEventListener('click', (e) => { const wbtn = e.target.closest('[data-wake]'); if (wbtn) { wakeSpark(wbtn.dataset.wake); return; } const kbtn = e.target.closest('[data-ssh-key]'); if (kbtn) { copySparkSshKey(kbtn.dataset.sshKey, kbtn); return; } }); el('#sshkey-close').addEventListener('click', () => el('#sshkey-dialog').close()); setupCatalogDialog(); setupAdvancedDialog(); // Open WebUI link from /api/config try { state.config = await fetchJSON('/api/config'); if (state.config.open_webui_url) { const a = el('#open-webui-link'); a.href = state.config.open_webui_url; a.classList.remove('hidden'); } } catch {} setupDashboardTabs(); setupEndpointCollapse(); await loadModels(); await pollStatus(); await renderServices(); pollHardware(); pollUpdates(); // Disk-status probe runs after first paint — slow over SSH and not blocking. loadDiskStatus(); // Speech-model patches panel — slow over SSH, runs after first paint. renderSpeechModels(); setInterval(pollStatus, 5000); setInterval(pollHardware, 8000); // every 8s setInterval(pollUpdates, 300000); // every 5 min setInterval(loadDiskStatus, 60000); // every 60s — disk state changes rarely setInterval(renderSpeechModels, 120000); // every 2 min — patches change rarely } init();