v0.4.0 - NIM installer + dashboard resilience
Hotfix (was v0.3.1):
- services.py: cache 'unreachable' per (host,user) for 25s so a dead Spark doesn't hang every /api/services call behind 6s ssh timeout
- ssh_run timeout reduced 10 -> 6s for docker_state probes
- hardware probe: shorter SSH timeout (6s), longer cache TTL for failures (25s)
- JS pollStatus retries loadModels() if state.models is empty (recovers from cold-start proxy timeout)
- Unreachable hardware card now includes troubleshooting steps (Spark Control cannot SSH into an unreachable Spark to restart it)
v0.4 NIM installer:
- nim.py module: curated SUGGESTED_NIMS list (Parakeet, Magpie, Riva) + NimManager that runs docker login nvcr.io + docker pull + docker run -d --gpus all -p PORT:PORT -v VOL:/opt/nim/.cache -e NGC_API_KEY -e ... --restart=unless-stopped + chown the volume to uid 1000 + restart. Streams all output via SSE; redacts the API key from log lines.
- custom_services.py: persists installed NIMs to /data/services-overrides.yaml so they appear in the services panel after install
- services.py: merges custom services into the panel
- /api/nim/catalog GET, /api/nim/install POST + GET/SSE
- /api/services/{name} DELETE for custom services
- UI: '+ Install NIM' button next to 'Always-on services'; modal lists curated images each with a 'Pick' button + a custom-image form; installation runs in a second dialog with phase + elapsed timer + collapsible log
- NGC API key field added to Configure Sparks (masked); injected as NGC_API_KEY env var into the container
Package: bump 0.4.0:0; main.ts adds SERVICES_OVERRIDES + NGC_API_KEY env vars
This commit is contained in:
@@ -144,6 +144,15 @@ function renderHardware() {
|
||||
<span class="meta">unreachable</span>
|
||||
</div>
|
||||
<div class="muted small">${escapeHtml(s.host || '')} — ${escapeHtml(s.error || 'no response')}</div>
|
||||
<div class="muted small" style="line-height:1.5">
|
||||
Spark Control can't restart a Spark that won't answer SSH. Steps to try:
|
||||
<ol style="margin: 6px 0 0 18px; padding: 0;">
|
||||
<li>Verify it's powered on (check the front LED).</li>
|
||||
<li>Ping it from another LAN device.</li>
|
||||
<li>Power-cycle it physically.</li>
|
||||
<li>If it boots, this card will go green again automatically.</li>
|
||||
</ol>
|
||||
</div>
|
||||
`;
|
||||
grid.appendChild(card);
|
||||
continue;
|
||||
@@ -510,6 +519,10 @@ async function pollStatus() {
|
||||
renderCurrent(status);
|
||||
renderEndpoint(status);
|
||||
renderHealth(status);
|
||||
// If models hasn't loaded yet (init may have hit a transient proxy timeout), retry.
|
||||
if (!state.models || Object.keys(state.models).length === 0) {
|
||||
try { await loadModels(); } catch {}
|
||||
}
|
||||
// Refresh services state lazily — every 5s poll triggers this too.
|
||||
try {
|
||||
state.services = await fetchJSON('/api/services');
|
||||
@@ -953,6 +966,147 @@ function setupAdvancedDialog() {
|
||||
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||
}
|
||||
|
||||
// ===================== NIM installer =====================
|
||||
|
||||
const nimState = {
|
||||
catalog: null,
|
||||
job_id: null,
|
||||
eventsource: null,
|
||||
timer: null,
|
||||
started_at: null,
|
||||
};
|
||||
|
||||
async function loadNimCatalog() {
|
||||
try {
|
||||
nimState.catalog = await fetchJSON('/api/nim/catalog');
|
||||
el('#nim-catalog-link').href = nimState.catalog.catalog_url;
|
||||
const warn = el('#nim-key-warn');
|
||||
if (!nimState.catalog.ngc_key_configured) {
|
||||
warn.classList.add('nim-key-warn');
|
||||
warn.innerHTML = '⚠️ NGC API key not set. Open <strong>Configure Sparks</strong> in StartOS and paste your NGC personal API key, otherwise installs will fail. <a href="https://ngc.nvidia.com/setup/personal-key" target="_blank" rel="noopener">Get a key</a>';
|
||||
} else {
|
||||
warn.classList.remove('nim-key-warn');
|
||||
warn.textContent = '';
|
||||
}
|
||||
const grid = el('#nim-suggested');
|
||||
grid.innerHTML = '';
|
||||
for (const s of nimState.catalog.suggested || []) {
|
||||
const card = document.createElement('div');
|
||||
card.className = 'nim-card';
|
||||
card.innerHTML = `
|
||||
<div class="info">
|
||||
<div class="name">${escapeHtml(s.name)} <span class="muted small">· ${escapeHtml(s.kind || 'nim')}</span></div>
|
||||
<div class="desc">${escapeHtml(s.description || '')}</div>
|
||||
<div class="img">${escapeHtml(s.image)}</div>
|
||||
<div class="links">${s.homepage ? `<a href="${escapeHtml(s.homepage)}" target="_blank" rel="noopener">View on NGC ↗</a>` : ''}</div>
|
||||
</div>
|
||||
<button type="button" class="btn primary nim-pick" data-image="${escapeHtml(s.image)}" data-container="${escapeHtml(s.default_container)}" data-port="${s.default_port}" data-kind="${escapeHtml(s.kind)}">Pick</button>
|
||||
`;
|
||||
grid.appendChild(card);
|
||||
}
|
||||
grid.querySelectorAll('.nim-pick').forEach(btn => {
|
||||
btn.addEventListener('click', () => {
|
||||
el('#nim-image').value = btn.dataset.image;
|
||||
el('#nim-container').value = btn.dataset.container;
|
||||
el('#nim-port').value = btn.dataset.port;
|
||||
el('#nim-kind').value = btn.dataset.kind || 'nim';
|
||||
});
|
||||
});
|
||||
} catch (e) { console.warn('nim catalog failed', e); }
|
||||
}
|
||||
|
||||
function openNimDialog() {
|
||||
loadNimCatalog();
|
||||
el('#nim-dialog').showModal();
|
||||
}
|
||||
|
||||
async function submitNim(e) {
|
||||
e.preventDefault();
|
||||
const body = {
|
||||
image: el('#nim-image').value.trim(),
|
||||
container: el('#nim-container').value.trim(),
|
||||
port: parseInt(el('#nim-port').value, 10),
|
||||
host: el('#nim-host').value,
|
||||
kind: el('#nim-kind').value,
|
||||
};
|
||||
if (!body.image || !body.container || !body.port) {
|
||||
alert('Image, container name, and port are required.');
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const r = await fetchJSON('/api/nim/install', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
el('#nim-dialog').close();
|
||||
attachNimProgress(r.job_id);
|
||||
} catch (e) {
|
||||
alert('Install failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function nimTimerStart(at) {
|
||||
nimState.started_at = at;
|
||||
if (nimState.timer) clearInterval(nimState.timer);
|
||||
const tick = () => {
|
||||
if (!nimState.started_at) return;
|
||||
const sec = Math.max(0, Math.floor((Date.now() - nimState.started_at) / 1000));
|
||||
const m = Math.floor(sec / 60);
|
||||
const s = sec % 60;
|
||||
el('#nim-prog-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
|
||||
};
|
||||
tick();
|
||||
nimState.timer = setInterval(tick, 500);
|
||||
}
|
||||
|
||||
async function attachNimProgress(jobId) {
|
||||
nimState.job_id = jobId;
|
||||
el('#nim-prog-log').textContent = '';
|
||||
el('#nim-prog-title').textContent = 'Installing…';
|
||||
el('#nim-progress-dialog').showModal();
|
||||
try {
|
||||
const snap = await fetchJSON(`/api/nim/install/${jobId}`);
|
||||
nimTimerStart(Date.parse(snap.started_at));
|
||||
el('#nim-prog-phase').textContent = snap.phase || 'Working…';
|
||||
el('#nim-prog-log').textContent = (snap.lines || []).join('\n');
|
||||
if (snap.returncode !== null) { onNimDone(snap); return; }
|
||||
} catch { nimTimerStart(Date.now()); }
|
||||
const es = new EventSource(`/api/nim/install/${jobId}/stream`);
|
||||
nimState.eventsource = es;
|
||||
es.onmessage = ev => {
|
||||
try {
|
||||
const d = JSON.parse(ev.data);
|
||||
if (d.line !== undefined) {
|
||||
const log = el('#nim-prog-log');
|
||||
log.textContent += d.line + '\n';
|
||||
log.scrollTop = log.scrollHeight;
|
||||
}
|
||||
} catch {}
|
||||
};
|
||||
es.addEventListener('phase', ev => {
|
||||
try { el('#nim-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {}
|
||||
});
|
||||
es.addEventListener('done', ev => {
|
||||
let d = {}; try { d = JSON.parse(ev.data); } catch {}
|
||||
onNimDone(d);
|
||||
});
|
||||
es.onerror = () => { es.close(); nimState.eventsource = null; };
|
||||
}
|
||||
|
||||
function onNimDone(d) {
|
||||
if (nimState.eventsource) { nimState.eventsource.close(); nimState.eventsource = null; }
|
||||
if (nimState.timer) { clearInterval(nimState.timer); nimState.timer = null; }
|
||||
if (d.state === 'failed') {
|
||||
el('#nim-prog-title').textContent = `Failed (rc=${d.returncode})`;
|
||||
el('#nim-prog-phase').textContent = 'Failed';
|
||||
} else {
|
||||
el('#nim-prog-title').textContent = 'Installed';
|
||||
el('#nim-prog-phase').textContent = 'Done ✓ — service will appear when the container reports healthy.';
|
||||
}
|
||||
pollStatus();
|
||||
}
|
||||
|
||||
// ===================== Explain context (LLM commit summary) =====================
|
||||
|
||||
let explainEventSource = null;
|
||||
@@ -1149,6 +1303,10 @@ async function init() {
|
||||
el('#ub-apply').addEventListener('click', applyUpdate);
|
||||
el('#ub-explain').addEventListener('click', explainContext);
|
||||
el('#dl-repo').addEventListener('input', updateDlHfLink);
|
||||
el('#open-nim').addEventListener('click', openNimDialog);
|
||||
el('#nim-cancel').addEventListener('click', () => el('#nim-dialog').close());
|
||||
el('#nim-form').addEventListener('submit', submitNim);
|
||||
el('#nim-prog-close').addEventListener('click', () => el('#nim-progress-dialog').close());
|
||||
setupCatalogDialog();
|
||||
setupAdvancedDialog();
|
||||
// Open WebUI link from /api/config
|
||||
|
||||
@@ -76,8 +76,66 @@
|
||||
</section>
|
||||
|
||||
<section id="services-panel" class="services hidden">
|
||||
<h2 class="section-title">Always-on services</h2>
|
||||
<div class="section-header">
|
||||
<h2 class="section-title">Always-on services</h2>
|
||||
<button id="open-nim" class="btn small-btn">+ Install NIM</button>
|
||||
</div>
|
||||
<div id="services-grid" class="services-grid"></div>
|
||||
|
||||
<dialog id="nim-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form" id="nim-form">
|
||||
<h3>Install a NVIDIA NIM container</h3>
|
||||
<p class="muted small" id="nim-key-warn"></p>
|
||||
<p class="muted small">Pick a curated container below or paste any image from <a href="#" id="nim-catalog-link" target="_blank" rel="noopener">the NGC NIM catalog</a>. Spark Control will <code>docker pull</code> and <code>docker run</code> it on the target Spark.</p>
|
||||
|
||||
<div id="nim-suggested" class="nim-grid"></div>
|
||||
|
||||
<fieldset class="modal-fieldset">
|
||||
<legend>Custom image</legend>
|
||||
<label class="modal-row"><span>Image (nvcr.io/...)</span><input type="text" id="nim-image" placeholder="nvcr.io/nim/nvidia/<name>:latest"></label>
|
||||
<label class="modal-row"><span>Container name</span><input type="text" id="nim-container" placeholder="my-service"></label>
|
||||
<label class="modal-row"><span>Port</span><input type="number" id="nim-port" min="1" max="65535"></label>
|
||||
<label class="modal-row"><span>Kind</span>
|
||||
<select id="nim-kind">
|
||||
<option value="nim">NIM (other)</option>
|
||||
<option value="stt">STT (speech-to-text)</option>
|
||||
<option value="tts">TTS (text-to-speech)</option>
|
||||
<option value="vision">Vision</option>
|
||||
<option value="embedding">Embedding</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="modal-row"><span>Target Spark</span>
|
||||
<select id="nim-host">
|
||||
<option value="spark2">Spark 2 (default for support services)</option>
|
||||
<option value="spark1">Spark 1 (head node)</option>
|
||||
</select>
|
||||
</label>
|
||||
</fieldset>
|
||||
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="nim-cancel" class="btn">Cancel</button>
|
||||
<button type="submit" class="btn primary" id="nim-start">Install</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="nim-progress-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form">
|
||||
<h3 id="nim-prog-title">Installing…</h3>
|
||||
<div class="phase-row">
|
||||
<div class="phase" id="nim-prog-phase">Starting…</div>
|
||||
<span class="spacer"></span>
|
||||
<span class="timer" id="nim-prog-elapsed">0:00</span>
|
||||
</div>
|
||||
<details open>
|
||||
<summary class="muted small">Log</summary>
|
||||
<pre id="nim-prog-log" class="log"></pre>
|
||||
</details>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="nim-prog-close" class="btn">Close</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
</section>
|
||||
|
||||
<section id="models-section">
|
||||
|
||||
@@ -376,6 +376,7 @@ main {
|
||||
.hw-card .head .meta { color: var(--muted); font-size: 12px; margin-left: auto; }
|
||||
.hw-card.unreachable { border-color: rgba(239, 68, 68, 0.4); }
|
||||
.hw-card.unreachable .name { color: var(--error); }
|
||||
.hw-card.unreachable ol { color: var(--muted); }
|
||||
.hw-metric { display: flex; align-items: center; gap: 10px; font-size: 12px; }
|
||||
.hw-metric .label { color: var(--muted); width: 56px; flex-shrink: 0; text-transform: uppercase; letter-spacing: 0.05em; font-size: 11px; }
|
||||
.hw-metric .bar { flex: 1; height: 8px; background: var(--surface-2); border-radius: 4px; overflow: hidden; position: relative; }
|
||||
@@ -477,6 +478,37 @@ main {
|
||||
#dl-log-details { margin-top: 12px; }
|
||||
#dl-log-details summary { cursor: pointer; padding: 4px 0; }
|
||||
|
||||
/* ===== NIM install dialog ===== */
|
||||
|
||||
.modal#nim-dialog,
|
||||
.modal#nim-progress-dialog { max-width: 640px; }
|
||||
.nim-grid {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
grid-template-columns: 1fr;
|
||||
max-height: 240px;
|
||||
overflow-y: auto;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.nim-card {
|
||||
background: var(--surface-2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 10px 12px;
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
align-items: flex-start;
|
||||
}
|
||||
.nim-card .info { flex: 1; }
|
||||
.nim-card .name { font-weight: 600; font-size: 13px; }
|
||||
.nim-card .desc { color: var(--muted); font-size: 12px; margin-top: 4px; }
|
||||
.nim-card .img { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; color: #6b6b75; font-size: 11px; margin-top: 4px; word-break: break-all; }
|
||||
.nim-card .btn { padding: 6px 12px; font-size: 12px; flex-shrink: 0; }
|
||||
.nim-card .links { font-size: 11px; margin-top: 4px; }
|
||||
.nim-card .links a { color: var(--info); text-decoration: none; }
|
||||
.nim-card .links a:hover { text-decoration: underline; }
|
||||
.nim-key-warn { color: var(--warn); }
|
||||
|
||||
/* ===== Section titles ===== */
|
||||
|
||||
.section-title {
|
||||
|
||||
Reference in New Issue
Block a user