v0.3.0 - Hardware dashboard + knob context + Explain context + Open WebUI link
Hardware dashboard:
- New hardware.py module: SSH probes each Spark for hostname, uptime, load+cores, RAM, disk, GPU (name, util, temp, power) + per-process GPU memory sum
- DGX Spark uses unified memory (nvidia-smi memory.total returns N/A); fall back to per-process compute memory and compute fraction against system RAM. Marks with gpu_unified_memory=true.
- 4s TTL cache in HardwareProbe to avoid hammering
- /api/hardware returns per-Spark snapshot
- UI: 'Spark hardware' section at the top with per-Spark cards (CPU load, RAM, GPU mem (unified), GPU util + temp + power, disk) — bars with warn threshold styling
- Polls every 8s
Knob context (tied to live hardware):
- Each Advanced knob now shows plain-English help text
- 'GPU memory %' shows '~N GB allocated · ~M GB left for OS/buffers' computed from actual Spark RAM
- 'Max context' shows '~N pages of text'
- Toggles show tradeoff descriptions
Explain context:
- '✨ Explain context' button on the update banner
- /api/explain-updates POST: forwards pending commits to the loaded vLLM model and streams its response back as SSE
- Renders into an expandable 'Explained by the loaded LLM' section under Pending commits
- Reasoning tokens shown italicized when the model emits them
Open WebUI integration:
- New 'Open WebUI URL' optional field in Configure Sparks
- /api/config exposes it; UI shows 'Open chat ↗' button in the top bar if set
Downloads:
- Third radio option: Spark 1 only / Spark 2 only / Both Sparks
- Backend picks SSH target based on mode
- HF repo link icon next to the input
- Helper line about NVFP4 for Blackwell
Model cards:
- Repo name is now a clickable link to its Hugging Face page
Package: bump 0.3.0:0
This commit is contained in:
@@ -42,6 +42,7 @@ class Settings:
|
|||||||
parakeet_port: int
|
parakeet_port: int
|
||||||
magpie_port: int
|
magpie_port: int
|
||||||
bind_port: int
|
bind_port: int
|
||||||
|
open_webui_url: str
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls) -> "Settings":
|
def from_env(cls) -> "Settings":
|
||||||
@@ -66,6 +67,7 @@ class Settings:
|
|||||||
parakeet_port=int(_env("PARAKEET_PORT", "8000")),
|
parakeet_port=int(_env("PARAKEET_PORT", "8000")),
|
||||||
magpie_port=int(_env("MAGPIE_PORT", "9000")),
|
magpie_port=int(_env("MAGPIE_PORT", "9000")),
|
||||||
bind_port=int(_env("BIND_PORT", "9999")),
|
bind_port=int(_env("BIND_PORT", "9999")),
|
||||||
|
open_webui_url=_env("OPEN_WEBUI_URL", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
+14
-5
@@ -19,7 +19,7 @@ from .config import Settings
|
|||||||
from .ssh import ssh_stream, StreamHandle
|
from .ssh import ssh_stream, StreamHandle
|
||||||
|
|
||||||
|
|
||||||
Mode = Literal["solo", "cluster"]
|
Mode = Literal["spark1", "spark2", "cluster"]
|
||||||
|
|
||||||
|
|
||||||
_TQDM_RE = re.compile(
|
_TQDM_RE = re.compile(
|
||||||
@@ -113,17 +113,26 @@ class DownloadManager:
|
|||||||
|
|
||||||
async def _do(self, job: DownloadJob) -> None:
|
async def _do(self, job: DownloadJob) -> None:
|
||||||
s = self.settings
|
s = self.settings
|
||||||
if not s.spark1_host or not s.spark1_user:
|
# Pick the SSH target and hf-download flags from the mode.
|
||||||
raise RuntimeError("spark1 not configured")
|
if job.mode == "spark2":
|
||||||
|
target_host, target_user = s.spark2_host, s.spark2_user
|
||||||
|
flags = ""
|
||||||
|
elif job.mode == "cluster":
|
||||||
|
target_host, target_user = s.spark1_host, s.spark1_user
|
||||||
|
flags = "-c --copy-parallel"
|
||||||
|
else: # spark1
|
||||||
|
target_host, target_user = s.spark1_host, s.spark1_user
|
||||||
|
flags = ""
|
||||||
|
if not target_host or not target_user:
|
||||||
|
raise RuntimeError(f"{job.mode} host not configured")
|
||||||
|
|
||||||
flags = "-c --copy-parallel" if job.mode == "cluster" else ""
|
|
||||||
cmd = f"cd ~/spark-vllm-docker && ./hf-download.sh {job.repo} {flags}".strip()
|
cmd = f"cd ~/spark-vllm-docker && ./hf-download.sh {job.repo} {flags}".strip()
|
||||||
job.append(f"$ {cmd}")
|
job.append(f"$ {cmd}")
|
||||||
job.state = "downloading"
|
job.state = "downloading"
|
||||||
job.progress.phase = "Connecting to Hugging Face…"
|
job.progress.phase = "Connecting to Hugging Face…"
|
||||||
|
|
||||||
handle = StreamHandle()
|
handle = StreamHandle()
|
||||||
async for line in ssh_stream(s.spark1_host, s.spark1_user, cmd, s, handle=handle):
|
async for line in ssh_stream(target_host, target_user, cmd, s, handle=handle):
|
||||||
job.append(line)
|
job.append(line)
|
||||||
self._update_progress(job, line)
|
self._update_progress(job, line)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,118 @@
|
|||||||
|
"""Per-Spark hardware snapshots: RAM, disk, GPU memory + utilization, CPU load, uptime.
|
||||||
|
|
||||||
|
Drives via a single SSH command per Spark that runs `free`, `df`, `nvidia-smi`,
|
||||||
|
`/proc/loadavg`, and `uptime -p` and prints labeled lines back. We parse those
|
||||||
|
labels in `_parse`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .config import Settings
|
||||||
|
from .ssh import ssh_run
|
||||||
|
|
||||||
|
|
||||||
|
_PROBE = r"""
|
||||||
|
set -e
|
||||||
|
echo HOSTNAME=$(hostname)
|
||||||
|
echo UPTIME=$(uptime -p 2>/dev/null || uptime)
|
||||||
|
echo LOAD=$(awk '{print $1, $2, $3}' /proc/loadavg)
|
||||||
|
echo CORES=$(nproc 2>/dev/null || echo 0)
|
||||||
|
echo MEMORY=$(free -b 2>/dev/null | awk '/^Mem:/ {print $2, $3}')
|
||||||
|
echo DISK=$(df -B1 / 2>/dev/null | awk 'NR==2 {print $2, $3}')
|
||||||
|
echo GPU=$(nvidia-smi --query-gpu=name,utilization.gpu,temperature.gpu,power.draw,memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
|
||||||
|
echo GPU_MEM_USED_MIB=$(nvidia-smi --query-compute-apps=used_gpu_memory --format=csv,noheader,nounits 2>/dev/null | awk '{s+=$1} END {print s+0}')
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_int(s: str) -> int | None:
|
||||||
|
try: return int(s)
|
||||||
|
except (TypeError, ValueError): return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse(out: str) -> dict:
|
||||||
|
info: dict[str, Any] = {}
|
||||||
|
for raw in out.splitlines():
|
||||||
|
if "=" not in raw:
|
||||||
|
continue
|
||||||
|
k, v = raw.split("=", 1)
|
||||||
|
info[k.strip().lower()] = v.strip()
|
||||||
|
parsed: dict[str, Any] = {}
|
||||||
|
parsed["hostname"] = info.get("hostname")
|
||||||
|
parsed["uptime"] = info.get("uptime")
|
||||||
|
parsed["cores"] = _parse_int(info.get("cores", ""))
|
||||||
|
# Load average -> (1m, 5m, 15m)
|
||||||
|
if info.get("load"):
|
||||||
|
loads = info["load"].split()
|
||||||
|
try:
|
||||||
|
parsed["load"] = [float(x) for x in loads[:3]]
|
||||||
|
except ValueError:
|
||||||
|
parsed["load"] = None
|
||||||
|
# Memory: total used in bytes
|
||||||
|
if info.get("memory"):
|
||||||
|
mem = info["memory"].split()
|
||||||
|
if len(mem) == 2:
|
||||||
|
tot, used = _parse_int(mem[0]), _parse_int(mem[1])
|
||||||
|
parsed["ram_total_bytes"] = tot
|
||||||
|
parsed["ram_used_bytes"] = used
|
||||||
|
# Disk: total used in bytes
|
||||||
|
if info.get("disk"):
|
||||||
|
dk = info["disk"].split()
|
||||||
|
if len(dk) == 2:
|
||||||
|
parsed["disk_total_bytes"] = _parse_int(dk[0])
|
||||||
|
parsed["disk_used_bytes"] = _parse_int(dk[1])
|
||||||
|
# GPU: "name, util_gpu, temp_C, power_W, memory_total_MiB"
|
||||||
|
if info.get("gpu"):
|
||||||
|
parts = [p.strip() for p in info["gpu"].split(",")]
|
||||||
|
if len(parts) >= 5:
|
||||||
|
name, ug, temp, power, mt = parts[0], parts[1], parts[2], parts[3], parts[4]
|
||||||
|
parsed["gpu_name"] = name
|
||||||
|
parsed["gpu_util_pct"] = _parse_int(ug)
|
||||||
|
parsed["gpu_temp_c"] = _parse_int(temp)
|
||||||
|
try: parsed["gpu_power_w"] = float(power)
|
||||||
|
except ValueError: parsed["gpu_power_w"] = None
|
||||||
|
# memory.total may be "[N/A]" on unified-memory systems (DGX Spark)
|
||||||
|
parsed["gpu_mem_total_mib"] = _parse_int(mt)
|
||||||
|
parsed["gpu_unified_memory"] = parsed["gpu_mem_total_mib"] is None
|
||||||
|
# Sum per-process compute memory (works even on unified-memory systems)
|
||||||
|
if info.get("gpu_mem_used_mib"):
|
||||||
|
parsed["gpu_mem_used_mib"] = _parse_int(info["gpu_mem_used_mib"])
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
class HardwareProbe:
|
||||||
|
"""Caches results briefly to avoid hammering the Sparks."""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings, ttl_sec: float = 4.0) -> None:
|
||||||
|
self.settings = settings
|
||||||
|
self.ttl_sec = ttl_sec
|
||||||
|
self._cache: dict[str, tuple[float, dict]] = {}
|
||||||
|
self._locks: dict[str, asyncio.Lock] = {}
|
||||||
|
|
||||||
|
def _lock(self, key: str) -> asyncio.Lock:
|
||||||
|
if key not in self._locks:
|
||||||
|
self._locks[key] = asyncio.Lock()
|
||||||
|
return self._locks[key]
|
||||||
|
|
||||||
|
async def fetch(self) -> dict:
|
||||||
|
return {
|
||||||
|
"spark1": await self._one("spark1", self.settings.spark1_host, self.settings.spark1_user),
|
||||||
|
"spark2": await self._one("spark2", self.settings.spark2_host, self.settings.spark2_user),
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _one(self, key: str, host: str, user: str) -> dict:
|
||||||
|
if not host or not user:
|
||||||
|
return {"reachable": False, "configured": False}
|
||||||
|
async with self._lock(key):
|
||||||
|
now = time.monotonic()
|
||||||
|
cached = self._cache.get(key)
|
||||||
|
if cached and (now - cached[0] < self.ttl_sec):
|
||||||
|
return cached[1]
|
||||||
|
rc, out, err = await ssh_run(host, user, _PROBE, self.settings, timeout=8)
|
||||||
|
if rc != 0:
|
||||||
|
result = {"reachable": False, "configured": True, "host": host, "error": err.strip() or out.strip() or f"rc={rc}"}
|
||||||
|
else:
|
||||||
|
result = {"reachable": True, "configured": True, "host": host, **_parse(out)}
|
||||||
|
self._cache[key] = (now, result)
|
||||||
|
return result
|
||||||
+85
-1
@@ -11,6 +11,7 @@ from typing import Literal
|
|||||||
|
|
||||||
from .config import Settings
|
from .config import Settings
|
||||||
from .download import DownloadManager
|
from .download import DownloadManager
|
||||||
|
from .hardware import HardwareProbe
|
||||||
from .health import check_magpie, check_parakeet, check_vllm
|
from .health import check_magpie, check_parakeet, check_vllm
|
||||||
from .models import load_catalog
|
from .models import load_catalog
|
||||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||||
@@ -25,6 +26,7 @@ catalog = load_catalog(settings.models_yaml)
|
|||||||
swap_manager = SwapManager(settings, catalog)
|
swap_manager = SwapManager(settings, catalog)
|
||||||
download_manager = DownloadManager(settings)
|
download_manager = DownloadManager(settings)
|
||||||
update_manager = UpdateManager(settings)
|
update_manager = UpdateManager(settings)
|
||||||
|
hardware_probe = HardwareProbe(settings)
|
||||||
|
|
||||||
app = FastAPI(title="spark-control", version="0.1.0")
|
app = FastAPI(title="spark-control", version="0.1.0")
|
||||||
|
|
||||||
@@ -44,6 +46,7 @@ async def get_config() -> dict:
|
|||||||
"spark1_host": settings.spark1_host,
|
"spark1_host": settings.spark1_host,
|
||||||
"spark2_host": settings.spark2_host,
|
"spark2_host": settings.spark2_host,
|
||||||
"vllm_port": settings.vllm_port,
|
"vllm_port": settings.vllm_port,
|
||||||
|
"open_webui_url": settings.open_webui_url or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -116,6 +119,12 @@ async def del_model(key: str) -> dict:
|
|||||||
return {"ok": True, "key": key}
|
return {"ok": True, "key": key}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/hardware")
|
||||||
|
async def get_hardware() -> dict:
|
||||||
|
"""Per-Spark hardware snapshot — RAM, disk, GPU mem + util, CPU load, uptime."""
|
||||||
|
return await hardware_probe.fetch()
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/services")
|
@app.get("/api/services")
|
||||||
async def get_services() -> dict:
|
async def get_services() -> dict:
|
||||||
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
||||||
@@ -297,7 +306,7 @@ async def stream_swap(job_id: str):
|
|||||||
|
|
||||||
class DownloadRequest(BaseModel):
|
class DownloadRequest(BaseModel):
|
||||||
repo: str
|
repo: str
|
||||||
mode: Literal["solo", "cluster"] = "solo"
|
mode: Literal["spark1", "spark2", "cluster"] = "spark1"
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/download")
|
@app.post("/api/download")
|
||||||
@@ -376,6 +385,81 @@ async def get_updates() -> dict:
|
|||||||
return await get_update_status(settings)
|
return await get_update_status(settings)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/explain-updates")
|
||||||
|
async def explain_updates():
|
||||||
|
"""Stream a layman's explanation of the pending commits from the currently-loaded vLLM model."""
|
||||||
|
import httpx
|
||||||
|
info = await get_update_status(settings)
|
||||||
|
if not info.get("ok"):
|
||||||
|
async def err_gen():
|
||||||
|
yield f"event: done\ndata: {json.dumps({'error': info.get('error', 'unknown')})}\n\n"
|
||||||
|
return StreamingResponse(err_gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
vllm = await check_vllm(settings)
|
||||||
|
if not vllm.get("ok") or not vllm.get("current_model"):
|
||||||
|
async def err_gen():
|
||||||
|
yield f"event: done\ndata: {json.dumps({'error': 'no vLLM model loaded — swap to a model first'})}\n\n"
|
||||||
|
return StreamingResponse(err_gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
commits = "\n".join(info.get("log", []))
|
||||||
|
if not commits.strip():
|
||||||
|
async def empty_gen():
|
||||||
|
yield f"event: done\ndata: {json.dumps({'error': 'no pending commits'})}\n\n"
|
||||||
|
return StreamingResponse(empty_gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
"You are reviewing pending git commits to `eugr/spark-vllm-docker`, an upstream community project that "
|
||||||
|
"orchestrates vLLM on dual NVIDIA DGX Spark hardware (Blackwell GPUs, cluster via Ray, recipes per model). "
|
||||||
|
"The reader has a setup running models like Qwen3.6-35B-A3B-NVFP4 (daily driver, solo), Qwen3-VL 235B (cluster), "
|
||||||
|
"and Gemma 4 31B. The reader is technically literate but is NOT a vLLM expert.\n\n"
|
||||||
|
"For the commit list below: give a short overall verdict (Apply / Optional / Skip and why), then a brief "
|
||||||
|
"bullet per commit grouping similar ones. Call out anything that would break a working setup or that "
|
||||||
|
"requires re-downloading models. Avoid jargon. ~250 words max.\n\n"
|
||||||
|
f"Pending commits:\n{commits}"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def gen():
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=5.0)) as c:
|
||||||
|
async with c.stream(
|
||||||
|
"POST",
|
||||||
|
f"{vllm['base_url']}/chat/completions",
|
||||||
|
json={
|
||||||
|
"model": vllm["current_model"],
|
||||||
|
"stream": True,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": 600,
|
||||||
|
"temperature": 0.4,
|
||||||
|
},
|
||||||
|
) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
async for line in r.aiter_lines():
|
||||||
|
if not line.startswith("data: "):
|
||||||
|
continue
|
||||||
|
data = line[6:].strip()
|
||||||
|
if data == "[DONE]":
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
chunk = json.loads(data)
|
||||||
|
choices = chunk.get("choices") or []
|
||||||
|
if not choices:
|
||||||
|
continue
|
||||||
|
delta = choices[0].get("delta") or {}
|
||||||
|
text = delta.get("content")
|
||||||
|
reasoning = delta.get("reasoning")
|
||||||
|
if text:
|
||||||
|
yield f"data: {json.dumps({'content': text})}\n\n"
|
||||||
|
elif reasoning:
|
||||||
|
yield f"data: {json.dumps({'reasoning': reasoning})}\n\n"
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
yield f"data: {json.dumps({'error': f'{type(e).__name__}: {e}'})}\n\n"
|
||||||
|
yield f"event: done\ndata: {json.dumps({'ok': True})}\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
class UpdateRequest(BaseModel):
|
class UpdateRequest(BaseModel):
|
||||||
mode: Literal["solo", "cluster"] = "cluster"
|
mode: Literal["solo", "cluster"] = "cluster"
|
||||||
|
|
||||||
|
|||||||
+243
-1
@@ -13,6 +13,8 @@ const state = {
|
|||||||
swap_progress: 0, // 0–1
|
swap_progress: 0, // 0–1
|
||||||
services: {},
|
services: {},
|
||||||
service_action_in_flight: null, // e.g. "parakeet:restart"
|
service_action_in_flight: null, // e.g. "parakeet:restart"
|
||||||
|
hardware: {},
|
||||||
|
config: {},
|
||||||
configured: true,
|
configured: true,
|
||||||
timer_handle: null,
|
timer_handle: null,
|
||||||
};
|
};
|
||||||
@@ -63,7 +65,9 @@ function renderCards() {
|
|||||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||||
</div>
|
</div>
|
||||||
${desc}
|
${desc}
|
||||||
<div class="muted small repo">${escapeHtml(m.repo)}</div>
|
<div class="muted small repo">
|
||||||
|
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
|
||||||
|
</div>
|
||||||
<div class="spacer"></div>
|
<div class="spacer"></div>
|
||||||
<div class="card-actions">
|
<div class="card-actions">
|
||||||
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
||||||
@@ -93,6 +97,107 @@ function renderCurrent(status) {
|
|||||||
c.innerHTML = `<strong>${label}</strong>`;
|
c.innerHTML = `<strong>${label}</strong>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===================== hardware dashboard =====================
|
||||||
|
|
||||||
|
function fmtBytes(n) {
|
||||||
|
if (!n && n !== 0) return '—';
|
||||||
|
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||||
|
let i = 0; let v = n;
|
||||||
|
while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
|
||||||
|
return v < 10 ? `${v.toFixed(1)} ${u[i]}` : `${Math.round(v)} ${u[i]}`;
|
||||||
|
}
|
||||||
|
function fmtMiB(n) {
|
||||||
|
if (!n && n !== 0) return null;
|
||||||
|
// n is in MiB; render in GB
|
||||||
|
const gb = n / 1024;
|
||||||
|
return gb < 10 ? gb.toFixed(1) : Math.round(gb).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function bar(usedPct, warn) {
|
||||||
|
const pct = Math.max(2, Math.min(100, usedPct));
|
||||||
|
return `<div class="bar ${warn ? 'warn' : ''}"><span style="width:${pct}%"></span></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pollHardware() {
|
||||||
|
try {
|
||||||
|
state.hardware = await fetchJSON('/api/hardware');
|
||||||
|
renderHardware();
|
||||||
|
} catch (e) { console.warn('hardware poll failed', e); }
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderHardware() {
|
||||||
|
const panel = el('#hardware-panel');
|
||||||
|
const grid = el('#hardware-grid');
|
||||||
|
const hw = state.hardware || {};
|
||||||
|
const keys = Object.keys(hw).filter(k => hw[k] && (hw[k].configured !== false));
|
||||||
|
if (keys.length === 0) { panel.classList.add('hidden'); return; }
|
||||||
|
panel.classList.remove('hidden');
|
||||||
|
grid.innerHTML = '';
|
||||||
|
for (const key of keys) {
|
||||||
|
const s = hw[key];
|
||||||
|
const card = document.createElement('div');
|
||||||
|
if (!s.reachable) {
|
||||||
|
card.className = 'hw-card unreachable';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="head">
|
||||||
|
<span class="name">${escapeHtml(key)}</span>
|
||||||
|
<span class="meta">unreachable</span>
|
||||||
|
</div>
|
||||||
|
<div class="muted small">${escapeHtml(s.host || '')} — ${escapeHtml(s.error || 'no response')}</div>
|
||||||
|
`;
|
||||||
|
grid.appendChild(card);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const ramPct = s.ram_used_bytes && s.ram_total_bytes ? (s.ram_used_bytes / s.ram_total_bytes) * 100 : 0;
|
||||||
|
const diskPct = s.disk_used_bytes && s.disk_total_bytes ? (s.disk_used_bytes / s.disk_total_bytes) * 100 : 0;
|
||||||
|
const loadPct = (s.load && s.cores) ? Math.min(100, (s.load[0] / s.cores) * 100) : 0;
|
||||||
|
// GPU memory: on unified-memory systems (DGX Spark) total is N/A, so use system RAM as the pool.
|
||||||
|
const gpuMemTotalMiB = s.gpu_mem_total_mib || (s.gpu_unified_memory ? (s.ram_total_bytes / (1024 * 1024)) : null);
|
||||||
|
const gpuMemUsedMiB = s.gpu_mem_used_mib ?? null;
|
||||||
|
const gpuMemPct = (gpuMemTotalMiB && gpuMemUsedMiB != null) ? (gpuMemUsedMiB / gpuMemTotalMiB) * 100 : 0;
|
||||||
|
const gpuMemNote = s.gpu_unified_memory ? ' <span class="muted">(unified)</span>' : '';
|
||||||
|
const gpuExtras = [];
|
||||||
|
if (s.gpu_temp_c != null) gpuExtras.push(`${s.gpu_temp_c}°C`);
|
||||||
|
if (s.gpu_power_w != null) gpuExtras.push(`${s.gpu_power_w.toFixed(0)}W`);
|
||||||
|
const gpuExtrasStr = gpuExtras.length ? ` · ${gpuExtras.join(' · ')}` : '';
|
||||||
|
card.className = 'hw-card';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="head">
|
||||||
|
<span class="name">${escapeHtml(s.hostname || key)}</span>
|
||||||
|
<span class="meta">${escapeHtml(key)} · ${escapeHtml(s.gpu_name || '')} · ${escapeHtml(s.uptime || '')}</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">CPU</span>
|
||||||
|
${bar(loadPct, loadPct > 80)}
|
||||||
|
<span class="val">${s.load ? s.load[0].toFixed(2) : '—'} / ${s.cores || '?'} cores</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">RAM</span>
|
||||||
|
${bar(ramPct, ramPct > 85)}
|
||||||
|
<span class="val">${fmtBytes(s.ram_used_bytes)} / ${fmtBytes(s.ram_total_bytes)}</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">GPU mem${gpuMemNote}</span>
|
||||||
|
${bar(gpuMemPct, gpuMemPct > 90)}
|
||||||
|
<span class="val">${fmtMiB(gpuMemUsedMiB) || '—'} / ${fmtMiB(gpuMemTotalMiB) || '?'} GB</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">GPU util</span>
|
||||||
|
${bar(s.gpu_util_pct || 0, (s.gpu_util_pct || 0) > 90)}
|
||||||
|
<span class="val">${s.gpu_util_pct ?? 0}%${gpuExtrasStr}</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">Disk</span>
|
||||||
|
${bar(diskPct, diskPct > 85)}
|
||||||
|
<span class="val">${fmtBytes(s.disk_used_bytes)} / ${fmtBytes(s.disk_total_bytes)}</span>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
grid.appendChild(card);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===================== service classification =====================
|
||||||
|
|
||||||
function classifyService(s) {
|
function classifyService(s) {
|
||||||
// returns one of: running | unhealthy | missing | unconfigured | starting
|
// returns one of: running | unhealthy | missing | unconfigured | starting
|
||||||
if (!s.host) return 'unconfigured';
|
if (!s.host) return 'unconfigured';
|
||||||
@@ -543,6 +648,18 @@ function openDownloadForm() {
|
|||||||
el('#download-form').classList.remove('hidden');
|
el('#download-form').classList.remove('hidden');
|
||||||
el('#download-progress').classList.add('hidden');
|
el('#download-progress').classList.add('hidden');
|
||||||
el('#dl-repo').focus();
|
el('#dl-repo').focus();
|
||||||
|
updateDlHfLink();
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateDlHfLink() {
|
||||||
|
const repo = el('#dl-repo').value.trim();
|
||||||
|
const link = el('#dl-hf-link');
|
||||||
|
if (repo.includes('/')) {
|
||||||
|
link.href = `https://huggingface.co/${encodeURIComponent(repo)}`;
|
||||||
|
link.classList.remove('hidden');
|
||||||
|
} else {
|
||||||
|
link.classList.add('hidden');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function closeDownloadPanel() {
|
function closeDownloadPanel() {
|
||||||
@@ -672,6 +789,47 @@ function handleDownloadDone(d) {
|
|||||||
|
|
||||||
// ===================== Advanced / Add to catalog =====================
|
// ===================== Advanced / Add to catalog =====================
|
||||||
|
|
||||||
|
function gpuTotalGB(modelMode) {
|
||||||
|
// Solo uses Spark 1's GPU only. Cluster shares across both — but loading is per-Spark.
|
||||||
|
const s1 = state.hardware?.spark1;
|
||||||
|
const s2 = state.hardware?.spark2;
|
||||||
|
const g1 = s1?.gpu_mem_total_mib ? s1.gpu_mem_total_mib / 1024 : null;
|
||||||
|
const g2 = s2?.gpu_mem_total_mib ? s2.gpu_mem_total_mib / 1024 : null;
|
||||||
|
if (modelMode === 'cluster' && g1 && g2) return Math.min(g1, g2); // bottleneck
|
||||||
|
return g1 || g2 || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function knobContextHint(field, value, mode) {
|
||||||
|
if (field === 'gpu_memory_utilization') {
|
||||||
|
const gb = gpuTotalGB(mode);
|
||||||
|
if (!gb) return '';
|
||||||
|
const used = (value * gb).toFixed(0);
|
||||||
|
const free = (gb - value * gb).toFixed(0);
|
||||||
|
return `~${used} GB allocated to model + KV cache · ~${free} GB left for OS, buffers, other GPU workloads.`;
|
||||||
|
}
|
||||||
|
if (field === 'max_model_len') {
|
||||||
|
if (!value) return '';
|
||||||
|
const pages = Math.round(value / 350); // ~350 tokens per page
|
||||||
|
const kvBytes = (value * 2 * 4 * 32 * 128); // rough fp16 KV cache size for typical 32-layer model
|
||||||
|
return `~${pages.toLocaleString()} pages of text (very rough). Larger context = more GPU memory reserved for KV cache.`;
|
||||||
|
}
|
||||||
|
if (field === 'fastsafetensors') return value ? 'Faster cold-start weight loading.' : 'Standard safetensors loading.';
|
||||||
|
if (field === 'prefix_caching') return value ? 'Reuses GPU state for repeated prompt prefixes (e.g. long system prompts).' : 'Off — every request re-processes the full prompt.';
|
||||||
|
if (field === 'kv_cache_dtype') return value === 'fp8' ? 'Halves KV cache memory (fits ~2× more context). Quality cost is usually imperceptible.' : 'Default precision.';
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureKnobHint(rowEl, id) {
|
||||||
|
let h = rowEl.querySelector(`.knob-hint[data-for="${id}"]`);
|
||||||
|
if (!h) {
|
||||||
|
h = document.createElement('div');
|
||||||
|
h.className = 'knob-hint muted small';
|
||||||
|
h.dataset.for = id;
|
||||||
|
rowEl.appendChild(h);
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
function openAdvanced(key) {
|
function openAdvanced(key) {
|
||||||
const m = state.models[key];
|
const m = state.models[key];
|
||||||
if (!m) return;
|
if (!m) return;
|
||||||
@@ -684,6 +842,23 @@ function openAdvanced(key) {
|
|||||||
el('#adv-fst').checked = !!k.fastsafetensors;
|
el('#adv-fst').checked = !!k.fastsafetensors;
|
||||||
el('#adv-pcache').checked = !!k.prefix_caching;
|
el('#adv-pcache').checked = !!k.prefix_caching;
|
||||||
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
|
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
|
||||||
|
|
||||||
|
// Wire up live knob hints
|
||||||
|
const updateHints = () => {
|
||||||
|
const mml = parseInt(el('#adv-mml').value, 10);
|
||||||
|
const gmu = parseFloat(el('#adv-gmu').value);
|
||||||
|
ensureKnobHint(el('#adv-mml').parentElement, 'mml').textContent = knobContextHint('max_model_len', mml, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-gmu').parentElement, 'gmu').textContent = knobContextHint('gpu_memory_utilization', gmu, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-fst').parentElement, 'fst').textContent = knobContextHint('fastsafetensors', el('#adv-fst').checked, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-pcache').parentElement, 'pcache').textContent = knobContextHint('prefix_caching', el('#adv-pcache').checked, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-fp8').parentElement, 'fp8').textContent = knobContextHint('kv_cache_dtype', el('#adv-fp8').checked ? 'fp8' : 'auto', m.mode);
|
||||||
|
};
|
||||||
|
updateHints();
|
||||||
|
el('#adv-mml').oninput = updateHints;
|
||||||
|
el('#adv-gmu').oninput = (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); updateHints(); };
|
||||||
|
el('#adv-fst').onchange = updateHints;
|
||||||
|
el('#adv-pcache').onchange = updateHints;
|
||||||
|
el('#adv-fp8').onchange = updateHints;
|
||||||
const del = el('#adv-delete');
|
const del = el('#adv-delete');
|
||||||
del.classList.toggle('hidden', !m.custom);
|
del.classList.toggle('hidden', !m.custom);
|
||||||
del.onclick = async () => {
|
del.onclick = async () => {
|
||||||
@@ -778,6 +953,56 @@ function setupAdvancedDialog() {
|
|||||||
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===================== Explain context (LLM commit summary) =====================
|
||||||
|
|
||||||
|
let explainEventSource = null;
|
||||||
|
|
||||||
|
async function explainContext() {
|
||||||
|
if (explainEventSource) { explainEventSource.close(); explainEventSource = null; }
|
||||||
|
const section = el('#ub-explain-section');
|
||||||
|
const content = el('#ub-explain-content');
|
||||||
|
section.classList.remove('hidden');
|
||||||
|
section.open = true;
|
||||||
|
content.innerHTML = '<span class="muted">Asking the loaded model…</span>';
|
||||||
|
let text = '';
|
||||||
|
const es = new EventSource('/api/explain-updates');
|
||||||
|
explainEventSource = es;
|
||||||
|
let firstChunk = true;
|
||||||
|
es.onmessage = (ev) => {
|
||||||
|
try {
|
||||||
|
const d = JSON.parse(ev.data);
|
||||||
|
if (d.error) {
|
||||||
|
content.innerHTML = `<span class="muted">Couldn't get explanation: ${escapeHtml(d.error)}</span>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (firstChunk) { content.innerHTML = ''; firstChunk = false; }
|
||||||
|
if (d.content) {
|
||||||
|
text += d.content;
|
||||||
|
content.textContent = text;
|
||||||
|
content.scrollTop = content.scrollHeight;
|
||||||
|
} else if (d.reasoning) {
|
||||||
|
// Show reasoning tokens but de-emphasized
|
||||||
|
let r = content.querySelector('.reasoning-current');
|
||||||
|
if (!r) {
|
||||||
|
r = document.createElement('div');
|
||||||
|
r.className = 'reasoning reasoning-current';
|
||||||
|
r.textContent = '';
|
||||||
|
content.appendChild(r);
|
||||||
|
}
|
||||||
|
r.textContent += d.reasoning;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
};
|
||||||
|
es.addEventListener('done', () => {
|
||||||
|
es.close();
|
||||||
|
explainEventSource = null;
|
||||||
|
// strip the reasoning-current marker
|
||||||
|
const r = content.querySelector('.reasoning-current');
|
||||||
|
if (r) r.classList.remove('reasoning-current');
|
||||||
|
});
|
||||||
|
es.onerror = () => { es.close(); explainEventSource = null; };
|
||||||
|
}
|
||||||
|
|
||||||
// ===================== updates (spark-vllm-docker) =====================
|
// ===================== updates (spark-vllm-docker) =====================
|
||||||
|
|
||||||
const updState = {
|
const updState = {
|
||||||
@@ -817,19 +1042,23 @@ function renderUpdateBanner() {
|
|||||||
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
|
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
|
||||||
banner.classList.toggle('warn', !!dirty);
|
banner.classList.toggle('warn', !!dirty);
|
||||||
|
|
||||||
|
const explain = el('#ub-explain');
|
||||||
if (dirty > 0) {
|
if (dirty > 0) {
|
||||||
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
|
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
|
||||||
details.classList.add('hidden');
|
details.classList.add('hidden');
|
||||||
apply.classList.add('hidden');
|
apply.classList.add('hidden');
|
||||||
|
explain.classList.add('hidden');
|
||||||
} else if (behind === 0) {
|
} else if (behind === 0) {
|
||||||
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
|
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
|
||||||
details.classList.add('hidden');
|
details.classList.add('hidden');
|
||||||
apply.classList.add('hidden');
|
apply.classList.add('hidden');
|
||||||
list.classList.add('hidden');
|
list.classList.add('hidden');
|
||||||
|
explain.classList.add('hidden');
|
||||||
} else {
|
} else {
|
||||||
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
|
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
|
||||||
details.classList.remove('hidden');
|
details.classList.remove('hidden');
|
||||||
apply.classList.remove('hidden');
|
apply.classList.remove('hidden');
|
||||||
|
explain.classList.remove('hidden');
|
||||||
log.textContent = (info.log || []).join('\n') || '(no log)';
|
log.textContent = (info.log || []).join('\n') || '(no log)';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -918,13 +1147,26 @@ async function init() {
|
|||||||
list.open = !list.open;
|
list.open = !list.open;
|
||||||
});
|
});
|
||||||
el('#ub-apply').addEventListener('click', applyUpdate);
|
el('#ub-apply').addEventListener('click', applyUpdate);
|
||||||
|
el('#ub-explain').addEventListener('click', explainContext);
|
||||||
|
el('#dl-repo').addEventListener('input', updateDlHfLink);
|
||||||
setupCatalogDialog();
|
setupCatalogDialog();
|
||||||
setupAdvancedDialog();
|
setupAdvancedDialog();
|
||||||
|
// Open WebUI link from /api/config
|
||||||
|
try {
|
||||||
|
state.config = await fetchJSON('/api/config');
|
||||||
|
if (state.config.open_webui_url) {
|
||||||
|
const a = el('#open-webui-link');
|
||||||
|
a.href = state.config.open_webui_url;
|
||||||
|
a.classList.remove('hidden');
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
await loadModels();
|
await loadModels();
|
||||||
await pollStatus();
|
await pollStatus();
|
||||||
await renderServices();
|
await renderServices();
|
||||||
|
pollHardware();
|
||||||
pollUpdates();
|
pollUpdates();
|
||||||
setInterval(pollStatus, 5000);
|
setInterval(pollStatus, 5000);
|
||||||
|
setInterval(pollHardware, 8000); // every 8s
|
||||||
setInterval(pollUpdates, 300000); // every 5 min
|
setInterval(pollUpdates, 300000); // every 5 min
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
<div class="current" id="current">
|
<div class="current" id="current">
|
||||||
<span class="muted">connecting…</span>
|
<span class="muted">connecting…</span>
|
||||||
</div>
|
</div>
|
||||||
|
<a id="open-webui-link" class="topbar-btn hidden" href="#" target="_blank" rel="noopener" title="Open Open WebUI">Open chat ↗</a>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<main>
|
<main>
|
||||||
@@ -24,6 +25,11 @@
|
|||||||
<span>Run the <em>Configure Sparks</em> action in StartOS to set hostnames, then run <em>Test Connection</em>.</span>
|
<span>Run the <em>Configure Sparks</em> action in StartOS to set hostnames, then run <em>Test Connection</em>.</span>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section id="hardware-panel" class="hardware-panel hidden">
|
||||||
|
<h2 class="section-title">Spark hardware</h2>
|
||||||
|
<div id="hardware-grid" class="hardware-grid"></div>
|
||||||
|
</section>
|
||||||
|
|
||||||
<section id="endpoint-panel" class="endpoint-panel hidden">
|
<section id="endpoint-panel" class="endpoint-panel hidden">
|
||||||
<div class="ep-title muted small">OpenAI-compatible endpoint</div>
|
<div class="ep-title muted small">OpenAI-compatible endpoint</div>
|
||||||
<div class="ep-row">
|
<div class="ep-row">
|
||||||
@@ -133,11 +139,20 @@
|
|||||||
<label class="dl-row">
|
<label class="dl-row">
|
||||||
<span class="dl-label">HuggingFace repo</span>
|
<span class="dl-label">HuggingFace repo</span>
|
||||||
<input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off">
|
<input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off">
|
||||||
|
<a id="dl-hf-link" class="dl-hf-link hidden" href="#" target="_blank" rel="noopener" title="Open on Hugging Face">↗</a>
|
||||||
</label>
|
</label>
|
||||||
|
<div class="dl-help muted small">
|
||||||
|
<a href="https://huggingface.co/models?other=vllm" target="_blank" rel="noopener">Browse vLLM-compatible models</a>
|
||||||
|
· NVFP4-quantized models (e.g. <code>RedHatAI/...</code>) are best for Blackwell hardware
|
||||||
|
</div>
|
||||||
<div class="dl-row">
|
<div class="dl-row">
|
||||||
<span class="dl-label">Where</span>
|
<span class="dl-label">Where</span>
|
||||||
<label class="radio"><input type="radio" name="dl-mode" value="solo" checked> Spark 1 only (solo)</label>
|
<label class="radio"><input type="radio" name="dl-mode" value="spark1" checked> Spark 1 only</label>
|
||||||
<label class="radio"><input type="radio" name="dl-mode" value="cluster"> Both Sparks (cluster, copy in parallel)</label>
|
<label class="radio"><input type="radio" name="dl-mode" value="spark2"> Spark 2 only</label>
|
||||||
|
<label class="radio"><input type="radio" name="dl-mode" value="cluster"> Both Sparks (for cluster models)</label>
|
||||||
|
</div>
|
||||||
|
<div class="dl-help muted small">
|
||||||
|
For <strong>solo</strong> models, download to wherever you'll run them. For <strong>cluster</strong> models (-tp 2), both Sparks need the weights — "Both" downloads to one Spark and rsyncs to the other in parallel.
|
||||||
</div>
|
</div>
|
||||||
<div class="dl-actions">
|
<div class="dl-actions">
|
||||||
<button id="dl-cancel" class="btn">Cancel</button>
|
<button id="dl-cancel" class="btn">Cancel</button>
|
||||||
@@ -178,6 +193,7 @@
|
|||||||
<div class="ub-row">
|
<div class="ub-row">
|
||||||
<span id="ub-text">Checking for updates…</span>
|
<span id="ub-text">Checking for updates…</span>
|
||||||
<span class="spacer"></span>
|
<span class="spacer"></span>
|
||||||
|
<button id="ub-explain" class="btn small-btn hidden">✨ Explain context</button>
|
||||||
<button id="ub-details" class="btn small-btn hidden">Show details</button>
|
<button id="ub-details" class="btn small-btn hidden">Show details</button>
|
||||||
<button id="ub-apply" class="btn small-btn primary hidden">Apply update</button>
|
<button id="ub-apply" class="btn small-btn primary hidden">Apply update</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -185,6 +201,10 @@
|
|||||||
<summary class="muted small">Pending commits</summary>
|
<summary class="muted small">Pending commits</summary>
|
||||||
<pre id="ub-log" class="snippet"></pre>
|
<pre id="ub-log" class="snippet"></pre>
|
||||||
</details>
|
</details>
|
||||||
|
<details id="ub-explain-section" class="hidden">
|
||||||
|
<summary class="muted small">Explained by the loaded LLM</summary>
|
||||||
|
<div id="ub-explain-content" class="explain-content"></div>
|
||||||
|
</details>
|
||||||
<div id="ub-progress" class="hidden">
|
<div id="ub-progress" class="hidden">
|
||||||
<div class="phase-row">
|
<div class="phase-row">
|
||||||
<div class="phase" id="ub-phase">Applying update…</div>
|
<div class="phase" id="ub-phase">Applying update…</div>
|
||||||
|
|||||||
@@ -45,6 +45,17 @@ body {
|
|||||||
.logo-dot { width: 10px; height: 10px; border-radius: 50%; background: var(--accent); box-shadow: 0 0 12px var(--accent); }
|
.logo-dot { width: 10px; height: 10px; border-radius: 50%; background: var(--accent); box-shadow: 0 0 12px var(--accent); }
|
||||||
.current { flex: 1; text-align: right; font-size: 14px; }
|
.current { flex: 1; text-align: right; font-size: 14px; }
|
||||||
.current strong { color: var(--accent); }
|
.current strong { color: var(--accent); }
|
||||||
|
.topbar-btn {
|
||||||
|
background: var(--surface-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
color: var(--text);
|
||||||
|
padding: 5px 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 12px;
|
||||||
|
text-decoration: none;
|
||||||
|
transition: border-color 0.15s, background 0.15s;
|
||||||
|
}
|
||||||
|
.topbar-btn:hover { background: #24242c; border-color: var(--accent); color: var(--accent); }
|
||||||
|
|
||||||
main {
|
main {
|
||||||
max-width: 880px;
|
max-width: 880px;
|
||||||
@@ -268,6 +279,14 @@ main {
|
|||||||
font: 13px ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
font: 13px ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
||||||
}
|
}
|
||||||
.modal-row textarea { font-family: inherit; resize: vertical; }
|
.modal-row textarea { font-family: inherit; resize: vertical; }
|
||||||
|
.modal-row .knob-hint {
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 11px;
|
||||||
|
line-height: 1.5;
|
||||||
|
margin-top: 2px;
|
||||||
|
padding-left: 2px;
|
||||||
|
}
|
||||||
|
.modal-row.inline .knob-hint { width: 100%; margin-left: 22px; margin-top: 0; }
|
||||||
.modal-row input:focus, .modal-row textarea:focus, .modal-row select:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
.modal-row input:focus, .modal-row textarea:focus, .modal-row select:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
||||||
.modal-row input[type='range'] { padding: 0; flex: 1; }
|
.modal-row input[type='range'] { padding: 0; flex: 1; }
|
||||||
.modal-fieldset {
|
.modal-fieldset {
|
||||||
@@ -295,6 +314,31 @@ main {
|
|||||||
.ub-context a { color: var(--info); text-decoration: none; }
|
.ub-context a { color: var(--info); text-decoration: none; }
|
||||||
.ub-context a:hover { text-decoration: underline; }
|
.ub-context a:hover { text-decoration: underline; }
|
||||||
.ub-context em { font-style: normal; color: var(--text); font-weight: 500; }
|
.ub-context em { font-style: normal; color: var(--text); font-weight: 500; }
|
||||||
|
|
||||||
|
#ub-explain-section { margin-top: 8px; }
|
||||||
|
#ub-explain-section summary { cursor: pointer; padding: 4px 0; }
|
||||||
|
.explain-content {
|
||||||
|
background: #08080b;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 12px 14px;
|
||||||
|
margin-top: 8px;
|
||||||
|
font-size: 13px;
|
||||||
|
line-height: 1.6;
|
||||||
|
color: #c7c7d1;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-word;
|
||||||
|
max-height: 320px;
|
||||||
|
overflow: auto;
|
||||||
|
}
|
||||||
|
.explain-content .reasoning {
|
||||||
|
color: var(--muted);
|
||||||
|
font-style: italic;
|
||||||
|
font-size: 11px;
|
||||||
|
border-left: 2px solid var(--border);
|
||||||
|
padding-left: 10px;
|
||||||
|
margin: 4px 0;
|
||||||
|
}
|
||||||
.update-banner.up-to-date {
|
.update-banner.up-to-date {
|
||||||
border-color: var(--border);
|
border-color: var(--border);
|
||||||
color: var(--muted);
|
color: var(--muted);
|
||||||
@@ -306,6 +350,51 @@ main {
|
|||||||
#ub-list summary { cursor: pointer; padding: 4px 0; }
|
#ub-list summary { cursor: pointer; padding: 4px 0; }
|
||||||
#ub-progress { margin-top: 10px; }
|
#ub-progress { margin-top: 10px; }
|
||||||
|
|
||||||
|
/* ===== Hardware dashboard ===== */
|
||||||
|
|
||||||
|
.hardware-grid {
|
||||||
|
display: grid;
|
||||||
|
gap: 14px;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
|
||||||
|
}
|
||||||
|
.hw-card {
|
||||||
|
background: var(--surface);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 14px 16px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
.hw-card .head {
|
||||||
|
display: flex;
|
||||||
|
align-items: baseline;
|
||||||
|
gap: 8px;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.hw-card .head .name { font-weight: 600; font-size: 15px; }
|
||||||
|
.hw-card .head .meta { color: var(--muted); font-size: 12px; margin-left: auto; }
|
||||||
|
.hw-card.unreachable { border-color: rgba(239, 68, 68, 0.4); }
|
||||||
|
.hw-card.unreachable .name { color: var(--error); }
|
||||||
|
.hw-metric { display: flex; align-items: center; gap: 10px; font-size: 12px; }
|
||||||
|
.hw-metric .label { color: var(--muted); width: 56px; flex-shrink: 0; text-transform: uppercase; letter-spacing: 0.05em; font-size: 11px; }
|
||||||
|
.hw-metric .bar { flex: 1; height: 8px; background: var(--surface-2); border-radius: 4px; overflow: hidden; position: relative; }
|
||||||
|
.hw-metric .bar > span {
|
||||||
|
display: block;
|
||||||
|
height: 100%;
|
||||||
|
background: linear-gradient(90deg, var(--info), var(--accent));
|
||||||
|
border-radius: 4px;
|
||||||
|
transition: width 0.4s ease-out;
|
||||||
|
}
|
||||||
|
.hw-metric .bar.warn > span { background: linear-gradient(90deg, var(--warn), var(--error)); }
|
||||||
|
.hw-metric .val {
|
||||||
|
font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--text);
|
||||||
|
min-width: 110px;
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
/* ===== Section header (title + action button) ===== */
|
/* ===== Section header (title + action button) ===== */
|
||||||
|
|
||||||
.section-header {
|
.section-header {
|
||||||
@@ -358,6 +447,24 @@ main {
|
|||||||
min-width: 200px;
|
min-width: 200px;
|
||||||
}
|
}
|
||||||
.dl-row input[type='text']:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
.dl-row input[type='text']:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
||||||
|
.dl-hf-link {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
background: var(--surface-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
color: var(--info);
|
||||||
|
padding: 7px 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 14px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.dl-hf-link:hover { background: rgba(96, 165, 250, 0.08); border-color: var(--info); }
|
||||||
|
.dl-help { padding-left: 122px; line-height: 1.6; }
|
||||||
|
.dl-help a { color: var(--info); text-decoration: none; }
|
||||||
|
.dl-help a:hover { text-decoration: underline; }
|
||||||
|
.dl-help code { background: var(--surface-2); padding: 1px 5px; border-radius: 3px; font-size: 11px; }
|
||||||
.radio { display: inline-flex; align-items: center; gap: 6px; font-size: 13px; color: var(--text); cursor: pointer; }
|
.radio { display: inline-flex; align-items: center; gap: 6px; font-size: 13px; color: var(--text); cursor: pointer; }
|
||||||
.radio input { accent-color: var(--accent); }
|
.radio input { accent-color: var(--accent); }
|
||||||
.dl-actions { display: flex; gap: 8px; justify-content: flex-end; margin-top: 10px; }
|
.dl-actions { display: flex; gap: 8px; justify-content: flex-end; margin-top: 10px; }
|
||||||
@@ -489,6 +596,9 @@ main {
|
|||||||
font-size: 11px;
|
font-size: 11px;
|
||||||
color: #5c5c66;
|
color: #5c5c66;
|
||||||
}
|
}
|
||||||
|
.card .repo a { color: inherit; text-decoration: none; }
|
||||||
|
.card .repo a:hover { color: var(--info); text-decoration: underline; }
|
||||||
|
.card .repo .hf-icon { font-size: 13px; opacity: 0.7; }
|
||||||
.tag {
|
.tag {
|
||||||
background: var(--surface-2);
|
background: var(--surface-2);
|
||||||
border: 1px solid var(--border);
|
border: 1px solid var(--border);
|
||||||
|
|||||||
@@ -76,6 +76,15 @@ const inputSpec = InputSpec.of({
|
|||||||
placeholder: 'magpie-tts',
|
placeholder: 'magpie-tts',
|
||||||
masked: false,
|
masked: false,
|
||||||
}),
|
}),
|
||||||
|
open_webui_url: Value.text({
|
||||||
|
name: 'Open WebUI URL (optional)',
|
||||||
|
description:
|
||||||
|
'If you also run Open WebUI on your LAN, paste its URL here. Spark Control will then show a one-click "Open chat" button next to the current model so you can jump straight to it.',
|
||||||
|
required: false,
|
||||||
|
default: null,
|
||||||
|
placeholder: 'e.g. https://open-webui.yourserver.local',
|
||||||
|
masked: false,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
export const configureSparks = sdk.Action.withInput(
|
export const configureSparks = sdk.Action.withInput(
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ export const sparkConfigSchema = z.object({
|
|||||||
magpie_host: z.string().catch(''),
|
magpie_host: z.string().catch(''),
|
||||||
magpie_user: z.string().catch(''),
|
magpie_user: z.string().catch(''),
|
||||||
magpie_container: z.string().catch(''),
|
magpie_container: z.string().catch(''),
|
||||||
|
// Optional Open WebUI deep-link
|
||||||
|
open_webui_url: z.string().catch(''),
|
||||||
})
|
})
|
||||||
|
|
||||||
export type SparkConfig = z.infer<typeof sparkConfigSchema>
|
export type SparkConfig = z.infer<typeof sparkConfigSchema>
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ export const main = sdk.setupMain(async ({ effects }) => {
|
|||||||
magpie_host: '',
|
magpie_host: '',
|
||||||
magpie_user: '',
|
magpie_user: '',
|
||||||
magpie_container: '',
|
magpie_container: '',
|
||||||
|
open_webui_url: '',
|
||||||
}
|
}
|
||||||
|
|
||||||
return sdk.Daemons.of(effects).addDaemon('primary', {
|
return sdk.Daemons.of(effects).addDaemon('primary', {
|
||||||
@@ -47,6 +48,7 @@ export const main = sdk.setupMain(async ({ effects }) => {
|
|||||||
MAGPIE_USER: cfg.magpie_user,
|
MAGPIE_USER: cfg.magpie_user,
|
||||||
MAGPIE_CONTAINER: cfg.magpie_container,
|
MAGPIE_CONTAINER: cfg.magpie_container,
|
||||||
MODELS_OVERRIDES: '/data/models-overrides.yaml',
|
MODELS_OVERRIDES: '/data/models-overrides.yaml',
|
||||||
|
OPEN_WEBUI_URL: cfg.open_webui_url,
|
||||||
BIND_PORT: String(uiPort),
|
BIND_PORT: String(uiPort),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
|
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
|
||||||
|
|
||||||
export const v0_1_0 = VersionInfo.of({
|
export const v0_1_0 = VersionInfo.of({
|
||||||
version: '0.2.4:0',
|
version: '0.3.0:0',
|
||||||
releaseNotes: {
|
releaseNotes: {
|
||||||
en_US:
|
en_US:
|
||||||
'Hotfix + UX polish: fixes parakeet/magpie status showing as "Unknown" (empty container-name env var no longer overrode the default). Copy buttons are now compact icons, and the values themselves are clickable to copy. Service cards show host + URL + model as separate copyable rows. The update banner now spells out what is being updated (spark-vllm-docker, the upstream LLM-cluster project).',
|
'v0.3: Spark hardware dashboard (RAM, disk, GPU memory + utilization, CPU load, uptime per Spark). Per-model Advanced settings now show plain-English hints tied to your actual GPU memory (e.g. "0.85 GPU util leaves ~18 GB free"). "Explain context" button on the update banner asks the loaded LLM to summarize pending commits in plain English. Optional Open WebUI URL in Configure Sparks shows a one-click "Open chat" button in the top bar. Downloads can now target Spark 1, Spark 2, or both. Each model card links out to its Hugging Face page.',
|
||||||
},
|
},
|
||||||
migrations: {
|
migrations: {
|
||||||
up: async ({ effects }) => {},
|
up: async ({ effects }) => {},
|
||||||
|
|||||||
Reference in New Issue
Block a user