Files
spark-control/image/app/hardware.py
T
Grant 64ce0fca10 v0.3.0 - Hardware dashboard + knob context + Explain context + Open WebUI link
Hardware dashboard:
- New hardware.py module: SSH probes each Spark for hostname, uptime, load+cores, RAM, disk, GPU (name, util, temp, power) + per-process GPU memory sum
- DGX Spark uses unified memory (nvidia-smi memory.total returns N/A); fall back to per-process compute memory and compute fraction against system RAM. Marks with gpu_unified_memory=true.
- 4s TTL cache in HardwareProbe to avoid hammering
- /api/hardware returns per-Spark snapshot
- UI: 'Spark hardware' section at the top with per-Spark cards (CPU load, RAM, GPU mem (unified), GPU util + temp + power, disk) — bars with warn threshold styling
- Polls every 8s

Knob context (tied to live hardware):
- Each Advanced knob now shows plain-English help text
- 'GPU memory %' shows '~N GB allocated · ~M GB left for OS/buffers' computed from actual Spark RAM
- 'Max context' shows '~N pages of text'
- Toggles show tradeoff descriptions

Explain context:
- ' Explain context' button on the update banner
- /api/explain-updates POST: forwards pending commits to the loaded vLLM model and streams its response back as SSE
- Renders into an expandable 'Explained by the loaded LLM' section under Pending commits
- Reasoning tokens shown italicized when the model emits them

Open WebUI integration:
- New 'Open WebUI URL' optional field in Configure Sparks
- /api/config exposes it; UI shows 'Open chat ↗' button in the top bar if set

Downloads:
- Third radio option: Spark 1 only / Spark 2 only / Both Sparks
- Backend picks SSH target based on mode
- HF repo link icon next to the input
- Helper line about NVFP4 for Blackwell

Model cards:
- Repo name is now a clickable link to its Hugging Face page

Package: bump 0.3.0:0
2026-05-12 12:00:15 -05:00

119 lines
4.7 KiB
Python

"""Per-Spark hardware snapshots: RAM, disk, GPU memory + utilization, CPU load, uptime.
Drives via a single SSH command per Spark that runs `free`, `df`, `nvidia-smi`,
`/proc/loadavg`, and `uptime -p` and prints labeled lines back. We parse those
labels in `_parse`.
"""
from __future__ import annotations
import asyncio
import time
from typing import Any
from .config import Settings
from .ssh import ssh_run
_PROBE = r"""
set -e
echo HOSTNAME=$(hostname)
echo UPTIME=$(uptime -p 2>/dev/null || uptime)
echo LOAD=$(awk '{print $1, $2, $3}' /proc/loadavg)
echo CORES=$(nproc 2>/dev/null || echo 0)
echo MEMORY=$(free -b 2>/dev/null | awk '/^Mem:/ {print $2, $3}')
echo DISK=$(df -B1 / 2>/dev/null | awk 'NR==2 {print $2, $3}')
echo GPU=$(nvidia-smi --query-gpu=name,utilization.gpu,temperature.gpu,power.draw,memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
echo GPU_MEM_USED_MIB=$(nvidia-smi --query-compute-apps=used_gpu_memory --format=csv,noheader,nounits 2>/dev/null | awk '{s+=$1} END {print s+0}')
""".strip()
def _parse_int(s: str) -> int | None:
try: return int(s)
except (TypeError, ValueError): return None
def _parse(out: str) -> dict:
info: dict[str, Any] = {}
for raw in out.splitlines():
if "=" not in raw:
continue
k, v = raw.split("=", 1)
info[k.strip().lower()] = v.strip()
parsed: dict[str, Any] = {}
parsed["hostname"] = info.get("hostname")
parsed["uptime"] = info.get("uptime")
parsed["cores"] = _parse_int(info.get("cores", ""))
# Load average -> (1m, 5m, 15m)
if info.get("load"):
loads = info["load"].split()
try:
parsed["load"] = [float(x) for x in loads[:3]]
except ValueError:
parsed["load"] = None
# Memory: total used in bytes
if info.get("memory"):
mem = info["memory"].split()
if len(mem) == 2:
tot, used = _parse_int(mem[0]), _parse_int(mem[1])
parsed["ram_total_bytes"] = tot
parsed["ram_used_bytes"] = used
# Disk: total used in bytes
if info.get("disk"):
dk = info["disk"].split()
if len(dk) == 2:
parsed["disk_total_bytes"] = _parse_int(dk[0])
parsed["disk_used_bytes"] = _parse_int(dk[1])
# GPU: "name, util_gpu, temp_C, power_W, memory_total_MiB"
if info.get("gpu"):
parts = [p.strip() for p in info["gpu"].split(",")]
if len(parts) >= 5:
name, ug, temp, power, mt = parts[0], parts[1], parts[2], parts[3], parts[4]
parsed["gpu_name"] = name
parsed["gpu_util_pct"] = _parse_int(ug)
parsed["gpu_temp_c"] = _parse_int(temp)
try: parsed["gpu_power_w"] = float(power)
except ValueError: parsed["gpu_power_w"] = None
# memory.total may be "[N/A]" on unified-memory systems (DGX Spark)
parsed["gpu_mem_total_mib"] = _parse_int(mt)
parsed["gpu_unified_memory"] = parsed["gpu_mem_total_mib"] is None
# Sum per-process compute memory (works even on unified-memory systems)
if info.get("gpu_mem_used_mib"):
parsed["gpu_mem_used_mib"] = _parse_int(info["gpu_mem_used_mib"])
return parsed
class HardwareProbe:
"""Caches results briefly to avoid hammering the Sparks."""
def __init__(self, settings: Settings, ttl_sec: float = 4.0) -> None:
self.settings = settings
self.ttl_sec = ttl_sec
self._cache: dict[str, tuple[float, dict]] = {}
self._locks: dict[str, asyncio.Lock] = {}
def _lock(self, key: str) -> asyncio.Lock:
if key not in self._locks:
self._locks[key] = asyncio.Lock()
return self._locks[key]
async def fetch(self) -> dict:
return {
"spark1": await self._one("spark1", self.settings.spark1_host, self.settings.spark1_user),
"spark2": await self._one("spark2", self.settings.spark2_host, self.settings.spark2_user),
}
async def _one(self, key: str, host: str, user: str) -> dict:
if not host or not user:
return {"reachable": False, "configured": False}
async with self._lock(key):
now = time.monotonic()
cached = self._cache.get(key)
if cached and (now - cached[0] < self.ttl_sec):
return cached[1]
rc, out, err = await ssh_run(host, user, _PROBE, self.settings, timeout=8)
if rc != 0:
result = {"reachable": False, "configured": True, "host": host, "error": err.strip() or out.strip() or f"rc={rc}"}
else:
result = {"reachable": True, "configured": True, "host": host, **_parse(out)}
self._cache[key] = (now, result)
return result