Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a02f4db850 | |||
| 1889ab45fb | |||
| e88fdcfde4 | |||
| 64ce0fca10 | |||
| c6da6b0784 |
+6
-2
@@ -42,6 +42,8 @@ class Settings:
|
|||||||
parakeet_port: int
|
parakeet_port: int
|
||||||
magpie_port: int
|
magpie_port: int
|
||||||
bind_port: int
|
bind_port: int
|
||||||
|
open_webui_url: str
|
||||||
|
ngc_api_key: str
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls) -> "Settings":
|
def from_env(cls) -> "Settings":
|
||||||
@@ -55,10 +57,10 @@ class Settings:
|
|||||||
spark2_user=spark2_user,
|
spark2_user=spark2_user,
|
||||||
parakeet_host=_env("PARAKEET_HOST") or spark2_host,
|
parakeet_host=_env("PARAKEET_HOST") or spark2_host,
|
||||||
parakeet_user=_env("PARAKEET_USER") or spark2_user,
|
parakeet_user=_env("PARAKEET_USER") or spark2_user,
|
||||||
parakeet_container=_env("PARAKEET_CONTAINER", "parakeet-asr"),
|
parakeet_container=_env("PARAKEET_CONTAINER") or "parakeet-asr",
|
||||||
magpie_host=_env("MAGPIE_HOST") or spark2_host,
|
magpie_host=_env("MAGPIE_HOST") or spark2_host,
|
||||||
magpie_user=_env("MAGPIE_USER") or spark2_user,
|
magpie_user=_env("MAGPIE_USER") or spark2_user,
|
||||||
magpie_container=_env("MAGPIE_CONTAINER", "magpie-tts"),
|
magpie_container=_env("MAGPIE_CONTAINER") or "magpie-tts",
|
||||||
ssh_key_path=_env("SSH_KEY_PATH"),
|
ssh_key_path=_env("SSH_KEY_PATH"),
|
||||||
ssh_known_hosts=_env("SSH_KNOWN_HOSTS"),
|
ssh_known_hosts=_env("SSH_KNOWN_HOSTS"),
|
||||||
models_yaml=_resolve_models_yaml(),
|
models_yaml=_resolve_models_yaml(),
|
||||||
@@ -66,6 +68,8 @@ class Settings:
|
|||||||
parakeet_port=int(_env("PARAKEET_PORT", "8000")),
|
parakeet_port=int(_env("PARAKEET_PORT", "8000")),
|
||||||
magpie_port=int(_env("MAGPIE_PORT", "9000")),
|
magpie_port=int(_env("MAGPIE_PORT", "9000")),
|
||||||
bind_port=int(_env("BIND_PORT", "9999")),
|
bind_port=int(_env("BIND_PORT", "9999")),
|
||||||
|
open_webui_url=_env("OPEN_WEBUI_URL", ""),
|
||||||
|
ngc_api_key=_env("NGC_API_KEY", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -0,0 +1,126 @@
|
|||||||
|
"""Track Spark up/down transitions and cache discovered MAC addresses.
|
||||||
|
|
||||||
|
Persisted to /data/connectivity.json so history survives package restarts:
|
||||||
|
|
||||||
|
{
|
||||||
|
"macs": { "spark1": "aa:bb:..", "spark2": "11:22:.." },
|
||||||
|
"current": { "spark1": "up", "spark2": "down" },
|
||||||
|
"last_change": { "spark1": "2026-05-12T15:00:00Z", ... },
|
||||||
|
"events": [
|
||||||
|
{ "spark": "spark2", "at": "2026-05-12T17:30:00Z", "transition": "down" },
|
||||||
|
{ "spark": "spark2", "at": "2026-05-12T18:45:00Z", "transition": "up", "down_seconds": 4500 },
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
MAX_EVENTS = 200 # rolling window — plenty for showing recent history
|
||||||
|
|
||||||
|
|
||||||
|
def _path() -> str:
|
||||||
|
return os.environ.get("CONNECTIVITY_LOG", "/data/connectivity.json")
|
||||||
|
|
||||||
|
|
||||||
|
_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _read() -> dict:
|
||||||
|
try:
|
||||||
|
with open(_path()) as f:
|
||||||
|
return json.load(f) or {}
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _write(data: dict) -> None:
|
||||||
|
p = _path()
|
||||||
|
Path(p).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = p + ".tmp"
|
||||||
|
with open(tmp, "w") as f:
|
||||||
|
json.dump(data, f, indent=2, sort_keys=False)
|
||||||
|
os.replace(tmp, p)
|
||||||
|
|
||||||
|
|
||||||
|
def load() -> dict:
|
||||||
|
with _lock:
|
||||||
|
d = _read()
|
||||||
|
d.setdefault("macs", {})
|
||||||
|
d.setdefault("current", {})
|
||||||
|
d.setdefault("last_change", {})
|
||||||
|
d.setdefault("events", [])
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def record_mac(spark: str, mac: Optional[str]) -> None:
|
||||||
|
if not mac:
|
||||||
|
return
|
||||||
|
with _lock:
|
||||||
|
d = _read()
|
||||||
|
d.setdefault("macs", {})
|
||||||
|
if d["macs"].get(spark) != mac:
|
||||||
|
d["macs"][spark] = mac
|
||||||
|
_write(d)
|
||||||
|
|
||||||
|
|
||||||
|
def record_state(spark: str, reachable: bool) -> Optional[dict]:
|
||||||
|
"""Update current state. If it differs from the last seen state, append an event.
|
||||||
|
|
||||||
|
Returns the event dict if a transition was recorded, else None.
|
||||||
|
"""
|
||||||
|
new_state = "up" if reachable else "down"
|
||||||
|
now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||||
|
with _lock:
|
||||||
|
d = _read()
|
||||||
|
d.setdefault("macs", {})
|
||||||
|
d.setdefault("current", {})
|
||||||
|
d.setdefault("last_change", {})
|
||||||
|
d.setdefault("events", [])
|
||||||
|
prev = d["current"].get(spark)
|
||||||
|
if prev == new_state:
|
||||||
|
return None
|
||||||
|
event: dict = {"spark": spark, "at": now, "transition": new_state}
|
||||||
|
# When we have a previous state and timestamp, compute duration
|
||||||
|
last_change = d["last_change"].get(spark)
|
||||||
|
if prev and last_change:
|
||||||
|
try:
|
||||||
|
prev_dt = datetime.fromisoformat(last_change.replace("Z", "+00:00"))
|
||||||
|
duration = (datetime.now(timezone.utc) - prev_dt).total_seconds()
|
||||||
|
if prev == "down" and new_state == "up":
|
||||||
|
event["down_seconds"] = round(duration)
|
||||||
|
if prev == "up" and new_state == "down":
|
||||||
|
event["up_seconds"] = round(duration)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
d["current"][spark] = new_state
|
||||||
|
d["last_change"][spark] = now
|
||||||
|
d["events"].append(event)
|
||||||
|
# Keep rolling window
|
||||||
|
if len(d["events"]) > MAX_EVENTS:
|
||||||
|
d["events"] = d["events"][-MAX_EVENTS:]
|
||||||
|
_write(d)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
def get_mac(spark: str) -> Optional[str]:
|
||||||
|
d = load()
|
||||||
|
return d.get("macs", {}).get(spark)
|
||||||
|
|
||||||
|
|
||||||
|
def summary() -> dict:
|
||||||
|
"""Compact summary for the UI: known MACs, current state, recent events."""
|
||||||
|
d = load()
|
||||||
|
events = d.get("events", [])
|
||||||
|
return {
|
||||||
|
"macs": d.get("macs", {}),
|
||||||
|
"current": d.get("current", {}),
|
||||||
|
"last_change": d.get("last_change", {}),
|
||||||
|
"events": events[-50:],
|
||||||
|
}
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
"""User-installed services persist in /data/services-overrides.yaml.
|
||||||
|
|
||||||
|
Format:
|
||||||
|
custom:
|
||||||
|
- key: my-riva
|
||||||
|
kind: stt
|
||||||
|
host: <spark-2-ip>
|
||||||
|
user: <spark-user>
|
||||||
|
container: riva-asr
|
||||||
|
port: 8001
|
||||||
|
health_path: /health
|
||||||
|
image: nvcr.io/nim/nvidia/riva-multilingual:latest
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
def _path() -> str:
|
||||||
|
return os.environ.get("SERVICES_OVERRIDES", "/data/services-overrides.yaml")
|
||||||
|
|
||||||
|
|
||||||
|
def load_custom_services() -> list[dict]:
|
||||||
|
try:
|
||||||
|
with open(_path()) as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
except FileNotFoundError:
|
||||||
|
return []
|
||||||
|
return data.get("custom") or []
|
||||||
|
|
||||||
|
|
||||||
|
def add_custom_service(entry: dict) -> None:
|
||||||
|
p = _path()
|
||||||
|
Path(p).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
data: dict = {}
|
||||||
|
try:
|
||||||
|
with open(p) as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
custom = data.get("custom") or []
|
||||||
|
custom = [c for c in custom if c.get("key") != entry["key"]]
|
||||||
|
custom.append(entry)
|
||||||
|
data["custom"] = custom
|
||||||
|
with open(p, "w") as f:
|
||||||
|
yaml.safe_dump(data, f, sort_keys=False)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_custom_service(key: str) -> None:
|
||||||
|
p = _path()
|
||||||
|
try:
|
||||||
|
with open(p) as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
except FileNotFoundError:
|
||||||
|
return
|
||||||
|
data["custom"] = [c for c in (data.get("custom") or []) if c.get("key") != key]
|
||||||
|
with open(p, "w") as f:
|
||||||
|
yaml.safe_dump(data, f, sort_keys=False)
|
||||||
+14
-5
@@ -19,7 +19,7 @@ from .config import Settings
|
|||||||
from .ssh import ssh_stream, StreamHandle
|
from .ssh import ssh_stream, StreamHandle
|
||||||
|
|
||||||
|
|
||||||
Mode = Literal["solo", "cluster"]
|
Mode = Literal["spark1", "spark2", "cluster"]
|
||||||
|
|
||||||
|
|
||||||
_TQDM_RE = re.compile(
|
_TQDM_RE = re.compile(
|
||||||
@@ -113,17 +113,26 @@ class DownloadManager:
|
|||||||
|
|
||||||
async def _do(self, job: DownloadJob) -> None:
|
async def _do(self, job: DownloadJob) -> None:
|
||||||
s = self.settings
|
s = self.settings
|
||||||
if not s.spark1_host or not s.spark1_user:
|
# Pick the SSH target and hf-download flags from the mode.
|
||||||
raise RuntimeError("spark1 not configured")
|
if job.mode == "spark2":
|
||||||
|
target_host, target_user = s.spark2_host, s.spark2_user
|
||||||
|
flags = ""
|
||||||
|
elif job.mode == "cluster":
|
||||||
|
target_host, target_user = s.spark1_host, s.spark1_user
|
||||||
|
flags = "-c --copy-parallel"
|
||||||
|
else: # spark1
|
||||||
|
target_host, target_user = s.spark1_host, s.spark1_user
|
||||||
|
flags = ""
|
||||||
|
if not target_host or not target_user:
|
||||||
|
raise RuntimeError(f"{job.mode} host not configured")
|
||||||
|
|
||||||
flags = "-c --copy-parallel" if job.mode == "cluster" else ""
|
|
||||||
cmd = f"cd ~/spark-vllm-docker && ./hf-download.sh {job.repo} {flags}".strip()
|
cmd = f"cd ~/spark-vllm-docker && ./hf-download.sh {job.repo} {flags}".strip()
|
||||||
job.append(f"$ {cmd}")
|
job.append(f"$ {cmd}")
|
||||||
job.state = "downloading"
|
job.state = "downloading"
|
||||||
job.progress.phase = "Connecting to Hugging Face…"
|
job.progress.phase = "Connecting to Hugging Face…"
|
||||||
|
|
||||||
handle = StreamHandle()
|
handle = StreamHandle()
|
||||||
async for line in ssh_stream(s.spark1_host, s.spark1_user, cmd, s, handle=handle):
|
async for line in ssh_stream(target_host, target_user, cmd, s, handle=handle):
|
||||||
job.append(line)
|
job.append(line)
|
||||||
self._update_progress(job, line)
|
self._update_progress(job, line)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,137 @@
|
|||||||
|
"""Per-Spark hardware snapshots: RAM, disk, GPU memory + utilization, CPU load, uptime.
|
||||||
|
|
||||||
|
Drives via a single SSH command per Spark that runs `free`, `df`, `nvidia-smi`,
|
||||||
|
`/proc/loadavg`, and `uptime -p` and prints labeled lines back. We parse those
|
||||||
|
labels in `_parse`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .config import Settings
|
||||||
|
from .connectivity import record_mac, record_state
|
||||||
|
from .ssh import ssh_run
|
||||||
|
|
||||||
|
|
||||||
|
_PROBE = r"""
|
||||||
|
set -e
|
||||||
|
echo HOSTNAME=$(hostname)
|
||||||
|
echo UPTIME=$(uptime -p 2>/dev/null || uptime)
|
||||||
|
echo LOAD=$(awk '{print $1, $2, $3}' /proc/loadavg)
|
||||||
|
echo CORES=$(nproc 2>/dev/null || echo 0)
|
||||||
|
echo MEMORY=$(free -b 2>/dev/null | awk '/^Mem:/ {print $2, $3}')
|
||||||
|
echo DISK=$(df -B1 / 2>/dev/null | awk 'NR==2 {print $2, $3}')
|
||||||
|
echo GPU=$(nvidia-smi --query-gpu=name,utilization.gpu,temperature.gpu,power.draw,memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
|
||||||
|
echo GPU_MEM_USED_MIB=$(nvidia-smi --query-compute-apps=used_gpu_memory --format=csv,noheader,nounits 2>/dev/null | awk '{s+=$1} END {print s+0}')
|
||||||
|
DEFIF=$(ip route show default 2>/dev/null | awk '{print $5; exit}')
|
||||||
|
echo MAC=$(cat /sys/class/net/$DEFIF/address 2>/dev/null)
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_int(s: str) -> int | None:
|
||||||
|
try: return int(s)
|
||||||
|
except (TypeError, ValueError): return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse(out: str) -> dict:
|
||||||
|
info: dict[str, Any] = {}
|
||||||
|
for raw in out.splitlines():
|
||||||
|
if "=" not in raw:
|
||||||
|
continue
|
||||||
|
k, v = raw.split("=", 1)
|
||||||
|
info[k.strip().lower()] = v.strip()
|
||||||
|
parsed: dict[str, Any] = {}
|
||||||
|
parsed["hostname"] = info.get("hostname")
|
||||||
|
parsed["uptime"] = info.get("uptime")
|
||||||
|
parsed["cores"] = _parse_int(info.get("cores", ""))
|
||||||
|
# Load average -> (1m, 5m, 15m)
|
||||||
|
if info.get("load"):
|
||||||
|
loads = info["load"].split()
|
||||||
|
try:
|
||||||
|
parsed["load"] = [float(x) for x in loads[:3]]
|
||||||
|
except ValueError:
|
||||||
|
parsed["load"] = None
|
||||||
|
# Memory: total used in bytes
|
||||||
|
if info.get("memory"):
|
||||||
|
mem = info["memory"].split()
|
||||||
|
if len(mem) == 2:
|
||||||
|
tot, used = _parse_int(mem[0]), _parse_int(mem[1])
|
||||||
|
parsed["ram_total_bytes"] = tot
|
||||||
|
parsed["ram_used_bytes"] = used
|
||||||
|
# Disk: total used in bytes
|
||||||
|
if info.get("disk"):
|
||||||
|
dk = info["disk"].split()
|
||||||
|
if len(dk) == 2:
|
||||||
|
parsed["disk_total_bytes"] = _parse_int(dk[0])
|
||||||
|
parsed["disk_used_bytes"] = _parse_int(dk[1])
|
||||||
|
# GPU: "name, util_gpu, temp_C, power_W, memory_total_MiB"
|
||||||
|
if info.get("gpu"):
|
||||||
|
parts = [p.strip() for p in info["gpu"].split(",")]
|
||||||
|
if len(parts) >= 5:
|
||||||
|
name, ug, temp, power, mt = parts[0], parts[1], parts[2], parts[3], parts[4]
|
||||||
|
parsed["gpu_name"] = name
|
||||||
|
parsed["gpu_util_pct"] = _parse_int(ug)
|
||||||
|
parsed["gpu_temp_c"] = _parse_int(temp)
|
||||||
|
try: parsed["gpu_power_w"] = float(power)
|
||||||
|
except ValueError: parsed["gpu_power_w"] = None
|
||||||
|
# memory.total may be "[N/A]" on unified-memory systems (DGX Spark)
|
||||||
|
parsed["gpu_mem_total_mib"] = _parse_int(mt)
|
||||||
|
parsed["gpu_unified_memory"] = parsed["gpu_mem_total_mib"] is None
|
||||||
|
# Sum per-process compute memory (works even on unified-memory systems)
|
||||||
|
if info.get("gpu_mem_used_mib"):
|
||||||
|
parsed["gpu_mem_used_mib"] = _parse_int(info["gpu_mem_used_mib"])
|
||||||
|
# MAC address on the default-route interface (for Wake-on-LAN)
|
||||||
|
if info.get("mac"):
|
||||||
|
parsed["mac"] = info["mac"].lower()
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
class HardwareProbe:
|
||||||
|
"""Caches results briefly to avoid hammering the Sparks."""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings, ttl_sec: float = 4.0, fail_ttl_sec: float = 25.0) -> None:
|
||||||
|
self.settings = settings
|
||||||
|
self.ttl_sec = ttl_sec
|
||||||
|
self.fail_ttl_sec = fail_ttl_sec
|
||||||
|
self._cache: dict[str, tuple[float, dict]] = {}
|
||||||
|
self._locks: dict[str, asyncio.Lock] = {}
|
||||||
|
|
||||||
|
def _ttl_for(self, value: dict) -> float:
|
||||||
|
return self.ttl_sec if value.get("reachable") else self.fail_ttl_sec
|
||||||
|
|
||||||
|
def _lock(self, key: str) -> asyncio.Lock:
|
||||||
|
if key not in self._locks:
|
||||||
|
self._locks[key] = asyncio.Lock()
|
||||||
|
return self._locks[key]
|
||||||
|
|
||||||
|
async def fetch(self) -> dict:
|
||||||
|
s1, s2 = await asyncio.gather(
|
||||||
|
self._one("spark1", self.settings.spark1_host, self.settings.spark1_user),
|
||||||
|
self._one("spark2", self.settings.spark2_host, self.settings.spark2_user),
|
||||||
|
)
|
||||||
|
return {"spark1": s1, "spark2": s2}
|
||||||
|
|
||||||
|
async def _one(self, key: str, host: str, user: str) -> dict:
|
||||||
|
if not host or not user:
|
||||||
|
return {"reachable": False, "configured": False}
|
||||||
|
async with self._lock(key):
|
||||||
|
now = time.monotonic()
|
||||||
|
cached = self._cache.get(key)
|
||||||
|
if cached and (now - cached[0] < self._ttl_for(cached[1])):
|
||||||
|
return cached[1]
|
||||||
|
# Use a shorter timeout for the connect phase; if a previous probe
|
||||||
|
# marked this host unreachable, return the cached failure immediately.
|
||||||
|
rc, out, err = await ssh_run(host, user, _PROBE, self.settings, timeout=6)
|
||||||
|
if rc != 0:
|
||||||
|
result = {"reachable": False, "configured": True, "host": host, "error": err.strip() or out.strip() or f"rc={rc}"}
|
||||||
|
self._cache[key] = (now, result)
|
||||||
|
record_state(key, False)
|
||||||
|
return result
|
||||||
|
parsed = _parse(out)
|
||||||
|
result = {"reachable": True, "configured": True, "host": host, **parsed}
|
||||||
|
self._cache[key] = (now, result)
|
||||||
|
record_state(key, True)
|
||||||
|
if parsed.get("mac"):
|
||||||
|
record_mac(key, parsed["mac"])
|
||||||
|
return result
|
||||||
@@ -0,0 +1,202 @@
|
|||||||
|
"""NVIDIA NIM container install / lifecycle.
|
||||||
|
|
||||||
|
Two pieces:
|
||||||
|
* A small curated catalog of NIM images (so users don't have to copy/paste
|
||||||
|
huge nvcr.io URLs).
|
||||||
|
* An installer that SSHes into the target Spark, runs `docker pull` then
|
||||||
|
`docker run -d --gpus all -p PORT:PORT -v VOLUME:/opt/nim/.cache
|
||||||
|
-e NGC_API_KEY=... IMAGE` and streams output.
|
||||||
|
|
||||||
|
Custom services also persist via `overrides.add_custom_service()` so the
|
||||||
|
Services panel can show them.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import asyncio
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .config import Settings
|
||||||
|
from .ssh import ssh_stream, StreamHandle
|
||||||
|
|
||||||
|
|
||||||
|
# Curated list. These are the most useful NIM containers for a dual-Spark
|
||||||
|
# audio-and-LLM setup. Browse the full catalog at
|
||||||
|
# https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia
|
||||||
|
CATALOG_URL = "https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers"
|
||||||
|
|
||||||
|
|
||||||
|
SUGGESTED_NIMS: list[dict] = [
|
||||||
|
{
|
||||||
|
"key": "parakeet-tdt-0.6b-v3",
|
||||||
|
"name": "Parakeet TDT 0.6B v3",
|
||||||
|
"image": "nvcr.io/nim/nvidia/parakeet-tdt-0-6b-v3:latest",
|
||||||
|
"default_container": "parakeet-asr",
|
||||||
|
"default_port": 8000,
|
||||||
|
"kind": "stt",
|
||||||
|
"description": "Streaming speech-to-text (English). Used by Open WebUI for voice input. ~1 GB.",
|
||||||
|
"homepage": "https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/parakeet-tdt-0-6b-v3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "magpie-tts-multilingual",
|
||||||
|
"name": "Magpie TTS Multilingual",
|
||||||
|
"image": "nvcr.io/nim/nvidia/magpie-tts-multilingual:latest",
|
||||||
|
"default_container": "magpie-tts",
|
||||||
|
"default_port": 9000,
|
||||||
|
"kind": "tts",
|
||||||
|
"description": "Multilingual text-to-speech. Counterpart to Parakeet for 'read aloud'. ~3 GB.",
|
||||||
|
"homepage": "https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia/containers/magpie-tts-multilingual",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "riva-multilingual",
|
||||||
|
"name": "Riva Multilingual ASR",
|
||||||
|
"image": "nvcr.io/nim/nvidia/riva-multilingual:latest",
|
||||||
|
"default_container": "riva-asr",
|
||||||
|
"default_port": 8001,
|
||||||
|
"kind": "stt",
|
||||||
|
"description": "NVIDIA Riva speech-recognition multi-language model. Larger and more accurate than Parakeet.",
|
||||||
|
"homepage": "https://catalog.ngc.nvidia.com/orgs/nim/teams/nvidia",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class NimInstallJob:
|
||||||
|
id: str
|
||||||
|
image: str
|
||||||
|
container: str
|
||||||
|
port: int
|
||||||
|
host: str
|
||||||
|
user: str
|
||||||
|
volume: Optional[str]
|
||||||
|
started_at: str
|
||||||
|
state: str = "starting" # starting | pulling | running | done | failed
|
||||||
|
phase: str = "Starting…"
|
||||||
|
lines: list[str] = field(default_factory=list)
|
||||||
|
returncode: Optional[int] = None
|
||||||
|
finished_at: Optional[str] = None
|
||||||
|
|
||||||
|
def append(self, line: str) -> None:
|
||||||
|
self.lines.append(line)
|
||||||
|
if len(self.lines) > 1000:
|
||||||
|
del self.lines[: len(self.lines) - 1000]
|
||||||
|
|
||||||
|
|
||||||
|
class NimManager:
|
||||||
|
def __init__(self, settings: Settings) -> None:
|
||||||
|
self.settings = settings
|
||||||
|
self.lock = asyncio.Lock()
|
||||||
|
self.jobs: dict[str, NimInstallJob] = {}
|
||||||
|
self.current_job_id: Optional[str] = None
|
||||||
|
|
||||||
|
def get(self, job_id: str) -> NimInstallJob | None:
|
||||||
|
return self.jobs.get(job_id)
|
||||||
|
|
||||||
|
async def trigger(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
image: str,
|
||||||
|
container: str,
|
||||||
|
port: int,
|
||||||
|
host: str,
|
||||||
|
user: str,
|
||||||
|
volume: str | None = None,
|
||||||
|
extra_env: dict[str, str] | None = None,
|
||||||
|
) -> NimInstallJob:
|
||||||
|
if self.lock.locked():
|
||||||
|
raise RuntimeError("Another NIM install is already in progress")
|
||||||
|
if not host or not user:
|
||||||
|
raise RuntimeError("target host not configured")
|
||||||
|
if not self.settings.ngc_api_key:
|
||||||
|
raise RuntimeError(
|
||||||
|
"NGC_API_KEY is not set. Open Configure Sparks in StartOS and paste your NGC personal API key (free at https://ngc.nvidia.com/setup/personal-key)."
|
||||||
|
)
|
||||||
|
|
||||||
|
job = NimInstallJob(
|
||||||
|
id=uuid.uuid4().hex[:8],
|
||||||
|
image=image,
|
||||||
|
container=container,
|
||||||
|
port=port,
|
||||||
|
host=host,
|
||||||
|
user=user,
|
||||||
|
volume=volume or f"{container}-cache",
|
||||||
|
started_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
)
|
||||||
|
self.jobs[job.id] = job
|
||||||
|
self.current_job_id = job.id
|
||||||
|
asyncio.create_task(self._run(job, extra_env or {}))
|
||||||
|
return job
|
||||||
|
|
||||||
|
async def _run(self, job: NimInstallJob, extra_env: dict[str, str]) -> None:
|
||||||
|
async with self.lock:
|
||||||
|
try:
|
||||||
|
await self._do(job, extra_env)
|
||||||
|
if job.state != "failed":
|
||||||
|
job.state = "done"
|
||||||
|
job.returncode = 0
|
||||||
|
job.phase = "Done"
|
||||||
|
except Exception as e:
|
||||||
|
job.append(f"[error] {type(e).__name__}: {e}")
|
||||||
|
job.state = "failed"
|
||||||
|
if job.returncode is None:
|
||||||
|
job.returncode = 1
|
||||||
|
finally:
|
||||||
|
job.finished_at = datetime.now(timezone.utc).isoformat()
|
||||||
|
if self.current_job_id == job.id:
|
||||||
|
self.current_job_id = None
|
||||||
|
|
||||||
|
async def _do(self, job: NimInstallJob, extra_env: dict[str, str]) -> None:
|
||||||
|
# Build the bash one-liner. We use docker login non-interactively with the NGC API key.
|
||||||
|
env_parts = [f'-e NGC_API_KEY=$NGC_API_KEY']
|
||||||
|
for k, v in extra_env.items():
|
||||||
|
env_parts.append(f"-e {k}={v}")
|
||||||
|
env_str = " ".join(env_parts)
|
||||||
|
cmd = (
|
||||||
|
f"set -e; "
|
||||||
|
f"export NGC_API_KEY='{self.settings.ngc_api_key}'; "
|
||||||
|
f"echo '=== docker login nvcr.io ==='; "
|
||||||
|
f"echo \"$NGC_API_KEY\" | docker login nvcr.io -u '$oauthtoken' --password-stdin; "
|
||||||
|
f"echo '=== docker pull {job.image} (this can be 1-10 GB) ==='; "
|
||||||
|
f"docker pull {job.image}; "
|
||||||
|
f"echo '=== remove any prior container with the same name ==='; "
|
||||||
|
f"docker rm -f {job.container} 2>/dev/null || true; "
|
||||||
|
f"echo '=== docker run -d --gpus all -p {job.port}:{job.port} -v {job.volume}:/opt/nim/.cache {env_str} --name {job.container} --restart unless-stopped {job.image} ==='; "
|
||||||
|
f"docker run -d --gpus all "
|
||||||
|
f"-p {job.port}:{job.port} "
|
||||||
|
f"-v {job.volume}:/opt/nim/.cache "
|
||||||
|
f"{env_str} "
|
||||||
|
f"--name {job.container} "
|
||||||
|
f"--restart unless-stopped "
|
||||||
|
f"{job.image}; "
|
||||||
|
f"echo '=== ensuring cache volume is writable by uid 1000 (riva-server) ==='; "
|
||||||
|
f"docker run --rm -v {job.volume}:/cache alpine chown -R 1000:1000 /cache && "
|
||||||
|
f"docker restart {job.container}; "
|
||||||
|
f"echo '=== install complete; container is starting up and will download its model on first boot ==='"
|
||||||
|
)
|
||||||
|
job.append(f"$ <install command for {job.image} on {job.host}>")
|
||||||
|
job.state = "pulling"
|
||||||
|
job.phase = "Pulling image from nvcr.io (this can take a few minutes)…"
|
||||||
|
|
||||||
|
handle = StreamHandle()
|
||||||
|
async for line in ssh_stream(job.host, job.user, cmd, self.settings, handle=handle):
|
||||||
|
# Don't log lines containing the api key
|
||||||
|
if self.settings.ngc_api_key and self.settings.ngc_api_key in line:
|
||||||
|
continue
|
||||||
|
job.append(line)
|
||||||
|
if "docker pull" in line:
|
||||||
|
job.phase = "Pulling image from nvcr.io…"
|
||||||
|
elif "Login Succeeded" in line:
|
||||||
|
job.phase = "Logged in to NGC; pulling image…"
|
||||||
|
elif "Pull complete" in line:
|
||||||
|
job.phase = "Pulling layers…"
|
||||||
|
elif "Status: Downloaded newer image" in line or "Image is up to date" in line:
|
||||||
|
job.phase = "Image ready; starting container…"
|
||||||
|
elif "docker run -d" in line:
|
||||||
|
job.state = "running"
|
||||||
|
job.phase = "Container starting; downloading model on first boot…"
|
||||||
|
|
||||||
|
rc = handle.returncode or 0
|
||||||
|
if rc != 0:
|
||||||
|
job.state = "failed"
|
||||||
|
job.returncode = rc
|
||||||
+236
-1
@@ -10,14 +10,19 @@ from pydantic import BaseModel
|
|||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from .config import Settings
|
from .config import Settings
|
||||||
|
from .connectivity import get_mac, summary as connectivity_summary
|
||||||
|
from .custom_services import add_custom_service, delete_custom_service
|
||||||
from .download import DownloadManager
|
from .download import DownloadManager
|
||||||
|
from .hardware import HardwareProbe
|
||||||
from .health import check_magpie, check_parakeet, check_vllm
|
from .health import check_magpie, check_parakeet, check_vllm
|
||||||
from .models import load_catalog
|
from .models import load_catalog
|
||||||
|
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||||
from .services import docker_state, run_action, services_from_settings
|
from .services import docker_state, run_action, services_from_settings
|
||||||
from .ssh import ssh_run
|
from .ssh import ssh_run
|
||||||
from .swap import SwapManager
|
from .swap import SwapManager
|
||||||
from .updates import UpdateManager, get_update_status
|
from .updates import UpdateManager, get_update_status
|
||||||
|
from .wol import send_local_broadcast, send_via_peer
|
||||||
|
|
||||||
|
|
||||||
settings = Settings.from_env()
|
settings = Settings.from_env()
|
||||||
@@ -25,6 +30,8 @@ catalog = load_catalog(settings.models_yaml)
|
|||||||
swap_manager = SwapManager(settings, catalog)
|
swap_manager = SwapManager(settings, catalog)
|
||||||
download_manager = DownloadManager(settings)
|
download_manager = DownloadManager(settings)
|
||||||
update_manager = UpdateManager(settings)
|
update_manager = UpdateManager(settings)
|
||||||
|
hardware_probe = HardwareProbe(settings)
|
||||||
|
nim_manager = NimManager(settings)
|
||||||
|
|
||||||
app = FastAPI(title="spark-control", version="0.1.0")
|
app = FastAPI(title="spark-control", version="0.1.0")
|
||||||
|
|
||||||
@@ -44,6 +51,7 @@ async def get_config() -> dict:
|
|||||||
"spark1_host": settings.spark1_host,
|
"spark1_host": settings.spark1_host,
|
||||||
"spark2_host": settings.spark2_host,
|
"spark2_host": settings.spark2_host,
|
||||||
"vllm_port": settings.vllm_port,
|
"vllm_port": settings.vllm_port,
|
||||||
|
"open_webui_url": settings.open_webui_url or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -116,6 +124,56 @@ async def del_model(key: str) -> dict:
|
|||||||
return {"ok": True, "key": key}
|
return {"ok": True, "key": key}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/hardware")
|
||||||
|
async def get_hardware() -> dict:
|
||||||
|
"""Per-Spark hardware snapshot — RAM, disk, GPU mem + util, CPU load, uptime."""
|
||||||
|
return await hardware_probe.fetch()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/connectivity")
|
||||||
|
async def get_connectivity() -> dict:
|
||||||
|
"""Up/down transition log per Spark + cached MACs."""
|
||||||
|
return connectivity_summary()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/spark/{name}/wake")
|
||||||
|
async def wake_spark(name: str) -> dict:
|
||||||
|
"""Send a Wake-on-LAN magic packet for the named Spark.
|
||||||
|
|
||||||
|
Tries the OTHER Spark (if reachable) first because the packet has to
|
||||||
|
originate on the target's LAN segment to be reliable. Falls back to a
|
||||||
|
direct UDP broadcast from this container.
|
||||||
|
"""
|
||||||
|
if name not in ("spark1", "spark2"):
|
||||||
|
raise HTTPException(404, f"unknown spark: {name}")
|
||||||
|
mac = get_mac(name)
|
||||||
|
if not mac:
|
||||||
|
raise HTTPException(400, f"MAC for {name} not yet known; bring it up once so we can probe it, then this will work next time it sleeps")
|
||||||
|
|
||||||
|
# Find the peer's connectivity to decide the path.
|
||||||
|
other = "spark2" if name == "spark1" else "spark1"
|
||||||
|
other_host = settings.spark1_host if other == "spark1" else settings.spark2_host
|
||||||
|
other_user = settings.spark1_user if other == "spark1" else settings.spark2_user
|
||||||
|
|
||||||
|
delivered_via = None
|
||||||
|
via_peer_ok = False
|
||||||
|
via_peer_err = ""
|
||||||
|
if other_host and other_user:
|
||||||
|
via_peer_ok, via_peer_err = await send_via_peer(other_host, other_user, mac, settings)
|
||||||
|
if via_peer_ok:
|
||||||
|
delivered_via = other
|
||||||
|
|
||||||
|
if not via_peer_ok:
|
||||||
|
# Fall back to direct from this container
|
||||||
|
try:
|
||||||
|
send_local_broadcast(mac)
|
||||||
|
delivered_via = "container"
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(500, f"WoL failed: peer={via_peer_err!r} container={e!r}")
|
||||||
|
|
||||||
|
return {"ok": True, "spark": name, "mac": mac, "delivered_via": delivered_via}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/services")
|
@app.get("/api/services")
|
||||||
async def get_services() -> dict:
|
async def get_services() -> dict:
|
||||||
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
||||||
@@ -161,6 +219,108 @@ async def get_services() -> dict:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/nim/catalog")
|
||||||
|
async def get_nim_catalog() -> dict:
|
||||||
|
return {
|
||||||
|
"catalog_url": CATALOG_URL,
|
||||||
|
"ngc_key_configured": bool(settings.ngc_api_key),
|
||||||
|
"suggested": SUGGESTED_NIMS,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NimInstallBody(BaseModel):
|
||||||
|
image: str
|
||||||
|
container: str
|
||||||
|
port: int
|
||||||
|
host: Literal["spark1", "spark2"] = "spark2"
|
||||||
|
kind: str = ""
|
||||||
|
register: bool = True # write to custom services overrides after install
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/nim/install")
|
||||||
|
async def post_nim_install(body: NimInstallBody) -> dict:
|
||||||
|
target_host = settings.spark1_host if body.host == "spark1" else settings.spark2_host
|
||||||
|
target_user = settings.spark1_user if body.host == "spark1" else settings.spark2_user
|
||||||
|
try:
|
||||||
|
job = await nim_manager.trigger(
|
||||||
|
image=body.image,
|
||||||
|
container=body.container,
|
||||||
|
port=body.port,
|
||||||
|
host=target_host,
|
||||||
|
user=target_user,
|
||||||
|
)
|
||||||
|
except RuntimeError as e:
|
||||||
|
raise HTTPException(409 if "in progress" in str(e) else 400, str(e))
|
||||||
|
|
||||||
|
if body.register:
|
||||||
|
# Persist in custom services so the panel shows it after install.
|
||||||
|
add_custom_service({
|
||||||
|
"key": body.container,
|
||||||
|
"kind": body.kind or "nim",
|
||||||
|
"host": target_host,
|
||||||
|
"user": target_user,
|
||||||
|
"container": body.container,
|
||||||
|
"port": body.port,
|
||||||
|
"image": body.image,
|
||||||
|
})
|
||||||
|
return {"job_id": job.id, "image": job.image, "container": job.container, "state": job.state}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/nim/install/{job_id}")
|
||||||
|
async def get_nim_install(job_id: str) -> dict:
|
||||||
|
job = nim_manager.get(job_id)
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(404, "no such job")
|
||||||
|
return {
|
||||||
|
"id": job.id,
|
||||||
|
"image": job.image,
|
||||||
|
"container": job.container,
|
||||||
|
"port": job.port,
|
||||||
|
"host": job.host,
|
||||||
|
"state": job.state,
|
||||||
|
"phase": job.phase,
|
||||||
|
"started_at": job.started_at,
|
||||||
|
"finished_at": job.finished_at,
|
||||||
|
"returncode": job.returncode,
|
||||||
|
"lines": job.lines,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/nim/install/{job_id}/stream")
|
||||||
|
async def stream_nim_install(job_id: str):
|
||||||
|
job = nim_manager.get(job_id)
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(404, "no such job")
|
||||||
|
|
||||||
|
async def gen():
|
||||||
|
sent = 0
|
||||||
|
last_phase = None
|
||||||
|
while True:
|
||||||
|
n = len(job.lines)
|
||||||
|
if n > sent:
|
||||||
|
for line in job.lines[sent:n]:
|
||||||
|
yield f"data: {json.dumps({'line': line})}\n\n"
|
||||||
|
sent = n
|
||||||
|
if job.phase != last_phase:
|
||||||
|
yield f"event: phase\ndata: {json.dumps({'state': job.state, 'phase': job.phase})}\n\n"
|
||||||
|
last_phase = job.phase
|
||||||
|
if job.returncode is not None and sent >= len(job.lines):
|
||||||
|
yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n"
|
||||||
|
return
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/services/{name}")
|
||||||
|
async def del_service(name: str) -> dict:
|
||||||
|
# Only allow deleting custom services (not the bundled parakeet/magpie keys)
|
||||||
|
if name in ("parakeet", "magpie"):
|
||||||
|
raise HTTPException(400, "built-in service; cannot delete (use Configure Sparks to point at a different host)")
|
||||||
|
delete_custom_service(name)
|
||||||
|
return {"ok": True, "name": name}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/services/{name}/{action}")
|
@app.post("/api/services/{name}/{action}")
|
||||||
async def service_action(name: str, action: str) -> dict:
|
async def service_action(name: str, action: str) -> dict:
|
||||||
services = services_from_settings(settings)
|
services = services_from_settings(settings)
|
||||||
@@ -297,7 +457,7 @@ async def stream_swap(job_id: str):
|
|||||||
|
|
||||||
class DownloadRequest(BaseModel):
|
class DownloadRequest(BaseModel):
|
||||||
repo: str
|
repo: str
|
||||||
mode: Literal["solo", "cluster"] = "solo"
|
mode: Literal["spark1", "spark2", "cluster"] = "spark1"
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/download")
|
@app.post("/api/download")
|
||||||
@@ -376,6 +536,81 @@ async def get_updates() -> dict:
|
|||||||
return await get_update_status(settings)
|
return await get_update_status(settings)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/explain-updates")
|
||||||
|
async def explain_updates():
|
||||||
|
"""Stream a layman's explanation of the pending commits from the currently-loaded vLLM model."""
|
||||||
|
import httpx
|
||||||
|
info = await get_update_status(settings)
|
||||||
|
if not info.get("ok"):
|
||||||
|
async def err_gen():
|
||||||
|
yield f"event: done\ndata: {json.dumps({'error': info.get('error', 'unknown')})}\n\n"
|
||||||
|
return StreamingResponse(err_gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
vllm = await check_vllm(settings)
|
||||||
|
if not vllm.get("ok") or not vllm.get("current_model"):
|
||||||
|
async def err_gen():
|
||||||
|
yield f"event: done\ndata: {json.dumps({'error': 'no vLLM model loaded — swap to a model first'})}\n\n"
|
||||||
|
return StreamingResponse(err_gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
commits = "\n".join(info.get("log", []))
|
||||||
|
if not commits.strip():
|
||||||
|
async def empty_gen():
|
||||||
|
yield f"event: done\ndata: {json.dumps({'error': 'no pending commits'})}\n\n"
|
||||||
|
return StreamingResponse(empty_gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
"You are reviewing pending git commits to `eugr/spark-vllm-docker`, an upstream community project that "
|
||||||
|
"orchestrates vLLM on dual NVIDIA DGX Spark hardware (Blackwell GPUs, cluster via Ray, recipes per model). "
|
||||||
|
"The reader has a setup running models like Qwen3.6-35B-A3B-NVFP4 (daily driver, solo), Qwen3-VL 235B (cluster), "
|
||||||
|
"and Gemma 4 31B. The reader is technically literate but is NOT a vLLM expert.\n\n"
|
||||||
|
"For the commit list below: give a short overall verdict (Apply / Optional / Skip and why), then a brief "
|
||||||
|
"bullet per commit grouping similar ones. Call out anything that would break a working setup or that "
|
||||||
|
"requires re-downloading models. Avoid jargon. ~250 words max.\n\n"
|
||||||
|
f"Pending commits:\n{commits}"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def gen():
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0, connect=5.0)) as c:
|
||||||
|
async with c.stream(
|
||||||
|
"POST",
|
||||||
|
f"{vllm['base_url']}/chat/completions",
|
||||||
|
json={
|
||||||
|
"model": vllm["current_model"],
|
||||||
|
"stream": True,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": 600,
|
||||||
|
"temperature": 0.4,
|
||||||
|
},
|
||||||
|
) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
async for line in r.aiter_lines():
|
||||||
|
if not line.startswith("data: "):
|
||||||
|
continue
|
||||||
|
data = line[6:].strip()
|
||||||
|
if data == "[DONE]":
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
chunk = json.loads(data)
|
||||||
|
choices = chunk.get("choices") or []
|
||||||
|
if not choices:
|
||||||
|
continue
|
||||||
|
delta = choices[0].get("delta") or {}
|
||||||
|
text = delta.get("content")
|
||||||
|
reasoning = delta.get("reasoning")
|
||||||
|
if text:
|
||||||
|
yield f"data: {json.dumps({'content': text})}\n\n"
|
||||||
|
elif reasoning:
|
||||||
|
yield f"data: {json.dumps({'reasoning': reasoning})}\n\n"
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
yield f"data: {json.dumps({'error': f'{type(e).__name__}: {e}'})}\n\n"
|
||||||
|
yield f"event: done\ndata: {json.dumps({'ok': True})}\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(gen(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
class UpdateRequest(BaseModel):
|
class UpdateRequest(BaseModel):
|
||||||
mode: Literal["solo", "cluster"] = "cluster"
|
mode: Literal["solo", "cluster"] = "cluster"
|
||||||
|
|
||||||
|
|||||||
+42
-2
@@ -5,6 +5,7 @@ machinery. We just run `docker start|stop|restart <container>` via SSH on the
|
|||||||
appropriate host.
|
appropriate host.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Literal, Optional
|
from typing import Literal, Optional
|
||||||
|
|
||||||
@@ -12,6 +13,25 @@ from .config import Settings
|
|||||||
from .ssh import ssh_run
|
from .ssh import ssh_run
|
||||||
|
|
||||||
|
|
||||||
|
# Cache the "unreachable" verdict per (host, user) for a short period so that a
|
||||||
|
# repeated docker_state call doesn't re-pay the 6 s SSH connect timeout each time.
|
||||||
|
_UNREACHABLE_TTL = 25.0
|
||||||
|
_unreachable_cache: dict[tuple[str, str], float] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_recently_unreachable(host: str, user: str) -> bool:
|
||||||
|
ts = _unreachable_cache.get((host, user))
|
||||||
|
return bool(ts and time.monotonic() - ts < _UNREACHABLE_TTL)
|
||||||
|
|
||||||
|
|
||||||
|
def _mark_unreachable(host: str, user: str) -> None:
|
||||||
|
_unreachable_cache[(host, user)] = time.monotonic()
|
||||||
|
|
||||||
|
|
||||||
|
def _clear_unreachable(host: str, user: str) -> None:
|
||||||
|
_unreachable_cache.pop((host, user), None)
|
||||||
|
|
||||||
|
|
||||||
ServiceName = Literal["parakeet", "magpie"]
|
ServiceName = Literal["parakeet", "magpie"]
|
||||||
ServiceAction = Literal["start", "stop", "restart"]
|
ServiceAction = Literal["start", "stop", "restart"]
|
||||||
|
|
||||||
@@ -27,7 +47,8 @@ class ServiceDef:
|
|||||||
|
|
||||||
|
|
||||||
def services_from_settings(s: Settings) -> dict[str, ServiceDef]:
|
def services_from_settings(s: Settings) -> dict[str, ServiceDef]:
|
||||||
return {
|
from .custom_services import load_custom_services
|
||||||
|
out: dict[str, ServiceDef] = {
|
||||||
"parakeet": ServiceDef(
|
"parakeet": ServiceDef(
|
||||||
name="parakeet",
|
name="parakeet",
|
||||||
kind="stt",
|
kind="stt",
|
||||||
@@ -45,19 +66,38 @@ def services_from_settings(s: Settings) -> dict[str, ServiceDef]:
|
|||||||
port=s.magpie_port,
|
port=s.magpie_port,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
for entry in load_custom_services():
|
||||||
|
key = entry.get("key")
|
||||||
|
if not key or key in out:
|
||||||
|
continue
|
||||||
|
out[key] = ServiceDef(
|
||||||
|
name=key,
|
||||||
|
kind=entry.get("kind", ""),
|
||||||
|
host=entry.get("host", ""),
|
||||||
|
user=entry.get("user", ""),
|
||||||
|
container=entry.get("container", key),
|
||||||
|
port=int(entry.get("port", 0)),
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
async def docker_state(settings: Settings, svc: ServiceDef) -> dict:
|
async def docker_state(settings: Settings, svc: ServiceDef) -> dict:
|
||||||
"""Get docker state (running, exited, restarting, etc.) + restart count."""
|
"""Get docker state (running, exited, restarting, etc.) + restart count."""
|
||||||
if not svc.host or not svc.user:
|
if not svc.host or not svc.user:
|
||||||
return {"state": "unconfigured", "restart_count": None, "uptime": None}
|
return {"state": "unconfigured", "restart_count": None, "uptime": None}
|
||||||
|
if _is_recently_unreachable(svc.host, svc.user):
|
||||||
|
return {"state": "unreachable", "host_unreachable": True, "restart_count": None, "uptime": None}
|
||||||
cmd = (
|
cmd = (
|
||||||
f"docker inspect {svc.container} "
|
f"docker inspect {svc.container} "
|
||||||
f"--format '{{{{.State.Status}}}}|{{{{.State.StartedAt}}}}|{{{{.RestartCount}}}}|{{{{.State.ExitCode}}}}|{{{{.State.Error}}}}' "
|
f"--format '{{{{.State.Status}}}}|{{{{.State.StartedAt}}}}|{{{{.RestartCount}}}}|{{{{.State.ExitCode}}}}|{{{{.State.Error}}}}' "
|
||||||
f"2>&1 || echo 'NOT_FOUND'"
|
f"2>&1 || echo 'NOT_FOUND'"
|
||||||
)
|
)
|
||||||
rc, out, _ = await ssh_run(svc.host, svc.user, cmd, settings, timeout=10)
|
rc, out, _ = await ssh_run(svc.host, svc.user, cmd, settings, timeout=6)
|
||||||
out = out.strip()
|
out = out.strip()
|
||||||
|
if rc == 124 or "timeout after" in out.lower():
|
||||||
|
_mark_unreachable(svc.host, svc.user)
|
||||||
|
return {"state": "unreachable", "host_unreachable": True, "restart_count": None, "uptime": None}
|
||||||
|
_clear_unreachable(svc.host, svc.user)
|
||||||
if rc != 0 or out.startswith("NOT_FOUND") or "Error" in out and "no such object" in out.lower():
|
if rc != 0 or out.startswith("NOT_FOUND") or "Error" in out and "no such object" in out.lower():
|
||||||
return {"state": "missing", "restart_count": None, "uptime": None, "raw": out}
|
return {"state": "missing", "restart_count": None, "uptime": None, "raw": out}
|
||||||
parts = out.split("|")
|
parts = out.split("|")
|
||||||
|
|||||||
+524
-24
@@ -13,6 +13,8 @@ const state = {
|
|||||||
swap_progress: 0, // 0–1
|
swap_progress: 0, // 0–1
|
||||||
services: {},
|
services: {},
|
||||||
service_action_in_flight: null, // e.g. "parakeet:restart"
|
service_action_in_flight: null, // e.g. "parakeet:restart"
|
||||||
|
hardware: {},
|
||||||
|
config: {},
|
||||||
configured: true,
|
configured: true,
|
||||||
timer_handle: null,
|
timer_handle: null,
|
||||||
};
|
};
|
||||||
@@ -63,7 +65,9 @@ function renderCards() {
|
|||||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||||
</div>
|
</div>
|
||||||
${desc}
|
${desc}
|
||||||
<div class="muted small repo">${escapeHtml(m.repo)}</div>
|
<div class="muted small repo">
|
||||||
|
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
|
||||||
|
</div>
|
||||||
<div class="spacer"></div>
|
<div class="spacer"></div>
|
||||||
<div class="card-actions">
|
<div class="card-actions">
|
||||||
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
<button class="btn ${isActive ? '' : 'primary'}" data-swap-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
||||||
@@ -93,6 +97,184 @@ function renderCurrent(status) {
|
|||||||
c.innerHTML = `<strong>${label}</strong>`;
|
c.innerHTML = `<strong>${label}</strong>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===================== hardware dashboard =====================
|
||||||
|
|
||||||
|
function fmtBytes(n) {
|
||||||
|
if (!n && n !== 0) return '—';
|
||||||
|
const u = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||||
|
let i = 0; let v = n;
|
||||||
|
while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
|
||||||
|
return v < 10 ? `${v.toFixed(1)} ${u[i]}` : `${Math.round(v)} ${u[i]}`;
|
||||||
|
}
|
||||||
|
function fmtMiB(n) {
|
||||||
|
if (!n && n !== 0) return null;
|
||||||
|
// n is in MiB; render in GB
|
||||||
|
const gb = n / 1024;
|
||||||
|
return gb < 10 ? gb.toFixed(1) : Math.round(gb).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function bar(usedPct, warn) {
|
||||||
|
const pct = Math.max(2, Math.min(100, usedPct));
|
||||||
|
return `<div class="bar ${warn ? 'warn' : ''}"><span style="width:${pct}%"></span></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pollHardware() {
|
||||||
|
try {
|
||||||
|
state.hardware = await fetchJSON('/api/hardware');
|
||||||
|
try { state.connectivity = await fetchJSON('/api/connectivity'); } catch {}
|
||||||
|
renderHardware();
|
||||||
|
} catch (e) { console.warn('hardware poll failed', e); }
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtDuration(sec) {
|
||||||
|
if (sec == null) return '';
|
||||||
|
if (sec < 60) return `${Math.round(sec)}s`;
|
||||||
|
if (sec < 3600) return `${Math.round(sec / 60)}m`;
|
||||||
|
if (sec < 86400) {
|
||||||
|
const h = Math.floor(sec / 3600);
|
||||||
|
const m = Math.round((sec % 3600) / 60);
|
||||||
|
return m ? `${h}h ${m}m` : `${h}h`;
|
||||||
|
}
|
||||||
|
const d = Math.floor(sec / 86400);
|
||||||
|
const h = Math.round((sec % 86400) / 3600);
|
||||||
|
return h ? `${d}d ${h}h` : `${d}d`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function openConnectivityDialog() {
|
||||||
|
const dlg = el('#connectivity-dialog');
|
||||||
|
const content = el('#connectivity-content');
|
||||||
|
const c = state.connectivity || {};
|
||||||
|
const events = c.events || [];
|
||||||
|
if (events.length === 0) {
|
||||||
|
content.innerHTML = '<div class="muted small">No transitions recorded yet. Once a Spark goes down and comes back, you\'ll see entries here.</div>';
|
||||||
|
dlg.showModal();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const bySpark = {};
|
||||||
|
for (const e of events) {
|
||||||
|
(bySpark[e.spark] = bySpark[e.spark] || []).push(e);
|
||||||
|
}
|
||||||
|
const html = Object.entries(bySpark).map(([spark, evs]) => {
|
||||||
|
const downs = evs.filter(e => e.transition === 'down').length;
|
||||||
|
const mac = c.macs?.[spark];
|
||||||
|
return `
|
||||||
|
<div class="conn-spark">
|
||||||
|
<h4>${escapeHtml(spark)}${mac ? ` <span class="muted small">${escapeHtml(mac)}</span>` : ''}</h4>
|
||||||
|
<div class="conn-summary">${evs.length} transition${evs.length===1?'':'s'} · ${downs} down event${downs===1?'':'s'} in window</div>
|
||||||
|
${evs.slice(-25).reverse().map(e => `
|
||||||
|
<div class="conn-event ${e.transition}">
|
||||||
|
<span class="when">${escapeHtml(e.at.replace('T', ' ').replace('Z', ''))}</span>
|
||||||
|
<span class="what">${e.transition === 'up' ? '↑ came back online' : '↓ dropped offline'}</span>
|
||||||
|
<span class="dur">${e.down_seconds != null ? `was down ${fmtDuration(e.down_seconds)}` : ''}${e.up_seconds != null ? `was up ${fmtDuration(e.up_seconds)}` : ''}</span>
|
||||||
|
</div>
|
||||||
|
`).join('')}
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}).join('');
|
||||||
|
content.innerHTML = html;
|
||||||
|
dlg.showModal();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function wakeSpark(name) {
|
||||||
|
try {
|
||||||
|
const r = await fetchJSON(`/api/spark/${name}/wake`, { method: 'POST' });
|
||||||
|
alert(`Wake-on-LAN sent to ${name} (MAC ${r.mac}, via ${r.delivered_via}). Give it ~30 seconds to wake; the card will go green when it comes back.`);
|
||||||
|
} catch (e) {
|
||||||
|
alert(`Wake failed: ${e.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderHardware() {
|
||||||
|
const panel = el('#hardware-panel');
|
||||||
|
const grid = el('#hardware-grid');
|
||||||
|
const hw = state.hardware || {};
|
||||||
|
const keys = Object.keys(hw).filter(k => hw[k] && (hw[k].configured !== false));
|
||||||
|
if (keys.length === 0) { panel.classList.add('hidden'); return; }
|
||||||
|
panel.classList.remove('hidden');
|
||||||
|
grid.innerHTML = '';
|
||||||
|
for (const key of keys) {
|
||||||
|
const s = hw[key];
|
||||||
|
const card = document.createElement('div');
|
||||||
|
if (!s.reachable) {
|
||||||
|
card.className = 'hw-card unreachable';
|
||||||
|
const mac = state.connectivity?.macs?.[key];
|
||||||
|
const wolRow = mac
|
||||||
|
? `<div class="wol-row">
|
||||||
|
<span class="mac-display">${escapeHtml(mac)}</span>
|
||||||
|
<span class="spacer"></span>
|
||||||
|
<button class="btn" data-wake="${escapeHtml(key)}">Wake (WoL)</button>
|
||||||
|
</div>`
|
||||||
|
: `<div class="muted small">MAC not yet known — once it's been up once with this dashboard installed, "Wake" will appear here.</div>`;
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="head">
|
||||||
|
<span class="name">${escapeHtml(key)}</span>
|
||||||
|
<span class="meta">unreachable</span>
|
||||||
|
</div>
|
||||||
|
<div class="muted small">${escapeHtml(s.host || '')} — ${escapeHtml(s.error || 'no response')}</div>
|
||||||
|
${wolRow}
|
||||||
|
<div class="muted small" style="line-height:1.5">
|
||||||
|
If Wake-on-LAN doesn't bring it back, manual steps:
|
||||||
|
<ol style="margin: 6px 0 0 18px; padding: 0;">
|
||||||
|
<li>Verify it's powered on (check the front LED).</li>
|
||||||
|
<li>Ping it from another LAN device.</li>
|
||||||
|
<li>Power-cycle it physically.</li>
|
||||||
|
<li>If it boots, this card will go green again automatically.</li>
|
||||||
|
</ol>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
grid.appendChild(card);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const ramPct = s.ram_used_bytes && s.ram_total_bytes ? (s.ram_used_bytes / s.ram_total_bytes) * 100 : 0;
|
||||||
|
const diskPct = s.disk_used_bytes && s.disk_total_bytes ? (s.disk_used_bytes / s.disk_total_bytes) * 100 : 0;
|
||||||
|
const loadPct = (s.load && s.cores) ? Math.min(100, (s.load[0] / s.cores) * 100) : 0;
|
||||||
|
// GPU memory: on unified-memory systems (DGX Spark) total is N/A, so use system RAM as the pool.
|
||||||
|
const gpuMemTotalMiB = s.gpu_mem_total_mib || (s.gpu_unified_memory ? (s.ram_total_bytes / (1024 * 1024)) : null);
|
||||||
|
const gpuMemUsedMiB = s.gpu_mem_used_mib ?? null;
|
||||||
|
const gpuMemPct = (gpuMemTotalMiB && gpuMemUsedMiB != null) ? (gpuMemUsedMiB / gpuMemTotalMiB) * 100 : 0;
|
||||||
|
const gpuMemNote = s.gpu_unified_memory ? ' <span class="muted">(unified)</span>' : '';
|
||||||
|
const gpuExtras = [];
|
||||||
|
if (s.gpu_temp_c != null) gpuExtras.push(`${s.gpu_temp_c}°C`);
|
||||||
|
if (s.gpu_power_w != null) gpuExtras.push(`${s.gpu_power_w.toFixed(0)}W`);
|
||||||
|
const gpuExtrasStr = gpuExtras.length ? ` · ${gpuExtras.join(' · ')}` : '';
|
||||||
|
card.className = 'hw-card';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="head">
|
||||||
|
<span class="name">${escapeHtml(s.hostname || key)}</span>
|
||||||
|
<span class="meta">${escapeHtml(key)} · ${escapeHtml(s.gpu_name || '')} · ${escapeHtml(s.uptime || '')}</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">CPU</span>
|
||||||
|
${bar(loadPct, loadPct > 80)}
|
||||||
|
<span class="val">${s.load ? s.load[0].toFixed(2) : '—'} / ${s.cores || '?'} cores</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">RAM</span>
|
||||||
|
${bar(ramPct, ramPct > 85)}
|
||||||
|
<span class="val">${fmtBytes(s.ram_used_bytes)} / ${fmtBytes(s.ram_total_bytes)}</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">GPU mem${gpuMemNote}</span>
|
||||||
|
${bar(gpuMemPct, gpuMemPct > 90)}
|
||||||
|
<span class="val">${fmtMiB(gpuMemUsedMiB) || '—'} / ${fmtMiB(gpuMemTotalMiB) || '?'} GB</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">GPU util</span>
|
||||||
|
${bar(s.gpu_util_pct || 0, (s.gpu_util_pct || 0) > 90)}
|
||||||
|
<span class="val">${s.gpu_util_pct ?? 0}%${gpuExtrasStr}</span>
|
||||||
|
</div>
|
||||||
|
<div class="hw-metric">
|
||||||
|
<span class="label">Disk</span>
|
||||||
|
${bar(diskPct, diskPct > 85)}
|
||||||
|
<span class="val">${fmtBytes(s.disk_used_bytes)} / ${fmtBytes(s.disk_total_bytes)}</span>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
grid.appendChild(card);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===================== service classification =====================
|
||||||
|
|
||||||
function classifyService(s) {
|
function classifyService(s) {
|
||||||
// returns one of: running | unhealthy | missing | unconfigured | starting
|
// returns one of: running | unhealthy | missing | unconfigured | starting
|
||||||
if (!s.host) return 'unconfigured';
|
if (!s.host) return 'unconfigured';
|
||||||
@@ -143,11 +325,16 @@ async function renderServices() {
|
|||||||
if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true;
|
if (action === 'stop' && cls !== 'running' && cls !== 'starting' && cls !== 'unhealthy') return true;
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
const copyIcon = `<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>`;
|
||||||
|
const hostStr = s.host ? `${s.host}:${s.port}` : '';
|
||||||
const hostRow = s.host
|
const hostRow = s.host
|
||||||
? `<div class="row"><span class="k">Host</span><span class="v">${escapeHtml(s.host)}:${s.port}</span></div>`
|
? `<div class="row"><span class="k">Host</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(hostStr)}</span><button class="icon-btn" data-copy-text="${escapeHtml(hostStr)}" title="Copy host" aria-label="Copy">${copyIcon}</button></div>`
|
||||||
: `<div class="row"><span class="k">Host</span><span class="v muted-v">not configured</span></div>`;
|
: `<div class="row"><span class="k">Host</span><span class="v muted-v">not configured</span></div>`;
|
||||||
|
const urlRow = s.base_url
|
||||||
|
? `<div class="row"><span class="k">URL</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(s.base_url)}</span><button class="icon-btn" data-copy-text="${escapeHtml(s.base_url)}" title="Copy URL" aria-label="Copy">${copyIcon}</button></div>`
|
||||||
|
: '';
|
||||||
const modelRow = s.model
|
const modelRow = s.model
|
||||||
? `<div class="row"><span class="k">Model</span><span class="v">${escapeHtml(s.model)}</span></div>`
|
? `<div class="row"><span class="k">Model</span><span class="v copyable" data-copy-self title="Click to copy">${escapeHtml(s.model)}</span><button class="icon-btn" data-copy-text="${escapeHtml(s.model)}" title="Copy model" aria-label="Copy">${copyIcon}</button></div>`
|
||||||
: '';
|
: '';
|
||||||
const restartsRow = s.restart_count != null && s.restart_count > 1
|
const restartsRow = s.restart_count != null && s.restart_count > 1
|
||||||
? `<div class="row"><span class="k">Restarts</span><span class="v">${s.restart_count}</span></div>`
|
? `<div class="row"><span class="k">Restarts</span><span class="v">${s.restart_count}</span></div>`
|
||||||
@@ -159,6 +346,7 @@ async function renderServices() {
|
|||||||
<span class="status">${statusLabel(cls)}</span>
|
<span class="status">${statusLabel(cls)}</span>
|
||||||
</div>
|
</div>
|
||||||
${hostRow}
|
${hostRow}
|
||||||
|
${urlRow}
|
||||||
${modelRow}
|
${modelRow}
|
||||||
${restartsRow}
|
${restartsRow}
|
||||||
<div class="service-actions">
|
<div class="service-actions">
|
||||||
@@ -212,31 +400,50 @@ function renderEndpoint(status) {
|
|||||||
el('#ep-curl-snippet').textContent = snippet;
|
el('#ep-curl-snippet').textContent = snippet;
|
||||||
}
|
}
|
||||||
|
|
||||||
function setupCopyButtons() {
|
async function copyText(text, indicatorEl) {
|
||||||
document.body.addEventListener('click', async (e) => {
|
try {
|
||||||
const btn = e.target.closest('.copy-btn');
|
await navigator.clipboard.writeText(text);
|
||||||
if (!btn) return;
|
if (indicatorEl) {
|
||||||
const targetSel = btn.dataset.copy;
|
indicatorEl.classList.add('copied');
|
||||||
if (!targetSel) return;
|
setTimeout(() => indicatorEl.classList.remove('copied'), 1200);
|
||||||
const target = el(targetSel);
|
}
|
||||||
if (!target) return;
|
return true;
|
||||||
const text = target.textContent;
|
} catch {
|
||||||
try {
|
// Plain HTTP fallback: select the text so the user can ⌘C
|
||||||
await navigator.clipboard.writeText(text);
|
if (indicatorEl) {
|
||||||
const original = btn.textContent;
|
|
||||||
btn.classList.add('copied');
|
|
||||||
btn.textContent = 'Copied';
|
|
||||||
setTimeout(() => {
|
|
||||||
btn.classList.remove('copied');
|
|
||||||
btn.textContent = original;
|
|
||||||
}, 1400);
|
|
||||||
} catch {
|
|
||||||
// Clipboard API may fail over plain HTTP; fall back to selection
|
|
||||||
const range = document.createRange();
|
const range = document.createRange();
|
||||||
range.selectNode(target);
|
range.selectNode(indicatorEl);
|
||||||
window.getSelection().removeAllRanges();
|
window.getSelection().removeAllRanges();
|
||||||
window.getSelection().addRange(range);
|
window.getSelection().addRange(range);
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setupCopyButtons() {
|
||||||
|
document.body.addEventListener('click', async (e) => {
|
||||||
|
// Inline icon copy with literal text (used for dynamically-rendered service rows)
|
||||||
|
const litBtn = e.target.closest('[data-copy-text]');
|
||||||
|
if (litBtn) {
|
||||||
|
await copyText(litBtn.dataset.copyText, litBtn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Copy buttons (with svg icon) referenced by data-copy="selector"
|
||||||
|
const btn = e.target.closest('[data-copy]');
|
||||||
|
if (btn) {
|
||||||
|
const target = el(btn.dataset.copy);
|
||||||
|
if (target) {
|
||||||
|
await copyText(target.textContent, btn);
|
||||||
|
target.classList.add('copied');
|
||||||
|
setTimeout(() => target.classList.remove('copied'), 1200);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Self-copy: clicking the text itself
|
||||||
|
const selfCopy = e.target.closest('[data-copy-self]');
|
||||||
|
if (selfCopy) {
|
||||||
|
await copyText(selfCopy.textContent, selfCopy);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -380,6 +587,10 @@ async function pollStatus() {
|
|||||||
renderCurrent(status);
|
renderCurrent(status);
|
||||||
renderEndpoint(status);
|
renderEndpoint(status);
|
||||||
renderHealth(status);
|
renderHealth(status);
|
||||||
|
// If models hasn't loaded yet (init may have hit a transient proxy timeout), retry.
|
||||||
|
if (!state.models || Object.keys(state.models).length === 0) {
|
||||||
|
try { await loadModels(); } catch {}
|
||||||
|
}
|
||||||
// Refresh services state lazily — every 5s poll triggers this too.
|
// Refresh services state lazily — every 5s poll triggers this too.
|
||||||
try {
|
try {
|
||||||
state.services = await fetchJSON('/api/services');
|
state.services = await fetchJSON('/api/services');
|
||||||
@@ -518,6 +729,18 @@ function openDownloadForm() {
|
|||||||
el('#download-form').classList.remove('hidden');
|
el('#download-form').classList.remove('hidden');
|
||||||
el('#download-progress').classList.add('hidden');
|
el('#download-progress').classList.add('hidden');
|
||||||
el('#dl-repo').focus();
|
el('#dl-repo').focus();
|
||||||
|
updateDlHfLink();
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateDlHfLink() {
|
||||||
|
const repo = el('#dl-repo').value.trim();
|
||||||
|
const link = el('#dl-hf-link');
|
||||||
|
if (repo.includes('/')) {
|
||||||
|
link.href = `https://huggingface.co/${encodeURIComponent(repo)}`;
|
||||||
|
link.classList.remove('hidden');
|
||||||
|
} else {
|
||||||
|
link.classList.add('hidden');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function closeDownloadPanel() {
|
function closeDownloadPanel() {
|
||||||
@@ -647,6 +870,47 @@ function handleDownloadDone(d) {
|
|||||||
|
|
||||||
// ===================== Advanced / Add to catalog =====================
|
// ===================== Advanced / Add to catalog =====================
|
||||||
|
|
||||||
|
function gpuTotalGB(modelMode) {
|
||||||
|
// Solo uses Spark 1's GPU only. Cluster shares across both — but loading is per-Spark.
|
||||||
|
const s1 = state.hardware?.spark1;
|
||||||
|
const s2 = state.hardware?.spark2;
|
||||||
|
const g1 = s1?.gpu_mem_total_mib ? s1.gpu_mem_total_mib / 1024 : null;
|
||||||
|
const g2 = s2?.gpu_mem_total_mib ? s2.gpu_mem_total_mib / 1024 : null;
|
||||||
|
if (modelMode === 'cluster' && g1 && g2) return Math.min(g1, g2); // bottleneck
|
||||||
|
return g1 || g2 || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function knobContextHint(field, value, mode) {
|
||||||
|
if (field === 'gpu_memory_utilization') {
|
||||||
|
const gb = gpuTotalGB(mode);
|
||||||
|
if (!gb) return '';
|
||||||
|
const used = (value * gb).toFixed(0);
|
||||||
|
const free = (gb - value * gb).toFixed(0);
|
||||||
|
return `~${used} GB allocated to model + KV cache · ~${free} GB left for OS, buffers, other GPU workloads.`;
|
||||||
|
}
|
||||||
|
if (field === 'max_model_len') {
|
||||||
|
if (!value) return '';
|
||||||
|
const pages = Math.round(value / 350); // ~350 tokens per page
|
||||||
|
const kvBytes = (value * 2 * 4 * 32 * 128); // rough fp16 KV cache size for typical 32-layer model
|
||||||
|
return `~${pages.toLocaleString()} pages of text (very rough). Larger context = more GPU memory reserved for KV cache.`;
|
||||||
|
}
|
||||||
|
if (field === 'fastsafetensors') return value ? 'Faster cold-start weight loading.' : 'Standard safetensors loading.';
|
||||||
|
if (field === 'prefix_caching') return value ? 'Reuses GPU state for repeated prompt prefixes (e.g. long system prompts).' : 'Off — every request re-processes the full prompt.';
|
||||||
|
if (field === 'kv_cache_dtype') return value === 'fp8' ? 'Halves KV cache memory (fits ~2× more context). Quality cost is usually imperceptible.' : 'Default precision.';
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureKnobHint(rowEl, id) {
|
||||||
|
let h = rowEl.querySelector(`.knob-hint[data-for="${id}"]`);
|
||||||
|
if (!h) {
|
||||||
|
h = document.createElement('div');
|
||||||
|
h.className = 'knob-hint muted small';
|
||||||
|
h.dataset.for = id;
|
||||||
|
rowEl.appendChild(h);
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
function openAdvanced(key) {
|
function openAdvanced(key) {
|
||||||
const m = state.models[key];
|
const m = state.models[key];
|
||||||
if (!m) return;
|
if (!m) return;
|
||||||
@@ -659,6 +923,23 @@ function openAdvanced(key) {
|
|||||||
el('#adv-fst').checked = !!k.fastsafetensors;
|
el('#adv-fst').checked = !!k.fastsafetensors;
|
||||||
el('#adv-pcache').checked = !!k.prefix_caching;
|
el('#adv-pcache').checked = !!k.prefix_caching;
|
||||||
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
|
el('#adv-fp8').checked = k.kv_cache_dtype === 'fp8';
|
||||||
|
|
||||||
|
// Wire up live knob hints
|
||||||
|
const updateHints = () => {
|
||||||
|
const mml = parseInt(el('#adv-mml').value, 10);
|
||||||
|
const gmu = parseFloat(el('#adv-gmu').value);
|
||||||
|
ensureKnobHint(el('#adv-mml').parentElement, 'mml').textContent = knobContextHint('max_model_len', mml, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-gmu').parentElement, 'gmu').textContent = knobContextHint('gpu_memory_utilization', gmu, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-fst').parentElement, 'fst').textContent = knobContextHint('fastsafetensors', el('#adv-fst').checked, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-pcache').parentElement, 'pcache').textContent = knobContextHint('prefix_caching', el('#adv-pcache').checked, m.mode);
|
||||||
|
ensureKnobHint(el('#adv-fp8').parentElement, 'fp8').textContent = knobContextHint('kv_cache_dtype', el('#adv-fp8').checked ? 'fp8' : 'auto', m.mode);
|
||||||
|
};
|
||||||
|
updateHints();
|
||||||
|
el('#adv-mml').oninput = updateHints;
|
||||||
|
el('#adv-gmu').oninput = (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); updateHints(); };
|
||||||
|
el('#adv-fst').onchange = updateHints;
|
||||||
|
el('#adv-pcache').onchange = updateHints;
|
||||||
|
el('#adv-fp8').onchange = updateHints;
|
||||||
const del = el('#adv-delete');
|
const del = el('#adv-delete');
|
||||||
del.classList.toggle('hidden', !m.custom);
|
del.classList.toggle('hidden', !m.custom);
|
||||||
del.onclick = async () => {
|
del.onclick = async () => {
|
||||||
@@ -753,6 +1034,197 @@ function setupAdvancedDialog() {
|
|||||||
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ===================== NIM installer =====================
|
||||||
|
|
||||||
|
const nimState = {
|
||||||
|
catalog: null,
|
||||||
|
job_id: null,
|
||||||
|
eventsource: null,
|
||||||
|
timer: null,
|
||||||
|
started_at: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
async function loadNimCatalog() {
|
||||||
|
try {
|
||||||
|
nimState.catalog = await fetchJSON('/api/nim/catalog');
|
||||||
|
el('#nim-catalog-link').href = nimState.catalog.catalog_url;
|
||||||
|
const warn = el('#nim-key-warn');
|
||||||
|
if (!nimState.catalog.ngc_key_configured) {
|
||||||
|
warn.classList.add('nim-key-warn');
|
||||||
|
warn.innerHTML = '⚠️ NGC API key not set. Open <strong>Configure Sparks</strong> in StartOS and paste your NGC personal API key, otherwise installs will fail. <a href="https://ngc.nvidia.com/setup/personal-key" target="_blank" rel="noopener">Get a key</a>';
|
||||||
|
} else {
|
||||||
|
warn.classList.remove('nim-key-warn');
|
||||||
|
warn.textContent = '';
|
||||||
|
}
|
||||||
|
const grid = el('#nim-suggested');
|
||||||
|
grid.innerHTML = '';
|
||||||
|
for (const s of nimState.catalog.suggested || []) {
|
||||||
|
const card = document.createElement('div');
|
||||||
|
card.className = 'nim-card';
|
||||||
|
card.innerHTML = `
|
||||||
|
<div class="info">
|
||||||
|
<div class="name">${escapeHtml(s.name)} <span class="muted small">· ${escapeHtml(s.kind || 'nim')}</span></div>
|
||||||
|
<div class="desc">${escapeHtml(s.description || '')}</div>
|
||||||
|
<div class="img">${escapeHtml(s.image)}</div>
|
||||||
|
<div class="links">${s.homepage ? `<a href="${escapeHtml(s.homepage)}" target="_blank" rel="noopener">View on NGC ↗</a>` : ''}</div>
|
||||||
|
</div>
|
||||||
|
<button type="button" class="btn primary nim-pick" data-image="${escapeHtml(s.image)}" data-container="${escapeHtml(s.default_container)}" data-port="${s.default_port}" data-kind="${escapeHtml(s.kind)}">Pick</button>
|
||||||
|
`;
|
||||||
|
grid.appendChild(card);
|
||||||
|
}
|
||||||
|
grid.querySelectorAll('.nim-pick').forEach(btn => {
|
||||||
|
btn.addEventListener('click', () => {
|
||||||
|
el('#nim-image').value = btn.dataset.image;
|
||||||
|
el('#nim-container').value = btn.dataset.container;
|
||||||
|
el('#nim-port').value = btn.dataset.port;
|
||||||
|
el('#nim-kind').value = btn.dataset.kind || 'nim';
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} catch (e) { console.warn('nim catalog failed', e); }
|
||||||
|
}
|
||||||
|
|
||||||
|
function openNimDialog() {
|
||||||
|
loadNimCatalog();
|
||||||
|
el('#nim-dialog').showModal();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function submitNim(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
const body = {
|
||||||
|
image: el('#nim-image').value.trim(),
|
||||||
|
container: el('#nim-container').value.trim(),
|
||||||
|
port: parseInt(el('#nim-port').value, 10),
|
||||||
|
host: el('#nim-host').value,
|
||||||
|
kind: el('#nim-kind').value,
|
||||||
|
};
|
||||||
|
if (!body.image || !body.container || !body.port) {
|
||||||
|
alert('Image, container name, and port are required.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const r = await fetchJSON('/api/nim/install', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
el('#nim-dialog').close();
|
||||||
|
attachNimProgress(r.job_id);
|
||||||
|
} catch (e) {
|
||||||
|
alert('Install failed: ' + e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function nimTimerStart(at) {
|
||||||
|
nimState.started_at = at;
|
||||||
|
if (nimState.timer) clearInterval(nimState.timer);
|
||||||
|
const tick = () => {
|
||||||
|
if (!nimState.started_at) return;
|
||||||
|
const sec = Math.max(0, Math.floor((Date.now() - nimState.started_at) / 1000));
|
||||||
|
const m = Math.floor(sec / 60);
|
||||||
|
const s = sec % 60;
|
||||||
|
el('#nim-prog-elapsed').textContent = `${m}:${s.toString().padStart(2, '0')}`;
|
||||||
|
};
|
||||||
|
tick();
|
||||||
|
nimState.timer = setInterval(tick, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function attachNimProgress(jobId) {
|
||||||
|
nimState.job_id = jobId;
|
||||||
|
el('#nim-prog-log').textContent = '';
|
||||||
|
el('#nim-prog-title').textContent = 'Installing…';
|
||||||
|
el('#nim-progress-dialog').showModal();
|
||||||
|
try {
|
||||||
|
const snap = await fetchJSON(`/api/nim/install/${jobId}`);
|
||||||
|
nimTimerStart(Date.parse(snap.started_at));
|
||||||
|
el('#nim-prog-phase').textContent = snap.phase || 'Working…';
|
||||||
|
el('#nim-prog-log').textContent = (snap.lines || []).join('\n');
|
||||||
|
if (snap.returncode !== null) { onNimDone(snap); return; }
|
||||||
|
} catch { nimTimerStart(Date.now()); }
|
||||||
|
const es = new EventSource(`/api/nim/install/${jobId}/stream`);
|
||||||
|
nimState.eventsource = es;
|
||||||
|
es.onmessage = ev => {
|
||||||
|
try {
|
||||||
|
const d = JSON.parse(ev.data);
|
||||||
|
if (d.line !== undefined) {
|
||||||
|
const log = el('#nim-prog-log');
|
||||||
|
log.textContent += d.line + '\n';
|
||||||
|
log.scrollTop = log.scrollHeight;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
};
|
||||||
|
es.addEventListener('phase', ev => {
|
||||||
|
try { el('#nim-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {}
|
||||||
|
});
|
||||||
|
es.addEventListener('done', ev => {
|
||||||
|
let d = {}; try { d = JSON.parse(ev.data); } catch {}
|
||||||
|
onNimDone(d);
|
||||||
|
});
|
||||||
|
es.onerror = () => { es.close(); nimState.eventsource = null; };
|
||||||
|
}
|
||||||
|
|
||||||
|
function onNimDone(d) {
|
||||||
|
if (nimState.eventsource) { nimState.eventsource.close(); nimState.eventsource = null; }
|
||||||
|
if (nimState.timer) { clearInterval(nimState.timer); nimState.timer = null; }
|
||||||
|
if (d.state === 'failed') {
|
||||||
|
el('#nim-prog-title').textContent = `Failed (rc=${d.returncode})`;
|
||||||
|
el('#nim-prog-phase').textContent = 'Failed';
|
||||||
|
} else {
|
||||||
|
el('#nim-prog-title').textContent = 'Installed';
|
||||||
|
el('#nim-prog-phase').textContent = 'Done ✓ — service will appear when the container reports healthy.';
|
||||||
|
}
|
||||||
|
pollStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===================== Explain context (LLM commit summary) =====================
|
||||||
|
|
||||||
|
let explainEventSource = null;
|
||||||
|
|
||||||
|
async function explainContext() {
|
||||||
|
if (explainEventSource) { explainEventSource.close(); explainEventSource = null; }
|
||||||
|
const section = el('#ub-explain-section');
|
||||||
|
const content = el('#ub-explain-content');
|
||||||
|
section.classList.remove('hidden');
|
||||||
|
section.open = true;
|
||||||
|
content.innerHTML = '<span class="muted">Asking the loaded model…</span>';
|
||||||
|
let text = '';
|
||||||
|
const es = new EventSource('/api/explain-updates');
|
||||||
|
explainEventSource = es;
|
||||||
|
let firstChunk = true;
|
||||||
|
es.onmessage = (ev) => {
|
||||||
|
try {
|
||||||
|
const d = JSON.parse(ev.data);
|
||||||
|
if (d.error) {
|
||||||
|
content.innerHTML = `<span class="muted">Couldn't get explanation: ${escapeHtml(d.error)}</span>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (firstChunk) { content.innerHTML = ''; firstChunk = false; }
|
||||||
|
if (d.content) {
|
||||||
|
text += d.content;
|
||||||
|
content.textContent = text;
|
||||||
|
content.scrollTop = content.scrollHeight;
|
||||||
|
} else if (d.reasoning) {
|
||||||
|
// Show reasoning tokens but de-emphasized
|
||||||
|
let r = content.querySelector('.reasoning-current');
|
||||||
|
if (!r) {
|
||||||
|
r = document.createElement('div');
|
||||||
|
r.className = 'reasoning reasoning-current';
|
||||||
|
r.textContent = '';
|
||||||
|
content.appendChild(r);
|
||||||
|
}
|
||||||
|
r.textContent += d.reasoning;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
};
|
||||||
|
es.addEventListener('done', () => {
|
||||||
|
es.close();
|
||||||
|
explainEventSource = null;
|
||||||
|
// strip the reasoning-current marker
|
||||||
|
const r = content.querySelector('.reasoning-current');
|
||||||
|
if (r) r.classList.remove('reasoning-current');
|
||||||
|
});
|
||||||
|
es.onerror = () => { es.close(); explainEventSource = null; };
|
||||||
|
}
|
||||||
|
|
||||||
// ===================== updates (spark-vllm-docker) =====================
|
// ===================== updates (spark-vllm-docker) =====================
|
||||||
|
|
||||||
const updState = {
|
const updState = {
|
||||||
@@ -792,19 +1264,23 @@ function renderUpdateBanner() {
|
|||||||
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
|
banner.classList.toggle('up-to-date', behind === 0 && !dirty);
|
||||||
banner.classList.toggle('warn', !!dirty);
|
banner.classList.toggle('warn', !!dirty);
|
||||||
|
|
||||||
|
const explain = el('#ub-explain');
|
||||||
if (dirty > 0) {
|
if (dirty > 0) {
|
||||||
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
|
text.textContent = `${dirty} local change${dirty === 1 ? '' : 's'} in ~/spark-vllm-docker. Resolve before updating.`;
|
||||||
details.classList.add('hidden');
|
details.classList.add('hidden');
|
||||||
apply.classList.add('hidden');
|
apply.classList.add('hidden');
|
||||||
|
explain.classList.add('hidden');
|
||||||
} else if (behind === 0) {
|
} else if (behind === 0) {
|
||||||
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
|
text.textContent = `spark-vllm-docker is up to date (${info.current || ''})`;
|
||||||
details.classList.add('hidden');
|
details.classList.add('hidden');
|
||||||
apply.classList.add('hidden');
|
apply.classList.add('hidden');
|
||||||
list.classList.add('hidden');
|
list.classList.add('hidden');
|
||||||
|
explain.classList.add('hidden');
|
||||||
} else {
|
} else {
|
||||||
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
|
text.textContent = `${behind} commit${behind === 1 ? '' : 's'} behind upstream`;
|
||||||
details.classList.remove('hidden');
|
details.classList.remove('hidden');
|
||||||
apply.classList.remove('hidden');
|
apply.classList.remove('hidden');
|
||||||
|
explain.classList.remove('hidden');
|
||||||
log.textContent = (info.log || []).join('\n') || '(no log)';
|
log.textContent = (info.log || []).join('\n') || '(no log)';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -893,13 +1369,37 @@ async function init() {
|
|||||||
list.open = !list.open;
|
list.open = !list.open;
|
||||||
});
|
});
|
||||||
el('#ub-apply').addEventListener('click', applyUpdate);
|
el('#ub-apply').addEventListener('click', applyUpdate);
|
||||||
|
el('#ub-explain').addEventListener('click', explainContext);
|
||||||
|
el('#dl-repo').addEventListener('input', updateDlHfLink);
|
||||||
|
el('#open-nim').addEventListener('click', openNimDialog);
|
||||||
|
el('#nim-cancel').addEventListener('click', () => el('#nim-dialog').close());
|
||||||
|
el('#nim-form').addEventListener('submit', submitNim);
|
||||||
|
el('#nim-prog-close').addEventListener('click', () => el('#nim-progress-dialog').close());
|
||||||
|
el('#open-connectivity').addEventListener('click', openConnectivityDialog);
|
||||||
|
el('#connectivity-close').addEventListener('click', () => el('#connectivity-dialog').close());
|
||||||
|
// Wake-on-LAN buttons live on unreachable hardware cards; delegate.
|
||||||
|
el('#hardware-grid').addEventListener('click', (e) => {
|
||||||
|
const btn = e.target.closest('[data-wake]');
|
||||||
|
if (btn) wakeSpark(btn.dataset.wake);
|
||||||
|
});
|
||||||
setupCatalogDialog();
|
setupCatalogDialog();
|
||||||
setupAdvancedDialog();
|
setupAdvancedDialog();
|
||||||
|
// Open WebUI link from /api/config
|
||||||
|
try {
|
||||||
|
state.config = await fetchJSON('/api/config');
|
||||||
|
if (state.config.open_webui_url) {
|
||||||
|
const a = el('#open-webui-link');
|
||||||
|
a.href = state.config.open_webui_url;
|
||||||
|
a.classList.remove('hidden');
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
await loadModels();
|
await loadModels();
|
||||||
await pollStatus();
|
await pollStatus();
|
||||||
await renderServices();
|
await renderServices();
|
||||||
|
pollHardware();
|
||||||
pollUpdates();
|
pollUpdates();
|
||||||
setInterval(pollStatus, 5000);
|
setInterval(pollStatus, 5000);
|
||||||
|
setInterval(pollHardware, 8000); // every 8s
|
||||||
setInterval(pollUpdates, 300000); // every 5 min
|
setInterval(pollUpdates, 300000); // every 5 min
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+111
-9
@@ -16,6 +16,7 @@
|
|||||||
<div class="current" id="current">
|
<div class="current" id="current">
|
||||||
<span class="muted">connecting…</span>
|
<span class="muted">connecting…</span>
|
||||||
</div>
|
</div>
|
||||||
|
<a id="open-webui-link" class="topbar-btn hidden" href="#" target="_blank" rel="noopener" title="Open Open WebUI">Open chat ↗</a>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<main>
|
<main>
|
||||||
@@ -24,22 +25,47 @@
|
|||||||
<span>Run the <em>Configure Sparks</em> action in StartOS to set hostnames, then run <em>Test Connection</em>.</span>
|
<span>Run the <em>Configure Sparks</em> action in StartOS to set hostnames, then run <em>Test Connection</em>.</span>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
|
<section id="hardware-panel" class="hardware-panel hidden">
|
||||||
|
<div class="section-header">
|
||||||
|
<h2 class="section-title">Spark hardware</h2>
|
||||||
|
<button id="open-connectivity" class="btn small-btn">Connectivity log</button>
|
||||||
|
</div>
|
||||||
|
<div id="hardware-grid" class="hardware-grid"></div>
|
||||||
|
|
||||||
|
<dialog id="connectivity-dialog" class="modal">
|
||||||
|
<form method="dialog" class="modal-form">
|
||||||
|
<h3>Spark connectivity history</h3>
|
||||||
|
<p class="muted small">Most recent up/down transitions per Spark. Tracked since this dashboard was installed.</p>
|
||||||
|
<div id="connectivity-content" class="connectivity-content"></div>
|
||||||
|
<div class="modal-actions">
|
||||||
|
<button type="button" id="connectivity-close" class="btn">Close</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
</section>
|
||||||
|
|
||||||
<section id="endpoint-panel" class="endpoint-panel hidden">
|
<section id="endpoint-panel" class="endpoint-panel hidden">
|
||||||
<div class="ep-title muted small">OpenAI-compatible endpoint</div>
|
<div class="ep-title muted small">OpenAI-compatible endpoint</div>
|
||||||
<div class="ep-row">
|
<div class="ep-row">
|
||||||
<span class="ep-label">Base URL</span>
|
<span class="ep-label">Base URL</span>
|
||||||
<code class="ep-value" id="ep-url">—</code>
|
<code class="ep-value copyable" id="ep-url" data-copy-self title="Click to copy">—</code>
|
||||||
<button class="copy-btn" data-copy="#ep-url" title="Copy base URL">Copy</button>
|
<button class="icon-btn" data-copy="#ep-url" title="Copy base URL" aria-label="Copy">
|
||||||
|
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="ep-row">
|
<div class="ep-row">
|
||||||
<span class="ep-label">Model ID</span>
|
<span class="ep-label">Model ID</span>
|
||||||
<code class="ep-value" id="ep-model">—</code>
|
<code class="ep-value copyable" id="ep-model" data-copy-self title="Click to copy">—</code>
|
||||||
<button class="copy-btn" data-copy="#ep-model" title="Copy model ID">Copy</button>
|
<button class="icon-btn" data-copy="#ep-model" title="Copy model ID" aria-label="Copy">
|
||||||
|
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<details class="ep-curl">
|
<details class="ep-curl">
|
||||||
<summary class="muted small">curl example</summary>
|
<summary class="muted small">curl example</summary>
|
||||||
<pre id="ep-curl-snippet" class="snippet"></pre>
|
<pre id="ep-curl-snippet" class="snippet copyable" data-copy-self title="Click to copy"></pre>
|
||||||
<button class="copy-btn small" data-copy="#ep-curl-snippet">Copy snippet</button>
|
<button class="icon-btn" data-copy="#ep-curl-snippet" title="Copy snippet" aria-label="Copy">
|
||||||
|
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>
|
||||||
|
</button>
|
||||||
</details>
|
</details>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@@ -64,8 +90,66 @@
|
|||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section id="services-panel" class="services hidden">
|
<section id="services-panel" class="services hidden">
|
||||||
<h2 class="section-title">Always-on services</h2>
|
<div class="section-header">
|
||||||
|
<h2 class="section-title">Always-on services</h2>
|
||||||
|
<button id="open-nim" class="btn small-btn">+ Install NIM</button>
|
||||||
|
</div>
|
||||||
<div id="services-grid" class="services-grid"></div>
|
<div id="services-grid" class="services-grid"></div>
|
||||||
|
|
||||||
|
<dialog id="nim-dialog" class="modal">
|
||||||
|
<form method="dialog" class="modal-form" id="nim-form">
|
||||||
|
<h3>Install a NVIDIA NIM container</h3>
|
||||||
|
<p class="muted small" id="nim-key-warn"></p>
|
||||||
|
<p class="muted small">Pick a curated container below or paste any image from <a href="#" id="nim-catalog-link" target="_blank" rel="noopener">the NGC NIM catalog</a>. Spark Control will <code>docker pull</code> and <code>docker run</code> it on the target Spark.</p>
|
||||||
|
|
||||||
|
<div id="nim-suggested" class="nim-grid"></div>
|
||||||
|
|
||||||
|
<fieldset class="modal-fieldset">
|
||||||
|
<legend>Custom image</legend>
|
||||||
|
<label class="modal-row"><span>Image (nvcr.io/...)</span><input type="text" id="nim-image" placeholder="nvcr.io/nim/nvidia/<name>:latest"></label>
|
||||||
|
<label class="modal-row"><span>Container name</span><input type="text" id="nim-container" placeholder="my-service"></label>
|
||||||
|
<label class="modal-row"><span>Port</span><input type="number" id="nim-port" min="1" max="65535"></label>
|
||||||
|
<label class="modal-row"><span>Kind</span>
|
||||||
|
<select id="nim-kind">
|
||||||
|
<option value="nim">NIM (other)</option>
|
||||||
|
<option value="stt">STT (speech-to-text)</option>
|
||||||
|
<option value="tts">TTS (text-to-speech)</option>
|
||||||
|
<option value="vision">Vision</option>
|
||||||
|
<option value="embedding">Embedding</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label class="modal-row"><span>Target Spark</span>
|
||||||
|
<select id="nim-host">
|
||||||
|
<option value="spark2">Spark 2 (default for support services)</option>
|
||||||
|
<option value="spark1">Spark 1 (head node)</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
<div class="modal-actions">
|
||||||
|
<button type="button" id="nim-cancel" class="btn">Cancel</button>
|
||||||
|
<button type="submit" class="btn primary" id="nim-start">Install</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
|
|
||||||
|
<dialog id="nim-progress-dialog" class="modal">
|
||||||
|
<form method="dialog" class="modal-form">
|
||||||
|
<h3 id="nim-prog-title">Installing…</h3>
|
||||||
|
<div class="phase-row">
|
||||||
|
<div class="phase" id="nim-prog-phase">Starting…</div>
|
||||||
|
<span class="spacer"></span>
|
||||||
|
<span class="timer" id="nim-prog-elapsed">0:00</span>
|
||||||
|
</div>
|
||||||
|
<details open>
|
||||||
|
<summary class="muted small">Log</summary>
|
||||||
|
<pre id="nim-prog-log" class="log"></pre>
|
||||||
|
</details>
|
||||||
|
<div class="modal-actions">
|
||||||
|
<button type="button" id="nim-prog-close" class="btn">Close</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</dialog>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section id="models-section">
|
<section id="models-section">
|
||||||
@@ -127,11 +211,20 @@
|
|||||||
<label class="dl-row">
|
<label class="dl-row">
|
||||||
<span class="dl-label">HuggingFace repo</span>
|
<span class="dl-label">HuggingFace repo</span>
|
||||||
<input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off">
|
<input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off">
|
||||||
|
<a id="dl-hf-link" class="dl-hf-link hidden" href="#" target="_blank" rel="noopener" title="Open on Hugging Face">↗</a>
|
||||||
</label>
|
</label>
|
||||||
|
<div class="dl-help muted small">
|
||||||
|
<a href="https://huggingface.co/models?other=vllm" target="_blank" rel="noopener">Browse vLLM-compatible models</a>
|
||||||
|
· NVFP4-quantized models (e.g. <code>RedHatAI/...</code>) are best for Blackwell hardware
|
||||||
|
</div>
|
||||||
<div class="dl-row">
|
<div class="dl-row">
|
||||||
<span class="dl-label">Where</span>
|
<span class="dl-label">Where</span>
|
||||||
<label class="radio"><input type="radio" name="dl-mode" value="solo" checked> Spark 1 only (solo)</label>
|
<label class="radio"><input type="radio" name="dl-mode" value="spark1" checked> Spark 1 only</label>
|
||||||
<label class="radio"><input type="radio" name="dl-mode" value="cluster"> Both Sparks (cluster, copy in parallel)</label>
|
<label class="radio"><input type="radio" name="dl-mode" value="spark2"> Spark 2 only</label>
|
||||||
|
<label class="radio"><input type="radio" name="dl-mode" value="cluster"> Both Sparks (for cluster models)</label>
|
||||||
|
</div>
|
||||||
|
<div class="dl-help muted small">
|
||||||
|
For <strong>solo</strong> models, download to wherever you'll run them. For <strong>cluster</strong> models (-tp 2), both Sparks need the weights — "Both" downloads to one Spark and rsyncs to the other in parallel.
|
||||||
</div>
|
</div>
|
||||||
<div class="dl-actions">
|
<div class="dl-actions">
|
||||||
<button id="dl-cancel" class="btn">Cancel</button>
|
<button id="dl-cancel" class="btn">Cancel</button>
|
||||||
@@ -165,9 +258,14 @@
|
|||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section id="update-banner" class="update-banner hidden">
|
<section id="update-banner" class="update-banner hidden">
|
||||||
|
<div class="ub-context muted small">
|
||||||
|
Updates to <strong><a href="https://github.com/eugr/spark-vllm-docker" target="_blank" rel="noopener">eugr/spark-vllm-docker</a></strong>
|
||||||
|
— the upstream project that orchestrates vLLM on your Sparks (launch-cluster.sh, recipes, mods). These are <em>not</em> firmware, OS, or model updates.
|
||||||
|
</div>
|
||||||
<div class="ub-row">
|
<div class="ub-row">
|
||||||
<span id="ub-text">Checking for updates…</span>
|
<span id="ub-text">Checking for updates…</span>
|
||||||
<span class="spacer"></span>
|
<span class="spacer"></span>
|
||||||
|
<button id="ub-explain" class="btn small-btn hidden">✨ Explain context</button>
|
||||||
<button id="ub-details" class="btn small-btn hidden">Show details</button>
|
<button id="ub-details" class="btn small-btn hidden">Show details</button>
|
||||||
<button id="ub-apply" class="btn small-btn primary hidden">Apply update</button>
|
<button id="ub-apply" class="btn small-btn primary hidden">Apply update</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -175,6 +273,10 @@
|
|||||||
<summary class="muted small">Pending commits</summary>
|
<summary class="muted small">Pending commits</summary>
|
||||||
<pre id="ub-log" class="snippet"></pre>
|
<pre id="ub-log" class="snippet"></pre>
|
||||||
</details>
|
</details>
|
||||||
|
<details id="ub-explain-section" class="hidden">
|
||||||
|
<summary class="muted small">Explained by the loaded LLM</summary>
|
||||||
|
<div id="ub-explain-content" class="explain-content"></div>
|
||||||
|
</details>
|
||||||
<div id="ub-progress" class="hidden">
|
<div id="ub-progress" class="hidden">
|
||||||
<div class="phase-row">
|
<div class="phase-row">
|
||||||
<div class="phase" id="ub-phase">Applying update…</div>
|
<div class="phase" id="ub-phase">Applying update…</div>
|
||||||
|
|||||||
+212
-5
@@ -45,6 +45,17 @@ body {
|
|||||||
.logo-dot { width: 10px; height: 10px; border-radius: 50%; background: var(--accent); box-shadow: 0 0 12px var(--accent); }
|
.logo-dot { width: 10px; height: 10px; border-radius: 50%; background: var(--accent); box-shadow: 0 0 12px var(--accent); }
|
||||||
.current { flex: 1; text-align: right; font-size: 14px; }
|
.current { flex: 1; text-align: right; font-size: 14px; }
|
||||||
.current strong { color: var(--accent); }
|
.current strong { color: var(--accent); }
|
||||||
|
.topbar-btn {
|
||||||
|
background: var(--surface-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
color: var(--text);
|
||||||
|
padding: 5px 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 12px;
|
||||||
|
text-decoration: none;
|
||||||
|
transition: border-color 0.15s, background 0.15s;
|
||||||
|
}
|
||||||
|
.topbar-btn:hover { background: #24242c; border-color: var(--accent); color: var(--accent); }
|
||||||
|
|
||||||
main {
|
main {
|
||||||
max-width: 880px;
|
max-width: 880px;
|
||||||
@@ -97,7 +108,8 @@ main {
|
|||||||
overflow-x: auto;
|
overflow-x: auto;
|
||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
}
|
}
|
||||||
.copy-btn {
|
.copy-btn,
|
||||||
|
.icon-btn {
|
||||||
appearance: none;
|
appearance: none;
|
||||||
background: var(--surface-2);
|
background: var(--surface-2);
|
||||||
border: 1px solid var(--border);
|
border: 1px solid var(--border);
|
||||||
@@ -108,15 +120,27 @@ main {
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
transition: color 0.15s, border-color 0.15s, background 0.15s;
|
transition: color 0.15s, border-color 0.15s, background 0.15s;
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
}
|
}
|
||||||
.copy-btn:hover { color: var(--text); border-color: #34343c; }
|
.icon-btn { padding: 5px 7px; }
|
||||||
.copy-btn.copied {
|
.icon-btn svg { width: 14px; height: 14px; display: block; }
|
||||||
|
.copy-btn:hover,
|
||||||
|
.icon-btn:hover { color: var(--text); border-color: #34343c; }
|
||||||
|
.copy-btn.copied,
|
||||||
|
.icon-btn.copied {
|
||||||
color: var(--accent);
|
color: var(--accent);
|
||||||
border-color: rgba(74, 222, 128, 0.4);
|
border-color: rgba(74, 222, 128, 0.4);
|
||||||
background: rgba(74, 222, 128, 0.08);
|
background: rgba(74, 222, 128, 0.08);
|
||||||
}
|
}
|
||||||
|
.icon-btn.copied svg { color: var(--accent); }
|
||||||
.copy-btn.small { padding: 3px 8px; font-size: 11px; }
|
.copy-btn.small { padding: 3px 8px; font-size: 11px; }
|
||||||
|
|
||||||
|
.copyable { cursor: pointer; }
|
||||||
|
.copyable:hover { outline: 1px solid rgba(96, 165, 250, 0.5); }
|
||||||
|
.copyable.copied { outline: 1px solid var(--accent); background: rgba(74, 222, 128, 0.05); }
|
||||||
|
|
||||||
.ep-curl { margin-top: 8px; }
|
.ep-curl { margin-top: 8px; }
|
||||||
.ep-curl summary { cursor: pointer; padding: 4px 0; }
|
.ep-curl summary { cursor: pointer; padding: 4px 0; }
|
||||||
.ep-curl[open] summary { margin-bottom: 6px; }
|
.ep-curl[open] summary { margin-bottom: 6px; }
|
||||||
@@ -255,6 +279,14 @@ main {
|
|||||||
font: 13px ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
font: 13px ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
||||||
}
|
}
|
||||||
.modal-row textarea { font-family: inherit; resize: vertical; }
|
.modal-row textarea { font-family: inherit; resize: vertical; }
|
||||||
|
.modal-row .knob-hint {
|
||||||
|
color: var(--muted);
|
||||||
|
font-size: 11px;
|
||||||
|
line-height: 1.5;
|
||||||
|
margin-top: 2px;
|
||||||
|
padding-left: 2px;
|
||||||
|
}
|
||||||
|
.modal-row.inline .knob-hint { width: 100%; margin-left: 22px; margin-top: 0; }
|
||||||
.modal-row input:focus, .modal-row textarea:focus, .modal-row select:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
.modal-row input:focus, .modal-row textarea:focus, .modal-row select:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
||||||
.modal-row input[type='range'] { padding: 0; flex: 1; }
|
.modal-row input[type='range'] { padding: 0; flex: 1; }
|
||||||
.modal-fieldset {
|
.modal-fieldset {
|
||||||
@@ -274,10 +306,39 @@ main {
|
|||||||
background: var(--surface);
|
background: var(--surface);
|
||||||
border: 1px solid rgba(96, 165, 250, 0.4);
|
border: 1px solid rgba(96, 165, 250, 0.4);
|
||||||
border-radius: var(--radius);
|
border-radius: var(--radius);
|
||||||
padding: 10px 14px;
|
padding: 12px 14px;
|
||||||
margin-top: 18px;
|
margin-top: 18px;
|
||||||
font-size: 13px;
|
font-size: 13px;
|
||||||
}
|
}
|
||||||
|
.ub-context { margin-bottom: 8px; line-height: 1.5; }
|
||||||
|
.ub-context a { color: var(--info); text-decoration: none; }
|
||||||
|
.ub-context a:hover { text-decoration: underline; }
|
||||||
|
.ub-context em { font-style: normal; color: var(--text); font-weight: 500; }
|
||||||
|
|
||||||
|
#ub-explain-section { margin-top: 8px; }
|
||||||
|
#ub-explain-section summary { cursor: pointer; padding: 4px 0; }
|
||||||
|
.explain-content {
|
||||||
|
background: #08080b;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 12px 14px;
|
||||||
|
margin-top: 8px;
|
||||||
|
font-size: 13px;
|
||||||
|
line-height: 1.6;
|
||||||
|
color: #c7c7d1;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-word;
|
||||||
|
max-height: 320px;
|
||||||
|
overflow: auto;
|
||||||
|
}
|
||||||
|
.explain-content .reasoning {
|
||||||
|
color: var(--muted);
|
||||||
|
font-style: italic;
|
||||||
|
font-size: 11px;
|
||||||
|
border-left: 2px solid var(--border);
|
||||||
|
padding-left: 10px;
|
||||||
|
margin: 4px 0;
|
||||||
|
}
|
||||||
.update-banner.up-to-date {
|
.update-banner.up-to-date {
|
||||||
border-color: var(--border);
|
border-color: var(--border);
|
||||||
color: var(--muted);
|
color: var(--muted);
|
||||||
@@ -289,6 +350,88 @@ main {
|
|||||||
#ub-list summary { cursor: pointer; padding: 4px 0; }
|
#ub-list summary { cursor: pointer; padding: 4px 0; }
|
||||||
#ub-progress { margin-top: 10px; }
|
#ub-progress { margin-top: 10px; }
|
||||||
|
|
||||||
|
/* ===== Hardware dashboard ===== */
|
||||||
|
|
||||||
|
.hardware-grid {
|
||||||
|
display: grid;
|
||||||
|
gap: 14px;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
|
||||||
|
}
|
||||||
|
.hw-card {
|
||||||
|
background: var(--surface);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
padding: 14px 16px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
.hw-card .head {
|
||||||
|
display: flex;
|
||||||
|
align-items: baseline;
|
||||||
|
gap: 8px;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.hw-card .head .name { font-weight: 600; font-size: 15px; }
|
||||||
|
.hw-card .head .meta { color: var(--muted); font-size: 12px; margin-left: auto; }
|
||||||
|
.hw-card.unreachable { border-color: rgba(239, 68, 68, 0.4); }
|
||||||
|
.hw-card.unreachable .name { color: var(--error); }
|
||||||
|
.hw-card.unreachable ol { color: var(--muted); }
|
||||||
|
.hw-card .wol-row {
|
||||||
|
margin-top: 8px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--muted);
|
||||||
|
}
|
||||||
|
.hw-card .wol-row .btn { padding: 5px 10px; font-size: 12px; }
|
||||||
|
.hw-card .mac-display { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
|
||||||
|
|
||||||
|
.connectivity-content {
|
||||||
|
max-height: 360px;
|
||||||
|
overflow-y: auto;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 10px;
|
||||||
|
background: var(--surface-2);
|
||||||
|
}
|
||||||
|
.conn-spark { margin-bottom: 16px; }
|
||||||
|
.conn-spark h4 { font-size: 13px; margin: 0 0 8px; color: var(--text); }
|
||||||
|
.conn-event {
|
||||||
|
font-size: 12px;
|
||||||
|
display: flex;
|
||||||
|
gap: 10px;
|
||||||
|
padding: 4px 0;
|
||||||
|
border-bottom: 1px solid rgba(255,255,255,0.04);
|
||||||
|
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
|
||||||
|
}
|
||||||
|
.conn-event:last-child { border-bottom: 0; }
|
||||||
|
.conn-event .when { color: var(--muted); flex-shrink: 0; }
|
||||||
|
.conn-event .what { flex: 1; }
|
||||||
|
.conn-event.up .what { color: var(--accent); }
|
||||||
|
.conn-event.down .what { color: var(--error); }
|
||||||
|
.conn-event .dur { color: var(--muted); }
|
||||||
|
.conn-summary { color: var(--muted); font-size: 11px; padding: 4px 0 10px; }
|
||||||
|
.hw-metric { display: flex; align-items: center; gap: 10px; font-size: 12px; }
|
||||||
|
.hw-metric .label { color: var(--muted); width: 56px; flex-shrink: 0; text-transform: uppercase; letter-spacing: 0.05em; font-size: 11px; }
|
||||||
|
.hw-metric .bar { flex: 1; height: 8px; background: var(--surface-2); border-radius: 4px; overflow: hidden; position: relative; }
|
||||||
|
.hw-metric .bar > span {
|
||||||
|
display: block;
|
||||||
|
height: 100%;
|
||||||
|
background: linear-gradient(90deg, var(--info), var(--accent));
|
||||||
|
border-radius: 4px;
|
||||||
|
transition: width 0.4s ease-out;
|
||||||
|
}
|
||||||
|
.hw-metric .bar.warn > span { background: linear-gradient(90deg, var(--warn), var(--error)); }
|
||||||
|
.hw-metric .val {
|
||||||
|
font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--text);
|
||||||
|
min-width: 110px;
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
/* ===== Section header (title + action button) ===== */
|
/* ===== Section header (title + action button) ===== */
|
||||||
|
|
||||||
.section-header {
|
.section-header {
|
||||||
@@ -341,6 +484,24 @@ main {
|
|||||||
min-width: 200px;
|
min-width: 200px;
|
||||||
}
|
}
|
||||||
.dl-row input[type='text']:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
.dl-row input[type='text']:focus { outline: 1px solid var(--info); border-color: var(--info); }
|
||||||
|
.dl-hf-link {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
background: var(--surface-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
color: var(--info);
|
||||||
|
padding: 7px 10px;
|
||||||
|
border-radius: 6px;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 14px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.dl-hf-link:hover { background: rgba(96, 165, 250, 0.08); border-color: var(--info); }
|
||||||
|
.dl-help { padding-left: 122px; line-height: 1.6; }
|
||||||
|
.dl-help a { color: var(--info); text-decoration: none; }
|
||||||
|
.dl-help a:hover { text-decoration: underline; }
|
||||||
|
.dl-help code { background: var(--surface-2); padding: 1px 5px; border-radius: 3px; font-size: 11px; }
|
||||||
.radio { display: inline-flex; align-items: center; gap: 6px; font-size: 13px; color: var(--text); cursor: pointer; }
|
.radio { display: inline-flex; align-items: center; gap: 6px; font-size: 13px; color: var(--text); cursor: pointer; }
|
||||||
.radio input { accent-color: var(--accent); }
|
.radio input { accent-color: var(--accent); }
|
||||||
.dl-actions { display: flex; gap: 8px; justify-content: flex-end; margin-top: 10px; }
|
.dl-actions { display: flex; gap: 8px; justify-content: flex-end; margin-top: 10px; }
|
||||||
@@ -353,6 +514,37 @@ main {
|
|||||||
#dl-log-details { margin-top: 12px; }
|
#dl-log-details { margin-top: 12px; }
|
||||||
#dl-log-details summary { cursor: pointer; padding: 4px 0; }
|
#dl-log-details summary { cursor: pointer; padding: 4px 0; }
|
||||||
|
|
||||||
|
/* ===== NIM install dialog ===== */
|
||||||
|
|
||||||
|
.modal#nim-dialog,
|
||||||
|
.modal#nim-progress-dialog { max-width: 640px; }
|
||||||
|
.nim-grid {
|
||||||
|
display: grid;
|
||||||
|
gap: 8px;
|
||||||
|
grid-template-columns: 1fr;
|
||||||
|
max-height: 240px;
|
||||||
|
overflow-y: auto;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.nim-card {
|
||||||
|
background: var(--surface-2);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 10px 12px;
|
||||||
|
display: flex;
|
||||||
|
gap: 10px;
|
||||||
|
align-items: flex-start;
|
||||||
|
}
|
||||||
|
.nim-card .info { flex: 1; }
|
||||||
|
.nim-card .name { font-weight: 600; font-size: 13px; }
|
||||||
|
.nim-card .desc { color: var(--muted); font-size: 12px; margin-top: 4px; }
|
||||||
|
.nim-card .img { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; color: #6b6b75; font-size: 11px; margin-top: 4px; word-break: break-all; }
|
||||||
|
.nim-card .btn { padding: 6px 12px; font-size: 12px; flex-shrink: 0; }
|
||||||
|
.nim-card .links { font-size: 11px; margin-top: 4px; }
|
||||||
|
.nim-card .links a { color: var(--info); text-decoration: none; }
|
||||||
|
.nim-card .links a:hover { text-decoration: underline; }
|
||||||
|
.nim-key-warn { color: var(--warn); }
|
||||||
|
|
||||||
/* ===== Section titles ===== */
|
/* ===== Section titles ===== */
|
||||||
|
|
||||||
.section-title {
|
.section-title {
|
||||||
@@ -409,13 +601,25 @@ main {
|
|||||||
|
|
||||||
.service-card .row {
|
.service-card .row {
|
||||||
display: flex;
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
font-size: 12px;
|
font-size: 12px;
|
||||||
color: var(--muted);
|
color: var(--muted);
|
||||||
gap: 6px;
|
gap: 6px;
|
||||||
}
|
}
|
||||||
.service-card .row .k { width: 60px; flex-shrink: 0; }
|
.service-card .row .k { width: 60px; flex-shrink: 0; }
|
||||||
.service-card .row .v { color: var(--text); font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace; word-break: break-all; }
|
.service-card .row .v {
|
||||||
|
color: var(--text);
|
||||||
|
font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, monospace;
|
||||||
|
word-break: break-all;
|
||||||
|
flex: 1;
|
||||||
|
padding: 2px 4px;
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
.service-card .row .v.muted-v { color: var(--muted); font-family: inherit; }
|
.service-card .row .v.muted-v { color: var(--muted); font-family: inherit; }
|
||||||
|
.service-card .row .v.copyable:hover { outline: 1px solid rgba(96, 165, 250, 0.5); }
|
||||||
|
.service-card .row .v.copyable.copied { outline: 1px solid var(--accent); background: rgba(74, 222, 128, 0.05); }
|
||||||
|
.service-card .row .icon-btn { padding: 3px 6px; }
|
||||||
|
.service-card .row .icon-btn svg { width: 12px; height: 12px; }
|
||||||
|
|
||||||
.service-actions {
|
.service-actions {
|
||||||
display: flex;
|
display: flex;
|
||||||
@@ -460,6 +664,9 @@ main {
|
|||||||
font-size: 11px;
|
font-size: 11px;
|
||||||
color: #5c5c66;
|
color: #5c5c66;
|
||||||
}
|
}
|
||||||
|
.card .repo a { color: inherit; text-decoration: none; }
|
||||||
|
.card .repo a:hover { color: var(--info); text-decoration: underline; }
|
||||||
|
.card .repo .hf-icon { font-size: 13px; opacity: 0.7; }
|
||||||
.tag {
|
.tag {
|
||||||
background: var(--surface-2);
|
background: var(--surface-2);
|
||||||
border: 1px solid var(--border);
|
border: 1px solid var(--border);
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
"""Wake-on-LAN.
|
||||||
|
|
||||||
|
Two delivery paths, tried in order:
|
||||||
|
|
||||||
|
1. SSH into the other Spark and have IT broadcast — most reliable because the
|
||||||
|
packet originates from the same LAN subnet as the sleeping Spark.
|
||||||
|
2. Direct UDP broadcast from this container. May or may not work depending
|
||||||
|
on the StartOS container's network namespace.
|
||||||
|
|
||||||
|
The DGX Spark's NIC must have WoL enabled in firmware/OS for either path to
|
||||||
|
actually wake the box; this module just delivers the magic packet correctly.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import asyncio
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
|
||||||
|
from .config import Settings
|
||||||
|
from .ssh import ssh_run
|
||||||
|
|
||||||
|
|
||||||
|
_MAC_RE = re.compile(r"^[0-9a-fA-F]{2}([:-]?[0-9a-fA-F]{2}){5}$")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_mac(mac: str) -> str:
|
||||||
|
mac = mac.strip().lower()
|
||||||
|
if not _MAC_RE.match(mac):
|
||||||
|
raise ValueError(f"invalid MAC address: {mac!r}")
|
||||||
|
return mac.replace("-", ":")
|
||||||
|
|
||||||
|
|
||||||
|
def build_magic_packet(mac: str) -> bytes:
|
||||||
|
mac_bytes = bytes.fromhex(normalize_mac(mac).replace(":", ""))
|
||||||
|
return b"\xff" * 6 + mac_bytes * 16
|
||||||
|
|
||||||
|
|
||||||
|
def send_local_broadcast(mac: str, broadcast: str = "255.255.255.255", port: int = 9) -> None:
|
||||||
|
"""Send from THIS container. May not reach the LAN in some topologies."""
|
||||||
|
pkt = build_magic_packet(mac)
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||||
|
try:
|
||||||
|
s.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
|
||||||
|
s.sendto(pkt, (broadcast, port))
|
||||||
|
# Also send to port 7 (alternate WoL convention) for safety
|
||||||
|
s.sendto(pkt, (broadcast, 7))
|
||||||
|
finally:
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def send_via_peer(host: str, user: str, mac: str, settings: Settings) -> tuple[bool, str]:
|
||||||
|
"""Use a different (reachable) Spark to send the WoL packet to its peer.
|
||||||
|
|
||||||
|
Uses Python 3 (always present on the Sparks for vLLM) to avoid depending on
|
||||||
|
wakeonlan / etherwake being installed.
|
||||||
|
"""
|
||||||
|
normalized = normalize_mac(mac)
|
||||||
|
mac_hex = normalized.replace(":", "")
|
||||||
|
py = (
|
||||||
|
"python3 -c \""
|
||||||
|
"import socket; "
|
||||||
|
f"m=bytes.fromhex('{mac_hex}'); "
|
||||||
|
"s=socket.socket(socket.AF_INET, socket.SOCK_DGRAM); "
|
||||||
|
"s.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1); "
|
||||||
|
"s.sendto(b'\\xff'*6 + m*16, ('255.255.255.255', 9)); "
|
||||||
|
"s.sendto(b'\\xff'*6 + m*16, ('255.255.255.255', 7)); "
|
||||||
|
"print('sent')\""
|
||||||
|
)
|
||||||
|
rc, out, err = await ssh_run(host, user, py, settings, timeout=8)
|
||||||
|
return rc == 0 and "sent" in out, (err.strip() or out.strip() or f"rc={rc}")
|
||||||
@@ -76,6 +76,24 @@ const inputSpec = InputSpec.of({
|
|||||||
placeholder: 'magpie-tts',
|
placeholder: 'magpie-tts',
|
||||||
masked: false,
|
masked: false,
|
||||||
}),
|
}),
|
||||||
|
open_webui_url: Value.text({
|
||||||
|
name: 'Open WebUI URL (optional)',
|
||||||
|
description:
|
||||||
|
'If you also run Open WebUI on your LAN, paste its URL here. Spark Control will then show a one-click "Open chat" button next to the current model so you can jump straight to it.',
|
||||||
|
required: false,
|
||||||
|
default: null,
|
||||||
|
placeholder: 'e.g. https://open-webui.yourserver.local',
|
||||||
|
masked: false,
|
||||||
|
}),
|
||||||
|
ngc_api_key: Value.text({
|
||||||
|
name: 'NGC API key (optional)',
|
||||||
|
description:
|
||||||
|
'NVIDIA NGC personal API key — needed to install NIM containers (Parakeet, Magpie, etc.) from nvcr.io. Get one free at https://ngc.nvidia.com/setup/personal-key. Stored only on this Start9 server; passed to docker as the NGC_API_KEY env var when installing NIM services.',
|
||||||
|
required: false,
|
||||||
|
default: null,
|
||||||
|
placeholder: 'starts with "nvapi-..."',
|
||||||
|
masked: true,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
export const configureSparks = sdk.Action.withInput(
|
export const configureSparks = sdk.Action.withInput(
|
||||||
|
|||||||
@@ -14,6 +14,10 @@ export const sparkConfigSchema = z.object({
|
|||||||
magpie_host: z.string().catch(''),
|
magpie_host: z.string().catch(''),
|
||||||
magpie_user: z.string().catch(''),
|
magpie_user: z.string().catch(''),
|
||||||
magpie_container: z.string().catch(''),
|
magpie_container: z.string().catch(''),
|
||||||
|
// Optional Open WebUI deep-link
|
||||||
|
open_webui_url: z.string().catch(''),
|
||||||
|
// Optional NGC API key for pulling NIM containers from nvcr.io/nim/...
|
||||||
|
ngc_api_key: z.string().catch(''),
|
||||||
})
|
})
|
||||||
|
|
||||||
export type SparkConfig = z.infer<typeof sparkConfigSchema>
|
export type SparkConfig = z.infer<typeof sparkConfigSchema>
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ export const main = sdk.setupMain(async ({ effects }) => {
|
|||||||
magpie_host: '',
|
magpie_host: '',
|
||||||
magpie_user: '',
|
magpie_user: '',
|
||||||
magpie_container: '',
|
magpie_container: '',
|
||||||
|
open_webui_url: '',
|
||||||
|
ngc_api_key: '',
|
||||||
}
|
}
|
||||||
|
|
||||||
return sdk.Daemons.of(effects).addDaemon('primary', {
|
return sdk.Daemons.of(effects).addDaemon('primary', {
|
||||||
@@ -47,6 +49,10 @@ export const main = sdk.setupMain(async ({ effects }) => {
|
|||||||
MAGPIE_USER: cfg.magpie_user,
|
MAGPIE_USER: cfg.magpie_user,
|
||||||
MAGPIE_CONTAINER: cfg.magpie_container,
|
MAGPIE_CONTAINER: cfg.magpie_container,
|
||||||
MODELS_OVERRIDES: '/data/models-overrides.yaml',
|
MODELS_OVERRIDES: '/data/models-overrides.yaml',
|
||||||
|
SERVICES_OVERRIDES: '/data/services-overrides.yaml',
|
||||||
|
CONNECTIVITY_LOG: '/data/connectivity.json',
|
||||||
|
OPEN_WEBUI_URL: cfg.open_webui_url,
|
||||||
|
NGC_API_KEY: cfg.ngc_api_key,
|
||||||
BIND_PORT: String(uiPort),
|
BIND_PORT: String(uiPort),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
|
import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'
|
||||||
|
|
||||||
export const v0_1_0 = VersionInfo.of({
|
export const v0_1_0 = VersionInfo.of({
|
||||||
version: '0.2.3:0',
|
version: '0.5.0:0',
|
||||||
releaseNotes: {
|
releaseNotes: {
|
||||||
en_US:
|
en_US:
|
||||||
'Per-model Advanced settings + downloaded-model catalog flow. Each card now has an Advanced button: max context tokens, GPU memory %, and optimization toggles (fastsafetensors, prefix caching, FP8 KV cache). After a download finishes, a dialog appears to add the model to the catalog with those same knobs as launch defaults. Custom models can be deleted. Overrides persist in /data/models-overrides.yaml and survive package updates.',
|
'v0.5: Wake-on-LAN + connectivity history. Each Spark\'s MAC is now auto-discovered during the normal hardware sweep and cached in /data/connectivity.json. Up/down transitions are logged with duration. Unreachable hardware cards get a "Wake (WoL)" button that sends a magic packet (preferring the other Spark as the sender so it originates on the right LAN segment). New "Connectivity log" button in the hardware section shows the recent transitions for each Spark — useful for spotting patterns (e.g. always-at-noon dropouts).',
|
||||||
},
|
},
|
||||||
migrations: {
|
migrations: {
|
||||||
up: async ({ effects }) => {},
|
up: async ({ effects }) => {},
|
||||||
|
|||||||
Reference in New Issue
Block a user