v0.26.0:0 - disk-driven model menu (scan sparks; recipes; needs-setup)
The dashboard menu is now the set of models actually downloaded on the Sparks, not a hard-coded catalog. models.yaml + overrides are reframed as launch recipes matched to an on-disk model by repo; an on-disk model with no recipe is flagged needs_setup and its launch settings are inferred from its config.json for a one-time operator confirmation (discovery.py). - delete now removes weights AND the menu card (delete_from_disk sweeps all hosts; the delete endpoint resolves keys via the live menu) - new GET /api/models/suggest; /api/models returns the menu + a recipes list (download autocomplete); GET /api/models/disk-status removed - dropped the two legacy Qwen recipes (235B FP8, 2.5 72B) - tests: +test_discovery.py (cache parsing, infer_recipe, build_menu merge)
This commit is contained in:
@@ -0,0 +1,209 @@
|
||||
"""Disk-driven model menu + launch-recipe inference.
|
||||
|
||||
The dashboard's model list is whatever is actually downloaded on the Sparks
|
||||
(see `disk.list_cached_models`), NOT a hard-coded catalog. The bundled/overridden
|
||||
catalog entries are *launch recipes*: matched to an on-disk model by repo, they
|
||||
say HOW to launch it. A completed model on disk with no matching recipe shows up
|
||||
as `needs_setup` — the first switch reads its `config.json`, proposes a recipe
|
||||
(`infer_recipe`) the operator confirms once, and that confirmed recipe is saved
|
||||
to /data so it's a normal card from then on.
|
||||
|
||||
Why a recipe layer at all, if the menu is the disk? Because a folder on disk
|
||||
doesn't say how to launch it: the per-family parsers (`--reasoning-parser`,
|
||||
`--tool-call-parser`), the MoE backend (some Gemma MoE checkpoints need
|
||||
`marlin` on GB10), and solo-vs-cluster topology can't be read off a directory.
|
||||
We infer a best guess from the model's own config + size, but the operator
|
||||
confirms it — a wrong guess is cheap, a wrong launch is not.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
from .config import Settings
|
||||
from .disk import list_cached_models, probe_disk
|
||||
from .overrides import extract_knobs_from_args
|
||||
|
||||
|
||||
# A model whose weights exceed this can't fit one Spark's 128 GB beside a KV
|
||||
# cache, so it must shard across both via Ray. A heuristic prefill only — the
|
||||
# operator confirms mode in the setup form, so the exact cutoff isn't critical.
|
||||
SINGLE_SPARK_BYTES = 115 * 1000 ** 3
|
||||
|
||||
# Generic knob defaults applied to every inferred recipe (the operator can tweak
|
||||
# these in the setup form). Family-specific flags (parsers, MoE backend) are
|
||||
# layered on separately by `_detect_family`.
|
||||
_COMMON_KNOBS = {
|
||||
"max_model_len": 32768,
|
||||
"gpu_memory_utilization": 0.85,
|
||||
"fastsafetensors": True,
|
||||
"prefix_caching": True,
|
||||
"kv_cache_dtype": "fp8",
|
||||
}
|
||||
|
||||
|
||||
def repo_to_key(repo: str) -> str:
|
||||
"""Stable, URL-safe menu key for a discovered model with no recipe key yet.
|
||||
|
||||
'RedHatAI/Qwen3.6-35B-A3B-NVFP4' -> 'redhatai-qwen3-6-35b-a3b-nvfp4'. The same
|
||||
slug is used by the menu, the setup form, and `_identify_current_model`, so a
|
||||
loaded-but-unconfigured model still highlights as active."""
|
||||
return re.sub(r"[^a-z0-9_-]+", "-", repo.lower()).strip("-")
|
||||
|
||||
|
||||
def _detect_family(config: dict) -> tuple[str, list[str], list[str]]:
|
||||
"""Return (family_label, vllm_flags, capabilities) inferred from config.json.
|
||||
|
||||
Only family-specific, non-knob flags (parsers, MoE backend) go in vllm_flags;
|
||||
generic knob defaults are handled by the caller. Best-effort and operator-
|
||||
confirmed, so a wrong guess is cheap."""
|
||||
arch = " ".join(config.get("architectures") or [])
|
||||
mtype = str(config.get("model_type") or "")
|
||||
s = (arch + " " + mtype).lower()
|
||||
is_moe = (
|
||||
"moe" in s
|
||||
or any(config.get(k) for k in ("num_experts", "n_routed_experts", "num_local_experts"))
|
||||
)
|
||||
is_vision = (
|
||||
"conditionalgeneration" in s
|
||||
or "vision" in s
|
||||
or "vlforcausallm" in s
|
||||
or "vision_config" in config
|
||||
or "image_token_index" in config
|
||||
)
|
||||
flags: list[str] = []
|
||||
caps: list[str] = []
|
||||
label = "Generic"
|
||||
if mtype.startswith("qwen3") or "qwen3" in s:
|
||||
label = "Qwen3 (MoE)" if is_moe else "Qwen3"
|
||||
flags.append("--reasoning-parser=qwen3")
|
||||
caps.append("reasoning")
|
||||
if is_moe:
|
||||
flags.append("--moe_backend=flashinfer_cutlass")
|
||||
elif "gemma" in s:
|
||||
label = "Gemma (MoE)" if is_moe else "Gemma"
|
||||
flags += ["--reasoning-parser=gemma4", "--tool-call-parser=gemma4", "--enable-auto-tool-choice"]
|
||||
caps += ["reasoning", "tools"]
|
||||
if is_moe:
|
||||
# The fast flashinfer/CUTLASS FP4 path errors on GB10 for Gemma MoE;
|
||||
# marlin is the working fallback (see the Gemma 26B trial notes).
|
||||
flags.append("--moe_backend=marlin")
|
||||
if is_vision and "vision" not in caps:
|
||||
caps.append("vision")
|
||||
return label, flags, caps
|
||||
|
||||
|
||||
def _infer_mode(total_bytes: int, on_host_count: int) -> str:
|
||||
"""Solo unless the weights are present on both Sparks or too big for one."""
|
||||
if on_host_count >= 2 or total_bytes > SINGLE_SPARK_BYTES:
|
||||
return "cluster"
|
||||
return "solo"
|
||||
|
||||
|
||||
def infer_recipe(repo: str, config: dict, total_bytes: int, on_host_count: int) -> dict:
|
||||
"""Propose a launch recipe for a discovered model — prefills the setup form."""
|
||||
label, flags, caps = _detect_family(config or {})
|
||||
mode = _infer_mode(total_bytes, on_host_count)
|
||||
vllm_args = list(flags)
|
||||
vllm_args.append("--max-num-batched-tokens=16384")
|
||||
knobs = dict(_COMMON_KNOBS)
|
||||
if mode == "cluster":
|
||||
# Large models shard across both Sparks via Ray; leave more headroom.
|
||||
vllm_args += ["-tp=2", "--distributed-executor-backend=ray"]
|
||||
knobs["gpu_memory_utilization"] = 0.7
|
||||
return {
|
||||
"key": repo_to_key(repo),
|
||||
"repo": repo,
|
||||
"display_name": repo.split("/")[-1],
|
||||
"mode": mode,
|
||||
"capabilities": caps,
|
||||
"vllm_args": vllm_args,
|
||||
"knobs": knobs,
|
||||
"family": label,
|
||||
}
|
||||
|
||||
|
||||
def _menu_entry_from_recipe(m, *, on_disk: bool, total_bytes: int, per_host: list[dict]) -> dict:
|
||||
d = m.model_dump()
|
||||
d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
|
||||
d["needs_setup"] = False
|
||||
d["on_disk"] = on_disk
|
||||
d["total_bytes"] = total_bytes
|
||||
d["per_host"] = per_host
|
||||
return d
|
||||
|
||||
|
||||
async def build_menu(settings: Settings, catalog) -> dict[str, dict]:
|
||||
"""The disk-driven model menu: every completed model on the Sparks, annotated
|
||||
with its launch recipe (matched by repo) or flagged `needs_setup` if none.
|
||||
|
||||
Two SSH scans total (one per Spark), run in parallel — much cheaper than the
|
||||
old per-recipe disk probe. A host that errors is skipped, not fatal."""
|
||||
hosts = [(settings.spark1_host, settings.spark1_user)]
|
||||
if settings.spark2_host:
|
||||
hosts.append((settings.spark2_host, settings.spark2_user))
|
||||
scans = await asyncio.gather(
|
||||
*(list_cached_models(h, u, settings) for h, u in hosts),
|
||||
return_exceptions=True,
|
||||
)
|
||||
by_repo: dict[str, dict] = {}
|
||||
for (h, _u), res in zip(hosts, scans):
|
||||
if isinstance(res, Exception):
|
||||
continue
|
||||
for repo, size, complete in res:
|
||||
e = by_repo.setdefault(repo, {"total_bytes": 0, "per_host": [], "complete": False})
|
||||
e["total_bytes"] += size
|
||||
e["per_host"].append({"host": h, "size_bytes": size})
|
||||
e["complete"] = e["complete"] or complete
|
||||
|
||||
recipe_by_repo = {m.repo: (k, m) for k, m in catalog.models.items() if m.repo}
|
||||
|
||||
menu: dict[str, dict] = {}
|
||||
for repo, info in by_repo.items():
|
||||
# Skip half-fetched / corrupt caches (no finished snapshot) — they'd show
|
||||
# as broken cards. In-flight downloads surface in the download panel.
|
||||
if not info["complete"]:
|
||||
continue
|
||||
if repo in recipe_by_repo:
|
||||
key, m = recipe_by_repo[repo]
|
||||
menu[key] = _menu_entry_from_recipe(
|
||||
m, on_disk=True, total_bytes=info["total_bytes"], per_host=info["per_host"]
|
||||
)
|
||||
else:
|
||||
key = repo_to_key(repo)
|
||||
menu[key] = {
|
||||
"display_name": repo.split("/")[-1],
|
||||
"repo": repo,
|
||||
"local_path": None,
|
||||
"size_gb": round(info["total_bytes"] / 1e9, 1),
|
||||
"mode": _infer_mode(info["total_bytes"], len(info["per_host"])),
|
||||
"capabilities": [],
|
||||
"expected_ready_seconds": 300,
|
||||
"vllm_args": [],
|
||||
"description": None,
|
||||
"knobs": None,
|
||||
"custom": False,
|
||||
"needs_setup": True,
|
||||
"effective_knobs": {},
|
||||
"on_disk": True,
|
||||
"total_bytes": info["total_bytes"],
|
||||
"per_host": info["per_host"],
|
||||
}
|
||||
|
||||
# Local/fine-tuned recipes live as a directory, not an HF cache entry — probe
|
||||
# each by path and include it if present. Their keys are unique catalog keys
|
||||
# (and local models carry repo="" per ModelDef), so they never collide with a
|
||||
# discovered repo's slug or an HF recipe key above.
|
||||
for key, m in catalog.models.items():
|
||||
if not m.local_path:
|
||||
continue
|
||||
st = await probe_disk(m.repo, m.mode, settings, local_path=m.local_path)
|
||||
if not st.on_disk:
|
||||
continue
|
||||
menu[key] = _menu_entry_from_recipe(
|
||||
m,
|
||||
on_disk=True,
|
||||
total_bytes=st.total_bytes,
|
||||
per_host=[{"host": r.host, "size_bytes": r.size_bytes} for r in st.per_host if r.on_disk],
|
||||
)
|
||||
|
||||
return menu
|
||||
+89
-3
@@ -10,6 +10,7 @@ model or one tied to an in-flight swap/download.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
@@ -36,6 +37,87 @@ def repo_to_cache_dirname(repo: str) -> str:
|
||||
return dn
|
||||
|
||||
|
||||
def cache_dirname_to_repo(dirname: str) -> Optional[str]:
|
||||
"""Inverse of `repo_to_cache_dirname`: 'models--org--name' -> 'org/name'.
|
||||
|
||||
A repo has exactly one '/', so the org is the first '--'-segment and the name
|
||||
is everything after (names may themselves contain single dashes). Returns
|
||||
None for anything that isn't a model cache dir."""
|
||||
if not dirname.startswith("models--"):
|
||||
return None
|
||||
parts = dirname[len("models--"):].split("--")
|
||||
if len(parts) < 2 or not parts[0] or not parts[1]:
|
||||
return None
|
||||
return f"{parts[0]}/{'--'.join(parts[1:])}"
|
||||
|
||||
|
||||
def parse_cache_listing(out: str) -> list[tuple[str, int, bool]]:
|
||||
"""Parse the 'size|complete|dirname' lines from `list_cached_models`'s scan.
|
||||
|
||||
Returns [(repo, size_bytes, complete), ...], skipping non-model lines. Pure
|
||||
function so the parsing is unit-testable without SSH."""
|
||||
items: list[tuple[str, int, bool]] = []
|
||||
for line in out.splitlines():
|
||||
line = line.strip()
|
||||
if line.count("|") < 2:
|
||||
continue
|
||||
size_s, complete_s, dirname = line.split("|", 2)
|
||||
repo = cache_dirname_to_repo(dirname.strip())
|
||||
if not repo:
|
||||
continue
|
||||
try:
|
||||
size = int(size_s)
|
||||
except ValueError:
|
||||
size = 0
|
||||
items.append((repo, size, complete_s.strip() == "1"))
|
||||
return items
|
||||
|
||||
|
||||
async def list_cached_models(host: str, user: str, settings: Settings) -> list[tuple[str, int, bool]]:
|
||||
"""Enumerate every Hugging Face model cached on a host: (repo, size_bytes, complete).
|
||||
|
||||
'complete' = the cache has at least one snapshot carrying a config.json (a
|
||||
finished download, not a half-fetched/corrupt dir). One SSH round-trip; the
|
||||
glob's no-match case is handled by the `[ -d ]` guard."""
|
||||
if not host or not user:
|
||||
return []
|
||||
cmd = (
|
||||
'HUB="$HOME/.cache/huggingface/hub"; '
|
||||
'for d in "$HUB"/models--*; do '
|
||||
'[ -d "$d" ] || continue; '
|
||||
'n=$(basename "$d"); '
|
||||
'sz=$(du -sb "$d" 2>/dev/null | cut -f1); sz=${sz:-0}; '
|
||||
'if ls "$d"/snapshots/*/config.json >/dev/null 2>&1; then c=1; else c=0; fi; '
|
||||
'echo "${sz}|${c}|${n}"; '
|
||||
'done'
|
||||
)
|
||||
rc, out, err = await ssh_run(host, user, cmd, settings, timeout=30.0)
|
||||
if rc != 0:
|
||||
return []
|
||||
return parse_cache_listing(out)
|
||||
|
||||
|
||||
async def read_model_config(host: str, user: str, repo: str, settings: Settings) -> Optional[dict]:
|
||||
"""Read a cached model's config.json (first snapshot) for launch inference.
|
||||
|
||||
Returns the parsed dict, or None if absent/unreadable. The dirname is
|
||||
whitelisted (repo_to_cache_dirname) so it's safe to embed unquoted."""
|
||||
if not host or not user:
|
||||
return None
|
||||
dn = repo_to_cache_dirname(repo)
|
||||
cmd = (
|
||||
f'D=$(ls -d "$HOME/.cache/huggingface/hub/{dn}/snapshots/"*/ 2>/dev/null | head -1); '
|
||||
f'[ -n "$D" ] && cat "${{D}}config.json" 2>/dev/null'
|
||||
)
|
||||
rc, out, err = await ssh_run(host, user, cmd, settings, timeout=20.0)
|
||||
if rc != 0 or not out.strip():
|
||||
return None
|
||||
try:
|
||||
return json.loads(out)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class HostDiskResult:
|
||||
host: str
|
||||
@@ -159,10 +241,14 @@ async def delete_host(host: str, user: str, repo: str, settings: Settings) -> Ho
|
||||
return HostDiskResult(host=host, on_disk=False, size_bytes=freed)
|
||||
|
||||
|
||||
async def delete_from_disk(repo: str, mode: str, settings: Settings) -> DiskStatus:
|
||||
"""rm -rf the model's cache dir on the relevant Sparks. Idempotent."""
|
||||
async def delete_from_disk(repo: str, settings: Settings) -> DiskStatus:
|
||||
"""rm -rf the model's cache dir on ALL configured Sparks. Idempotent.
|
||||
|
||||
We sweep both Sparks regardless of the model's declared mode: a 'remove from
|
||||
disk & menu' must leave nothing behind, and rm of an absent dir reports 0
|
||||
bytes freed (FREED 0), so an extra host is harmless."""
|
||||
hosts: list[tuple[str, str]] = [(settings.spark1_host, settings.spark1_user)]
|
||||
if mode == "cluster" and settings.spark2_host:
|
||||
if settings.spark2_host:
|
||||
hosts.append((settings.spark2_host, settings.spark2_user))
|
||||
|
||||
results = await asyncio.gather(*(delete_host(h, u, repo, settings) for h, u in hosts))
|
||||
|
||||
+77
-56
@@ -15,7 +15,8 @@ from .coordination import LockHeld, ScheduleRegistry, SwapLockManager, WebhookNo
|
||||
from .custom_services import add_custom_service, delete_custom_service
|
||||
from .audio_proxy import build_router as build_audio_router
|
||||
from .deep_health import DeepHealth
|
||||
from .disk import delete_from_disk, probe_disk
|
||||
from .discovery import build_menu, infer_recipe, repo_to_key
|
||||
from .disk import delete_from_disk, probe_host, read_model_config
|
||||
from .download import DownloadManager
|
||||
from .llm_proxy import build_router as build_llm_router
|
||||
from .embeddings_proxy import build_router as build_embeddings_router
|
||||
@@ -25,7 +26,7 @@ from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings,
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import ModelDef, load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
from .overrides import add_custom, delete_custom, load_overrides, set_knobs
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
from .shellsafe import validate_container, validate_image, validate_repo
|
||||
from .speech_models import SpeechModelsManager
|
||||
@@ -161,20 +162,65 @@ def _reload_catalog() -> None:
|
||||
swap_manager.reload_catalog(catalog)
|
||||
|
||||
|
||||
def _recipe_summaries() -> list[dict]:
|
||||
"""Known launch recipes (bundled + saved), for the download panel's autocomplete.
|
||||
|
||||
These are NOT the menu — the menu is what's on disk. This is just the set of
|
||||
repos Spark Control already knows how to launch, so the download box can
|
||||
suggest them by name without putting phantom cards on the dashboard."""
|
||||
out = []
|
||||
for m in catalog.models.values():
|
||||
if m.repo:
|
||||
out.append({"repo": m.repo, "display_name": m.display_name, "mode": m.mode})
|
||||
return out
|
||||
|
||||
|
||||
@app.get("/api/models")
|
||||
async def get_models() -> dict:
|
||||
out_models: dict[str, dict] = {}
|
||||
for key, m in catalog.models.items():
|
||||
d = m.model_dump()
|
||||
# Always include effective knobs for the UI (defaults from base args + any overrides)
|
||||
d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
|
||||
out_models[key] = d
|
||||
"""The model menu = what's actually downloaded on the Sparks (one scan per
|
||||
Spark), each annotated with its launch recipe or flagged `needs_setup`.
|
||||
|
||||
Does SSH, so it's the slower of the model endpoints; the front-end calls it on
|
||||
load, after a swap/download/delete, and on a slow timer — not every poll."""
|
||||
if not settings.configured:
|
||||
return {"configured": False, "defaults": catalog.defaults.model_dump(), "models": {}, "recipes": []}
|
||||
menu = await build_menu(settings, catalog)
|
||||
return {
|
||||
"configured": True,
|
||||
"defaults": catalog.defaults.model_dump(),
|
||||
"models": out_models,
|
||||
"models": menu,
|
||||
"recipes": _recipe_summaries(),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/models/suggest")
|
||||
async def suggest_model(repo: str = Query(...)) -> dict:
|
||||
"""Read a downloaded model's config.json + size and propose a launch recipe.
|
||||
|
||||
Prefills the 'set up this model' form for an on-disk model that has no recipe
|
||||
yet. The operator confirms/edits, then POSTs it to /api/models to save."""
|
||||
if not settings.configured:
|
||||
raise HTTPException(503, "spark1 not configured")
|
||||
try:
|
||||
validate_repo(repo)
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
hosts = [(settings.spark1_host, settings.spark1_user)]
|
||||
if settings.spark2_host:
|
||||
hosts.append((settings.spark2_host, settings.spark2_user))
|
||||
# Config from whichever Spark has it; size summed across the Sparks that do.
|
||||
sizes = await asyncio.gather(*(probe_host(h, u, repo, settings) for h, u in hosts))
|
||||
total = sum(r.size_bytes for r in sizes if r.on_disk)
|
||||
on_hosts = sum(1 for r in sizes if r.on_disk)
|
||||
config = None
|
||||
for (h, u), r in zip(hosts, sizes):
|
||||
if r.on_disk:
|
||||
config = await read_model_config(h, u, repo, settings)
|
||||
if config is not None:
|
||||
break
|
||||
return infer_recipe(repo, config or {}, total, on_hosts)
|
||||
|
||||
|
||||
class KnobsBody(BaseModel):
|
||||
knobs: dict
|
||||
|
||||
@@ -238,71 +284,43 @@ async def del_model(key: str) -> dict:
|
||||
return {"ok": True, "key": key}
|
||||
|
||||
|
||||
@app.get("/api/models/disk-status")
|
||||
async def get_models_disk_status() -> dict:
|
||||
"""Probe each catalog model's HF cache on the appropriate Spark(s) in parallel.
|
||||
|
||||
Result is keyed by model key: {on_disk, total_bytes, per_host:[{host,on_disk,size_bytes,error?}]}.
|
||||
Designed to be called once on dashboard load; takes ~1–3s depending on Spark count.
|
||||
"""
|
||||
if not settings.configured:
|
||||
return {"configured": False, "models": {}}
|
||||
keys = list(catalog.models.keys())
|
||||
statuses = await asyncio.gather(*(
|
||||
probe_disk(
|
||||
catalog.models[k].repo,
|
||||
catalog.models[k].mode,
|
||||
settings,
|
||||
local_path=catalog.models[k].local_path,
|
||||
)
|
||||
for k in keys
|
||||
), return_exceptions=True)
|
||||
out: dict[str, dict] = {}
|
||||
for k, s in zip(keys, statuses):
|
||||
if isinstance(s, Exception):
|
||||
out[k] = {"on_disk": False, "total_bytes": 0, "per_host": [], "error": str(s)}
|
||||
continue
|
||||
out[k] = {
|
||||
"on_disk": s.on_disk,
|
||||
"total_bytes": s.total_bytes,
|
||||
"per_host": [
|
||||
{"host": r.host, "on_disk": r.on_disk, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
|
||||
for r in s.per_host
|
||||
],
|
||||
}
|
||||
return {"configured": True, "models": out}
|
||||
|
||||
|
||||
@app.delete("/api/models/{key}/disk")
|
||||
async def del_model_disk(key: str) -> dict:
|
||||
"""Delete a model's weights from the Spark filesystem(s). The catalog entry stays.
|
||||
"""Remove a model's weights from the Sparks — and thus from the menu, since the
|
||||
menu IS the disk. Resolves the key against the live menu, so a discovered
|
||||
model (no saved recipe) is deletable too.
|
||||
|
||||
Safety rails:
|
||||
- Refuses a local/fine-tuned directory (hand-placed, not re-downloadable).
|
||||
- Refuses if the model is currently loaded on vLLM.
|
||||
- Refuses if a swap or download is in flight.
|
||||
- Idempotent: if the cache dir is already gone on a host, that host reports 0 bytes freed.
|
||||
- Refuses if a swap or this model's own download is in flight.
|
||||
- Idempotent across both Sparks: an already-absent cache dir frees 0 bytes.
|
||||
"""
|
||||
if key not in catalog.models:
|
||||
if not settings.configured:
|
||||
raise HTTPException(503, "spark1 not configured")
|
||||
menu = await build_menu(settings, catalog)
|
||||
entry = menu.get(key)
|
||||
if entry is None:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
m = catalog.models[key]
|
||||
|
||||
# Never rm a local fine-tune directory from the dashboard — it's irreplaceable
|
||||
# training output the user placed by hand, not a re-downloadable HF cache.
|
||||
if m.local_path:
|
||||
if entry.get("local_path"):
|
||||
raise HTTPException(
|
||||
400,
|
||||
"this is a local model; its directory must be managed on the Spark, not deleted from here",
|
||||
)
|
||||
repo = entry["repo"]
|
||||
|
||||
# Refuse if currently loaded
|
||||
try:
|
||||
vllm = await check_vllm(settings)
|
||||
except Exception:
|
||||
vllm = {}
|
||||
if vllm.get("ok") and vllm.get("current_model") == m.repo:
|
||||
if vllm.get("ok") and vllm.get("current_model") == repo:
|
||||
raise HTTPException(
|
||||
409,
|
||||
f"'{m.display_name}' is the currently loaded model. Switch to a different model first, then try again."
|
||||
f"'{entry['display_name']}' is the currently loaded model. Switch to a different model first, then try again."
|
||||
)
|
||||
|
||||
# Refuse if a swap is in flight
|
||||
@@ -312,10 +330,10 @@ async def del_model_disk(key: str) -> dict:
|
||||
# Refuse if a download is in flight for this same repo (a different model's download is fine)
|
||||
if download_manager.current_job_id:
|
||||
job = download_manager.get(download_manager.current_job_id)
|
||||
if job and job.repo == m.repo:
|
||||
if job and job.repo == repo:
|
||||
raise HTTPException(409, "this model is currently downloading; cancel or wait for it to finish")
|
||||
|
||||
status = await delete_from_disk(m.repo, m.mode, settings)
|
||||
status = await delete_from_disk(repo, settings)
|
||||
# Audit log
|
||||
record_report(
|
||||
f"disk:{key}",
|
||||
@@ -326,7 +344,7 @@ async def del_model_disk(key: str) -> dict:
|
||||
return {
|
||||
"ok": True,
|
||||
"key": key,
|
||||
"repo": m.repo,
|
||||
"repo": repo,
|
||||
"bytes_freed": status.total_bytes,
|
||||
"per_host": [
|
||||
{"host": r.host, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
|
||||
@@ -881,10 +899,13 @@ async def get_status() -> dict:
|
||||
def _identify_current_model(repo: str | None) -> str | None:
|
||||
if not repo:
|
||||
return None
|
||||
# A recipe-backed model keys by its recipe key; a discovered model (loaded but
|
||||
# not yet set up) keys by the same slug build_menu uses, so it still
|
||||
# highlights as the active card.
|
||||
for key, m in catalog.models.items():
|
||||
if m.repo == repo:
|
||||
return key
|
||||
return None
|
||||
return repo_to_key(repo)
|
||||
|
||||
|
||||
class SwapRequest(BaseModel):
|
||||
|
||||
+158
-121
@@ -19,8 +19,8 @@ const state = {
|
||||
configured: true,
|
||||
timer_handle: null,
|
||||
deep_health: {},
|
||||
disk_status: {}, // keyed by model key: { on_disk, total_bytes, per_host }
|
||||
disk_status_loaded: false,
|
||||
models_loaded: false, // true once the first disk scan (/api/models) returns
|
||||
recipes: [], // known launch recipes (for the download autocomplete)
|
||||
lock: { held: false }, // GPU swap reservation (coordination layer)
|
||||
schedules: [], // schedules external automation has registered
|
||||
};
|
||||
@@ -65,67 +65,69 @@ function renderCards() {
|
||||
const lockTip = locked
|
||||
? `Reserved by ${state.lock.holder || 'automation'}${state.lock.expires_at ? ' until ' + fmtClock(state.lock.expires_at) : ''}`
|
||||
: '';
|
||||
for (const key of Object.keys(state.models)) {
|
||||
const keys = Object.keys(state.models);
|
||||
if (keys.length === 0) {
|
||||
// The menu is the disk: nothing downloaded (or the scan hasn't returned yet).
|
||||
root.innerHTML = state.models_loaded
|
||||
? `<div class="empty-menu muted">No models downloaded on the Sparks yet. Use <strong>+ Download a new model</strong> above to fetch one — it'll appear here when it's done.</div>`
|
||||
: `<div class="empty-menu muted">Scanning the Sparks for downloaded models…</div>`;
|
||||
return;
|
||||
}
|
||||
for (const key of keys) {
|
||||
const m = state.models[key];
|
||||
const isActive = key === state.current_model_key;
|
||||
const card = document.createElement('div');
|
||||
card.className = 'card' + (isActive ? ' active' : '');
|
||||
card.className = 'card' + (isActive ? ' active' : '') + (m.needs_setup ? ' needs-setup' : '');
|
||||
const desc = m.description
|
||||
? `<div class="desc">${escapeHtml(m.description)}</div>`
|
||||
: '';
|
||||
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
|
||||
const localPill = m.local_path ? `<span class="tag local-pill" title="Served from a directory on the Spark, not Hugging Face">local</span>` : '';
|
||||
// Disk-presence pill + trash button. Until /api/models/disk-status comes back,
|
||||
// we don't know — render a neutral placeholder.
|
||||
const disk = state.disk_status[key];
|
||||
let diskPill = '';
|
||||
if (state.disk_status_loaded) {
|
||||
if (disk && disk.on_disk) {
|
||||
const gb = (disk.total_bytes / 1e9);
|
||||
diskPill = `<span class="tag on-disk" title="Weights present on disk">on disk · ${gb.toFixed(1)} GB</span>`;
|
||||
} else {
|
||||
diskPill = `<span class="tag not-on-disk" title="Weights not downloaded">not downloaded</span>`;
|
||||
}
|
||||
}
|
||||
// Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded.
|
||||
// Every card on the menu is on disk by definition — show its real size.
|
||||
const gb = (m.total_bytes || 0) / 1e9;
|
||||
const diskPill = gb > 0
|
||||
? `<span class="tag on-disk" title="Weights present on the Spark(s)">on disk · ${gb.toFixed(1)} GB</span>`
|
||||
: '';
|
||||
const setupPill = m.needs_setup
|
||||
? `<span class="tag setup-pill" title="On disk, but Spark Control hasn't been told how to launch it">needs setup</span>`
|
||||
: '';
|
||||
// Trash = remove weights from disk AND from the menu. Disabled if active / mid-swap.
|
||||
// Never offered for local models: their directory is hand-placed training output,
|
||||
// not a re-downloadable HF cache (the server refuses the delete too).
|
||||
let trashBtn = '';
|
||||
if (state.disk_status_loaded && disk && disk.on_disk && !m.local_path) {
|
||||
if (!m.local_path) {
|
||||
const disabled = isActive || isSwapping;
|
||||
const tip = isActive
|
||||
? 'Currently loaded — switch to another model first'
|
||||
: isSwapping
|
||||
? 'A swap is in progress'
|
||||
: 'Delete weights from disk';
|
||||
trashBtn = `<button class="icon-btn danger" data-disk-del-key="${key}" title="${escapeHtml(tip)}" aria-label="Delete from disk" ${disabled ? 'disabled' : ''}>${trashIcon}</button>`;
|
||||
: 'Remove weights from disk & menu';
|
||||
trashBtn = `<button class="icon-btn danger" data-disk-del-key="${key}" title="${escapeHtml(tip)}" aria-label="Remove from disk and menu" ${disabled ? 'disabled' : ''}>${trashIcon}</button>`;
|
||||
}
|
||||
// Primary card action: "Switch to this" (green) when on disk; "Download" (blue) when not.
|
||||
// Before disk-status loads we render the swap button as a sensible default.
|
||||
const isOnDisk = !state.disk_status_loaded || (disk && disk.on_disk);
|
||||
const dlInFlight = !!(typeof dlState !== 'undefined' && dlState && dlState.job_id);
|
||||
// Primary action: "Current" / "Switch to this", or "Set up & switch" for a
|
||||
// model on disk that has no launch recipe yet.
|
||||
const swapBlocked = isSwapping || locked;
|
||||
const lockTipAttr = locked ? ` title="${escapeHtml(lockTip)}"` : '';
|
||||
let primaryBtn = '';
|
||||
if (isActive) {
|
||||
primaryBtn = `<button class="btn" disabled>Current</button>`;
|
||||
} else if (isOnDisk) {
|
||||
const swapBlocked = isSwapping || locked;
|
||||
const tip = locked ? ` title="${escapeHtml(lockTip)}"` : '';
|
||||
primaryBtn = `<button class="btn primary" data-swap-key="${key}"${tip} ${swapBlocked ? 'disabled' : ''}>Switch to this</button>`;
|
||||
} else if (m.local_path) {
|
||||
// A local model can't be "downloaded" — its directory has to exist on the Spark.
|
||||
primaryBtn = `<button class="btn" disabled title="Directory not found on the Spark — create it there, then refresh">Not found on Spark</button>`;
|
||||
} else if (m.needs_setup) {
|
||||
primaryBtn = `<button class="btn primary" data-setup-key="${key}"${lockTipAttr} ${swapBlocked ? 'disabled' : ''}>Set up & switch</button>`;
|
||||
} else {
|
||||
const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)';
|
||||
primaryBtn = `<button class="btn info" data-download-key="${key}" title="${escapeHtml(tip)}" ${dlInFlight ? 'disabled' : ''}>Download</button>`;
|
||||
primaryBtn = `<button class="btn primary" data-swap-key="${key}"${lockTipAttr} ${swapBlocked ? 'disabled' : ''}>Switch to this</button>`;
|
||||
}
|
||||
// The Test/Advanced controls need a saved recipe; hide them until setup is done.
|
||||
const recipeActions = m.needs_setup ? '' : `
|
||||
<button class="btn test-btn" data-test-key="${key}" title="Pre-flight check the launch command without starting the engine">Test</button>
|
||||
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>`;
|
||||
card.innerHTML = `
|
||||
<div class="name">${escapeHtml(m.display_name)}</div>
|
||||
<div class="meta">
|
||||
<span class="tag mode-${m.mode}">${m.mode}</span>
|
||||
<span class="tag">${m.size_gb} GB</span>
|
||||
${diskPill}
|
||||
${setupPill}
|
||||
${customPill}
|
||||
${localPill}
|
||||
${diskPill}
|
||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||
</div>
|
||||
${desc}
|
||||
@@ -136,9 +138,7 @@ function renderCards() {
|
||||
</div>
|
||||
<div class="spacer"></div>
|
||||
<div class="card-actions">
|
||||
${primaryBtn}
|
||||
<button class="btn test-btn" data-test-key="${key}" title="Pre-flight check the launch command without starting the engine">Test</button>
|
||||
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>
|
||||
${primaryBtn}${recipeActions}
|
||||
${trashBtn}
|
||||
</div>
|
||||
<div class="test-result hidden" data-test-result-for="${key}"></div>
|
||||
@@ -148,8 +148,8 @@ function renderCards() {
|
||||
for (const btn of root.querySelectorAll('[data-swap-key]')) {
|
||||
btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey));
|
||||
}
|
||||
for (const btn of root.querySelectorAll('[data-download-key]')) {
|
||||
btn.addEventListener('click', () => triggerDownloadForKey(btn.dataset.downloadKey));
|
||||
for (const btn of root.querySelectorAll('[data-setup-key]')) {
|
||||
btn.addEventListener('click', () => openSetupForKey(btn.dataset.setupKey));
|
||||
}
|
||||
for (const btn of root.querySelectorAll('[data-adv-key]')) {
|
||||
btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey));
|
||||
@@ -1170,24 +1170,44 @@ async function pollStatus() {
|
||||
}
|
||||
}
|
||||
|
||||
let menuLoadInFlight = false;
|
||||
|
||||
async function loadModels() {
|
||||
const data = await fetchJSON('/api/models');
|
||||
state.defaults = data.defaults || {};
|
||||
state.models = data.models || {};
|
||||
// The menu is whatever's downloaded on the Sparks — /api/models does the scan
|
||||
// (SSH), so this is the slower model call. Best-effort: a transient failure
|
||||
// leaves the previous menu in place rather than blanking the dashboard.
|
||||
// Guard against overlap: init() fires this un-awaited and pollStatus()'s
|
||||
// empty-menu fallback may call it again before the scan returns.
|
||||
if (menuLoadInFlight) return;
|
||||
menuLoadInFlight = true;
|
||||
try {
|
||||
const data = await fetchJSON('/api/models');
|
||||
state.defaults = data.defaults || {};
|
||||
state.models = data.models || {};
|
||||
state.recipes = data.recipes || [];
|
||||
state.models_loaded = true;
|
||||
populateDownloadSuggestions();
|
||||
renderCards();
|
||||
} catch (e) {
|
||||
console.warn('model menu load failed:', e.message);
|
||||
} finally {
|
||||
menuLoadInFlight = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function loadDiskStatus() {
|
||||
// Probes each catalog model's HF cache over SSH; takes a beat. Best-effort.
|
||||
try {
|
||||
const r = await fetchJSON('/api/models/disk-status');
|
||||
if (r && r.models) {
|
||||
state.disk_status = r.models;
|
||||
state.disk_status_loaded = true;
|
||||
renderCards();
|
||||
}
|
||||
} catch (e) {
|
||||
// Silent — pills just won't render. Don't block dashboard.
|
||||
console.warn('disk-status probe failed:', e.message);
|
||||
// Populate the download box's autocomplete with known recipes not currently on
|
||||
// disk — so common/bundled models stay discoverable without phantom menu cards.
|
||||
function populateDownloadSuggestions() {
|
||||
const dl = el('#dl-suggestions');
|
||||
if (!dl) return;
|
||||
const onDiskRepos = new Set(Object.values(state.models).map(m => m.repo).filter(Boolean));
|
||||
dl.innerHTML = '';
|
||||
for (const r of state.recipes || []) {
|
||||
if (onDiskRepos.has(r.repo)) continue;
|
||||
const opt = document.createElement('option');
|
||||
opt.value = r.repo;
|
||||
opt.label = `${r.display_name} (${r.mode})`;
|
||||
dl.appendChild(opt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1201,14 +1221,12 @@ function fmtBytesShort(n) {
|
||||
|
||||
function openDiskDeleteDialog(key) {
|
||||
const m = state.models[key];
|
||||
const disk = state.disk_status[key];
|
||||
if (!m || !disk || !disk.on_disk) return;
|
||||
if (!m || !m.on_disk) return;
|
||||
const dlg = el('#disk-delete-dialog');
|
||||
el('#dd-summary').innerHTML = `Free <strong>${fmtBytesShort(disk.total_bytes)}</strong> by removing <strong>${escapeHtml(m.display_name)}</strong> (<code>${escapeHtml(m.repo)}</code>) from disk.`;
|
||||
el('#dd-summary').innerHTML = `Free <strong>${fmtBytesShort(m.total_bytes)}</strong> by removing <strong>${escapeHtml(m.display_name)}</strong> (<code>${escapeHtml(m.repo)}</code>) from the Sparks. This also takes it off the menu.`;
|
||||
const hostsEl = el('#dd-hosts');
|
||||
hostsEl.innerHTML = '';
|
||||
for (const h of (disk.per_host || [])) {
|
||||
if (!h.on_disk) continue;
|
||||
for (const h of (m.per_host || [])) {
|
||||
const li = document.createElement('li');
|
||||
li.innerHTML = `<code>${escapeHtml(h.host)}</code> — ${fmtBytesShort(h.size_bytes)}`;
|
||||
hostsEl.appendChild(li);
|
||||
@@ -1227,20 +1245,19 @@ function openDiskDeleteDialog(key) {
|
||||
try {
|
||||
const r = await fetchJSON(`/api/models/${encodeURIComponent(key)}/disk`, { method: 'DELETE' });
|
||||
dlg.close();
|
||||
// Optimistically clear local disk state for this key, then refresh.
|
||||
delete state.disk_status[key];
|
||||
// Optimistically drop the card, then re-scan the menu (it's gone from disk).
|
||||
delete state.models[key];
|
||||
renderCards();
|
||||
// Eagerly re-probe so size is accurate (and shows "not downloaded" pill).
|
||||
loadDiskStatus();
|
||||
await loadModels();
|
||||
const freed = r && typeof r.bytes_freed === 'number' ? fmtBytesShort(r.bytes_freed) : '';
|
||||
console.log(`Deleted ${m.display_name} from disk${freed ? ` — freed ${freed}` : ''}.`);
|
||||
console.log(`Removed ${m.display_name} from disk${freed ? ` — freed ${freed}` : ''}.`);
|
||||
} catch (e) {
|
||||
errEl.textContent = e.message || 'Delete failed';
|
||||
errEl.classList.remove('hidden');
|
||||
} finally {
|
||||
confirm.disabled = false;
|
||||
cancel.disabled = false;
|
||||
confirm.textContent = 'Delete from disk';
|
||||
confirm.textContent = 'Remove from disk & menu';
|
||||
}
|
||||
};
|
||||
cancel.onclick = onCancel;
|
||||
@@ -1341,38 +1358,6 @@ async function releaseLock() {
|
||||
pollCoordination();
|
||||
}
|
||||
|
||||
async function triggerDownloadForKey(modelKey) {
|
||||
const m = state.models[modelKey];
|
||||
if (!m) return;
|
||||
if (dlState.job_id) {
|
||||
alert('A download is already in progress; wait for it to finish.');
|
||||
return;
|
||||
}
|
||||
// Pick the download target from the model's mode:
|
||||
// solo -> spark1 only
|
||||
// cluster -> both Sparks (fetch on Spark 1, rsync to Spark 2 in parallel)
|
||||
const dlMode = m.mode === 'cluster' ? 'cluster' : 'spark1';
|
||||
const sizeNote = m.size_gb ? ` (~${m.size_gb} GB)` : '';
|
||||
const target = m.mode === 'cluster' ? 'both Sparks' : 'Spark 1';
|
||||
if (!confirm(`Download "${m.display_name}"${sizeNote} to ${target}? Large models can take a while; you can watch progress in the download panel.`)) {
|
||||
return;
|
||||
}
|
||||
dlState.last_repo = m.repo;
|
||||
dlState.last_mode = dlMode;
|
||||
try {
|
||||
const r = await fetchJSON('/api/download', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify({ repo: m.repo, mode: dlMode }),
|
||||
});
|
||||
// Open the download panel + attach to progress stream
|
||||
openDownloadForm();
|
||||
attachToDownload(r.job_id);
|
||||
} catch (e) {
|
||||
alert('Failed to start download: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function attachToSwap(jobId, needsBackfill) {
|
||||
if (state.swap_eventsource) {
|
||||
state.swap_eventsource.close();
|
||||
@@ -1603,12 +1588,14 @@ function handleDownloadDone(d) {
|
||||
el('#dl-title').textContent = 'Done';
|
||||
el('#dl-phase').textContent = 'Done ✓';
|
||||
el('#dl-progress-fill').style.width = '100%';
|
||||
// Offer to add to catalog
|
||||
// The new model now appears on the menu (the menu is the disk). If it matched
|
||||
// a known recipe it's ready to switch to; if not, offer to set it up.
|
||||
const repo = dlState.last_repo;
|
||||
const mode = dlState.last_mode;
|
||||
if (repo) {
|
||||
setTimeout(() => openCatalogDialog(repo, mode), 600);
|
||||
}
|
||||
loadModels().then(() => {
|
||||
if (!repo) return;
|
||||
const entry = Object.values(state.models).find(m => m.repo === repo);
|
||||
if (entry && entry.needs_setup) setTimeout(() => openSetupDialog(repo, { thenSwap: false }), 600);
|
||||
});
|
||||
}
|
||||
dlState.job_id = null;
|
||||
}
|
||||
@@ -1721,21 +1708,67 @@ function openAdvanced(key) {
|
||||
dlg.showModal();
|
||||
}
|
||||
|
||||
function openCatalogDialog(repo, mode) {
|
||||
// Context carried from openSetupDialog -> the submit handler: the inferred
|
||||
// launch flags (parsers/MoE backend) and whether to swap right after saving.
|
||||
let setupCtx = { key: '', repo: '', vllm_args: [], thenSwap: false };
|
||||
|
||||
// "Set up & switch" on a needs-setup card.
|
||||
async function openSetupForKey(key) {
|
||||
const m = state.models[key];
|
||||
if (!m) return;
|
||||
if (state.lock && state.lock.held) {
|
||||
const until = state.lock.expires_at ? ' until ' + fmtClock(state.lock.expires_at) : '';
|
||||
alert(`The GPU swap path is reserved by ${state.lock.holder || 'automation'}${until}. Use "Release" on the reservation banner to override.`);
|
||||
return;
|
||||
}
|
||||
await openSetupDialog(m.repo, { thenSwap: true });
|
||||
}
|
||||
|
||||
// Open the "set up this model" dialog, prefilled from inference (config.json +
|
||||
// size). The operator confirms once; on save the recipe persists and (if
|
||||
// thenSwap) we switch to it.
|
||||
async function openSetupDialog(repo, opts = {}) {
|
||||
const dlg = el('#catalog-dialog');
|
||||
const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-');
|
||||
el('#cd-key').value = key;
|
||||
el('#cd-name').value = repo.split('/').pop();
|
||||
let sug = null;
|
||||
try {
|
||||
sug = await fetchJSON(`/api/models/suggest?repo=${encodeURIComponent(repo)}`);
|
||||
} catch (e) {
|
||||
console.warn('recipe suggestion failed:', e.message);
|
||||
}
|
||||
const fallbackKey = repo.toLowerCase().replace(/[^a-z0-9_-]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
setupCtx = {
|
||||
key: (sug && sug.key) || fallbackKey,
|
||||
repo,
|
||||
vllm_args: (sug && sug.vllm_args) || [],
|
||||
thenSwap: !!opts.thenSwap,
|
||||
};
|
||||
el('#cd-key').value = setupCtx.key;
|
||||
el('#cd-name').value = (sug && sug.display_name) || repo.split('/').pop();
|
||||
el('#cd-repo').value = repo;
|
||||
el('#cd-size').value = '';
|
||||
el('#cd-mode').value = mode || 'solo';
|
||||
el('#cd-mode').value = (sug && sug.mode) || 'solo';
|
||||
el('#cd-desc').value = '';
|
||||
el('#cd-mml').value = 32768;
|
||||
el('#cd-gmu').value = 0.85;
|
||||
el('#cd-gmu-out').value = '0.85';
|
||||
el('#cd-fst').checked = true;
|
||||
el('#cd-pcache').checked = true;
|
||||
el('#cd-fp8').checked = true;
|
||||
const knobs = (sug && sug.knobs) || {};
|
||||
el('#cd-mml').value = knobs.max_model_len || 32768;
|
||||
el('#cd-gmu').value = knobs.gpu_memory_utilization || 0.85;
|
||||
el('#cd-gmu-out').value = parseFloat(el('#cd-gmu').value).toFixed(2);
|
||||
el('#cd-fst').checked = knobs.fastsafetensors !== false;
|
||||
el('#cd-pcache').checked = knobs.prefix_caching !== false;
|
||||
el('#cd-fp8').checked = (knobs.kv_cache_dtype || 'fp8') === 'fp8';
|
||||
|
||||
const det = el('#cd-detected');
|
||||
if (det) {
|
||||
if (sug) {
|
||||
const caps = (sug.capabilities || []).join(', ');
|
||||
const flags = setupCtx.vllm_args.length ? `: <code>${escapeHtml(setupCtx.vllm_args.join(' '))}</code>` : '';
|
||||
det.innerHTML = `Detected <strong>${escapeHtml(sug.family || 'Generic')}</strong>${caps ? ` · ${escapeHtml(caps)}` : ''}. Launch flags set automatically${flags}.`;
|
||||
} else {
|
||||
det.textContent = "Couldn't auto-detect this model's settings — pick mode and knobs manually.";
|
||||
}
|
||||
det.classList.remove('hidden');
|
||||
}
|
||||
const submit = el('#cd-submit');
|
||||
if (submit) submit.textContent = setupCtx.thenSwap ? 'Save & switch' : 'Save settings';
|
||||
dlg.showModal();
|
||||
}
|
||||
|
||||
@@ -1745,13 +1778,15 @@ function setupCatalogDialog() {
|
||||
el('#catalog-form').addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
const body = {
|
||||
key: el('#cd-key').value.trim(),
|
||||
key: el('#cd-key').value.trim() || setupCtx.key,
|
||||
display_name: el('#cd-name').value.trim(),
|
||||
repo: el('#cd-repo').value.trim(),
|
||||
size_gb: parseFloat(el('#cd-size').value) || 0,
|
||||
mode: el('#cd-mode').value,
|
||||
description: el('#cd-desc').value.trim() || null,
|
||||
vllm_args: [],
|
||||
// The inferred family flags (parsers / MoE backend); knob-controlled flags
|
||||
// are layered on by the server from `knobs`, so no duplication.
|
||||
vllm_args: setupCtx.vllm_args || [],
|
||||
knobs: {
|
||||
max_model_len: parseInt(el('#cd-mml').value, 10) || 32768,
|
||||
gpu_memory_utilization: parseFloat(el('#cd-gmu').value),
|
||||
@@ -1769,8 +1804,9 @@ function setupCatalogDialog() {
|
||||
el('#catalog-dialog').close();
|
||||
closeDownloadPanel();
|
||||
await loadModels();
|
||||
if (setupCtx.thenSwap) triggerSwap(body.key);
|
||||
pollStatus();
|
||||
} catch (e) { alert('Add to catalog failed: ' + e.message); }
|
||||
} catch (e) { alert('Saving the model setup failed: ' + e.message); }
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2212,21 +2248,22 @@ async function init() {
|
||||
} catch {}
|
||||
setupDashboardTabs();
|
||||
setupEndpointCollapse();
|
||||
await loadModels();
|
||||
// Fire the (SSH-backed) menu scan without awaiting — it self-renders a
|
||||
// "Scanning…" state and fills in when it returns, so a slow/unreachable
|
||||
// cluster never blocks first paint. pollStatus() below paints the rest.
|
||||
loadModels();
|
||||
await pollStatus();
|
||||
await renderServices();
|
||||
pollCoordination();
|
||||
pollHardware();
|
||||
pollUpdates();
|
||||
// Disk-status probe runs after first paint — slow over SSH and not blocking.
|
||||
loadDiskStatus();
|
||||
// Speech-model patches panel — slow over SSH, runs after first paint.
|
||||
renderSpeechModels();
|
||||
setInterval(pollStatus, 5000);
|
||||
setInterval(pollCoordination, 5000); // swap lock + schedule registry
|
||||
setInterval(pollHardware, 8000); // every 8s
|
||||
setInterval(pollUpdates, 300000); // every 5 min
|
||||
setInterval(loadDiskStatus, 60000); // every 60s — disk state changes rarely
|
||||
setInterval(loadModels, 60000); // every 60s — re-scan the Sparks for added/removed models
|
||||
setInterval(renderSpeechModels, 120000); // every 2 min — patches change rarely
|
||||
}
|
||||
|
||||
|
||||
@@ -241,9 +241,10 @@
|
||||
|
||||
<dialog id="catalog-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form" id="catalog-form">
|
||||
<h3>Add downloaded model to catalog</h3>
|
||||
<p class="muted small">It will appear as a new card you can swap to. Knob values become its default launch flags — you can tweak later via the model's "Advanced" panel.</p>
|
||||
<label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="cd-key" required pattern="[a-zA-Z0-9_-]+"></label>
|
||||
<h3>Set up this model</h3>
|
||||
<p class="muted small">This model is downloaded, but Spark Control needs to know how to launch it. We've guessed from the model's own files — confirm or adjust, and it's saved so you're never asked again.</p>
|
||||
<p id="cd-detected" class="muted small cd-detected hidden"></p>
|
||||
<label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="cd-key" required pattern="[a-zA-Z0-9_-]+" readonly></label>
|
||||
<label class="modal-row"><span>Display name</span><input type="text" id="cd-name" required></label>
|
||||
<label class="modal-row"><span>Repo (read-only)</span><input type="text" id="cd-repo" readonly></label>
|
||||
<label class="modal-row"><span>Size (GB)</span><input type="number" id="cd-size" step="0.1" min="0"></label>
|
||||
@@ -264,7 +265,7 @@
|
||||
</fieldset>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="cd-cancel" class="btn">Cancel</button>
|
||||
<button type="submit" class="btn primary">Add to catalog</button>
|
||||
<button type="submit" id="cd-submit" class="btn primary">Save settings</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
@@ -302,14 +303,14 @@
|
||||
|
||||
<dialog id="disk-delete-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form">
|
||||
<h3>Delete model weights from disk?</h3>
|
||||
<h3>Remove this model from the Sparks?</h3>
|
||||
<p id="dd-summary" class="muted small"></p>
|
||||
<ul class="muted small dd-hosts" id="dd-hosts"></ul>
|
||||
<p class="muted small">This is reversible — you can re-download from the catalog at any time. The catalog entry stays intact.</p>
|
||||
<p class="muted small">This deletes the weights and removes the card from the menu. You can always download it again later (re-downloading restores its saved settings).</p>
|
||||
<p id="dd-error" class="muted small dd-error hidden"></p>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="dd-cancel" class="btn">Cancel</button>
|
||||
<button type="button" id="dd-confirm" class="btn danger">Delete from disk</button>
|
||||
<button type="button" id="dd-confirm" class="btn danger">Remove from disk & menu</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
@@ -354,11 +355,12 @@
|
||||
<div class="download-form" id="download-form">
|
||||
<label class="dl-row">
|
||||
<span class="dl-label">HuggingFace repo</span>
|
||||
<input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off">
|
||||
<input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off" list="dl-suggestions">
|
||||
<datalist id="dl-suggestions"></datalist>
|
||||
<a id="dl-hf-link" class="dl-hf-link hidden" href="#" target="_blank" rel="noopener" title="Open on Hugging Face">↗</a>
|
||||
</label>
|
||||
<div class="dl-help muted small">
|
||||
<a href="https://huggingface.co/models?other=vllm" target="_blank" rel="noopener">Browse vLLM-compatible models</a>
|
||||
Type any repo, or pick a known one from the list. <a href="https://huggingface.co/models?other=vllm" target="_blank" rel="noopener">Browse vLLM-compatible models</a>
|
||||
· NVFP4-quantized models (e.g. <code>RedHatAI/...</code>) are best for Blackwell hardware
|
||||
</div>
|
||||
<div class="dl-row">
|
||||
|
||||
@@ -778,6 +778,12 @@ main {
|
||||
.card .local-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
|
||||
.tag.on-disk { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
|
||||
.tag.not-on-disk { color: var(--muted); border-color: var(--border); opacity: 0.7; }
|
||||
.tag.setup-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
|
||||
.card.needs-setup { border-style: dashed; }
|
||||
.card-actions .btn[data-setup-key] { flex: 1; }
|
||||
.empty-menu { grid-column: 1 / -1; padding: 28px 16px; text-align: center; border: 1px dashed var(--border); border-radius: 10px; }
|
||||
.cd-detected { padding: 8px 10px; border: 1px solid var(--border); border-radius: 8px; background: rgba(255,255,255,0.02); }
|
||||
.cd-detected code { word-break: break-all; }
|
||||
.card-actions .icon-btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); margin-left: auto; }
|
||||
.card-actions .icon-btn.danger:hover:not(:disabled) { background: rgba(239, 68, 68, 0.08); border-color: var(--error); color: var(--error); }
|
||||
.card-actions .icon-btn.danger:disabled { opacity: 0.35; cursor: not-allowed; }
|
||||
|
||||
Reference in New Issue
Block a user