v0.26.0:0 - disk-driven model menu (scan sparks; recipes; needs-setup)
The dashboard menu is now the set of models actually downloaded on the Sparks, not a hard-coded catalog. models.yaml + overrides are reframed as launch recipes matched to an on-disk model by repo; an on-disk model with no recipe is flagged needs_setup and its launch settings are inferred from its config.json for a one-time operator confirmation (discovery.py). - delete now removes weights AND the menu card (delete_from_disk sweeps all hosts; the delete endpoint resolves keys via the live menu) - new GET /api/models/suggest; /api/models returns the menu + a recipes list (download autocomplete); GET /api/models/disk-status removed - dropped the two legacy Qwen recipes (235B FP8, 2.5 72B) - tests: +test_discovery.py (cache parsing, infer_recipe, build_menu merge)
This commit is contained in:
+89
-3
@@ -10,6 +10,7 @@ model or one tied to an in-flight swap/download.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
@@ -36,6 +37,87 @@ def repo_to_cache_dirname(repo: str) -> str:
|
||||
return dn
|
||||
|
||||
|
||||
def cache_dirname_to_repo(dirname: str) -> Optional[str]:
|
||||
"""Inverse of `repo_to_cache_dirname`: 'models--org--name' -> 'org/name'.
|
||||
|
||||
A repo has exactly one '/', so the org is the first '--'-segment and the name
|
||||
is everything after (names may themselves contain single dashes). Returns
|
||||
None for anything that isn't a model cache dir."""
|
||||
if not dirname.startswith("models--"):
|
||||
return None
|
||||
parts = dirname[len("models--"):].split("--")
|
||||
if len(parts) < 2 or not parts[0] or not parts[1]:
|
||||
return None
|
||||
return f"{parts[0]}/{'--'.join(parts[1:])}"
|
||||
|
||||
|
||||
def parse_cache_listing(out: str) -> list[tuple[str, int, bool]]:
|
||||
"""Parse the 'size|complete|dirname' lines from `list_cached_models`'s scan.
|
||||
|
||||
Returns [(repo, size_bytes, complete), ...], skipping non-model lines. Pure
|
||||
function so the parsing is unit-testable without SSH."""
|
||||
items: list[tuple[str, int, bool]] = []
|
||||
for line in out.splitlines():
|
||||
line = line.strip()
|
||||
if line.count("|") < 2:
|
||||
continue
|
||||
size_s, complete_s, dirname = line.split("|", 2)
|
||||
repo = cache_dirname_to_repo(dirname.strip())
|
||||
if not repo:
|
||||
continue
|
||||
try:
|
||||
size = int(size_s)
|
||||
except ValueError:
|
||||
size = 0
|
||||
items.append((repo, size, complete_s.strip() == "1"))
|
||||
return items
|
||||
|
||||
|
||||
async def list_cached_models(host: str, user: str, settings: Settings) -> list[tuple[str, int, bool]]:
|
||||
"""Enumerate every Hugging Face model cached on a host: (repo, size_bytes, complete).
|
||||
|
||||
'complete' = the cache has at least one snapshot carrying a config.json (a
|
||||
finished download, not a half-fetched/corrupt dir). One SSH round-trip; the
|
||||
glob's no-match case is handled by the `[ -d ]` guard."""
|
||||
if not host or not user:
|
||||
return []
|
||||
cmd = (
|
||||
'HUB="$HOME/.cache/huggingface/hub"; '
|
||||
'for d in "$HUB"/models--*; do '
|
||||
'[ -d "$d" ] || continue; '
|
||||
'n=$(basename "$d"); '
|
||||
'sz=$(du -sb "$d" 2>/dev/null | cut -f1); sz=${sz:-0}; '
|
||||
'if ls "$d"/snapshots/*/config.json >/dev/null 2>&1; then c=1; else c=0; fi; '
|
||||
'echo "${sz}|${c}|${n}"; '
|
||||
'done'
|
||||
)
|
||||
rc, out, err = await ssh_run(host, user, cmd, settings, timeout=30.0)
|
||||
if rc != 0:
|
||||
return []
|
||||
return parse_cache_listing(out)
|
||||
|
||||
|
||||
async def read_model_config(host: str, user: str, repo: str, settings: Settings) -> Optional[dict]:
|
||||
"""Read a cached model's config.json (first snapshot) for launch inference.
|
||||
|
||||
Returns the parsed dict, or None if absent/unreadable. The dirname is
|
||||
whitelisted (repo_to_cache_dirname) so it's safe to embed unquoted."""
|
||||
if not host or not user:
|
||||
return None
|
||||
dn = repo_to_cache_dirname(repo)
|
||||
cmd = (
|
||||
f'D=$(ls -d "$HOME/.cache/huggingface/hub/{dn}/snapshots/"*/ 2>/dev/null | head -1); '
|
||||
f'[ -n "$D" ] && cat "${{D}}config.json" 2>/dev/null'
|
||||
)
|
||||
rc, out, err = await ssh_run(host, user, cmd, settings, timeout=20.0)
|
||||
if rc != 0 or not out.strip():
|
||||
return None
|
||||
try:
|
||||
return json.loads(out)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class HostDiskResult:
|
||||
host: str
|
||||
@@ -159,10 +241,14 @@ async def delete_host(host: str, user: str, repo: str, settings: Settings) -> Ho
|
||||
return HostDiskResult(host=host, on_disk=False, size_bytes=freed)
|
||||
|
||||
|
||||
async def delete_from_disk(repo: str, mode: str, settings: Settings) -> DiskStatus:
|
||||
"""rm -rf the model's cache dir on the relevant Sparks. Idempotent."""
|
||||
async def delete_from_disk(repo: str, settings: Settings) -> DiskStatus:
|
||||
"""rm -rf the model's cache dir on ALL configured Sparks. Idempotent.
|
||||
|
||||
We sweep both Sparks regardless of the model's declared mode: a 'remove from
|
||||
disk & menu' must leave nothing behind, and rm of an absent dir reports 0
|
||||
bytes freed (FREED 0), so an extra host is harmless."""
|
||||
hosts: list[tuple[str, str]] = [(settings.spark1_host, settings.spark1_user)]
|
||||
if mode == "cluster" and settings.spark2_host:
|
||||
if settings.spark2_host:
|
||||
hosts.append((settings.spark2_host, settings.spark2_user))
|
||||
|
||||
results = await asyncio.gather(*(delete_host(h, u, repo, settings) for h, u in hosts))
|
||||
|
||||
Reference in New Issue
Block a user