v0.26.0:0 - disk-driven model menu (scan sparks; recipes; needs-setup)
The dashboard menu is now the set of models actually downloaded on the Sparks, not a hard-coded catalog. models.yaml + overrides are reframed as launch recipes matched to an on-disk model by repo; an on-disk model with no recipe is flagged needs_setup and its launch settings are inferred from its config.json for a one-time operator confirmation (discovery.py). - delete now removes weights AND the menu card (delete_from_disk sweeps all hosts; the delete endpoint resolves keys via the live menu) - new GET /api/models/suggest; /api/models returns the menu + a recipes list (download autocomplete); GET /api/models/disk-status removed - dropped the two legacy Qwen recipes (235B FP8, 2.5 72B) - tests: +test_discovery.py (cache parsing, infer_recipe, build_menu merge)
This commit is contained in:
+77
-56
@@ -15,7 +15,8 @@ from .coordination import LockHeld, ScheduleRegistry, SwapLockManager, WebhookNo
|
||||
from .custom_services import add_custom_service, delete_custom_service
|
||||
from .audio_proxy import build_router as build_audio_router
|
||||
from .deep_health import DeepHealth
|
||||
from .disk import delete_from_disk, probe_disk
|
||||
from .discovery import build_menu, infer_recipe, repo_to_key
|
||||
from .disk import delete_from_disk, probe_host, read_model_config
|
||||
from .download import DownloadManager
|
||||
from .llm_proxy import build_router as build_llm_router
|
||||
from .embeddings_proxy import build_router as build_embeddings_router
|
||||
@@ -25,7 +26,7 @@ from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings,
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import ModelDef, load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
from .overrides import add_custom, delete_custom, load_overrides, set_knobs
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
from .shellsafe import validate_container, validate_image, validate_repo
|
||||
from .speech_models import SpeechModelsManager
|
||||
@@ -161,20 +162,65 @@ def _reload_catalog() -> None:
|
||||
swap_manager.reload_catalog(catalog)
|
||||
|
||||
|
||||
def _recipe_summaries() -> list[dict]:
|
||||
"""Known launch recipes (bundled + saved), for the download panel's autocomplete.
|
||||
|
||||
These are NOT the menu — the menu is what's on disk. This is just the set of
|
||||
repos Spark Control already knows how to launch, so the download box can
|
||||
suggest them by name without putting phantom cards on the dashboard."""
|
||||
out = []
|
||||
for m in catalog.models.values():
|
||||
if m.repo:
|
||||
out.append({"repo": m.repo, "display_name": m.display_name, "mode": m.mode})
|
||||
return out
|
||||
|
||||
|
||||
@app.get("/api/models")
|
||||
async def get_models() -> dict:
|
||||
out_models: dict[str, dict] = {}
|
||||
for key, m in catalog.models.items():
|
||||
d = m.model_dump()
|
||||
# Always include effective knobs for the UI (defaults from base args + any overrides)
|
||||
d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
|
||||
out_models[key] = d
|
||||
"""The model menu = what's actually downloaded on the Sparks (one scan per
|
||||
Spark), each annotated with its launch recipe or flagged `needs_setup`.
|
||||
|
||||
Does SSH, so it's the slower of the model endpoints; the front-end calls it on
|
||||
load, after a swap/download/delete, and on a slow timer — not every poll."""
|
||||
if not settings.configured:
|
||||
return {"configured": False, "defaults": catalog.defaults.model_dump(), "models": {}, "recipes": []}
|
||||
menu = await build_menu(settings, catalog)
|
||||
return {
|
||||
"configured": True,
|
||||
"defaults": catalog.defaults.model_dump(),
|
||||
"models": out_models,
|
||||
"models": menu,
|
||||
"recipes": _recipe_summaries(),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/models/suggest")
|
||||
async def suggest_model(repo: str = Query(...)) -> dict:
|
||||
"""Read a downloaded model's config.json + size and propose a launch recipe.
|
||||
|
||||
Prefills the 'set up this model' form for an on-disk model that has no recipe
|
||||
yet. The operator confirms/edits, then POSTs it to /api/models to save."""
|
||||
if not settings.configured:
|
||||
raise HTTPException(503, "spark1 not configured")
|
||||
try:
|
||||
validate_repo(repo)
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
hosts = [(settings.spark1_host, settings.spark1_user)]
|
||||
if settings.spark2_host:
|
||||
hosts.append((settings.spark2_host, settings.spark2_user))
|
||||
# Config from whichever Spark has it; size summed across the Sparks that do.
|
||||
sizes = await asyncio.gather(*(probe_host(h, u, repo, settings) for h, u in hosts))
|
||||
total = sum(r.size_bytes for r in sizes if r.on_disk)
|
||||
on_hosts = sum(1 for r in sizes if r.on_disk)
|
||||
config = None
|
||||
for (h, u), r in zip(hosts, sizes):
|
||||
if r.on_disk:
|
||||
config = await read_model_config(h, u, repo, settings)
|
||||
if config is not None:
|
||||
break
|
||||
return infer_recipe(repo, config or {}, total, on_hosts)
|
||||
|
||||
|
||||
class KnobsBody(BaseModel):
|
||||
knobs: dict
|
||||
|
||||
@@ -238,71 +284,43 @@ async def del_model(key: str) -> dict:
|
||||
return {"ok": True, "key": key}
|
||||
|
||||
|
||||
@app.get("/api/models/disk-status")
|
||||
async def get_models_disk_status() -> dict:
|
||||
"""Probe each catalog model's HF cache on the appropriate Spark(s) in parallel.
|
||||
|
||||
Result is keyed by model key: {on_disk, total_bytes, per_host:[{host,on_disk,size_bytes,error?}]}.
|
||||
Designed to be called once on dashboard load; takes ~1–3s depending on Spark count.
|
||||
"""
|
||||
if not settings.configured:
|
||||
return {"configured": False, "models": {}}
|
||||
keys = list(catalog.models.keys())
|
||||
statuses = await asyncio.gather(*(
|
||||
probe_disk(
|
||||
catalog.models[k].repo,
|
||||
catalog.models[k].mode,
|
||||
settings,
|
||||
local_path=catalog.models[k].local_path,
|
||||
)
|
||||
for k in keys
|
||||
), return_exceptions=True)
|
||||
out: dict[str, dict] = {}
|
||||
for k, s in zip(keys, statuses):
|
||||
if isinstance(s, Exception):
|
||||
out[k] = {"on_disk": False, "total_bytes": 0, "per_host": [], "error": str(s)}
|
||||
continue
|
||||
out[k] = {
|
||||
"on_disk": s.on_disk,
|
||||
"total_bytes": s.total_bytes,
|
||||
"per_host": [
|
||||
{"host": r.host, "on_disk": r.on_disk, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
|
||||
for r in s.per_host
|
||||
],
|
||||
}
|
||||
return {"configured": True, "models": out}
|
||||
|
||||
|
||||
@app.delete("/api/models/{key}/disk")
|
||||
async def del_model_disk(key: str) -> dict:
|
||||
"""Delete a model's weights from the Spark filesystem(s). The catalog entry stays.
|
||||
"""Remove a model's weights from the Sparks — and thus from the menu, since the
|
||||
menu IS the disk. Resolves the key against the live menu, so a discovered
|
||||
model (no saved recipe) is deletable too.
|
||||
|
||||
Safety rails:
|
||||
- Refuses a local/fine-tuned directory (hand-placed, not re-downloadable).
|
||||
- Refuses if the model is currently loaded on vLLM.
|
||||
- Refuses if a swap or download is in flight.
|
||||
- Idempotent: if the cache dir is already gone on a host, that host reports 0 bytes freed.
|
||||
- Refuses if a swap or this model's own download is in flight.
|
||||
- Idempotent across both Sparks: an already-absent cache dir frees 0 bytes.
|
||||
"""
|
||||
if key not in catalog.models:
|
||||
if not settings.configured:
|
||||
raise HTTPException(503, "spark1 not configured")
|
||||
menu = await build_menu(settings, catalog)
|
||||
entry = menu.get(key)
|
||||
if entry is None:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
m = catalog.models[key]
|
||||
|
||||
# Never rm a local fine-tune directory from the dashboard — it's irreplaceable
|
||||
# training output the user placed by hand, not a re-downloadable HF cache.
|
||||
if m.local_path:
|
||||
if entry.get("local_path"):
|
||||
raise HTTPException(
|
||||
400,
|
||||
"this is a local model; its directory must be managed on the Spark, not deleted from here",
|
||||
)
|
||||
repo = entry["repo"]
|
||||
|
||||
# Refuse if currently loaded
|
||||
try:
|
||||
vllm = await check_vllm(settings)
|
||||
except Exception:
|
||||
vllm = {}
|
||||
if vllm.get("ok") and vllm.get("current_model") == m.repo:
|
||||
if vllm.get("ok") and vllm.get("current_model") == repo:
|
||||
raise HTTPException(
|
||||
409,
|
||||
f"'{m.display_name}' is the currently loaded model. Switch to a different model first, then try again."
|
||||
f"'{entry['display_name']}' is the currently loaded model. Switch to a different model first, then try again."
|
||||
)
|
||||
|
||||
# Refuse if a swap is in flight
|
||||
@@ -312,10 +330,10 @@ async def del_model_disk(key: str) -> dict:
|
||||
# Refuse if a download is in flight for this same repo (a different model's download is fine)
|
||||
if download_manager.current_job_id:
|
||||
job = download_manager.get(download_manager.current_job_id)
|
||||
if job and job.repo == m.repo:
|
||||
if job and job.repo == repo:
|
||||
raise HTTPException(409, "this model is currently downloading; cancel or wait for it to finish")
|
||||
|
||||
status = await delete_from_disk(m.repo, m.mode, settings)
|
||||
status = await delete_from_disk(repo, settings)
|
||||
# Audit log
|
||||
record_report(
|
||||
f"disk:{key}",
|
||||
@@ -326,7 +344,7 @@ async def del_model_disk(key: str) -> dict:
|
||||
return {
|
||||
"ok": True,
|
||||
"key": key,
|
||||
"repo": m.repo,
|
||||
"repo": repo,
|
||||
"bytes_freed": status.total_bytes,
|
||||
"per_host": [
|
||||
{"host": r.host, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
|
||||
@@ -881,10 +899,13 @@ async def get_status() -> dict:
|
||||
def _identify_current_model(repo: str | None) -> str | None:
|
||||
if not repo:
|
||||
return None
|
||||
# A recipe-backed model keys by its recipe key; a discovered model (loaded but
|
||||
# not yet set up) keys by the same slug build_menu uses, so it still
|
||||
# highlights as the active card.
|
||||
for key, m in catalog.models.items():
|
||||
if m.repo == repo:
|
||||
return key
|
||||
return None
|
||||
return repo_to_key(repo)
|
||||
|
||||
|
||||
class SwapRequest(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user