v0.26.0:0 - disk-driven model menu (scan sparks; recipes; needs-setup)

The dashboard menu is now the set of models actually downloaded on the Sparks, not a hard-coded catalog. models.yaml + overrides are reframed as launch recipes matched to an on-disk model by repo; an on-disk model with no recipe is flagged needs_setup and its launch settings are inferred from its config.json for a one-time operator confirmation (discovery.py). - delete now removes weights AND the menu card (delete_from_disk sweeps all hosts; the delete endpoint resolves keys via the live menu) - new GET /api/models/suggest; /api/models returns the menu + a recipes list (download autocomplete); GET /api/models/disk-status removed - dropped the two legacy Qwen recipes (235B FP8, 2.5 72B) - tests: +test_discovery.py (cache parsing, infer_recipe, build_menu merge)
2026-06-18 11:09:56 -05:00
parent c0b35184ba
commit df9f244eae
14 changed files with 795 additions and 238 deletions
@@ -15,7 +15,8 @@ from .coordination import LockHeld, ScheduleRegistry, SwapLockManager, WebhookNo
 from .custom_services import add_custom_service, delete_custom_service
 from .audio_proxy import build_router as build_audio_router
 from .deep_health import DeepHealth
-from .disk import delete_from_disk, probe_disk
+from .discovery import build_menu, infer_recipe, repo_to_key
+from .disk import delete_from_disk, probe_host, read_model_config
 from .download import DownloadManager
 from .llm_proxy import build_router as build_llm_router
 from .embeddings_proxy import build_router as build_embeddings_router
@@ -25,7 +26,7 @@ from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings,
 from .matrix_bridge import MatrixBridgeManager
 from .models import ModelDef, load_catalog
 from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
-from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
+from .overrides import add_custom, delete_custom, load_overrides, set_knobs
 from .services import docker_state, run_action, services_from_settings
 from .shellsafe import validate_container, validate_image, validate_repo
 from .speech_models import SpeechModelsManager
@@ -161,20 +162,65 @@ def _reload_catalog() -> None:
    swap_manager.reload_catalog(catalog)


+def _recipe_summaries() -> list[dict]:
+    """Known launch recipes (bundled + saved), for the download panel's autocomplete.
+
+    These are NOT the menu — the menu is what's on disk. This is just the set of
+    repos Spark Control already knows how to launch, so the download box can
+    suggest them by name without putting phantom cards on the dashboard."""
+    out = []
+    for m in catalog.models.values():
+        if m.repo:
+            out.append({"repo": m.repo, "display_name": m.display_name, "mode": m.mode})
+    return out
+
+
@app.get("/api/models")
 async def get_models() -> dict:
-    out_models: dict[str, dict] = {}
-    for key, m in catalog.models.items():
-        d = m.model_dump()
-        # Always include effective knobs for the UI (defaults from base args + any overrides)
-        d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
-        out_models[key] = d
+    """The model menu = what's actually downloaded on the Sparks (one scan per
+    Spark), each annotated with its launch recipe or flagged `needs_setup`.
+
+    Does SSH, so it's the slower of the model endpoints; the front-end calls it on
+    load, after a swap/download/delete, and on a slow timer — not every poll."""
+    if not settings.configured:
+        return {"configured": False, "defaults": catalog.defaults.model_dump(), "models": {}, "recipes": []}
+    menu = await build_menu(settings, catalog)
    return {
+        "configured": True,
        "defaults": catalog.defaults.model_dump(),
-        "models": out_models,
+        "models": menu,
+        "recipes": _recipe_summaries(),
    }


+@app.get("/api/models/suggest")
+async def suggest_model(repo: str = Query(...)) -> dict:
+    """Read a downloaded model's config.json + size and propose a launch recipe.
+
+    Prefills the 'set up this model' form for an on-disk model that has no recipe
+    yet. The operator confirms/edits, then POSTs it to /api/models to save."""
+    if not settings.configured:
+        raise HTTPException(503, "spark1 not configured")
+    try:
+        validate_repo(repo)
+    except ValueError as e:
+        raise HTTPException(400, str(e))
+    hosts = [(settings.spark1_host, settings.spark1_user)]
+    if settings.spark2_host:
+        hosts.append((settings.spark2_host, settings.spark2_user))
+    # Config from whichever Spark has it; size summed across the Sparks that do.
+    sizes = await asyncio.gather(*(probe_host(h, u, repo, settings) for h, u in hosts))
+    total = sum(r.size_bytes for r in sizes if r.on_disk)
+    on_hosts = sum(1 for r in sizes if r.on_disk)
+    config = None
+    for (h, u), r in zip(hosts, sizes):
+        if r.on_disk:
+            config = await read_model_config(h, u, repo, settings)
+            if config is not None:
+                break
+    return infer_recipe(repo, config or {}, total, on_hosts)
+
+
 class KnobsBody(BaseModel):
    knobs: dict

@@ -238,71 +284,43 @@ async def del_model(key: str) -> dict:
    return {"ok": True, "key": key}


-@app.get("/api/models/disk-status")
-async def get_models_disk_status() -> dict:
-    """Probe each catalog model's HF cache on the appropriate Spark(s) in parallel.
-
-    Result is keyed by model key: {on_disk, total_bytes, per_host:[{host,on_disk,size_bytes,error?}]}.
-    Designed to be called once on dashboard load; takes ~1–3s depending on Spark count.
-    """
-    if not settings.configured:
-        return {"configured": False, "models": {}}
-    keys = list(catalog.models.keys())
-    statuses = await asyncio.gather(*(
-        probe_disk(
-            catalog.models[k].repo,
-            catalog.models[k].mode,
-            settings,
-            local_path=catalog.models[k].local_path,
-        )
-        for k in keys
-    ), return_exceptions=True)
-    out: dict[str, dict] = {}
-    for k, s in zip(keys, statuses):
-        if isinstance(s, Exception):
-            out[k] = {"on_disk": False, "total_bytes": 0, "per_host": [], "error": str(s)}
-            continue
-        out[k] = {
-            "on_disk": s.on_disk,
-            "total_bytes": s.total_bytes,
-            "per_host": [
-                {"host": r.host, "on_disk": r.on_disk, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
-                for r in s.per_host
-            ],
-        }
-    return {"configured": True, "models": out}
-
-
@app.delete("/api/models/{key}/disk")
 async def del_model_disk(key: str) -> dict:
-    """Delete a model's weights from the Spark filesystem(s). The catalog entry stays.
+    """Remove a model's weights from the Sparks — and thus from the menu, since the
+    menu IS the disk. Resolves the key against the live menu, so a discovered
+    model (no saved recipe) is deletable too.

    Safety rails:
+      - Refuses a local/fine-tuned directory (hand-placed, not re-downloadable).
      - Refuses if the model is currently loaded on vLLM.
-      - Refuses if a swap or download is in flight.
-      - Idempotent: if the cache dir is already gone on a host, that host reports 0 bytes freed.
+      - Refuses if a swap or this model's own download is in flight.
+      - Idempotent across both Sparks: an already-absent cache dir frees 0 bytes.
    """
-    if key not in catalog.models:
+    if not settings.configured:
+        raise HTTPException(503, "spark1 not configured")
+    menu = await build_menu(settings, catalog)
+    entry = menu.get(key)
+    if entry is None:
        raise HTTPException(404, f"unknown model: {key}")
-    m = catalog.models[key]

    # Never rm a local fine-tune directory from the dashboard — it's irreplaceable
    # training output the user placed by hand, not a re-downloadable HF cache.
-    if m.local_path:
+    if entry.get("local_path"):
        raise HTTPException(
            400,
            "this is a local model; its directory must be managed on the Spark, not deleted from here",
        )
+    repo = entry["repo"]

    # Refuse if currently loaded
    try:
        vllm = await check_vllm(settings)
    except Exception:
        vllm = {}
-    if vllm.get("ok") and vllm.get("current_model") == m.repo:
+    if vllm.get("ok") and vllm.get("current_model") == repo:
        raise HTTPException(
            409,
-            f"'{m.display_name}' is the currently loaded model. Switch to a different model first, then try again."
+            f"'{entry['display_name']}' is the currently loaded model. Switch to a different model first, then try again."
        )

    # Refuse if a swap is in flight
@@ -312,10 +330,10 @@ async def del_model_disk(key: str) -> dict:
    # Refuse if a download is in flight for this same repo (a different model's download is fine)
    if download_manager.current_job_id:
        job = download_manager.get(download_manager.current_job_id)
-        if job and job.repo == m.repo:
+        if job and job.repo == repo:
            raise HTTPException(409, "this model is currently downloading; cancel or wait for it to finish")

-    status = await delete_from_disk(m.repo, m.mode, settings)
+    status = await delete_from_disk(repo, settings)
    # Audit log
    record_report(
        f"disk:{key}",
@@ -326,7 +344,7 @@ async def del_model_disk(key: str) -> dict:
    return {
        "ok": True,
        "key": key,
-        "repo": m.repo,
+        "repo": repo,
        "bytes_freed": status.total_bytes,
        "per_host": [
            {"host": r.host, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
@@ -881,10 +899,13 @@ async def get_status() -> dict:
 def _identify_current_model(repo: str | None) -> str | None:
    if not repo:
        return None
+    # A recipe-backed model keys by its recipe key; a discovered model (loaded but
+    # not yet set up) keys by the same slug build_menu uses, so it still
+    # highlights as the active card.
    for key, m in catalog.models.items():
        if m.repo == repo:
            return key
-    return None
+    return repo_to_key(repo)


 class SwapRequest(BaseModel):