v0.8.1:0 - delete model weights from disk via card trash icon

Each model card now shows whether its weights are present on disk (with GB size) or not yet downloaded. When present and the model isn't currently loaded, a trash icon appears; clicking it pops a confirmation showing exactly how many GB will be freed and on which Spark(s), then runs rm -rf on the HF cache directory via SSH. Cluster-mode models are removed from both Sparks; solo-mode from Spark 1 only. Safety rails: refuses to delete the currently-loaded model, refuses during an in-flight swap or download, and the catalog entry stays intact so it can be re-downloaded anytime. Backend: - new image/app/disk.py: probe_disk + delete_from_disk over SSH - GET /api/models/disk-status — parallel probe across all catalog models - DELETE /api/models/{key}/disk — guarded rm -rf, logs to connectivity events Frontend: - on-disk / not-downloaded pills on every card - trash icon-btn in card-actions row (hidden when not on disk) - confirmation dialog showing per-host bytes-to-free - disk-status re-checked every 60s Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 17:07:20 -05:00
parent 1602b3b3b4
commit 9ff7ee9c1e
6 changed files with 345 additions and 2 deletions
@@ -13,6 +13,7 @@ from .config import Settings
 from .connectivity import get_mac, record_report, record_state, summary as connectivity_summary
 from .custom_services import add_custom_service, delete_custom_service
 from .deep_health import DeepHealth
+from .disk import delete_from_disk, probe_disk
 from .download import DownloadManager
 from .hardware import HardwareProbe
 from .health import check_magpie, check_parakeet, check_vllm
@@ -139,6 +140,89 @@ async def del_model(key: str) -> dict:
    return {"ok": True, "key": key}


+@app.get("/api/models/disk-status")
+async def get_models_disk_status() -> dict:
+    """Probe each catalog model's HF cache on the appropriate Spark(s) in parallel.
+
+    Result is keyed by model key: {on_disk, total_bytes, per_host:[{host,on_disk,size_bytes,error?}]}.
+    Designed to be called once on dashboard load; takes ~1–3s depending on Spark count.
+    """
+    if not settings.configured:
+        return {"configured": False, "models": {}}
+    keys = list(catalog.models.keys())
+    statuses = await asyncio.gather(*(
+        probe_disk(catalog.models[k].repo, catalog.models[k].mode, settings) for k in keys
+    ), return_exceptions=True)
+    out: dict[str, dict] = {}
+    for k, s in zip(keys, statuses):
+        if isinstance(s, Exception):
+            out[k] = {"on_disk": False, "total_bytes": 0, "per_host": [], "error": str(s)}
+            continue
+        out[k] = {
+            "on_disk": s.on_disk,
+            "total_bytes": s.total_bytes,
+            "per_host": [
+                {"host": r.host, "on_disk": r.on_disk, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
+                for r in s.per_host
+            ],
+        }
+    return {"configured": True, "models": out}
+
+
+@app.delete("/api/models/{key}/disk")
+async def del_model_disk(key: str) -> dict:
+    """Delete a model's weights from the Spark filesystem(s). The catalog entry stays.
+
+    Safety rails:
+      - Refuses if the model is currently loaded on vLLM.
+      - Refuses if a swap or download is in flight.
+      - Idempotent: if the cache dir is already gone on a host, that host reports 0 bytes freed.
+    """
+    if key not in catalog.models:
+        raise HTTPException(404, f"unknown model: {key}")
+    m = catalog.models[key]
+
+    # Refuse if currently loaded
+    try:
+        vllm = await check_vllm(settings)
+    except Exception:
+        vllm = {}
+    if vllm.get("ok") and vllm.get("current_model") == m.repo:
+        raise HTTPException(
+            409,
+            f"'{m.display_name}' is the currently loaded model. Switch to a different model first, then try again."
+        )
+
+    # Refuse if a swap is in flight
+    if swap_manager.current_job_id:
+        raise HTTPException(409, "a model swap is in progress; wait for it to finish")
+
+    # Refuse if a download is in flight for this same repo (a different model's download is fine)
+    if download_manager.current_job_id:
+        job = download_manager.get(download_manager.current_job_id)
+        if job and job.repo == m.repo:
+            raise HTTPException(409, "this model is currently downloading; cancel or wait for it to finish")
+
+    status = await delete_from_disk(m.repo, m.mode, settings)
+    # Audit log
+    record_report(
+        f"disk:{key}",
+        ok=True,
+        source="disk-delete",
+        detail=f"freed {status.total_bytes} bytes across {len(status.per_host)} host(s)",
+    )
+    return {
+        "ok": True,
+        "key": key,
+        "repo": m.repo,
+        "bytes_freed": status.total_bytes,
+        "per_host": [
+            {"host": r.host, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
+            for r in status.per_host
+        ],
+    }
+
+
@app.get("/api/hardware")
 async def get_hardware() -> dict:
    """Per-Spark hardware snapshot — RAM, disk, GPU mem + util, CPU load, uptime."""