v0.8.1:0 - delete model weights from disk via card trash icon
Each model card now shows whether its weights are present on disk
(with GB size) or not yet downloaded. When present and the model
isn't currently loaded, a trash icon appears; clicking it pops a
confirmation showing exactly how many GB will be freed and on
which Spark(s), then runs rm -rf on the HF cache directory via SSH.
Cluster-mode models are removed from both Sparks; solo-mode from
Spark 1 only. Safety rails: refuses to delete the currently-loaded
model, refuses during an in-flight swap or download, and the
catalog entry stays intact so it can be re-downloaded anytime.
Backend:
- new image/app/disk.py: probe_disk + delete_from_disk over SSH
- GET /api/models/disk-status — parallel probe across all catalog models
- DELETE /api/models/{key}/disk — guarded rm -rf, logs to connectivity events
Frontend:
- on-disk / not-downloaded pills on every card
- trash icon-btn in card-actions row (hidden when not on disk)
- confirmation dialog showing per-host bytes-to-free
- disk-status re-checked every 60s
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,7 @@ from .config import Settings
|
||||
from .connectivity import get_mac, record_report, record_state, summary as connectivity_summary
|
||||
from .custom_services import add_custom_service, delete_custom_service
|
||||
from .deep_health import DeepHealth
|
||||
from .disk import delete_from_disk, probe_disk
|
||||
from .download import DownloadManager
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_magpie, check_parakeet, check_vllm
|
||||
@@ -139,6 +140,89 @@ async def del_model(key: str) -> dict:
|
||||
return {"ok": True, "key": key}
|
||||
|
||||
|
||||
@app.get("/api/models/disk-status")
|
||||
async def get_models_disk_status() -> dict:
|
||||
"""Probe each catalog model's HF cache on the appropriate Spark(s) in parallel.
|
||||
|
||||
Result is keyed by model key: {on_disk, total_bytes, per_host:[{host,on_disk,size_bytes,error?}]}.
|
||||
Designed to be called once on dashboard load; takes ~1–3s depending on Spark count.
|
||||
"""
|
||||
if not settings.configured:
|
||||
return {"configured": False, "models": {}}
|
||||
keys = list(catalog.models.keys())
|
||||
statuses = await asyncio.gather(*(
|
||||
probe_disk(catalog.models[k].repo, catalog.models[k].mode, settings) for k in keys
|
||||
), return_exceptions=True)
|
||||
out: dict[str, dict] = {}
|
||||
for k, s in zip(keys, statuses):
|
||||
if isinstance(s, Exception):
|
||||
out[k] = {"on_disk": False, "total_bytes": 0, "per_host": [], "error": str(s)}
|
||||
continue
|
||||
out[k] = {
|
||||
"on_disk": s.on_disk,
|
||||
"total_bytes": s.total_bytes,
|
||||
"per_host": [
|
||||
{"host": r.host, "on_disk": r.on_disk, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
|
||||
for r in s.per_host
|
||||
],
|
||||
}
|
||||
return {"configured": True, "models": out}
|
||||
|
||||
|
||||
@app.delete("/api/models/{key}/disk")
|
||||
async def del_model_disk(key: str) -> dict:
|
||||
"""Delete a model's weights from the Spark filesystem(s). The catalog entry stays.
|
||||
|
||||
Safety rails:
|
||||
- Refuses if the model is currently loaded on vLLM.
|
||||
- Refuses if a swap or download is in flight.
|
||||
- Idempotent: if the cache dir is already gone on a host, that host reports 0 bytes freed.
|
||||
"""
|
||||
if key not in catalog.models:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
m = catalog.models[key]
|
||||
|
||||
# Refuse if currently loaded
|
||||
try:
|
||||
vllm = await check_vllm(settings)
|
||||
except Exception:
|
||||
vllm = {}
|
||||
if vllm.get("ok") and vllm.get("current_model") == m.repo:
|
||||
raise HTTPException(
|
||||
409,
|
||||
f"'{m.display_name}' is the currently loaded model. Switch to a different model first, then try again."
|
||||
)
|
||||
|
||||
# Refuse if a swap is in flight
|
||||
if swap_manager.current_job_id:
|
||||
raise HTTPException(409, "a model swap is in progress; wait for it to finish")
|
||||
|
||||
# Refuse if a download is in flight for this same repo (a different model's download is fine)
|
||||
if download_manager.current_job_id:
|
||||
job = download_manager.get(download_manager.current_job_id)
|
||||
if job and job.repo == m.repo:
|
||||
raise HTTPException(409, "this model is currently downloading; cancel or wait for it to finish")
|
||||
|
||||
status = await delete_from_disk(m.repo, m.mode, settings)
|
||||
# Audit log
|
||||
record_report(
|
||||
f"disk:{key}",
|
||||
ok=True,
|
||||
source="disk-delete",
|
||||
detail=f"freed {status.total_bytes} bytes across {len(status.per_host)} host(s)",
|
||||
)
|
||||
return {
|
||||
"ok": True,
|
||||
"key": key,
|
||||
"repo": m.repo,
|
||||
"bytes_freed": status.total_bytes,
|
||||
"per_host": [
|
||||
{"host": r.host, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
|
||||
for r in status.per_host
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/hardware")
|
||||
async def get_hardware() -> dict:
|
||||
"""Per-Spark hardware snapshot — RAM, disk, GPU mem + util, CPU load, uptime."""
|
||||
|
||||
Reference in New Issue
Block a user