From 513c78bfa5d6a8264293dcdd0864a2c569e97484 Mon Sep 17 00:00:00 2001 From: Keysat Date: Wed, 13 May 2026 17:58:43 -0500 Subject: [PATCH] v0.8.1:1 - fix disk probe: $HOME wasn't expanding inside shlex.quote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 0.8.1:0 probe wrapped the entire path (including $HOME) in shlex.quote, which produces single quotes โ€” preventing shell variable expansion. The resulting `[ -d '$HOME/.cache/...' ]` test looked for a literal path starting with the string $HOME and always failed, so every model reported as "not downloaded" and no trash icons rendered. Fix: embed $HOME in a double-quoted shell context (which allows expansion) and validate the cache dirname against a whitelist [A-Za-z0-9._-]+ rather than relying on shlex quoting. The dirname is fully constrained by HF's naming rules + our org--name munging, so the whitelist is tight enough. Verified against Spark 1: probe now correctly reports the 25,075,981,924 bytes (23.4 GB) of Qwen3.6's cache dir. Co-Authored-By: Claude Opus 4.7 (1M context) --- image/app/disk.py | 54 ++++++++++++++++-------------- package/startos/versions/v0_1_0.ts | 4 +-- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/image/app/disk.py b/image/app/disk.py index 3ef2df6..aa86d98 100644 --- a/image/app/disk.py +++ b/image/app/disk.py @@ -10,7 +10,7 @@ model or one tied to an in-flight swap/download. """ from __future__ import annotations import asyncio -import shlex +import re from dataclasses import dataclass from typing import Optional @@ -18,17 +18,21 @@ from .config import Settings from .ssh import ssh_run +# HF cache dirnames are `models----` where and only contain +# Hugging Face's allowed identifier chars: letters, digits, dot, dash, underscore. +# Validate against this whitelist so we can safely embed the dirname into a shell +# command without quoting (we need $HOME outside the quotes to expand). +_SAFE_DIRNAME = re.compile(r"^[A-Za-z0-9._\-]+$") + + def repo_to_cache_dirname(repo: str) -> str: """Convert 'org/name' to 'models--org--name' (the HF hub cache directory).""" if "/" not in repo: raise ValueError(f"repo must be in 'org/name' form: {repo!r}") - return "models--" + repo.replace("/", "--") - - -def _cache_path(repo: str) -> str: - """Full remote path to the model's cache directory.""" - # Use $HOME so it resolves correctly regardless of the SSH user's home. - return f"$HOME/.cache/huggingface/hub/{repo_to_cache_dirname(repo)}" + dn = "models--" + repo.replace("/", "--") + if not _SAFE_DIRNAME.fullmatch(dn): + raise ValueError(f"unsafe cache dirname (rejected by whitelist): {dn!r}") + return dn @dataclass @@ -51,13 +55,13 @@ async def probe_host(host: str, user: str, repo: str, settings: Settings) -> Hos """Return whether the model's cache dir exists on this host and its size.""" if not host or not user: return HostDiskResult(host=host or "?", on_disk=False, error="host not configured") - path = _cache_path(repo) - # `du -sb` prints bytes; if the dir doesn't exist, `du` returns non-zero. - # We test existence explicitly first so we can report on_disk=False cleanly. + dn = repo_to_cache_dirname(repo) # whitelisted; safe to embed + # $HOME must expand server-side, so we build the path with double quotes + # (which DO allow variable expansion) rather than shlex.quote single quotes. cmd = ( - f"if [ -d {shlex.quote(path)} ]; then " - f"du -sb {shlex.quote(path)} 2>/dev/null | awk '{{print $1}}'; " - f"else echo MISSING; fi" + f'P="$HOME/.cache/huggingface/hub/{dn}"; ' + f'if [ -d "$P" ]; then du -sb "$P" 2>/dev/null | cut -f1; ' + f'else echo MISSING; fi' ) rc, out, err = await ssh_run(host, user, cmd, settings, timeout=20.0) if rc != 0: @@ -88,19 +92,19 @@ async def delete_host(host: str, user: str, repo: str, settings: Settings) -> Ho """Probe + rm -rf on one host. Returns bytes freed (0 if the dir wasn't there).""" if not host or not user: return HostDiskResult(host=host or "?", on_disk=False, error="host not configured") - path = _cache_path(repo) - # Safety: hard-code the prefix in the command so a bad `repo` can never escape. + dn = repo_to_cache_dirname(repo) # whitelisted; safe to embed # Compute size first, then remove. If absent, still return success (idempotent). + # $HOME is in double-quoted context so it expands; the dirname is whitelisted. cmd = ( - f"set -e; " - f"P={shlex.quote(path)}; " - f"if [ -d \"$P\" ]; then " - f" SIZE=$(du -sb \"$P\" 2>/dev/null | awk '{{print $1}}'); " - f" rm -rf -- \"$P\"; " - f" echo FREED $SIZE; " - f"else " - f" echo FREED 0; " - f"fi" + f'set -e; ' + f'P="$HOME/.cache/huggingface/hub/{dn}"; ' + f'if [ -d "$P" ]; then ' + f' SIZE=$(du -sb "$P" 2>/dev/null | cut -f1); ' + f' rm -rf -- "$P"; ' + f' echo "FREED $SIZE"; ' + f'else ' + f' echo "FREED 0"; ' + f'fi' ) rc, out, err = await ssh_run(host, user, cmd, settings, timeout=120.0) if rc != 0: diff --git a/package/startos/versions/v0_1_0.ts b/package/startos/versions/v0_1_0.ts index 1f24176..0306cf0 100644 --- a/package/startos/versions/v0_1_0.ts +++ b/package/startos/versions/v0_1_0.ts @@ -1,10 +1,10 @@ import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk' export const v0_1_0 = VersionInfo.of({ - version: '0.8.1:0', + version: '0.8.1:1', releaseNotes: { en_US: - 'v0.8.1: model weights can now be deleted from disk directly from the dashboard. Each model card shows whether the weights are present (with on-disk GB size) or not yet downloaded. When present and the model is NOT currently loaded, a small trash icon appears on the card; clicking it pops a confirmation showing how many GB will be freed and on which Spark(s), then runs `rm -rf` on the Hugging Face cache directory via SSH. Cluster-mode models are deleted from both Sparks; solo-mode from Spark 1 only. Safety rails: refuses to delete the currently-loaded model, refuses during an in-flight swap or download, and the catalog entry stays โ€” you can always re-download. Disk status is probed once on dashboard load and re-checked every 60s.', + 'v0.8.1:1 โ€” fix: the disk-status probe shipped in 0.8.1:0 was wrapping $HOME in single quotes via shlex.quote, which prevented shell variable expansion. Result: every model reported as "not downloaded" even when weights were on disk, so no trash icons appeared. Rewritten to embed $HOME in double-quoted shell context and validate the cache dirname against a whitelist. The trash icons now show up correctly. v0.8.1:0 features: per-card disk-presence pills (on disk ยท GB / not downloaded), trash icon to rm -rf the HF cache directory via SSH with a confirmation dialog. Safety rails unchanged: refuses to delete the currently-loaded model or during an in-flight swap/download; catalog entry persists for re-download.', }, migrations: { up: async ({ effects }) => {},