v0.26.0:0 - disk-driven model menu (scan sparks; recipes; needs-setup)

The dashboard menu is now the set of models actually downloaded on the Sparks, not a hard-coded catalog. models.yaml + overrides are reframed as launch recipes matched to an on-disk model by repo; an on-disk model with no recipe is flagged needs_setup and its launch settings are inferred from its config.json for a one-time operator confirmation (discovery.py). - delete now removes weights AND the menu card (delete_from_disk sweeps all hosts; the delete endpoint resolves keys via the live menu) - new GET /api/models/suggest; /api/models returns the menu + a recipes list (download autocomplete); GET /api/models/disk-status removed - dropped the two legacy Qwen recipes (235B FP8, 2.5 72B) - tests: +test_discovery.py (cache parsing, infer_recipe, build_menu merge)
2026-06-18 11:09:56 -05:00
parent c0b35184ba
commit df9f244eae
14 changed files with 795 additions and 238 deletions
@@ -0,0 +1,209 @@
+"""Disk-driven model menu + launch-recipe inference.
+
+The dashboard's model list is whatever is actually downloaded on the Sparks
+(see `disk.list_cached_models`), NOT a hard-coded catalog. The bundled/overridden
+catalog entries are *launch recipes*: matched to an on-disk model by repo, they
+say HOW to launch it. A completed model on disk with no matching recipe shows up
+as `needs_setup` — the first switch reads its `config.json`, proposes a recipe
+(`infer_recipe`) the operator confirms once, and that confirmed recipe is saved
+to /data so it's a normal card from then on.
+
+Why a recipe layer at all, if the menu is the disk? Because a folder on disk
+doesn't say how to launch it: the per-family parsers (`--reasoning-parser`,
+`--tool-call-parser`), the MoE backend (some Gemma MoE checkpoints need
+`marlin` on GB10), and solo-vs-cluster topology can't be read off a directory.
+We infer a best guess from the model's own config + size, but the operator
+confirms it — a wrong guess is cheap, a wrong launch is not.
+"""
+from __future__ import annotations
+import asyncio
+import re
+
+from .config import Settings
+from .disk import list_cached_models, probe_disk
+from .overrides import extract_knobs_from_args
+
+
+# A model whose weights exceed this can't fit one Spark's 128 GB beside a KV
+# cache, so it must shard across both via Ray. A heuristic prefill only — the
+# operator confirms mode in the setup form, so the exact cutoff isn't critical.
+SINGLE_SPARK_BYTES = 115 * 1000 ** 3
+
+# Generic knob defaults applied to every inferred recipe (the operator can tweak
+# these in the setup form). Family-specific flags (parsers, MoE backend) are
+# layered on separately by `_detect_family`.
+_COMMON_KNOBS = {
+    "max_model_len": 32768,
+    "gpu_memory_utilization": 0.85,
+    "fastsafetensors": True,
+    "prefix_caching": True,
+    "kv_cache_dtype": "fp8",
+}
+
+
+def repo_to_key(repo: str) -> str:
+    """Stable, URL-safe menu key for a discovered model with no recipe key yet.
+
+    'RedHatAI/Qwen3.6-35B-A3B-NVFP4' -> 'redhatai-qwen3-6-35b-a3b-nvfp4'. The same
+    slug is used by the menu, the setup form, and `_identify_current_model`, so a
+    loaded-but-unconfigured model still highlights as active."""
+    return re.sub(r"[^a-z0-9_-]+", "-", repo.lower()).strip("-")
+
+
+def _detect_family(config: dict) -> tuple[str, list[str], list[str]]:
+    """Return (family_label, vllm_flags, capabilities) inferred from config.json.
+
+    Only family-specific, non-knob flags (parsers, MoE backend) go in vllm_flags;
+    generic knob defaults are handled by the caller. Best-effort and operator-
+    confirmed, so a wrong guess is cheap."""
+    arch = " ".join(config.get("architectures") or [])
+    mtype = str(config.get("model_type") or "")
+    s = (arch + " " + mtype).lower()
+    is_moe = (
+        "moe" in s
+        or any(config.get(k) for k in ("num_experts", "n_routed_experts", "num_local_experts"))
+    )
+    is_vision = (
+        "conditionalgeneration" in s
+        or "vision" in s
+        or "vlforcausallm" in s
+        or "vision_config" in config
+        or "image_token_index" in config
+    )
+    flags: list[str] = []
+    caps: list[str] = []
+    label = "Generic"
+    if mtype.startswith("qwen3") or "qwen3" in s:
+        label = "Qwen3 (MoE)" if is_moe else "Qwen3"
+        flags.append("--reasoning-parser=qwen3")
+        caps.append("reasoning")
+        if is_moe:
+            flags.append("--moe_backend=flashinfer_cutlass")
+    elif "gemma" in s:
+        label = "Gemma (MoE)" if is_moe else "Gemma"
+        flags += ["--reasoning-parser=gemma4", "--tool-call-parser=gemma4", "--enable-auto-tool-choice"]
+        caps += ["reasoning", "tools"]
+        if is_moe:
+            # The fast flashinfer/CUTLASS FP4 path errors on GB10 for Gemma MoE;
+            # marlin is the working fallback (see the Gemma 26B trial notes).
+            flags.append("--moe_backend=marlin")
+    if is_vision and "vision" not in caps:
+        caps.append("vision")
+    return label, flags, caps
+
+
+def _infer_mode(total_bytes: int, on_host_count: int) -> str:
+    """Solo unless the weights are present on both Sparks or too big for one."""
+    if on_host_count >= 2 or total_bytes > SINGLE_SPARK_BYTES:
+        return "cluster"
+    return "solo"
+
+
+def infer_recipe(repo: str, config: dict, total_bytes: int, on_host_count: int) -> dict:
+    """Propose a launch recipe for a discovered model — prefills the setup form."""
+    label, flags, caps = _detect_family(config or {})
+    mode = _infer_mode(total_bytes, on_host_count)
+    vllm_args = list(flags)
+    vllm_args.append("--max-num-batched-tokens=16384")
+    knobs = dict(_COMMON_KNOBS)
+    if mode == "cluster":
+        # Large models shard across both Sparks via Ray; leave more headroom.
+        vllm_args += ["-tp=2", "--distributed-executor-backend=ray"]
+        knobs["gpu_memory_utilization"] = 0.7
+    return {
+        "key": repo_to_key(repo),
+        "repo": repo,
+        "display_name": repo.split("/")[-1],
+        "mode": mode,
+        "capabilities": caps,
+        "vllm_args": vllm_args,
+        "knobs": knobs,
+        "family": label,
+    }
+
+
+def _menu_entry_from_recipe(m, *, on_disk: bool, total_bytes: int, per_host: list[dict]) -> dict:
+    d = m.model_dump()
+    d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
+    d["needs_setup"] = False
+    d["on_disk"] = on_disk
+    d["total_bytes"] = total_bytes
+    d["per_host"] = per_host
+    return d
+
+
+async def build_menu(settings: Settings, catalog) -> dict[str, dict]:
+    """The disk-driven model menu: every completed model on the Sparks, annotated
+    with its launch recipe (matched by repo) or flagged `needs_setup` if none.
+
+    Two SSH scans total (one per Spark), run in parallel — much cheaper than the
+    old per-recipe disk probe. A host that errors is skipped, not fatal."""
+    hosts = [(settings.spark1_host, settings.spark1_user)]
+    if settings.spark2_host:
+        hosts.append((settings.spark2_host, settings.spark2_user))
+    scans = await asyncio.gather(
+        *(list_cached_models(h, u, settings) for h, u in hosts),
+        return_exceptions=True,
+    )
+    by_repo: dict[str, dict] = {}
+    for (h, _u), res in zip(hosts, scans):
+        if isinstance(res, Exception):
+            continue
+        for repo, size, complete in res:
+            e = by_repo.setdefault(repo, {"total_bytes": 0, "per_host": [], "complete": False})
+            e["total_bytes"] += size
+            e["per_host"].append({"host": h, "size_bytes": size})
+            e["complete"] = e["complete"] or complete
+
+    recipe_by_repo = {m.repo: (k, m) for k, m in catalog.models.items() if m.repo}
+
+    menu: dict[str, dict] = {}
+    for repo, info in by_repo.items():
+        # Skip half-fetched / corrupt caches (no finished snapshot) — they'd show
+        # as broken cards. In-flight downloads surface in the download panel.
+        if not info["complete"]:
+            continue
+        if repo in recipe_by_repo:
+            key, m = recipe_by_repo[repo]
+            menu[key] = _menu_entry_from_recipe(
+                m, on_disk=True, total_bytes=info["total_bytes"], per_host=info["per_host"]
+            )
+        else:
+            key = repo_to_key(repo)
+            menu[key] = {
+                "display_name": repo.split("/")[-1],
+                "repo": repo,
+                "local_path": None,
+                "size_gb": round(info["total_bytes"] / 1e9, 1),
+                "mode": _infer_mode(info["total_bytes"], len(info["per_host"])),
+                "capabilities": [],
+                "expected_ready_seconds": 300,
+                "vllm_args": [],
+                "description": None,
+                "knobs": None,
+                "custom": False,
+                "needs_setup": True,
+                "effective_knobs": {},
+                "on_disk": True,
+                "total_bytes": info["total_bytes"],
+                "per_host": info["per_host"],
+            }
+
+    # Local/fine-tuned recipes live as a directory, not an HF cache entry — probe
+    # each by path and include it if present. Their keys are unique catalog keys
+    # (and local models carry repo="" per ModelDef), so they never collide with a
+    # discovered repo's slug or an HF recipe key above.
+    for key, m in catalog.models.items():
+        if not m.local_path:
+            continue
+        st = await probe_disk(m.repo, m.mode, settings, local_path=m.local_path)
+        if not st.on_disk:
+            continue
+        menu[key] = _menu_entry_from_recipe(
+            m,
+            on_disk=True,
+            total_bytes=st.total_bytes,
+            per_host=[{"host": r.host, "size_bytes": r.size_bytes} for r in st.per_host if r.on_disk],
+        )
+
+    return menu
@@ -10,6 +10,7 @@ model or one tied to an in-flight swap/download.
 """
 from __future__ import annotations
 import asyncio
+import json
 import re
 from dataclasses import dataclass
 from typing import Optional
@@ -36,6 +37,87 @@ def repo_to_cache_dirname(repo: str) -> str:
    return dn


+def cache_dirname_to_repo(dirname: str) -> Optional[str]:
+    """Inverse of `repo_to_cache_dirname`: 'models--org--name' -> 'org/name'.
+
+    A repo has exactly one '/', so the org is the first '--'-segment and the name
+    is everything after (names may themselves contain single dashes). Returns
+    None for anything that isn't a model cache dir."""
+    if not dirname.startswith("models--"):
+        return None
+    parts = dirname[len("models--"):].split("--")
+    if len(parts) < 2 or not parts[0] or not parts[1]:
+        return None
+    return f"{parts[0]}/{'--'.join(parts[1:])}"
+
+
+def parse_cache_listing(out: str) -> list[tuple[str, int, bool]]:
+    """Parse the 'size|complete|dirname' lines from `list_cached_models`'s scan.
+
+    Returns [(repo, size_bytes, complete), ...], skipping non-model lines. Pure
+    function so the parsing is unit-testable without SSH."""
+    items: list[tuple[str, int, bool]] = []
+    for line in out.splitlines():
+        line = line.strip()
+        if line.count("|") < 2:
+            continue
+        size_s, complete_s, dirname = line.split("|", 2)
+        repo = cache_dirname_to_repo(dirname.strip())
+        if not repo:
+            continue
+        try:
+            size = int(size_s)
+        except ValueError:
+            size = 0
+        items.append((repo, size, complete_s.strip() == "1"))
+    return items
+
+
+async def list_cached_models(host: str, user: str, settings: Settings) -> list[tuple[str, int, bool]]:
+    """Enumerate every Hugging Face model cached on a host: (repo, size_bytes, complete).
+
+    'complete' = the cache has at least one snapshot carrying a config.json (a
+    finished download, not a half-fetched/corrupt dir). One SSH round-trip; the
+    glob's no-match case is handled by the `[ -d ]` guard."""
+    if not host or not user:
+        return []
+    cmd = (
+        'HUB="$HOME/.cache/huggingface/hub"; '
+        'for d in "$HUB"/models--*; do '
+        '[ -d "$d" ] || continue; '
+        'n=$(basename "$d"); '
+        'sz=$(du -sb "$d" 2>/dev/null | cut -f1); sz=${sz:-0}; '
+        'if ls "$d"/snapshots/*/config.json >/dev/null 2>&1; then c=1; else c=0; fi; '
+        'echo "${sz}|${c}|${n}"; '
+        'done'
+    )
+    rc, out, err = await ssh_run(host, user, cmd, settings, timeout=30.0)
+    if rc != 0:
+        return []
+    return parse_cache_listing(out)
+
+
+async def read_model_config(host: str, user: str, repo: str, settings: Settings) -> Optional[dict]:
+    """Read a cached model's config.json (first snapshot) for launch inference.
+
+    Returns the parsed dict, or None if absent/unreadable. The dirname is
+    whitelisted (repo_to_cache_dirname) so it's safe to embed unquoted."""
+    if not host or not user:
+        return None
+    dn = repo_to_cache_dirname(repo)
+    cmd = (
+        f'D=$(ls -d "$HOME/.cache/huggingface/hub/{dn}/snapshots/"*/ 2>/dev/null | head -1); '
+        f'[ -n "$D" ] && cat "${{D}}config.json" 2>/dev/null'
+    )
+    rc, out, err = await ssh_run(host, user, cmd, settings, timeout=20.0)
+    if rc != 0 or not out.strip():
+        return None
+    try:
+        return json.loads(out)
+    except (ValueError, TypeError):
+        return None
+
+
@dataclass
 class HostDiskResult:
    host: str
@@ -159,10 +241,14 @@ async def delete_host(host: str, user: str, repo: str, settings: Settings) -> Ho
    return HostDiskResult(host=host, on_disk=False, size_bytes=freed)


-async def delete_from_disk(repo: str, mode: str, settings: Settings) -> DiskStatus:
-    """rm -rf the model's cache dir on the relevant Sparks. Idempotent."""
+async def delete_from_disk(repo: str, settings: Settings) -> DiskStatus:
+    """rm -rf the model's cache dir on ALL configured Sparks. Idempotent.
+
+    We sweep both Sparks regardless of the model's declared mode: a 'remove from
+    disk & menu' must leave nothing behind, and rm of an absent dir reports 0
+    bytes freed (FREED 0), so an extra host is harmless."""
    hosts: list[tuple[str, str]] = [(settings.spark1_host, settings.spark1_user)]
-    if mode == "cluster" and settings.spark2_host:
+    if settings.spark2_host:
        hosts.append((settings.spark2_host, settings.spark2_user))

    results = await asyncio.gather(*(delete_host(h, u, repo, settings) for h, u in hosts))
@@ -15,7 +15,8 @@ from .coordination import LockHeld, ScheduleRegistry, SwapLockManager, WebhookNo
 from .custom_services import add_custom_service, delete_custom_service
 from .audio_proxy import build_router as build_audio_router
 from .deep_health import DeepHealth
-from .disk import delete_from_disk, probe_disk
+from .discovery import build_menu, infer_recipe, repo_to_key
+from .disk import delete_from_disk, probe_host, read_model_config
 from .download import DownloadManager
 from .llm_proxy import build_router as build_llm_router
 from .embeddings_proxy import build_router as build_embeddings_router
@@ -25,7 +26,7 @@ from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings,
 from .matrix_bridge import MatrixBridgeManager
 from .models import ModelDef, load_catalog
 from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
-from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
+from .overrides import add_custom, delete_custom, load_overrides, set_knobs
 from .services import docker_state, run_action, services_from_settings
 from .shellsafe import validate_container, validate_image, validate_repo
 from .speech_models import SpeechModelsManager
@@ -161,20 +162,65 @@ def _reload_catalog() -> None:
    swap_manager.reload_catalog(catalog)


+def _recipe_summaries() -> list[dict]:
+    """Known launch recipes (bundled + saved), for the download panel's autocomplete.
+
+    These are NOT the menu — the menu is what's on disk. This is just the set of
+    repos Spark Control already knows how to launch, so the download box can
+    suggest them by name without putting phantom cards on the dashboard."""
+    out = []
+    for m in catalog.models.values():
+        if m.repo:
+            out.append({"repo": m.repo, "display_name": m.display_name, "mode": m.mode})
+    return out
+
+
@app.get("/api/models")
 async def get_models() -> dict:
-    out_models: dict[str, dict] = {}
-    for key, m in catalog.models.items():
-        d = m.model_dump()
-        # Always include effective knobs for the UI (defaults from base args + any overrides)
-        d["effective_knobs"] = {**extract_knobs_from_args(m.vllm_args), **(m.knobs or {})}
-        out_models[key] = d
+    """The model menu = what's actually downloaded on the Sparks (one scan per
+    Spark), each annotated with its launch recipe or flagged `needs_setup`.
+
+    Does SSH, so it's the slower of the model endpoints; the front-end calls it on
+    load, after a swap/download/delete, and on a slow timer — not every poll."""
+    if not settings.configured:
+        return {"configured": False, "defaults": catalog.defaults.model_dump(), "models": {}, "recipes": []}
+    menu = await build_menu(settings, catalog)
    return {
+        "configured": True,
        "defaults": catalog.defaults.model_dump(),
-        "models": out_models,
+        "models": menu,
+        "recipes": _recipe_summaries(),
    }


+@app.get("/api/models/suggest")
+async def suggest_model(repo: str = Query(...)) -> dict:
+    """Read a downloaded model's config.json + size and propose a launch recipe.
+
+    Prefills the 'set up this model' form for an on-disk model that has no recipe
+    yet. The operator confirms/edits, then POSTs it to /api/models to save."""
+    if not settings.configured:
+        raise HTTPException(503, "spark1 not configured")
+    try:
+        validate_repo(repo)
+    except ValueError as e:
+        raise HTTPException(400, str(e))
+    hosts = [(settings.spark1_host, settings.spark1_user)]
+    if settings.spark2_host:
+        hosts.append((settings.spark2_host, settings.spark2_user))
+    # Config from whichever Spark has it; size summed across the Sparks that do.
+    sizes = await asyncio.gather(*(probe_host(h, u, repo, settings) for h, u in hosts))
+    total = sum(r.size_bytes for r in sizes if r.on_disk)
+    on_hosts = sum(1 for r in sizes if r.on_disk)
+    config = None
+    for (h, u), r in zip(hosts, sizes):
+        if r.on_disk:
+            config = await read_model_config(h, u, repo, settings)
+            if config is not None:
+                break
+    return infer_recipe(repo, config or {}, total, on_hosts)
+
+
 class KnobsBody(BaseModel):
    knobs: dict

@@ -238,71 +284,43 @@ async def del_model(key: str) -> dict:
    return {"ok": True, "key": key}


-@app.get("/api/models/disk-status")
-async def get_models_disk_status() -> dict:
-    """Probe each catalog model's HF cache on the appropriate Spark(s) in parallel.
-
-    Result is keyed by model key: {on_disk, total_bytes, per_host:[{host,on_disk,size_bytes,error?}]}.
-    Designed to be called once on dashboard load; takes ~1–3s depending on Spark count.
-    """
-    if not settings.configured:
-        return {"configured": False, "models": {}}
-    keys = list(catalog.models.keys())
-    statuses = await asyncio.gather(*(
-        probe_disk(
-            catalog.models[k].repo,
-            catalog.models[k].mode,
-            settings,
-            local_path=catalog.models[k].local_path,
-        )
-        for k in keys
-    ), return_exceptions=True)
-    out: dict[str, dict] = {}
-    for k, s in zip(keys, statuses):
-        if isinstance(s, Exception):
-            out[k] = {"on_disk": False, "total_bytes": 0, "per_host": [], "error": str(s)}
-            continue
-        out[k] = {
-            "on_disk": s.on_disk,
-            "total_bytes": s.total_bytes,
-            "per_host": [
-                {"host": r.host, "on_disk": r.on_disk, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
-                for r in s.per_host
-            ],
-        }
-    return {"configured": True, "models": out}
-
-
@app.delete("/api/models/{key}/disk")
 async def del_model_disk(key: str) -> dict:
-    """Delete a model's weights from the Spark filesystem(s). The catalog entry stays.
+    """Remove a model's weights from the Sparks — and thus from the menu, since the
+    menu IS the disk. Resolves the key against the live menu, so a discovered
+    model (no saved recipe) is deletable too.

    Safety rails:
+      - Refuses a local/fine-tuned directory (hand-placed, not re-downloadable).
      - Refuses if the model is currently loaded on vLLM.
-      - Refuses if a swap or download is in flight.
-      - Idempotent: if the cache dir is already gone on a host, that host reports 0 bytes freed.
+      - Refuses if a swap or this model's own download is in flight.
+      - Idempotent across both Sparks: an already-absent cache dir frees 0 bytes.
    """
-    if key not in catalog.models:
+    if not settings.configured:
+        raise HTTPException(503, "spark1 not configured")
+    menu = await build_menu(settings, catalog)
+    entry = menu.get(key)
+    if entry is None:
        raise HTTPException(404, f"unknown model: {key}")
-    m = catalog.models[key]

    # Never rm a local fine-tune directory from the dashboard — it's irreplaceable
    # training output the user placed by hand, not a re-downloadable HF cache.
-    if m.local_path:
+    if entry.get("local_path"):
        raise HTTPException(
            400,
            "this is a local model; its directory must be managed on the Spark, not deleted from here",
        )
+    repo = entry["repo"]

    # Refuse if currently loaded
    try:
        vllm = await check_vllm(settings)
    except Exception:
        vllm = {}
-    if vllm.get("ok") and vllm.get("current_model") == m.repo:
+    if vllm.get("ok") and vllm.get("current_model") == repo:
        raise HTTPException(
            409,
-            f"'{m.display_name}' is the currently loaded model. Switch to a different model first, then try again."
+            f"'{entry['display_name']}' is the currently loaded model. Switch to a different model first, then try again."
        )

    # Refuse if a swap is in flight
@@ -312,10 +330,10 @@ async def del_model_disk(key: str) -> dict:
    # Refuse if a download is in flight for this same repo (a different model's download is fine)
    if download_manager.current_job_id:
        job = download_manager.get(download_manager.current_job_id)
-        if job and job.repo == m.repo:
+        if job and job.repo == repo:
            raise HTTPException(409, "this model is currently downloading; cancel or wait for it to finish")

-    status = await delete_from_disk(m.repo, m.mode, settings)
+    status = await delete_from_disk(repo, settings)
    # Audit log
    record_report(
        f"disk:{key}",
@@ -326,7 +344,7 @@ async def del_model_disk(key: str) -> dict:
    return {
        "ok": True,
        "key": key,
-        "repo": m.repo,
+        "repo": repo,
        "bytes_freed": status.total_bytes,
        "per_host": [
            {"host": r.host, "size_bytes": r.size_bytes, **({"error": r.error} if r.error else {})}
@@ -881,10 +899,13 @@ async def get_status() -> dict:
 def _identify_current_model(repo: str | None) -> str | None:
    if not repo:
        return None
+    # A recipe-backed model keys by its recipe key; a discovered model (loaded but
+    # not yet set up) keys by the same slug build_menu uses, so it still
+    # highlights as the active card.
    for key, m in catalog.models.items():
        if m.repo == repo:
            return key
-    return None
+    return repo_to_key(repo)


 class SwapRequest(BaseModel):
@@ -19,8 +19,8 @@ const state = {
  configured: true,
  timer_handle: null,
  deep_health: {},
-  disk_status: {},         // keyed by model key: { on_disk, total_bytes, per_host }
-  disk_status_loaded: false,
+  models_loaded: false,    // true once the first disk scan (/api/models) returns
+  recipes: [],             // known launch recipes (for the download autocomplete)
  lock: { held: false },   // GPU swap reservation (coordination layer)
  schedules: [],           // schedules external automation has registered
 };
@@ -65,67 +65,69 @@ function renderCards() {
  const lockTip = locked
    ? `Reserved by ${state.lock.holder || 'automation'}${state.lock.expires_at ? ' until ' + fmtClock(state.lock.expires_at) : ''}`
    : '';
-  for (const key of Object.keys(state.models)) {
+  const keys = Object.keys(state.models);
+  if (keys.length === 0) {
+    // The menu is the disk: nothing downloaded (or the scan hasn't returned yet).
+    root.innerHTML = state.models_loaded
+      ? `<div class="empty-menu muted">No models downloaded on the Sparks yet. Use <strong>+ Download a new model</strong> above to fetch one — it'll appear here when it's done.</div>`
+      : `<div class="empty-menu muted">Scanning the Sparks for downloaded models…</div>`;
+    return;
+  }
+  for (const key of keys) {
    const m = state.models[key];
    const isActive = key === state.current_model_key;
    const card = document.createElement('div');
-    card.className = 'card' + (isActive ? ' active' : '');
+    card.className = 'card' + (isActive ? ' active' : '') + (m.needs_setup ? ' needs-setup' : '');
    const desc = m.description
      ? `<div class="desc">${escapeHtml(m.description)}</div>`
      : '';
    const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
    const localPill = m.local_path ? `<span class="tag local-pill" title="Served from a directory on the Spark, not Hugging Face">local</span>` : '';
-    // Disk-presence pill + trash button. Until /api/models/disk-status comes back,
-    // we don't know — render a neutral placeholder.
-    const disk = state.disk_status[key];
-    let diskPill = '';
-    if (state.disk_status_loaded) {
-      if (disk && disk.on_disk) {
-        const gb = (disk.total_bytes / 1e9);
-        diskPill = `<span class="tag on-disk" title="Weights present on disk">on disk · ${gb.toFixed(1)} GB</span>`;
-      } else {
-        diskPill = `<span class="tag not-on-disk" title="Weights not downloaded">not downloaded</span>`;
-      }
-    }
-    // Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded.
+    // Every card on the menu is on disk by definition — show its real size.
+    const gb = (m.total_bytes || 0) / 1e9;
+    const diskPill = gb > 0
+      ? `<span class="tag on-disk" title="Weights present on the Spark(s)">on disk · ${gb.toFixed(1)} GB</span>`
+      : '';
+    const setupPill = m.needs_setup
+      ? `<span class="tag setup-pill" title="On disk, but Spark Control hasn't been told how to launch it">needs setup</span>`
+      : '';
+    // Trash = remove weights from disk AND from the menu. Disabled if active / mid-swap.
    // Never offered for local models: their directory is hand-placed training output,
    // not a re-downloadable HF cache (the server refuses the delete too).
    let trashBtn = '';
-    if (state.disk_status_loaded && disk && disk.on_disk && !m.local_path) {
+    if (!m.local_path) {
      const disabled = isActive || isSwapping;
      const tip = isActive
        ? 'Currently loaded — switch to another model first'
        : isSwapping
        ? 'A swap is in progress'
-        : 'Delete weights from disk';
-      trashBtn = `<button class="icon-btn danger" data-disk-del-key="${key}" title="${escapeHtml(tip)}" aria-label="Delete from disk" ${disabled ? 'disabled' : ''}>${trashIcon}</button>`;
+        : 'Remove weights from disk & menu';
+      trashBtn = `<button class="icon-btn danger" data-disk-del-key="${key}" title="${escapeHtml(tip)}" aria-label="Remove from disk and menu" ${disabled ? 'disabled' : ''}>${trashIcon}</button>`;
    }
-    // Primary card action: "Switch to this" (green) when on disk; "Download" (blue) when not.
-    // Before disk-status loads we render the swap button as a sensible default.
-    const isOnDisk = !state.disk_status_loaded || (disk && disk.on_disk);
-    const dlInFlight = !!(typeof dlState !== 'undefined' && dlState && dlState.job_id);
+    // Primary action: "Current" / "Switch to this", or "Set up & switch" for a
+    // model on disk that has no launch recipe yet.
+    const swapBlocked = isSwapping || locked;
+    const lockTipAttr = locked ? ` title="${escapeHtml(lockTip)}"` : '';
    let primaryBtn = '';
    if (isActive) {
      primaryBtn = `<button class="btn" disabled>Current</button>`;
-    } else if (isOnDisk) {
-      const swapBlocked = isSwapping || locked;
-      const tip = locked ? ` title="${escapeHtml(lockTip)}"` : '';
-      primaryBtn = `<button class="btn primary" data-swap-key="${key}"${tip} ${swapBlocked ? 'disabled' : ''}>Switch to this</button>`;
-    } else if (m.local_path) {
-      // A local model can't be "downloaded" — its directory has to exist on the Spark.
-      primaryBtn = `<button class="btn" disabled title="Directory not found on the Spark — create it there, then refresh">Not found on Spark</button>`;
+    } else if (m.needs_setup) {
+      primaryBtn = `<button class="btn primary" data-setup-key="${key}"${lockTipAttr} ${swapBlocked ? 'disabled' : ''}>Set up &amp; switch</button>`;
    } else {
-      const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)';
-      primaryBtn = `<button class="btn info" data-download-key="${key}" title="${escapeHtml(tip)}" ${dlInFlight ? 'disabled' : ''}>Download</button>`;
+      primaryBtn = `<button class="btn primary" data-swap-key="${key}"${lockTipAttr} ${swapBlocked ? 'disabled' : ''}>Switch to this</button>`;
    }
+    // The Test/Advanced controls need a saved recipe; hide them until setup is done.
+    const recipeActions = m.needs_setup ? '' : `
+        <button class="btn test-btn" data-test-key="${key}" title="Pre-flight check the launch command without starting the engine">Test</button>
+        <button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>`;
    card.innerHTML = `
      <div class="name">${escapeHtml(m.display_name)}</div>
      <div class="meta">
        <span class="tag mode-${m.mode}">${m.mode}</span>
-        <span class="tag">${m.size_gb} GB</span>
+        ${diskPill}
+        ${setupPill}
        ${customPill}
        ${localPill}
-        ${diskPill}
        ${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
      </div>
      ${desc}
@@ -136,9 +138,7 @@ function renderCards() {
      </div>
      <div class="spacer"></div>
      <div class="card-actions">
-        ${primaryBtn}
-        <button class="btn test-btn" data-test-key="${key}" title="Pre-flight check the launch command without starting the engine">Test</button>
-        <button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>
+        ${primaryBtn}${recipeActions}
        ${trashBtn}
      </div>
      <div class="test-result hidden" data-test-result-for="${key}"></div>
@@ -148,8 +148,8 @@ function renderCards() {
  for (const btn of root.querySelectorAll('[data-swap-key]')) {
    btn.addEventListener('click', () => triggerSwap(btn.dataset.swapKey));
  }
-  for (const btn of root.querySelectorAll('[data-download-key]')) {
-    btn.addEventListener('click', () => triggerDownloadForKey(btn.dataset.downloadKey));
+  for (const btn of root.querySelectorAll('[data-setup-key]')) {
+    btn.addEventListener('click', () => openSetupForKey(btn.dataset.setupKey));
  }
  for (const btn of root.querySelectorAll('[data-adv-key]')) {
    btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey));
@@ -1170,24 +1170,44 @@ async function pollStatus() {
  }
 }

+let menuLoadInFlight = false;
+
 async function loadModels() {
-  const data = await fetchJSON('/api/models');
-  state.defaults = data.defaults || {};
-  state.models = data.models || {};
+  // The menu is whatever's downloaded on the Sparks — /api/models does the scan
+  // (SSH), so this is the slower model call. Best-effort: a transient failure
+  // leaves the previous menu in place rather than blanking the dashboard.
+  // Guard against overlap: init() fires this un-awaited and pollStatus()'s
+  // empty-menu fallback may call it again before the scan returns.
+  if (menuLoadInFlight) return;
+  menuLoadInFlight = true;
+  try {
+    const data = await fetchJSON('/api/models');
+    state.defaults = data.defaults || {};
+    state.models = data.models || {};
+    state.recipes = data.recipes || [];
+    state.models_loaded = true;
+    populateDownloadSuggestions();
+    renderCards();
+  } catch (e) {
+    console.warn('model menu load failed:', e.message);
+  } finally {
+    menuLoadInFlight = false;
+  }
 }

-async function loadDiskStatus() {
-  // Probes each catalog model's HF cache over SSH; takes a beat. Best-effort.
-  try {
-    const r = await fetchJSON('/api/models/disk-status');
-    if (r && r.models) {
-      state.disk_status = r.models;
-      state.disk_status_loaded = true;
-      renderCards();
-    }
-  } catch (e) {
-    // Silent — pills just won't render. Don't block dashboard.
-    console.warn('disk-status probe failed:', e.message);
+// Populate the download box's autocomplete with known recipes not currently on
+// disk — so common/bundled models stay discoverable without phantom menu cards.
+function populateDownloadSuggestions() {
+  const dl = el('#dl-suggestions');
+  if (!dl) return;
+  const onDiskRepos = new Set(Object.values(state.models).map(m => m.repo).filter(Boolean));
+  dl.innerHTML = '';
+  for (const r of state.recipes || []) {
+    if (onDiskRepos.has(r.repo)) continue;
+    const opt = document.createElement('option');
+    opt.value = r.repo;
+    opt.label = `${r.display_name} (${r.mode})`;
+    dl.appendChild(opt);
  }
 }

@@ -1201,14 +1221,12 @@ function fmtBytesShort(n) {

 function openDiskDeleteDialog(key) {
  const m = state.models[key];
-  const disk = state.disk_status[key];
-  if (!m || !disk || !disk.on_disk) return;
+  if (!m || !m.on_disk) return;
  const dlg = el('#disk-delete-dialog');
-  el('#dd-summary').innerHTML = `Free <strong>${fmtBytesShort(disk.total_bytes)}</strong> by removing <strong>${escapeHtml(m.display_name)}</strong> (<code>${escapeHtml(m.repo)}</code>) from disk.`;
+  el('#dd-summary').innerHTML = `Free <strong>${fmtBytesShort(m.total_bytes)}</strong> by removing <strong>${escapeHtml(m.display_name)}</strong> (<code>${escapeHtml(m.repo)}</code>) from the Sparks. This also takes it off the menu.`;
  const hostsEl = el('#dd-hosts');
  hostsEl.innerHTML = '';
-  for (const h of (disk.per_host || [])) {
-    if (!h.on_disk) continue;
+  for (const h of (m.per_host || [])) {
    const li = document.createElement('li');
    li.innerHTML = `<code>${escapeHtml(h.host)}</code> — ${fmtBytesShort(h.size_bytes)}`;
    hostsEl.appendChild(li);
@@ -1227,20 +1245,19 @@ function openDiskDeleteDialog(key) {
    try {
      const r = await fetchJSON(`/api/models/${encodeURIComponent(key)}/disk`, { method: 'DELETE' });
      dlg.close();
-      // Optimistically clear local disk state for this key, then refresh.
-      delete state.disk_status[key];
+      // Optimistically drop the card, then re-scan the menu (it's gone from disk).
+      delete state.models[key];
      renderCards();
-      // Eagerly re-probe so size is accurate (and shows "not downloaded" pill).
-      loadDiskStatus();
+      await loadModels();
      const freed = r && typeof r.bytes_freed === 'number' ? fmtBytesShort(r.bytes_freed) : '';
-      console.log(`Deleted ${m.display_name} from disk${freed ? ` — freed ${freed}` : ''}.`);
+      console.log(`Removed ${m.display_name} from disk${freed ? ` — freed ${freed}` : ''}.`);
    } catch (e) {
      errEl.textContent = e.message || 'Delete failed';
      errEl.classList.remove('hidden');
    } finally {
      confirm.disabled = false;
      cancel.disabled = false;
-      confirm.textContent = 'Delete from disk';
+      confirm.textContent = 'Remove from disk & menu';
    }
  };
  cancel.onclick = onCancel;
@@ -1341,38 +1358,6 @@ async function releaseLock() {
  pollCoordination();
 }

-async function triggerDownloadForKey(modelKey) {
-  const m = state.models[modelKey];
-  if (!m) return;
-  if (dlState.job_id) {
-    alert('A download is already in progress; wait for it to finish.');
-    return;
-  }
-  // Pick the download target from the model's mode:
-  //   solo    -> spark1 only
-  //   cluster -> both Sparks (fetch on Spark 1, rsync to Spark 2 in parallel)
-  const dlMode = m.mode === 'cluster' ? 'cluster' : 'spark1';
-  const sizeNote = m.size_gb ? ` (~${m.size_gb} GB)` : '';
-  const target = m.mode === 'cluster' ? 'both Sparks' : 'Spark 1';
-  if (!confirm(`Download "${m.display_name}"${sizeNote} to ${target}? Large models can take a while; you can watch progress in the download panel.`)) {
-    return;
-  }
-  dlState.last_repo = m.repo;
-  dlState.last_mode = dlMode;
-  try {
-    const r = await fetchJSON('/api/download', {
-      method: 'POST',
-      headers: { 'content-type': 'application/json' },
-      body: JSON.stringify({ repo: m.repo, mode: dlMode }),
-    });
-    // Open the download panel + attach to progress stream
-    openDownloadForm();
-    attachToDownload(r.job_id);
-  } catch (e) {
-    alert('Failed to start download: ' + e.message);
-  }
-}
-
 async function attachToSwap(jobId, needsBackfill) {
  if (state.swap_eventsource) {
    state.swap_eventsource.close();
@@ -1603,12 +1588,14 @@ function handleDownloadDone(d) {
    el('#dl-title').textContent = 'Done';
    el('#dl-phase').textContent = 'Done ✓';
    el('#dl-progress-fill').style.width = '100%';
-    // Offer to add to catalog
+    // The new model now appears on the menu (the menu is the disk). If it matched
+    // a known recipe it's ready to switch to; if not, offer to set it up.
    const repo = dlState.last_repo;
-    const mode = dlState.last_mode;
-    if (repo) {
-      setTimeout(() => openCatalogDialog(repo, mode), 600);
-    }
+    loadModels().then(() => {
+      if (!repo) return;
+      const entry = Object.values(state.models).find(m => m.repo === repo);
+      if (entry && entry.needs_setup) setTimeout(() => openSetupDialog(repo, { thenSwap: false }), 600);
+    });
  }
  dlState.job_id = null;
 }
@@ -1721,21 +1708,67 @@ function openAdvanced(key) {
  dlg.showModal();
 }

-function openCatalogDialog(repo, mode) {
+// Context carried from openSetupDialog -> the submit handler: the inferred
+// launch flags (parsers/MoE backend) and whether to swap right after saving.
+let setupCtx = { key: '', repo: '', vllm_args: [], thenSwap: false };
+
+// "Set up & switch" on a needs-setup card.
+async function openSetupForKey(key) {
+  const m = state.models[key];
+  if (!m) return;
+  if (state.lock && state.lock.held) {
+    const until = state.lock.expires_at ? ' until ' + fmtClock(state.lock.expires_at) : '';
+    alert(`The GPU swap path is reserved by ${state.lock.holder || 'automation'}${until}. Use "Release" on the reservation banner to override.`);
+    return;
+  }
+  await openSetupDialog(m.repo, { thenSwap: true });
+}
+
+// Open the "set up this model" dialog, prefilled from inference (config.json +
+// size). The operator confirms once; on save the recipe persists and (if
+// thenSwap) we switch to it.
+async function openSetupDialog(repo, opts = {}) {
  const dlg = el('#catalog-dialog');
-  const key = repo.split('/').pop().toLowerCase().replace(/[^a-z0-9_-]/g, '-');
-  el('#cd-key').value = key;
-  el('#cd-name').value = repo.split('/').pop();
+  let sug = null;
+  try {
+    sug = await fetchJSON(`/api/models/suggest?repo=${encodeURIComponent(repo)}`);
+  } catch (e) {
+    console.warn('recipe suggestion failed:', e.message);
+  }
+  const fallbackKey = repo.toLowerCase().replace(/[^a-z0-9_-]+/g, '-').replace(/^-+|-+$/g, '');
+  setupCtx = {
+    key: (sug && sug.key) || fallbackKey,
+    repo,
+    vllm_args: (sug && sug.vllm_args) || [],
+    thenSwap: !!opts.thenSwap,
+  };
+  el('#cd-key').value = setupCtx.key;
+  el('#cd-name').value = (sug && sug.display_name) || repo.split('/').pop();
  el('#cd-repo').value = repo;
  el('#cd-size').value = '';
-  el('#cd-mode').value = mode || 'solo';
+  el('#cd-mode').value = (sug && sug.mode) || 'solo';
  el('#cd-desc').value = '';
-  el('#cd-mml').value = 32768;
-  el('#cd-gmu').value = 0.85;
-  el('#cd-gmu-out').value = '0.85';
-  el('#cd-fst').checked = true;
-  el('#cd-pcache').checked = true;
-  el('#cd-fp8').checked = true;
+  const knobs = (sug && sug.knobs) || {};
+  el('#cd-mml').value = knobs.max_model_len || 32768;
+  el('#cd-gmu').value = knobs.gpu_memory_utilization || 0.85;
+  el('#cd-gmu-out').value = parseFloat(el('#cd-gmu').value).toFixed(2);
+  el('#cd-fst').checked = knobs.fastsafetensors !== false;
+  el('#cd-pcache').checked = knobs.prefix_caching !== false;
+  el('#cd-fp8').checked = (knobs.kv_cache_dtype || 'fp8') === 'fp8';
+
+  const det = el('#cd-detected');
+  if (det) {
+    if (sug) {
+      const caps = (sug.capabilities || []).join(', ');
+      const flags = setupCtx.vllm_args.length ? `: <code>${escapeHtml(setupCtx.vllm_args.join(' '))}</code>` : '';
+      det.innerHTML = `Detected <strong>${escapeHtml(sug.family || 'Generic')}</strong>${caps ? ` · ${escapeHtml(caps)}` : ''}. Launch flags set automatically${flags}.`;
+    } else {
+      det.textContent = "Couldn't auto-detect this model's settings — pick mode and knobs manually.";
+    }
+    det.classList.remove('hidden');
+  }
+  const submit = el('#cd-submit');
+  if (submit) submit.textContent = setupCtx.thenSwap ? 'Save & switch' : 'Save settings';
  dlg.showModal();
 }

@@ -1745,13 +1778,15 @@ function setupCatalogDialog() {
  el('#catalog-form').addEventListener('submit', async (e) => {
    e.preventDefault();
    const body = {
-      key: el('#cd-key').value.trim(),
+      key: el('#cd-key').value.trim() || setupCtx.key,
      display_name: el('#cd-name').value.trim(),
      repo: el('#cd-repo').value.trim(),
      size_gb: parseFloat(el('#cd-size').value) || 0,
      mode: el('#cd-mode').value,
      description: el('#cd-desc').value.trim() || null,
-      vllm_args: [],
+      // The inferred family flags (parsers / MoE backend); knob-controlled flags
+      // are layered on by the server from `knobs`, so no duplication.
+      vllm_args: setupCtx.vllm_args || [],
      knobs: {
        max_model_len: parseInt(el('#cd-mml').value, 10) || 32768,
        gpu_memory_utilization: parseFloat(el('#cd-gmu').value),
@@ -1769,8 +1804,9 @@ function setupCatalogDialog() {
      el('#catalog-dialog').close();
      closeDownloadPanel();
      await loadModels();
+      if (setupCtx.thenSwap) triggerSwap(body.key);
      pollStatus();
-    } catch (e) { alert('Add to catalog failed: ' + e.message); }
+    } catch (e) { alert('Saving the model setup failed: ' + e.message); }
  });
 }

@@ -2212,21 +2248,22 @@ async function init() {
  } catch {}
  setupDashboardTabs();
  setupEndpointCollapse();
-  await loadModels();
+  // Fire the (SSH-backed) menu scan without awaiting — it self-renders a
+  // "Scanning…" state and fills in when it returns, so a slow/unreachable
+  // cluster never blocks first paint. pollStatus() below paints the rest.
+  loadModels();
  await pollStatus();
  await renderServices();
  pollCoordination();
  pollHardware();
  pollUpdates();
-  // Disk-status probe runs after first paint — slow over SSH and not blocking.
-  loadDiskStatus();
  // Speech-model patches panel — slow over SSH, runs after first paint.
  renderSpeechModels();
  setInterval(pollStatus, 5000);
  setInterval(pollCoordination, 5000); // swap lock + schedule registry
  setInterval(pollHardware, 8000);    // every 8s
  setInterval(pollUpdates, 300000);  // every 5 min
-  setInterval(loadDiskStatus, 60000); // every 60s — disk state changes rarely
+  setInterval(loadModels, 60000); // every 60s — re-scan the Sparks for added/removed models
  setInterval(renderSpeechModels, 120000); // every 2 min — patches change rarely
 }

@@ -241,9 +241,10 @@

      <dialog id="catalog-dialog" class="modal">
        <form method="dialog" class="modal-form" id="catalog-form">
-          <h3>Add downloaded model to catalog</h3>
-          <p class="muted small">It will appear as a new card you can swap to. Knob values become its default launch flags — you can tweak later via the model's "Advanced" panel.</p>
-          <label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="cd-key" required pattern="[a-zA-Z0-9_-]+"></label>
+          <h3>Set up this model</h3>
+          <p class="muted small">This model is downloaded, but Spark Control needs to know how to launch it. We've guessed from the model's own files — confirm or adjust, and it's saved so you're never asked again.</p>
+          <p id="cd-detected" class="muted small cd-detected hidden"></p>
+          <label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="cd-key" required pattern="[a-zA-Z0-9_-]+" readonly></label>
          <label class="modal-row"><span>Display name</span><input type="text" id="cd-name" required></label>
          <label class="modal-row"><span>Repo (read-only)</span><input type="text" id="cd-repo" readonly></label>
          <label class="modal-row"><span>Size (GB)</span><input type="number" id="cd-size" step="0.1" min="0"></label>
@@ -264,7 +265,7 @@
          </fieldset>
          <div class="modal-actions">
            <button type="button" id="cd-cancel" class="btn">Cancel</button>
-            <button type="submit" class="btn primary">Add to catalog</button>
+            <button type="submit" id="cd-submit" class="btn primary">Save settings</button>
          </div>
        </form>
      </dialog>
@@ -302,14 +303,14 @@

      <dialog id="disk-delete-dialog" class="modal">
        <form method="dialog" class="modal-form">
-          <h3>Delete model weights from disk?</h3>
+          <h3>Remove this model from the Sparks?</h3>
          <p id="dd-summary" class="muted small"></p>
          <ul class="muted small dd-hosts" id="dd-hosts"></ul>
-          <p class="muted small">This is reversible — you can re-download from the catalog at any time. The catalog entry stays intact.</p>
+          <p class="muted small">This deletes the weights and removes the card from the menu. You can always download it again later (re-downloading restores its saved settings).</p>
          <p id="dd-error" class="muted small dd-error hidden"></p>
          <div class="modal-actions">
            <button type="button" id="dd-cancel" class="btn">Cancel</button>
-            <button type="button" id="dd-confirm" class="btn danger">Delete from disk</button>
+            <button type="button" id="dd-confirm" class="btn danger">Remove from disk &amp; menu</button>
          </div>
        </form>
      </dialog>
@@ -354,11 +355,12 @@
        <div class="download-form" id="download-form">
          <label class="dl-row">
            <span class="dl-label">HuggingFace repo</span>
-            <input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off">
+            <input type="text" id="dl-repo" placeholder="e.g. RedHatAI/Qwen3.6-35B-A3B-NVFP4" autocomplete="off" list="dl-suggestions">
+            <datalist id="dl-suggestions"></datalist>
            <a id="dl-hf-link" class="dl-hf-link hidden" href="#" target="_blank" rel="noopener" title="Open on Hugging Face">↗</a>
          </label>
          <div class="dl-help muted small">
-            <a href="https://huggingface.co/models?other=vllm" target="_blank" rel="noopener">Browse vLLM-compatible models</a>
+            Type any repo, or pick a known one from the list. <a href="https://huggingface.co/models?other=vllm" target="_blank" rel="noopener">Browse vLLM-compatible models</a>
            · NVFP4-quantized models (e.g. <code>RedHatAI/...</code>) are best for Blackwell hardware
          </div>
          <div class="dl-row">
@@ -778,6 +778,12 @@ main {
 .card .local-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
 .tag.on-disk { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
 .tag.not-on-disk { color: var(--muted); border-color: var(--border); opacity: 0.7; }
+.tag.setup-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
+.card.needs-setup { border-style: dashed; }
+.card-actions .btn[data-setup-key] { flex: 1; }
+.empty-menu { grid-column: 1 / -1; padding: 28px 16px; text-align: center; border: 1px dashed var(--border); border-radius: 10px; }
+.cd-detected { padding: 8px 10px; border: 1px solid var(--border); border-radius: 8px; background: rgba(255,255,255,0.02); }
+.cd-detected code { word-break: break-all; }
 .card-actions .icon-btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); margin-left: auto; }
 .card-actions .icon-btn.danger:hover:not(:disabled) { background: rgba(239, 68, 68, 0.08); border-color: var(--error); color: var(--error); }
 .card-actions .icon-btn.danger:disabled { opacity: 0.35; cursor: not-allowed; }