diff --git a/image/app/server.py b/image/app/server.py index 5beec4d..9b6707f 100644 --- a/image/app/server.py +++ b/image/app/server.py @@ -22,6 +22,7 @@ from .services import docker_state, run_action, services_from_settings from .ssh import ssh_run from .swap import SwapManager from .updates import UpdateManager, get_update_status +from .validate import validate_launch from .wol import send_local_broadcast, send_via_peer @@ -434,6 +435,15 @@ class SwapRequest(BaseModel): dry_run: bool = False +@app.post("/api/swap/{key}/validate") +async def validate_swap(key: str) -> dict: + """Pre-flight check: run vLLM's argparse layer against the proposed launch + command WITHOUT starting an engine. Cheap (~5 s) and doesn't disturb the + currently-loaded model. + """ + return await validate_launch(key, catalog, settings) + + @app.post("/api/swap") async def post_swap(req: SwapRequest) -> dict: if not settings.configured and not req.dry_run: diff --git a/image/app/static/app.js b/image/app/static/app.js index fee7515..10d31fb 100644 --- a/image/app/static/app.js +++ b/image/app/static/app.js @@ -73,8 +73,10 @@ function renderCards() { + +
`; root.appendChild(card); } @@ -84,6 +86,37 @@ function renderCards() { for (const btn of root.querySelectorAll('[data-adv-key]')) { btn.addEventListener('click', () => openAdvanced(btn.dataset.advKey)); } + for (const btn of root.querySelectorAll('[data-test-key]')) { + btn.addEventListener('click', () => testLaunch(btn.dataset.testKey, btn)); + } +} + +async function testLaunch(key, btn) { + const resultEl = document.querySelector(`[data-test-result-for="${key}"]`); + if (!resultEl) return; + const originalText = btn.textContent; + btn.disabled = true; + btn.textContent = 'Testing…'; + resultEl.classList.remove('hidden', 'ok', 'fail'); + resultEl.innerHTML = 'Checking launch args against vLLM\'s parser…'; + try { + const r = await fetchJSON(`/api/swap/${encodeURIComponent(key)}/validate`, { method: 'POST' }); + if (r.ok) { + resultEl.classList.add('ok'); + resultEl.innerHTML = `✓ Launch args parse OK. (Doesn't guarantee runtime success — only catches argparse-level issues.)`; + } else { + resultEl.classList.add('fail'); + const err = escapeHtml(r.error || 'unknown error'); + const stage = r.stage ? ` (${escapeHtml(r.stage)})` : ''; + resultEl.innerHTML = `✗ Would fail: ${err}${stage}`; + } + } catch (e) { + resultEl.classList.add('fail'); + resultEl.innerHTML = `✗ Test failed: ${escapeHtml(e.message)}`; + } finally { + btn.disabled = false; + btn.textContent = originalText; + } } function renderCurrent(status) { diff --git a/image/app/static/style.css b/image/app/static/style.css index 302c7b3..b13d37c 100644 --- a/image/app/static/style.css +++ b/image/app/static/style.css @@ -701,9 +701,24 @@ main { .card.active .btn { background: rgba(74, 222, 128, 0.12); color: var(--accent); border-color: rgba(74, 222, 128, 0.4); } .card-actions { display: flex; gap: 6px; } .card-actions .btn.primary { flex: 1; } -.card .adv-btn { padding: 8px 12px; font-size: 12px; } +.card .adv-btn, +.card .test-btn { padding: 8px 12px; font-size: 12px; } .card .custom-pill { color: var(--info); border-color: rgba(96, 165, 250, 0.4); } +.test-result { + font-size: 12px; + line-height: 1.45; + padding: 8px 10px; + border-radius: 5px; + margin-top: 4px; + border: 1px solid var(--border); + background: var(--surface-2); +} +.test-result.ok { border-color: rgba(74, 222, 128, 0.4); background: rgba(74, 222, 128, 0.04); } +.test-result.fail { border-color: rgba(239, 68, 68, 0.45); background: rgba(239, 68, 68, 0.06); word-break: break-word; } +.test-result .ok-mark { color: var(--accent); font-weight: 600; } +.test-result .fail-mark { color: var(--error); font-weight: 600; } + .footer { margin-top: 28px; padding-top: 16px; diff --git a/image/app/validate.py b/image/app/validate.py new file mode 100644 index 0000000..983e267 --- /dev/null +++ b/image/app/validate.py @@ -0,0 +1,137 @@ +"""Pre-flight validation of a proposed vLLM launch command. + +Runs vLLM's own argparse layer (EngineArgs) inside the vllm_node container WITHOUT +starting the engine. Catches: + + * unknown flag names (typos) + * bad types / values that argparse rejects + * deprecated flags removed in the installed vLLM version + +Does NOT catch (these surface only during real engine init): + * model-architecture-specific constraints (e.g. Qwen3.6 Mamba block_size) + * OOM at weight-loading time + * Triton / CUDA-kernel compatibility errors + +A pre-flight check that returns "ok" is therefore NOT a guarantee — but a +"failed" verdict is a definitive 'don't bother with the real swap'. +""" +from __future__ import annotations +import json +import shlex +from typing import Any + +from .config import Settings +from .models import Catalog, build_launch_command +from .ssh import ssh_run + + +# Validates the proposed args against the same combined parser vLLM uses for +# `vllm serve` (engine args + server args + frontend args). Returns one JSON +# line on stdout: {"ok": true, ...} or {"ok": false, ...}. +_VALIDATOR_SCRIPT = r""" +import argparse, json, sys + +# Mirror what `vllm serve` does internally: FlexibleArgumentParser (which is +# more lenient about dashes vs underscores) wrapped with make_arg_parser +# (which adds engine + server + frontend args). +parser = None +try: + # Newer vLLM path + from vllm.utils.argparse_utils import FlexibleArgumentParser +except Exception: + try: + # Older fallback + from vllm.engine.arg_utils import FlexibleArgumentParser + except Exception: + FlexibleArgumentParser = argparse.ArgumentParser # type: ignore + +try: + from vllm.entrypoints.openai.cli_args import make_arg_parser + parser = make_arg_parser(FlexibleArgumentParser(add_help=False)) +except Exception: + pass +if parser is None: + try: + from vllm.engine.arg_utils import EngineArgs + parser = FlexibleArgumentParser(add_help=False) + EngineArgs.add_cli_args(parser) + except Exception as e: + print(json.dumps({"ok": False, "stage": "import", "error": f"{type(e).__name__}: {e}"})) + sys.exit(0) + +class _ArgError(Exception): + pass + +def _err(message): + raise _ArgError(message) + +parser.error = _err # capture argparse errors instead of sys.exit(2) + +try: + raw = sys.stdin.read() + arglist = json.loads(raw) + ns = parser.parse_args(arglist) + print(json.dumps({"ok": True, "model": getattr(ns, "model", None)})) +except _ArgError as e: + print(json.dumps({"ok": False, "stage": "parse", "error": str(e)})) +except SystemExit as e: + print(json.dumps({"ok": False, "stage": "parse", "error": f"argparse exit {e.code}"})) +except Exception as e: + print(json.dumps({"ok": False, "stage": "parse", "error": f"{type(e).__name__}: {e}"})) +""" + + +def _vllm_arg_list(key: str, model_def, catalog: Catalog) -> list[str]: + """Reconstruct the args list passed to `vllm serve` (without the positional model).""" + cmd = build_launch_command(key, model_def, catalog.defaults) + # build_launch_command yields: + # ./launch-cluster.sh [--solo] -d exec vllm serve