v0.23.0:0 - local / fine-tuned model support
Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes), not just Hugging Face repos. - ModelDef gains local_path; a model must set exactly one of repo / local_path. The validator also enforces the local-path whitelist and that any --chat-template lives inside local_path (only that dir is mounted). - build_launch_command bind-mounts the dir into the vLLM container at the SAME host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook, then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream expands that var unquoted; contract noted in runbook.md). - shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'. - POST /api/models validates the full entry via ModelDef before persisting, so a bad entry can't be written and then break catalog load; _merge_overrides skips an invalid override entry instead of failing the whole catalog. - disk.py size-probes a local path with du; disk-delete refused for local models. - UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF link, delete button hidden for local models. - Tests: local launch + injection round-trip, chat-template location, traversal, exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent pass; findings addressed.
This commit is contained in:
+41
-4
@@ -15,6 +15,7 @@ from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from .config import Settings
|
||||
from .shellsafe import quote_arg
|
||||
from .ssh import ssh_run
|
||||
|
||||
|
||||
@@ -76,16 +77,52 @@ async def probe_host(host: str, user: str, repo: str, settings: Settings) -> Hos
|
||||
return HostDiskResult(host=host, on_disk=True, size_bytes=size)
|
||||
|
||||
|
||||
async def probe_disk(repo: str, mode: str, settings: Settings) -> DiskStatus:
|
||||
"""Probe one model across the relevant Sparks based on its mode (solo|cluster)."""
|
||||
async def probe_local_host(host: str, user: str, path: str, settings: Settings) -> HostDiskResult:
|
||||
"""Return whether a local model directory exists on this host and its size.
|
||||
|
||||
For locally fine-tuned models (a Spark directory, not an HF cache entry). The
|
||||
path is whitelisted at the API boundary (shellsafe.validate_local_path); we
|
||||
shlex-quote it here in depth.
|
||||
"""
|
||||
if not host or not user:
|
||||
return HostDiskResult(host=host or "?", on_disk=False, error="host not configured")
|
||||
qp = quote_arg(path)
|
||||
cmd = f"if [ -d {qp} ]; then du -sb {qp} 2>/dev/null | cut -f1; else echo MISSING; fi"
|
||||
rc, out, err = await ssh_run(host, user, cmd, settings, timeout=20.0)
|
||||
if rc != 0:
|
||||
return HostDiskResult(host=host, on_disk=False, error=(err or out).strip() or f"rc={rc}")
|
||||
raw = out.strip()
|
||||
if raw == "MISSING" or raw == "":
|
||||
return HostDiskResult(host=host, on_disk=False)
|
||||
try:
|
||||
size = int(raw.splitlines()[-1])
|
||||
except ValueError:
|
||||
return HostDiskResult(host=host, on_disk=False, error=f"unparsable du output: {raw!r}")
|
||||
return HostDiskResult(host=host, on_disk=True, size_bytes=size)
|
||||
|
||||
|
||||
async def probe_disk(
|
||||
repo: str, mode: str, settings: Settings, *, local_path: str | None = None
|
||||
) -> DiskStatus:
|
||||
"""Probe one model across the relevant Sparks based on its mode (solo|cluster).
|
||||
|
||||
A local model (local_path set) is probed by directory; otherwise by HF cache.
|
||||
"""
|
||||
hosts: list[tuple[str, str]] = [(settings.spark1_host, settings.spark1_user)]
|
||||
if mode == "cluster" and settings.spark2_host:
|
||||
hosts.append((settings.spark2_host, settings.spark2_user))
|
||||
|
||||
results = await asyncio.gather(*(probe_host(h, u, repo, settings) for h, u in hosts))
|
||||
if local_path:
|
||||
results = await asyncio.gather(
|
||||
*(probe_local_host(h, u, local_path, settings) for h, u in hosts)
|
||||
)
|
||||
key = local_path
|
||||
else:
|
||||
results = await asyncio.gather(*(probe_host(h, u, repo, settings) for h, u in hosts))
|
||||
key = repo
|
||||
on_disk = any(r.on_disk for r in results)
|
||||
total = sum(r.size_bytes for r in results)
|
||||
return DiskStatus(repo=repo, on_disk=on_disk, total_bytes=total, per_host=list(results))
|
||||
return DiskStatus(repo=key, on_disk=on_disk, total_bytes=total, per_host=list(results))
|
||||
|
||||
|
||||
async def delete_host(host: str, user: str, repo: str, settings: Settings) -> HostDiskResult:
|
||||
|
||||
+78
-8
@@ -1,15 +1,33 @@
|
||||
from __future__ import annotations
|
||||
import logging
|
||||
from typing import Literal, Optional
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
from .overrides import apply_knobs_to_args, load_overrides
|
||||
from .shellsafe import quote_arg, quote_args
|
||||
from .shellsafe import quote_arg, quote_args, validate_local_path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _chat_template_path(vllm_args: list[str]) -> str | None:
|
||||
"""Extract the path from a `--chat-template=<path>` arg, if present."""
|
||||
for a in vllm_args:
|
||||
if a.startswith("--chat-template="):
|
||||
return a.split("=", 1)[1]
|
||||
return None
|
||||
|
||||
|
||||
def _is_within(path: str, base: str) -> bool:
|
||||
"""True if `path` is `base` itself or lives inside it (lexical check)."""
|
||||
base = base.rstrip("/")
|
||||
return path == base or path.startswith(base + "/")
|
||||
|
||||
|
||||
class ModelDef(BaseModel):
|
||||
display_name: str
|
||||
repo: str
|
||||
repo: str = "" # HF 'org/name'; empty for a local model
|
||||
local_path: str | None = None # absolute dir on the Spark; set => local model
|
||||
size_gb: float
|
||||
mode: Literal["solo", "cluster"]
|
||||
capabilities: list[str] = Field(default_factory=list)
|
||||
@@ -19,6 +37,38 @@ class ModelDef(BaseModel):
|
||||
knobs: dict | None = None # user-customized; merged at launch time
|
||||
custom: bool = False # True if this came from /data overrides
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_source(self) -> "ModelDef":
|
||||
if bool(self.repo) == bool(self.local_path):
|
||||
raise ValueError(
|
||||
f"model {self.display_name!r} must set exactly one of 'repo' (HF) "
|
||||
f"or 'local_path' (Spark directory)"
|
||||
)
|
||||
if self.local_path:
|
||||
# Single place that enforces the path whitelist, so YAML/override
|
||||
# entries get the same boundary check as the API. The quote_arg sink
|
||||
# is still defense-in-depth.
|
||||
validate_local_path(self.local_path)
|
||||
# Only local_path is bind-mounted into the vLLM container, so any
|
||||
# --chat-template path must live inside it or vLLM can't find it.
|
||||
tmpl = _chat_template_path(self.vllm_args)
|
||||
if tmpl is not None and not _is_within(tmpl, self.local_path):
|
||||
raise ValueError(
|
||||
f"--chat-template path {tmpl!r} must be inside the model "
|
||||
f"directory {self.local_path!r} (only that directory is mounted "
|
||||
f"into the container)"
|
||||
)
|
||||
return self
|
||||
|
||||
@property
|
||||
def is_local(self) -> bool:
|
||||
return bool(self.local_path)
|
||||
|
||||
@property
|
||||
def source(self) -> str:
|
||||
"""What `vllm serve` is pointed at: the local dir if set, else the HF repo."""
|
||||
return self.local_path if self.local_path else self.repo
|
||||
|
||||
|
||||
class Defaults(BaseModel):
|
||||
port: int = 8888
|
||||
@@ -47,7 +97,8 @@ def _merge_overrides(catalog: Catalog) -> Catalog:
|
||||
continue
|
||||
defaults_dump = {
|
||||
"display_name": entry.get("display_name", key),
|
||||
"repo": entry["repo"],
|
||||
"repo": entry.get("repo", ""),
|
||||
"local_path": entry.get("local_path"),
|
||||
"size_gb": float(entry.get("size_gb", 0)),
|
||||
"mode": entry.get("mode", "solo"),
|
||||
"capabilities": entry.get("capabilities") or [],
|
||||
@@ -57,7 +108,12 @@ def _merge_overrides(catalog: Catalog) -> Catalog:
|
||||
"knobs": entry.get("knobs"),
|
||||
"custom": True,
|
||||
}
|
||||
new_models[key] = ModelDef.model_validate(defaults_dump)
|
||||
# A single malformed override entry (bad path, missing source, etc.) must
|
||||
# not take down the whole catalog — skip it and keep the rest loadable.
|
||||
try:
|
||||
new_models[key] = ModelDef.model_validate(defaults_dump)
|
||||
except Exception as e:
|
||||
log.warning("skipping invalid custom model %r: %s", key, e)
|
||||
|
||||
return Catalog(defaults=catalog.defaults, models=new_models)
|
||||
|
||||
@@ -78,7 +134,21 @@ def build_launch_command(key: str, model: ModelDef, defaults: Defaults) -> str:
|
||||
solo = "--solo " if model.mode == "solo" else ""
|
||||
base_args = apply_knobs_to_args(list(model.vllm_args), model.knobs)
|
||||
args = [f"--port={defaults.port}", f"--host={defaults.host}", *base_args]
|
||||
# repo + args are user-controlled (custom models, knobs); shlex.quote each so
|
||||
# they cannot break out of the SSH shell command. shlex.split (used by the
|
||||
# source + args are user-controlled (custom models, knobs); shlex.quote each
|
||||
# so they cannot break out of the SSH shell command. shlex.split (used by the
|
||||
# vLLM pre-flight validator) cleanly reverses this quoting.
|
||||
return f"./launch-cluster.sh {solo}-d exec vllm serve {quote_arg(model.repo)} {quote_args(args)}"
|
||||
prefix = ""
|
||||
if model.local_path:
|
||||
# A local model's directory isn't in the HF cache the launch script
|
||||
# already mounts, so bind-mount it at the SAME path inside the vllm
|
||||
# container via the script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook. Same
|
||||
# path inside and out means `vllm serve <dir>` and any
|
||||
# `--chat-template=<dir>/...` arg both resolve. No launch-cluster.sh
|
||||
# change needed. (The env assignment sits before the script, so the
|
||||
# validator's `serve`-keyed shlex round-trip is unaffected.)
|
||||
mount = quote_arg(f"-v {model.local_path}:{model.local_path}")
|
||||
prefix = f"VLLM_SPARK_EXTRA_DOCKER_ARGS={mount} "
|
||||
return (
|
||||
f"{prefix}./launch-cluster.sh {solo}-d exec vllm serve "
|
||||
f"{quote_arg(model.source)} {quote_args(args)}"
|
||||
)
|
||||
|
||||
@@ -14,7 +14,7 @@ Shape:
|
||||
custom:
|
||||
- key: my-new-model
|
||||
display_name: My New Model (from download)
|
||||
repo: my-org/my-model
|
||||
repo: my-org/my-model # an HF repo; OR set local_path instead (exactly one)
|
||||
size_gb: 20
|
||||
mode: solo
|
||||
description: null
|
||||
@@ -25,6 +25,12 @@ Shape:
|
||||
fastsafetensors: true
|
||||
prefix_caching: true
|
||||
kv_cache_dtype: fp8
|
||||
- key: my-finetune # a local/fine-tuned model (a directory on the Spark)
|
||||
display_name: My Fine-tune
|
||||
local_path: /home/you/models/my-finetune
|
||||
size_gb: 59
|
||||
mode: solo
|
||||
vllm_args: [--chat-template=/home/you/models/my-finetune/chat_template.jinja]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import os
|
||||
|
||||
+29
-5
@@ -6,7 +6,7 @@ from pathlib import Path
|
||||
from fastapi import FastAPI, HTTPException, Query, Request
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, ValidationError
|
||||
from typing import Literal
|
||||
|
||||
from .config import Settings
|
||||
@@ -22,7 +22,7 @@ from .redaction_gateway import build_router as build_redaction_router, MapStore
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import load_catalog
|
||||
from .models import ModelDef, load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
@@ -183,7 +183,8 @@ async def put_model_knobs(key: str, body: KnobsBody) -> dict:
|
||||
class CustomModelBody(BaseModel):
|
||||
key: str
|
||||
display_name: str
|
||||
repo: str
|
||||
repo: str = ""
|
||||
local_path: str | None = None
|
||||
size_gb: float = 0
|
||||
mode: Literal["solo", "cluster"] = "solo"
|
||||
description: str | None = None
|
||||
@@ -196,8 +197,17 @@ class CustomModelBody(BaseModel):
|
||||
async def post_model(body: CustomModelBody) -> dict:
|
||||
if not body.key or not body.key.replace("-", "").replace("_", "").isalnum():
|
||||
raise HTTPException(400, "key must be alphanumeric/-/_ only")
|
||||
# Validate the full entry BEFORE persisting (exactly-one source, local-path
|
||||
# whitelist, chat-template location). Doing it via ModelDef means the API and
|
||||
# the YAML-override path share one set of rules, and a bad entry can't be
|
||||
# written to /data and then break catalog load.
|
||||
try:
|
||||
validate_repo(body.repo)
|
||||
ModelDef.model_validate(body.model_dump())
|
||||
if body.repo:
|
||||
validate_repo(body.repo) # HF charset (the model only validates local paths)
|
||||
except ValidationError as e:
|
||||
msg = e.errors()[0]["msg"] if e.errors() else str(e)
|
||||
raise HTTPException(400, msg.removeprefix("Value error, "))
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
if body.key in catalog.models and not catalog.models[body.key].custom:
|
||||
@@ -229,7 +239,13 @@ async def get_models_disk_status() -> dict:
|
||||
return {"configured": False, "models": {}}
|
||||
keys = list(catalog.models.keys())
|
||||
statuses = await asyncio.gather(*(
|
||||
probe_disk(catalog.models[k].repo, catalog.models[k].mode, settings) for k in keys
|
||||
probe_disk(
|
||||
catalog.models[k].repo,
|
||||
catalog.models[k].mode,
|
||||
settings,
|
||||
local_path=catalog.models[k].local_path,
|
||||
)
|
||||
for k in keys
|
||||
), return_exceptions=True)
|
||||
out: dict[str, dict] = {}
|
||||
for k, s in zip(keys, statuses):
|
||||
@@ -260,6 +276,14 @@ async def del_model_disk(key: str) -> dict:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
m = catalog.models[key]
|
||||
|
||||
# Never rm a local fine-tune directory from the dashboard — it's irreplaceable
|
||||
# training output the user placed by hand, not a re-downloadable HF cache.
|
||||
if m.local_path:
|
||||
raise HTTPException(
|
||||
400,
|
||||
"this is a local model; its directory must be managed on the Spark, not deleted from here",
|
||||
)
|
||||
|
||||
# Refuse if currently loaded
|
||||
try:
|
||||
vllm = await check_vllm(settings)
|
||||
|
||||
@@ -28,6 +28,12 @@ _IMAGE_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/@-]*$")
|
||||
# Docker container / volume name (Docker's own rule).
|
||||
_CONTAINER_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.-]*$")
|
||||
|
||||
# Absolute filesystem path to a local model directory on a Spark. Conservative
|
||||
# charset (letters, digits, and safe path punctuation) with a required leading
|
||||
# '/', so it carries no shell metacharacters and no whitespace. Traversal ('.'
|
||||
# and '..' segments) is rejected separately in validate_local_path.
|
||||
_LOCAL_PATH_RE = re.compile(r"^/[A-Za-z0-9._+/-]+$")
|
||||
|
||||
|
||||
def validate_repo(repo: str) -> str:
|
||||
"""Return `repo` if it is a well-formed 'org/name'; else raise ValueError."""
|
||||
@@ -50,6 +56,25 @@ def validate_container(name: str) -> str:
|
||||
return name
|
||||
|
||||
|
||||
def validate_local_path(path: str) -> str:
|
||||
"""Return `path` if it is a safe absolute model directory path; else ValueError.
|
||||
|
||||
For locally fine-tuned models served by directory (not an HF repo). Requires
|
||||
an absolute path, a metacharacter-free charset, and no '.'/'..' segments so a
|
||||
caller cannot traverse out of an intended models directory. The `quote_arg`
|
||||
sink still quotes it in depth — this is the boundary check.
|
||||
"""
|
||||
p = path or ""
|
||||
if len(p) > 512 or not _LOCAL_PATH_RE.fullmatch(p):
|
||||
raise ValueError(
|
||||
f"invalid local model path (expected an absolute path, no spaces or "
|
||||
f"shell metacharacters): {path!r}"
|
||||
)
|
||||
if any(seg in (".", "..") for seg in p.split("/")):
|
||||
raise ValueError(f"local model path must not contain '.' or '..' segments: {path!r}")
|
||||
return p
|
||||
|
||||
|
||||
def quote_arg(value: object) -> str:
|
||||
"""shlex.quote a single token for safe embedding in a shell command string."""
|
||||
return shlex.quote(str(value))
|
||||
|
||||
+67
-2
@@ -60,6 +60,7 @@ function renderCards() {
|
||||
? `<div class="desc">${escapeHtml(m.description)}</div>`
|
||||
: '';
|
||||
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
|
||||
const localPill = m.local_path ? `<span class="tag local-pill" title="Served from a directory on the Spark, not Hugging Face">local</span>` : '';
|
||||
// Disk-presence pill + trash button. Until /api/models/disk-status comes back,
|
||||
// we don't know — render a neutral placeholder.
|
||||
const disk = state.disk_status[key];
|
||||
@@ -73,8 +74,10 @@ function renderCards() {
|
||||
}
|
||||
}
|
||||
// Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded.
|
||||
// Never offered for local models: their directory is hand-placed training output,
|
||||
// not a re-downloadable HF cache (the server refuses the delete too).
|
||||
let trashBtn = '';
|
||||
if (state.disk_status_loaded && disk && disk.on_disk) {
|
||||
if (state.disk_status_loaded && disk && disk.on_disk && !m.local_path) {
|
||||
const disabled = isActive || isSwapping;
|
||||
const tip = isActive
|
||||
? 'Currently loaded — switch to another model first'
|
||||
@@ -92,6 +95,9 @@ function renderCards() {
|
||||
primaryBtn = `<button class="btn" disabled>Current</button>`;
|
||||
} else if (isOnDisk) {
|
||||
primaryBtn = `<button class="btn primary" data-swap-key="${key}" ${isSwapping ? 'disabled' : ''}>Switch to this</button>`;
|
||||
} else if (m.local_path) {
|
||||
// A local model can't be "downloaded" — its directory has to exist on the Spark.
|
||||
primaryBtn = `<button class="btn" disabled title="Directory not found on the Spark — create it there, then refresh">Not found on Spark</button>`;
|
||||
} else {
|
||||
const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)';
|
||||
primaryBtn = `<button class="btn info" data-download-key="${key}" title="${escapeHtml(tip)}" ${dlInFlight ? 'disabled' : ''}>Download</button>`;
|
||||
@@ -102,12 +108,15 @@ function renderCards() {
|
||||
<span class="tag mode-${m.mode}">${m.mode}</span>
|
||||
<span class="tag">${m.size_gb} GB</span>
|
||||
${customPill}
|
||||
${localPill}
|
||||
${diskPill}
|
||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||
</div>
|
||||
${desc}
|
||||
<div class="muted small repo">
|
||||
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
|
||||
${m.local_path
|
||||
? `<span class="local-path" title="Local model directory on the Spark">${escapeHtml(m.local_path)}</span>`
|
||||
: `<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>`}
|
||||
</div>
|
||||
<div class="spacer"></div>
|
||||
<div class="card-actions">
|
||||
@@ -1671,6 +1680,60 @@ function setupAdvancedDialog() {
|
||||
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||
}
|
||||
|
||||
function openLocalModelDialog() {
|
||||
const dlg = el('#local-model-dialog');
|
||||
el('#lm-key').value = '';
|
||||
el('#lm-name').value = '';
|
||||
el('#lm-path').value = '';
|
||||
el('#lm-chat').value = '';
|
||||
el('#lm-size').value = '';
|
||||
el('#lm-mode').value = 'solo';
|
||||
el('#lm-desc').value = '';
|
||||
el('#lm-mml').value = 32768;
|
||||
el('#lm-gmu').value = 0.85;
|
||||
el('#lm-gmu-out').value = '0.85';
|
||||
el('#lm-fst').checked = true;
|
||||
el('#lm-pcache').checked = true;
|
||||
el('#lm-fp8').checked = true;
|
||||
dlg.showModal();
|
||||
}
|
||||
|
||||
function setupLocalModelDialog() {
|
||||
el('#lm-cancel').addEventListener('click', () => el('#local-model-dialog').close());
|
||||
el('#lm-gmu').addEventListener('input', (e) => { el('#lm-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||
el('#local-model-form').addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
const chat = el('#lm-chat').value.trim();
|
||||
const body = {
|
||||
key: el('#lm-key').value.trim(),
|
||||
display_name: el('#lm-name').value.trim(),
|
||||
local_path: el('#lm-path').value.trim(),
|
||||
size_gb: parseFloat(el('#lm-size').value) || 0,
|
||||
mode: el('#lm-mode').value,
|
||||
description: el('#lm-desc').value.trim() || null,
|
||||
// A fine-tune's chat template (if any) rides along as a launch flag.
|
||||
vllm_args: chat ? [`--chat-template=${chat}`] : [],
|
||||
knobs: {
|
||||
max_model_len: parseInt(el('#lm-mml').value, 10) || 32768,
|
||||
gpu_memory_utilization: parseFloat(el('#lm-gmu').value),
|
||||
fastsafetensors: el('#lm-fst').checked,
|
||||
prefix_caching: el('#lm-pcache').checked,
|
||||
kv_cache_dtype: el('#lm-fp8').checked ? 'fp8' : 'auto',
|
||||
},
|
||||
};
|
||||
try {
|
||||
await fetchJSON('/api/models', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
el('#local-model-dialog').close();
|
||||
await loadModels();
|
||||
pollStatus();
|
||||
} catch (e) { alert('Add local model failed: ' + e.message); }
|
||||
});
|
||||
}
|
||||
|
||||
// ===================== NIM installer =====================
|
||||
|
||||
const nimState = {
|
||||
@@ -2034,8 +2097,10 @@ async function init() {
|
||||
if (kbtn) { copySparkSshKey(kbtn.dataset.sshKey, kbtn); return; }
|
||||
});
|
||||
el('#sshkey-close').addEventListener('click', () => el('#sshkey-dialog').close());
|
||||
el('#open-local').addEventListener('click', openLocalModelDialog);
|
||||
setupCatalogDialog();
|
||||
setupAdvancedDialog();
|
||||
setupLocalModelDialog();
|
||||
// Open WebUI link from /api/config
|
||||
try {
|
||||
state.config = await fetchJSON('/api/config');
|
||||
|
||||
@@ -229,6 +229,7 @@
|
||||
<div class="section-header">
|
||||
<h2 class="section-title">LLM swap</h2>
|
||||
<button id="open-download" class="btn small-btn">+ Download a new model</button>
|
||||
<button id="open-local" class="btn small-btn">+ Add local model</button>
|
||||
</div>
|
||||
|
||||
<dialog id="catalog-dialog" class="modal">
|
||||
@@ -261,6 +262,37 @@
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="local-model-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form" id="local-model-form">
|
||||
<h3>Add a local / fine-tuned model</h3>
|
||||
<p class="muted small">For a model that lives as a directory on a Spark (e.g. a fine-tune), not a Hugging Face repo. The directory is bind-mounted into the vLLM container at the same path when you swap to it. It must already exist on the Spark.</p>
|
||||
<label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="lm-key" required pattern="[a-zA-Z0-9_-]+"></label>
|
||||
<label class="modal-row"><span>Display name</span><input type="text" id="lm-name" required></label>
|
||||
<label class="modal-row"><span>Model directory (absolute path on the Spark)</span><input type="text" id="lm-path" required placeholder="e.g. /home/you/models/my-finetune"></label>
|
||||
<label class="modal-row"><span>Chat template path (optional)</span><input type="text" id="lm-chat" placeholder="e.g. /home/you/models/my-finetune/chat_template.jinja"></label>
|
||||
<label class="modal-row"><span>Size (GB)</span><input type="number" id="lm-size" step="0.1" min="0"></label>
|
||||
<label class="modal-row"><span>Mode</span>
|
||||
<select id="lm-mode">
|
||||
<option value="solo">solo (Spark 1 only)</option>
|
||||
<option value="cluster">cluster (both Sparks via Ray)</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="modal-row"><span>Description (optional)</span><textarea id="lm-desc" rows="3"></textarea></label>
|
||||
<fieldset class="modal-fieldset">
|
||||
<legend>Default launch knobs</legend>
|
||||
<label class="modal-row"><span>Max context (tokens)</span><input type="number" id="lm-mml" step="1024" min="1024" value="32768"></label>
|
||||
<label class="modal-row"><span>GPU memory %</span><input type="range" id="lm-gmu" min="0.5" max="0.95" step="0.01" value="0.85"> <output id="lm-gmu-out">0.85</output></label>
|
||||
<label class="modal-row inline"><input type="checkbox" id="lm-fst" checked> Fast safetensors loading</label>
|
||||
<label class="modal-row inline"><input type="checkbox" id="lm-pcache" checked> Prefix caching</label>
|
||||
<label class="modal-row inline"><input type="checkbox" id="lm-fp8" checked> FP8 KV cache</label>
|
||||
</fieldset>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="lm-cancel" class="btn">Cancel</button>
|
||||
<button type="submit" class="btn primary">Add local model</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="disk-delete-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form">
|
||||
<h3>Delete model weights from disk?</h3>
|
||||
|
||||
@@ -694,6 +694,7 @@ main {
|
||||
.card .repo a { color: inherit; text-decoration: none; }
|
||||
.card .repo a:hover { color: var(--info); text-decoration: underline; }
|
||||
.card .repo .hf-icon { font-size: 13px; opacity: 0.7; }
|
||||
.card .repo .local-path { font-family: var(--mono, ui-monospace, monospace); opacity: 0.85; }
|
||||
.tag {
|
||||
background: var(--surface-2);
|
||||
border: 1px solid var(--border);
|
||||
@@ -738,6 +739,7 @@ main {
|
||||
.card .adv-btn,
|
||||
.card .test-btn { padding: 8px 12px; font-size: 12px; }
|
||||
.card .custom-pill { color: var(--info); border-color: rgba(96, 165, 250, 0.4); }
|
||||
.card .local-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
|
||||
.tag.on-disk { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
|
||||
.tag.not-on-disk { color: var(--muted); border-color: var(--border); opacity: 0.7; }
|
||||
.card-actions .icon-btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); margin-left: auto; }
|
||||
|
||||
Reference in New Issue
Block a user