v0.23.0:0 - local / fine-tuned model support
Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes), not just Hugging Face repos. - ModelDef gains local_path; a model must set exactly one of repo / local_path. The validator also enforces the local-path whitelist and that any --chat-template lives inside local_path (only that dir is mounted). - build_launch_command bind-mounts the dir into the vLLM container at the SAME host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook, then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream expands that var unquoted; contract noted in runbook.md). - shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'. - POST /api/models validates the full entry via ModelDef before persisting, so a bad entry can't be written and then break catalog load; _merge_overrides skips an invalid override entry instead of failing the whole catalog. - disk.py size-probes a local path with du; disk-delete refused for local models. - UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF link, delete button hidden for local models. - Tests: local launch + injection round-trip, chat-template location, traversal, exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent pass; findings addressed.
This commit is contained in:
+67
-2
@@ -60,6 +60,7 @@ function renderCards() {
|
||||
? `<div class="desc">${escapeHtml(m.description)}</div>`
|
||||
: '';
|
||||
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
|
||||
const localPill = m.local_path ? `<span class="tag local-pill" title="Served from a directory on the Spark, not Hugging Face">local</span>` : '';
|
||||
// Disk-presence pill + trash button. Until /api/models/disk-status comes back,
|
||||
// we don't know — render a neutral placeholder.
|
||||
const disk = state.disk_status[key];
|
||||
@@ -73,8 +74,10 @@ function renderCards() {
|
||||
}
|
||||
}
|
||||
// Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded.
|
||||
// Never offered for local models: their directory is hand-placed training output,
|
||||
// not a re-downloadable HF cache (the server refuses the delete too).
|
||||
let trashBtn = '';
|
||||
if (state.disk_status_loaded && disk && disk.on_disk) {
|
||||
if (state.disk_status_loaded && disk && disk.on_disk && !m.local_path) {
|
||||
const disabled = isActive || isSwapping;
|
||||
const tip = isActive
|
||||
? 'Currently loaded — switch to another model first'
|
||||
@@ -92,6 +95,9 @@ function renderCards() {
|
||||
primaryBtn = `<button class="btn" disabled>Current</button>`;
|
||||
} else if (isOnDisk) {
|
||||
primaryBtn = `<button class="btn primary" data-swap-key="${key}" ${isSwapping ? 'disabled' : ''}>Switch to this</button>`;
|
||||
} else if (m.local_path) {
|
||||
// A local model can't be "downloaded" — its directory has to exist on the Spark.
|
||||
primaryBtn = `<button class="btn" disabled title="Directory not found on the Spark — create it there, then refresh">Not found on Spark</button>`;
|
||||
} else {
|
||||
const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)';
|
||||
primaryBtn = `<button class="btn info" data-download-key="${key}" title="${escapeHtml(tip)}" ${dlInFlight ? 'disabled' : ''}>Download</button>`;
|
||||
@@ -102,12 +108,15 @@ function renderCards() {
|
||||
<span class="tag mode-${m.mode}">${m.mode}</span>
|
||||
<span class="tag">${m.size_gb} GB</span>
|
||||
${customPill}
|
||||
${localPill}
|
||||
${diskPill}
|
||||
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||
</div>
|
||||
${desc}
|
||||
<div class="muted small repo">
|
||||
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
|
||||
${m.local_path
|
||||
? `<span class="local-path" title="Local model directory on the Spark">${escapeHtml(m.local_path)}</span>`
|
||||
: `<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>`}
|
||||
</div>
|
||||
<div class="spacer"></div>
|
||||
<div class="card-actions">
|
||||
@@ -1671,6 +1680,60 @@ function setupAdvancedDialog() {
|
||||
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||
}
|
||||
|
||||
function openLocalModelDialog() {
|
||||
const dlg = el('#local-model-dialog');
|
||||
el('#lm-key').value = '';
|
||||
el('#lm-name').value = '';
|
||||
el('#lm-path').value = '';
|
||||
el('#lm-chat').value = '';
|
||||
el('#lm-size').value = '';
|
||||
el('#lm-mode').value = 'solo';
|
||||
el('#lm-desc').value = '';
|
||||
el('#lm-mml').value = 32768;
|
||||
el('#lm-gmu').value = 0.85;
|
||||
el('#lm-gmu-out').value = '0.85';
|
||||
el('#lm-fst').checked = true;
|
||||
el('#lm-pcache').checked = true;
|
||||
el('#lm-fp8').checked = true;
|
||||
dlg.showModal();
|
||||
}
|
||||
|
||||
function setupLocalModelDialog() {
|
||||
el('#lm-cancel').addEventListener('click', () => el('#local-model-dialog').close());
|
||||
el('#lm-gmu').addEventListener('input', (e) => { el('#lm-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
|
||||
el('#local-model-form').addEventListener('submit', async (e) => {
|
||||
e.preventDefault();
|
||||
const chat = el('#lm-chat').value.trim();
|
||||
const body = {
|
||||
key: el('#lm-key').value.trim(),
|
||||
display_name: el('#lm-name').value.trim(),
|
||||
local_path: el('#lm-path').value.trim(),
|
||||
size_gb: parseFloat(el('#lm-size').value) || 0,
|
||||
mode: el('#lm-mode').value,
|
||||
description: el('#lm-desc').value.trim() || null,
|
||||
// A fine-tune's chat template (if any) rides along as a launch flag.
|
||||
vllm_args: chat ? [`--chat-template=${chat}`] : [],
|
||||
knobs: {
|
||||
max_model_len: parseInt(el('#lm-mml').value, 10) || 32768,
|
||||
gpu_memory_utilization: parseFloat(el('#lm-gmu').value),
|
||||
fastsafetensors: el('#lm-fst').checked,
|
||||
prefix_caching: el('#lm-pcache').checked,
|
||||
kv_cache_dtype: el('#lm-fp8').checked ? 'fp8' : 'auto',
|
||||
},
|
||||
};
|
||||
try {
|
||||
await fetchJSON('/api/models', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
el('#local-model-dialog').close();
|
||||
await loadModels();
|
||||
pollStatus();
|
||||
} catch (e) { alert('Add local model failed: ' + e.message); }
|
||||
});
|
||||
}
|
||||
|
||||
// ===================== NIM installer =====================
|
||||
|
||||
const nimState = {
|
||||
@@ -2034,8 +2097,10 @@ async function init() {
|
||||
if (kbtn) { copySparkSshKey(kbtn.dataset.sshKey, kbtn); return; }
|
||||
});
|
||||
el('#sshkey-close').addEventListener('click', () => el('#sshkey-dialog').close());
|
||||
el('#open-local').addEventListener('click', openLocalModelDialog);
|
||||
setupCatalogDialog();
|
||||
setupAdvancedDialog();
|
||||
setupLocalModelDialog();
|
||||
// Open WebUI link from /api/config
|
||||
try {
|
||||
state.config = await fetchJSON('/api/config');
|
||||
|
||||
@@ -229,6 +229,7 @@
|
||||
<div class="section-header">
|
||||
<h2 class="section-title">LLM swap</h2>
|
||||
<button id="open-download" class="btn small-btn">+ Download a new model</button>
|
||||
<button id="open-local" class="btn small-btn">+ Add local model</button>
|
||||
</div>
|
||||
|
||||
<dialog id="catalog-dialog" class="modal">
|
||||
@@ -261,6 +262,37 @@
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="local-model-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form" id="local-model-form">
|
||||
<h3>Add a local / fine-tuned model</h3>
|
||||
<p class="muted small">For a model that lives as a directory on a Spark (e.g. a fine-tune), not a Hugging Face repo. The directory is bind-mounted into the vLLM container at the same path when you swap to it. It must already exist on the Spark.</p>
|
||||
<label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="lm-key" required pattern="[a-zA-Z0-9_-]+"></label>
|
||||
<label class="modal-row"><span>Display name</span><input type="text" id="lm-name" required></label>
|
||||
<label class="modal-row"><span>Model directory (absolute path on the Spark)</span><input type="text" id="lm-path" required placeholder="e.g. /home/you/models/my-finetune"></label>
|
||||
<label class="modal-row"><span>Chat template path (optional)</span><input type="text" id="lm-chat" placeholder="e.g. /home/you/models/my-finetune/chat_template.jinja"></label>
|
||||
<label class="modal-row"><span>Size (GB)</span><input type="number" id="lm-size" step="0.1" min="0"></label>
|
||||
<label class="modal-row"><span>Mode</span>
|
||||
<select id="lm-mode">
|
||||
<option value="solo">solo (Spark 1 only)</option>
|
||||
<option value="cluster">cluster (both Sparks via Ray)</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="modal-row"><span>Description (optional)</span><textarea id="lm-desc" rows="3"></textarea></label>
|
||||
<fieldset class="modal-fieldset">
|
||||
<legend>Default launch knobs</legend>
|
||||
<label class="modal-row"><span>Max context (tokens)</span><input type="number" id="lm-mml" step="1024" min="1024" value="32768"></label>
|
||||
<label class="modal-row"><span>GPU memory %</span><input type="range" id="lm-gmu" min="0.5" max="0.95" step="0.01" value="0.85"> <output id="lm-gmu-out">0.85</output></label>
|
||||
<label class="modal-row inline"><input type="checkbox" id="lm-fst" checked> Fast safetensors loading</label>
|
||||
<label class="modal-row inline"><input type="checkbox" id="lm-pcache" checked> Prefix caching</label>
|
||||
<label class="modal-row inline"><input type="checkbox" id="lm-fp8" checked> FP8 KV cache</label>
|
||||
</fieldset>
|
||||
<div class="modal-actions">
|
||||
<button type="button" id="lm-cancel" class="btn">Cancel</button>
|
||||
<button type="submit" class="btn primary">Add local model</button>
|
||||
</div>
|
||||
</form>
|
||||
</dialog>
|
||||
|
||||
<dialog id="disk-delete-dialog" class="modal">
|
||||
<form method="dialog" class="modal-form">
|
||||
<h3>Delete model weights from disk?</h3>
|
||||
|
||||
@@ -694,6 +694,7 @@ main {
|
||||
.card .repo a { color: inherit; text-decoration: none; }
|
||||
.card .repo a:hover { color: var(--info); text-decoration: underline; }
|
||||
.card .repo .hf-icon { font-size: 13px; opacity: 0.7; }
|
||||
.card .repo .local-path { font-family: var(--mono, ui-monospace, monospace); opacity: 0.85; }
|
||||
.tag {
|
||||
background: var(--surface-2);
|
||||
border: 1px solid var(--border);
|
||||
@@ -738,6 +739,7 @@ main {
|
||||
.card .adv-btn,
|
||||
.card .test-btn { padding: 8px 12px; font-size: 12px; }
|
||||
.card .custom-pill { color: var(--info); border-color: rgba(96, 165, 250, 0.4); }
|
||||
.card .local-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
|
||||
.tag.on-disk { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
|
||||
.tag.not-on-disk { color: var(--muted); border-color: var(--border); opacity: 0.7; }
|
||||
.card-actions .icon-btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); margin-left: auto; }
|
||||
|
||||
Reference in New Issue
Block a user