v0.23.0:0 - local / fine-tuned model support

Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes),
not just Hugging Face repos.

- ModelDef gains local_path; a model must set exactly one of repo / local_path.
  The validator also enforces the local-path whitelist and that any
  --chat-template lives inside local_path (only that dir is mounted).
- build_launch_command bind-mounts the dir into the vLLM container at the SAME
  host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook,
  then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream
  expands that var unquoted; contract noted in runbook.md).
- shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'.
- POST /api/models validates the full entry via ModelDef before persisting, so a
  bad entry can't be written and then break catalog load; _merge_overrides skips
  an invalid override entry instead of failing the whole catalog.
- disk.py size-probes a local path with du; disk-delete refused for local models.
- UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF
  link, delete button hidden for local models.
- Tests: local launch + injection round-trip, chat-template location, traversal,
  exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent
  pass; findings addressed.
This commit is contained in:
Keysat
2026-06-17 22:27:41 -05:00
parent 57a893000e
commit e783653ef0
14 changed files with 402 additions and 26 deletions
+67 -2
View File
@@ -60,6 +60,7 @@ function renderCards() {
? `<div class="desc">${escapeHtml(m.description)}</div>`
: '';
const customPill = m.custom ? `<span class="tag custom-pill">custom</span>` : '';
const localPill = m.local_path ? `<span class="tag local-pill" title="Served from a directory on the Spark, not Hugging Face">local</span>` : '';
// Disk-presence pill + trash button. Until /api/models/disk-status comes back,
// we don't know — render a neutral placeholder.
const disk = state.disk_status[key];
@@ -73,8 +74,10 @@ function renderCards() {
}
}
// Trash button — hidden if not on disk; disabled (with tooltip) if currently loaded.
// Never offered for local models: their directory is hand-placed training output,
// not a re-downloadable HF cache (the server refuses the delete too).
let trashBtn = '';
if (state.disk_status_loaded && disk && disk.on_disk) {
if (state.disk_status_loaded && disk && disk.on_disk && !m.local_path) {
const disabled = isActive || isSwapping;
const tip = isActive
? 'Currently loaded — switch to another model first'
@@ -92,6 +95,9 @@ function renderCards() {
primaryBtn = `<button class="btn" disabled>Current</button>`;
} else if (isOnDisk) {
primaryBtn = `<button class="btn primary" data-swap-key="${key}" ${isSwapping ? 'disabled' : ''}>Switch to this</button>`;
} else if (m.local_path) {
// A local model can't be "downloaded" — its directory has to exist on the Spark.
primaryBtn = `<button class="btn" disabled title="Directory not found on the Spark — create it there, then refresh">Not found on Spark</button>`;
} else {
const tip = dlInFlight ? 'A download is already in progress' : 'Download weights to the Spark(s)';
primaryBtn = `<button class="btn info" data-download-key="${key}" title="${escapeHtml(tip)}" ${dlInFlight ? 'disabled' : ''}>Download</button>`;
@@ -102,12 +108,15 @@ function renderCards() {
<span class="tag mode-${m.mode}">${m.mode}</span>
<span class="tag">${m.size_gb} GB</span>
${customPill}
${localPill}
${diskPill}
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
</div>
${desc}
<div class="muted small repo">
<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>
${m.local_path
? `<span class="local-path" title="Local model directory on the Spark">${escapeHtml(m.local_path)}</span>`
: `<a href="https://huggingface.co/${encodeURIComponent(m.repo)}" target="_blank" rel="noopener" title="View on Hugging Face">${escapeHtml(m.repo)} <span class="hf-icon">↗</span></a>`}
</div>
<div class="spacer"></div>
<div class="card-actions">
@@ -1671,6 +1680,60 @@ function setupAdvancedDialog() {
el('#adv-gmu').addEventListener('input', (e) => { el('#adv-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
}
function openLocalModelDialog() {
const dlg = el('#local-model-dialog');
el('#lm-key').value = '';
el('#lm-name').value = '';
el('#lm-path').value = '';
el('#lm-chat').value = '';
el('#lm-size').value = '';
el('#lm-mode').value = 'solo';
el('#lm-desc').value = '';
el('#lm-mml').value = 32768;
el('#lm-gmu').value = 0.85;
el('#lm-gmu-out').value = '0.85';
el('#lm-fst').checked = true;
el('#lm-pcache').checked = true;
el('#lm-fp8').checked = true;
dlg.showModal();
}
function setupLocalModelDialog() {
el('#lm-cancel').addEventListener('click', () => el('#local-model-dialog').close());
el('#lm-gmu').addEventListener('input', (e) => { el('#lm-gmu-out').value = parseFloat(e.target.value).toFixed(2); });
el('#local-model-form').addEventListener('submit', async (e) => {
e.preventDefault();
const chat = el('#lm-chat').value.trim();
const body = {
key: el('#lm-key').value.trim(),
display_name: el('#lm-name').value.trim(),
local_path: el('#lm-path').value.trim(),
size_gb: parseFloat(el('#lm-size').value) || 0,
mode: el('#lm-mode').value,
description: el('#lm-desc').value.trim() || null,
// A fine-tune's chat template (if any) rides along as a launch flag.
vllm_args: chat ? [`--chat-template=${chat}`] : [],
knobs: {
max_model_len: parseInt(el('#lm-mml').value, 10) || 32768,
gpu_memory_utilization: parseFloat(el('#lm-gmu').value),
fastsafetensors: el('#lm-fst').checked,
prefix_caching: el('#lm-pcache').checked,
kv_cache_dtype: el('#lm-fp8').checked ? 'fp8' : 'auto',
},
};
try {
await fetchJSON('/api/models', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
el('#local-model-dialog').close();
await loadModels();
pollStatus();
} catch (e) { alert('Add local model failed: ' + e.message); }
});
}
// ===================== NIM installer =====================
const nimState = {
@@ -2034,8 +2097,10 @@ async function init() {
if (kbtn) { copySparkSshKey(kbtn.dataset.sshKey, kbtn); return; }
});
el('#sshkey-close').addEventListener('click', () => el('#sshkey-dialog').close());
el('#open-local').addEventListener('click', openLocalModelDialog);
setupCatalogDialog();
setupAdvancedDialog();
setupLocalModelDialog();
// Open WebUI link from /api/config
try {
state.config = await fetchJSON('/api/config');
+32
View File
@@ -229,6 +229,7 @@
<div class="section-header">
<h2 class="section-title">LLM swap</h2>
<button id="open-download" class="btn small-btn">+ Download a new model</button>
<button id="open-local" class="btn small-btn">+ Add local model</button>
</div>
<dialog id="catalog-dialog" class="modal">
@@ -261,6 +262,37 @@
</form>
</dialog>
<dialog id="local-model-dialog" class="modal">
<form method="dialog" class="modal-form" id="local-model-form">
<h3>Add a local / fine-tuned model</h3>
<p class="muted small">For a model that lives as a directory on a Spark (e.g. a fine-tune), not a Hugging Face repo. The directory is bind-mounted into the vLLM container at the same path when you swap to it. It must already exist on the Spark.</p>
<label class="modal-row"><span>Key (URL-safe id)</span><input type="text" id="lm-key" required pattern="[a-zA-Z0-9_-]+"></label>
<label class="modal-row"><span>Display name</span><input type="text" id="lm-name" required></label>
<label class="modal-row"><span>Model directory (absolute path on the Spark)</span><input type="text" id="lm-path" required placeholder="e.g. /home/you/models/my-finetune"></label>
<label class="modal-row"><span>Chat template path (optional)</span><input type="text" id="lm-chat" placeholder="e.g. /home/you/models/my-finetune/chat_template.jinja"></label>
<label class="modal-row"><span>Size (GB)</span><input type="number" id="lm-size" step="0.1" min="0"></label>
<label class="modal-row"><span>Mode</span>
<select id="lm-mode">
<option value="solo">solo (Spark 1 only)</option>
<option value="cluster">cluster (both Sparks via Ray)</option>
</select>
</label>
<label class="modal-row"><span>Description (optional)</span><textarea id="lm-desc" rows="3"></textarea></label>
<fieldset class="modal-fieldset">
<legend>Default launch knobs</legend>
<label class="modal-row"><span>Max context (tokens)</span><input type="number" id="lm-mml" step="1024" min="1024" value="32768"></label>
<label class="modal-row"><span>GPU memory %</span><input type="range" id="lm-gmu" min="0.5" max="0.95" step="0.01" value="0.85"> <output id="lm-gmu-out">0.85</output></label>
<label class="modal-row inline"><input type="checkbox" id="lm-fst" checked> Fast safetensors loading</label>
<label class="modal-row inline"><input type="checkbox" id="lm-pcache" checked> Prefix caching</label>
<label class="modal-row inline"><input type="checkbox" id="lm-fp8" checked> FP8 KV cache</label>
</fieldset>
<div class="modal-actions">
<button type="button" id="lm-cancel" class="btn">Cancel</button>
<button type="submit" class="btn primary">Add local model</button>
</div>
</form>
</dialog>
<dialog id="disk-delete-dialog" class="modal">
<form method="dialog" class="modal-form">
<h3>Delete model weights from disk?</h3>
+2
View File
@@ -694,6 +694,7 @@ main {
.card .repo a { color: inherit; text-decoration: none; }
.card .repo a:hover { color: var(--info); text-decoration: underline; }
.card .repo .hf-icon { font-size: 13px; opacity: 0.7; }
.card .repo .local-path { font-family: var(--mono, ui-monospace, monospace); opacity: 0.85; }
.tag {
background: var(--surface-2);
border: 1px solid var(--border);
@@ -738,6 +739,7 @@ main {
.card .adv-btn,
.card .test-btn { padding: 8px 12px; font-size: 12px; }
.card .custom-pill { color: var(--info); border-color: rgba(96, 165, 250, 0.4); }
.card .local-pill { color: var(--warn); border-color: rgba(245, 158, 11, 0.4); }
.tag.on-disk { color: var(--accent); border-color: rgba(74, 222, 128, 0.4); }
.tag.not-on-disk { color: var(--muted); border-color: var(--border); opacity: 0.7; }
.card-actions .icon-btn.danger { color: var(--error); border-color: rgba(239, 68, 68, 0.3); margin-left: auto; }