v0.27.2:0 - vision check tool + mark Qwen3.6 vision-capable

Qwen3.6-35B-A3B is multimodal (vision tower on disk) but was labelled
text-only. Mark it [vision, reasoning] and add a 'Vision check' button on
the running vision-capable card: upload an image + prompt -> existing /v1
passthrough proxy -> show the model's text. Confirmed 7/7 fields on a
business card. Records the Gemma-4-26B deferral + research findings.
This commit is contained in:
Keysat
2026-06-18 18:14:30 -05:00
parent c846386c1a
commit 9a3bf9ed86
6 changed files with 120 additions and 5 deletions
+84 -1
View File
@@ -4,6 +4,7 @@ const state = {
models: {},
defaults: {},
current_model_key: null,
vllm_model: null, // model id vLLM currently reports serving (for the vision check)
swap_job_id: null,
swap_eventsource: null,
swap_started_at: null,
@@ -120,6 +121,11 @@ function renderCards() {
const recipeActions = m.needs_setup ? '' : `
<button class="btn test-btn" data-test-key="${key}" title="Pre-flight check the launch command without starting the engine">Test</button>
<button class="btn adv-btn" data-adv-key="${key}" title="Advanced settings">Advanced</button>`;
// "Vision check" only makes sense for the model that's actually loaded, and
// only if it can take images — send an image to it and see what it reads.
const visionBtn = (isActive && (m.capabilities || []).includes('vision'))
? `<button class="btn vision-btn" data-vision-key="${key}" title="Send an image to the running model (e.g. a business card) and see what it reads">Vision check</button>`
: '';
card.innerHTML = `
<div class="name">${escapeHtml(m.display_name)}</div>
<div class="meta">
@@ -138,7 +144,7 @@ function renderCards() {
</div>
<div class="spacer"></div>
<div class="card-actions">
${primaryBtn}${recipeActions}
${primaryBtn}${recipeActions}${visionBtn}
${trashBtn}
</div>
<div class="test-result hidden" data-test-result-for="${key}"></div>
@@ -160,6 +166,80 @@ function renderCards() {
for (const btn of root.querySelectorAll('[data-disk-del-key]')) {
btn.addEventListener('click', () => openDiskDeleteDialog(btn.dataset.diskDelKey));
}
for (const btn of root.querySelectorAll('[data-vision-key]')) {
btn.addEventListener('click', () => openVisionCheck(btn.dataset.visionKey));
}
}
// ===================== vision check =====================
function openVisionCheck(key) {
const m = state.models[key];
el('#vc-model').textContent = m ? `${m.display_name}` : '';
el('#vc-file').value = '';
el('#vc-preview').classList.add('hidden');
el('#vc-preview').removeAttribute('src');
const res = el('#vc-result');
res.classList.add('hidden');
res.textContent = '';
el('#vision-dialog').showModal();
}
function previewVisionImage() {
const file = el('#vc-file').files[0];
const img = el('#vc-preview');
if (!file) { img.classList.add('hidden'); return; }
img.src = URL.createObjectURL(file);
img.classList.remove('hidden');
}
function readFileAsDataURL(file) {
return new Promise((resolve, reject) => {
const fr = new FileReader();
fr.onload = () => resolve(fr.result);
fr.onerror = () => reject(new Error('could not read the image file'));
fr.readAsDataURL(file);
});
}
async function runVisionCheck() {
const file = el('#vc-file').files[0];
const res = el('#vc-result');
if (!file) { alert('Pick an image first.'); return; }
const modelId = state.vllm_model;
if (!modelId) { alert('No running model detected — switch to a model first.'); return; }
const prompt = el('#vc-prompt').value.trim() || 'Describe this image.';
const btn = el('#vc-run');
btn.disabled = true; btn.textContent = 'Running…';
res.classList.remove('hidden', 'fail');
res.textContent = 'Sending the image to the model…';
try {
const dataUrl = await readFileAsDataURL(file);
const r = await fetchJSON('/v1/chat/completions', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify({
model: modelId,
max_tokens: 800,
temperature: 0,
messages: [{
role: 'user',
content: [
{ type: 'text', text: prompt },
{ type: 'image_url', image_url: { url: dataUrl } },
],
}],
}),
});
const msg = r.choices && r.choices[0] && r.choices[0].message;
const text = (msg && msg.content && msg.content.trim()) || '(model returned no text)';
res.textContent = text;
} catch (e) {
res.classList.add('fail');
res.textContent = 'Failed: ' + e.message;
} finally {
btn.disabled = false; btn.textContent = 'Run';
}
}
const trashIcon = '<svg viewBox="0 0 24 24" width="14" height="14" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><polyline points="3 6 5 6 21 6"></polyline><path d="M19 6l-1 14a2 2 0 0 1-2 2H8a2 2 0 0 1-2-2L5 6"></path><path d="M10 11v6"></path><path d="M14 11v6"></path><path d="M9 6V4a2 2 0 0 1 2-2h2a2 2 0 0 1 2 2v2"></path></svg>';
@@ -1143,6 +1223,7 @@ async function pollStatus() {
try {
const status = await fetchJSON('/api/status');
state.current_model_key = status.current_model_key;
state.vllm_model = (status.vllm || {}).current_model || null;
state.configured = status.configured;
renderBanner(status);
renderCurrent(status);
@@ -2329,6 +2410,8 @@ async function init() {
});
el('#sshkey-close').addEventListener('click', () => el('#sshkey-dialog').close());
el('#open-local').addEventListener('click', openLocalModelDialog);
el('#vc-run').addEventListener('click', runVisionCheck);
el('#vc-file').addEventListener('change', previewVisionImage);
el('#lock-release').addEventListener('click', releaseLock);
setupCatalogDialog();
setupAdvancedDialog();
+15
View File
@@ -365,6 +365,21 @@
</form>
</dialog>
<dialog id="vision-dialog" class="modal">
<form method="dialog" class="modal-form" id="vision-form">
<h3>Vision check<span id="vc-model" class="muted small"></span></h3>
<p class="muted small">Send an image to the running model and see what it reads back — handy for confirming OCR on a real photo (e.g. a business card). Sent over the same <code>/v1</code> endpoint your apps use; nothing leaves the LAN.</p>
<label class="modal-row"><span>Image</span><input type="file" id="vc-file" accept="image/*"></label>
<img id="vc-preview" class="vc-preview hidden" alt="selected image preview">
<label class="modal-row"><span>Prompt</span><textarea id="vc-prompt" rows="3">This is a business card. Extract every field as JSON with keys: name, title, company, phone, email, website, address. Output only the JSON.</textarea></label>
<div class="vc-result hidden" id="vc-result"></div>
<div class="modal-actions">
<button type="button" id="vc-run" class="btn primary">Run</button>
<button class="btn" value="cancel">Close</button>
</div>
</form>
</dialog>
<section id="download-panel" class="download-panel hidden">
<div class="download-form" id="download-form">
<label class="dl-row">
+12
View File
@@ -805,6 +805,18 @@ main {
.test-result .ok-mark { color: var(--accent); font-weight: 600; }
.test-result .fail-mark { color: var(--error); font-weight: 600; }
/* Vision check modal */
.vc-preview { display: block; max-width: 100%; max-height: 180px; border-radius: 8px; margin: 4px 0 10px; border: 1px solid var(--border); }
.vc-result {
margin-top: 4px; padding: 10px 12px;
border: 1px solid var(--border); border-radius: 8px;
background: var(--surface-2);
white-space: pre-wrap; word-break: break-word;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
font-size: 12px; line-height: 1.5; max-height: 280px; overflow: auto;
}
.vc-result.fail { border-color: rgba(239, 68, 68, 0.45); color: var(--error); }
.footer {
margin-top: 28px;
padding-top: 16px;