Add per-model descriptions + repo-cleanup polish
- models.yaml: add 'description' field for all 5 models (generic, anyone-can-use) - ModelDef gains optional description: str | None field - UI: render description below meta tags; mute the repo line further - escapeHtml() for safety in case descriptions/names contain HTML chars - Update runbook: how to add a new model with description
This commit is contained in:
@@ -12,6 +12,7 @@ class ModelDef(BaseModel):
|
|||||||
capabilities: list[str] = Field(default_factory=list)
|
capabilities: list[str] = Field(default_factory=list)
|
||||||
expected_ready_seconds: int = 300
|
expected_ready_seconds: int = 300
|
||||||
vllm_args: list[str] = Field(default_factory=list)
|
vllm_args: list[str] = Field(default_factory=list)
|
||||||
|
description: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class Defaults(BaseModel):
|
class Defaults(BaseModel):
|
||||||
|
|||||||
+17
-3
@@ -18,6 +18,16 @@ const state = {
|
|||||||
const el = (sel) => document.querySelector(sel);
|
const el = (sel) => document.querySelector(sel);
|
||||||
const $$ = (sel) => document.querySelectorAll(sel);
|
const $$ = (sel) => document.querySelectorAll(sel);
|
||||||
|
|
||||||
|
function escapeHtml(s) {
|
||||||
|
if (s == null) return '';
|
||||||
|
return String(s)
|
||||||
|
.replaceAll('&', '&')
|
||||||
|
.replaceAll('<', '<')
|
||||||
|
.replaceAll('>', '>')
|
||||||
|
.replaceAll('"', '"')
|
||||||
|
.replaceAll("'", ''');
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchJSON(url, opts) {
|
async function fetchJSON(url, opts) {
|
||||||
const r = await fetch(url, opts);
|
const r = await fetch(url, opts);
|
||||||
if (!r.ok) {
|
if (!r.ok) {
|
||||||
@@ -38,14 +48,18 @@ function renderCards() {
|
|||||||
const isActive = key === state.current_model_key;
|
const isActive = key === state.current_model_key;
|
||||||
const card = document.createElement('div');
|
const card = document.createElement('div');
|
||||||
card.className = 'card' + (isActive ? ' active' : '');
|
card.className = 'card' + (isActive ? ' active' : '');
|
||||||
|
const desc = m.description
|
||||||
|
? `<div class="desc">${escapeHtml(m.description)}</div>`
|
||||||
|
: '';
|
||||||
card.innerHTML = `
|
card.innerHTML = `
|
||||||
<div class="name">${m.display_name}</div>
|
<div class="name">${escapeHtml(m.display_name)}</div>
|
||||||
<div class="meta">
|
<div class="meta">
|
||||||
<span class="tag mode-${m.mode}">${m.mode}</span>
|
<span class="tag mode-${m.mode}">${m.mode}</span>
|
||||||
<span class="tag">${m.size_gb} GB</span>
|
<span class="tag">${m.size_gb} GB</span>
|
||||||
${(m.capabilities || []).map(c => `<span class="tag cap">${c}</span>`).join('')}
|
${(m.capabilities || []).map(c => `<span class="tag cap">${escapeHtml(c)}</span>`).join('')}
|
||||||
</div>
|
</div>
|
||||||
<div class="muted small repo">${m.repo}</div>
|
${desc}
|
||||||
|
<div class="muted small repo">${escapeHtml(m.repo)}</div>
|
||||||
<div class="spacer"></div>
|
<div class="spacer"></div>
|
||||||
<button class="btn ${isActive ? '' : 'primary'}" data-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
<button class="btn ${isActive ? '' : 'primary'}" data-key="${key}" ${isActive || isSwapping ? 'disabled' : ''}>
|
||||||
${isActive ? 'Current' : 'Switch to this'}
|
${isActive ? 'Current' : 'Switch to this'}
|
||||||
|
|||||||
@@ -170,7 +170,16 @@ main {
|
|||||||
}
|
}
|
||||||
.card .name { font-weight: 600; font-size: 15px; }
|
.card .name { font-weight: 600; font-size: 15px; }
|
||||||
.card .meta { display: flex; flex-wrap: wrap; gap: 6px; font-size: 12px; color: var(--muted); }
|
.card .meta { display: flex; flex-wrap: wrap; gap: 6px; font-size: 12px; color: var(--muted); }
|
||||||
.card .repo { word-break: break-all; }
|
.card .desc {
|
||||||
|
font-size: 13.5px;
|
||||||
|
line-height: 1.5;
|
||||||
|
color: #b9b9c4;
|
||||||
|
}
|
||||||
|
.card .repo {
|
||||||
|
word-break: break-all;
|
||||||
|
font-size: 11px;
|
||||||
|
color: #5c5c66;
|
||||||
|
}
|
||||||
.tag {
|
.tag {
|
||||||
background: var(--surface-2);
|
background: var(--surface-2);
|
||||||
border: 1px solid var(--border);
|
border: 1px solid var(--border);
|
||||||
|
|||||||
@@ -15,6 +15,11 @@ defaults:
|
|||||||
models:
|
models:
|
||||||
qwen3-vl:
|
qwen3-vl:
|
||||||
display_name: "Qwen3-VL 235B (vision)"
|
display_name: "Qwen3-VL 235B (vision)"
|
||||||
|
description: >-
|
||||||
|
Qwen's flagship multimodal model. 235B total parameters with ~22B
|
||||||
|
active per token (Mixture-of-Experts). Handles text, images, and
|
||||||
|
many languages. The most capable model in this catalog — also the
|
||||||
|
slowest to load because it splits across both Sparks.
|
||||||
repo: RedHatAI/Qwen3-VL-235B-A22B-Instruct-NVFP4
|
repo: RedHatAI/Qwen3-VL-235B-A22B-Instruct-NVFP4
|
||||||
size_gb: 135
|
size_gb: 135
|
||||||
mode: cluster
|
mode: cluster
|
||||||
@@ -28,6 +33,10 @@ models:
|
|||||||
|
|
||||||
gemma4:
|
gemma4:
|
||||||
display_name: "Gemma 4 31B"
|
display_name: "Gemma 4 31B"
|
||||||
|
description: >-
|
||||||
|
Google's mid-size reasoning model. 31B dense parameters with built-in
|
||||||
|
thinking mode and function-calling. Strong on math, logic, and
|
||||||
|
structured outputs; also supports vision input. Runs solo on one Spark.
|
||||||
repo: RedHatAI/gemma-4-31B-it-NVFP4
|
repo: RedHatAI/gemma-4-31B-it-NVFP4
|
||||||
size_gb: 23
|
size_gb: 23
|
||||||
mode: solo
|
mode: solo
|
||||||
@@ -45,6 +54,10 @@ models:
|
|||||||
|
|
||||||
qwen36:
|
qwen36:
|
||||||
display_name: "Qwen3.6 35B-A3B (daily driver)"
|
display_name: "Qwen3.6 35B-A3B (daily driver)"
|
||||||
|
description: >-
|
||||||
|
Qwen's latest fast Mixture-of-Experts model: 35B total parameters but
|
||||||
|
only ~3B active per token, making inference quick. Long 64K-token
|
||||||
|
context window. A good default for everyday chat and longer documents.
|
||||||
repo: RedHatAI/Qwen3.6-35B-A3B-NVFP4
|
repo: RedHatAI/Qwen3.6-35B-A3B-NVFP4
|
||||||
size_gb: 20
|
size_gb: 20
|
||||||
mode: solo
|
mode: solo
|
||||||
@@ -61,6 +74,10 @@ models:
|
|||||||
|
|
||||||
qwen3-235b-fp8:
|
qwen3-235b-fp8:
|
||||||
display_name: "Qwen3 235B-A22B FP8 (legacy)"
|
display_name: "Qwen3 235B-A22B FP8 (legacy)"
|
||||||
|
description: >-
|
||||||
|
Earlier generation of the Qwen 235B family in native FP8 precision.
|
||||||
|
Runs across both Sparks. Mostly superseded by Qwen3-VL above; keep
|
||||||
|
around for text-only baseline comparisons.
|
||||||
repo: Qwen/Qwen3-235B-A22B-FP8
|
repo: Qwen/Qwen3-235B-A22B-FP8
|
||||||
size_gb: 220
|
size_gb: 220
|
||||||
mode: cluster
|
mode: cluster
|
||||||
@@ -74,6 +91,9 @@ models:
|
|||||||
|
|
||||||
qwen25-72b:
|
qwen25-72b:
|
||||||
display_name: "Qwen2.5 72B (legacy)"
|
display_name: "Qwen2.5 72B (legacy)"
|
||||||
|
description: >-
|
||||||
|
Last-generation 72B dense model. Cluster mode required due to size.
|
||||||
|
Kept for compatibility and baseline comparison against newer Qwens.
|
||||||
repo: Qwen/Qwen2.5-72B-Instruct
|
repo: Qwen/Qwen2.5-72B-Instruct
|
||||||
size_gb: 145
|
size_gb: 145
|
||||||
mode: cluster
|
mode: cluster
|
||||||
|
|||||||
+4
-2
@@ -36,9 +36,11 @@ These take effect on the **next swap to that model**. If a swap fails after this
|
|||||||
|
|
||||||
## Adding a new model
|
## Adding a new model
|
||||||
|
|
||||||
1. Add an entry to `models.yaml` (in the image source) or, post-install, via the "Edit Model Catalog" action in StartOS.
|
1. Add an entry to `image/models.yaml`. Required fields: `display_name`, `repo`, `size_gb`, `mode` (`solo` or `cluster`), `vllm_args`. Optional but recommended: `description` (one paragraph — what the model is, what it's good for, how it differs from others; renders below the meta tags in each card), `capabilities` (tags like `[vision, reasoning, tools]`), `expected_ready_seconds`.
|
||||||
2. Confirm the weights are on the Spark: `ssh <spark-user>@<spark-1-host>.local 'ls ~/.cache/huggingface/hub/'`. If not, download with `./hf-download.sh <repo>` on Spark 1.
|
2. Confirm the weights are on the Spark: `ssh <spark-user>@<spark-1-host>.local 'ls ~/.cache/huggingface/hub/'`. If not, download with `./hf-download.sh <repo>` on Spark 1.
|
||||||
3. The new model appears in the UI on next refresh.
|
3. Rebuild + redeploy the package: `cd package && make x86 && make install`.
|
||||||
|
|
||||||
|
If `description` is omitted, the card simply hides that section — no need to populate it for every model. Keep descriptions generic (not user-specific) so the catalog stays portable.
|
||||||
|
|
||||||
## Manual swap fallback
|
## Manual swap fallback
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user