v0.23.0:0 - local / fine-tuned model support

Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes), not just Hugging Face repos. - ModelDef gains local_path; a model must set exactly one of repo / local_path. The validator also enforces the local-path whitelist and that any --chat-template lives inside local_path (only that dir is mounted). - build_launch_command bind-mounts the dir into the vLLM container at the SAME host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook, then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream expands that var unquoted; contract noted in runbook.md). - shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'. - POST /api/models validates the full entry via ModelDef before persisting, so a bad entry can't be written and then break catalog load; _merge_overrides skips an invalid override entry instead of failing the whole catalog. - disk.py size-probes a local path with du; disk-delete refused for local models. - UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF link, delete button hidden for local models. - Tests: local launch + injection round-trip, chat-template location, traversal, exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent pass; findings addressed.
2026-06-17 22:27:41 -05:00
parent 57a893000e
commit e783653ef0
14 changed files with 402 additions and 26 deletions
@@ -6,7 +6,7 @@ from pathlib import Path
 from fastapi import FastAPI, HTTPException, Query, Request
 from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
 from fastapi.staticfiles import StaticFiles
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 from typing import Literal

 from .config import Settings
@@ -22,7 +22,7 @@ from .redaction_gateway import build_router as build_redaction_router, MapStore
 from .hardware import HardwareProbe
 from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
 from .matrix_bridge import MatrixBridgeManager
-from .models import load_catalog
+from .models import ModelDef, load_catalog
 from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
 from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
 from .services import docker_state, run_action, services_from_settings
@@ -183,7 +183,8 @@ async def put_model_knobs(key: str, body: KnobsBody) -> dict:
 class CustomModelBody(BaseModel):
    key: str
    display_name: str
-    repo: str
+    repo: str = ""
+    local_path: str | None = None
    size_gb: float = 0
    mode: Literal["solo", "cluster"] = "solo"
    description: str | None = None
@@ -196,8 +197,17 @@ class CustomModelBody(BaseModel):
 async def post_model(body: CustomModelBody) -> dict:
    if not body.key or not body.key.replace("-", "").replace("_", "").isalnum():
        raise HTTPException(400, "key must be alphanumeric/-/_ only")
+    # Validate the full entry BEFORE persisting (exactly-one source, local-path
+    # whitelist, chat-template location). Doing it via ModelDef means the API and
+    # the YAML-override path share one set of rules, and a bad entry can't be
+    # written to /data and then break catalog load.
    try:
-        validate_repo(body.repo)
+        ModelDef.model_validate(body.model_dump())
+        if body.repo:
+            validate_repo(body.repo)  # HF charset (the model only validates local paths)
+    except ValidationError as e:
+        msg = e.errors()[0]["msg"] if e.errors() else str(e)
+        raise HTTPException(400, msg.removeprefix("Value error, "))
    except ValueError as e:
        raise HTTPException(400, str(e))
    if body.key in catalog.models and not catalog.models[body.key].custom:
@@ -229,7 +239,13 @@ async def get_models_disk_status() -> dict:
        return {"configured": False, "models": {}}
    keys = list(catalog.models.keys())
    statuses = await asyncio.gather(*(
-        probe_disk(catalog.models[k].repo, catalog.models[k].mode, settings) for k in keys
+        probe_disk(
+            catalog.models[k].repo,
+            catalog.models[k].mode,
+            settings,
+            local_path=catalog.models[k].local_path,
+        )
+        for k in keys
    ), return_exceptions=True)
    out: dict[str, dict] = {}
    for k, s in zip(keys, statuses):
@@ -260,6 +276,14 @@ async def del_model_disk(key: str) -> dict:
        raise HTTPException(404, f"unknown model: {key}")
    m = catalog.models[key]

+    # Never rm a local fine-tune directory from the dashboard — it's irreplaceable
+    # training output the user placed by hand, not a re-downloadable HF cache.
+    if m.local_path:
+        raise HTTPException(
+            400,
+            "this is a local model; its directory must be managed on the Spark, not deleted from here",
+        )
+
    # Refuse if currently loaded
    try:
        vllm = await check_vllm(settings)