v0.23.0:0 - local / fine-tuned model support
Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes), not just Hugging Face repos. - ModelDef gains local_path; a model must set exactly one of repo / local_path. The validator also enforces the local-path whitelist and that any --chat-template lives inside local_path (only that dir is mounted). - build_launch_command bind-mounts the dir into the vLLM container at the SAME host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook, then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream expands that var unquoted; contract noted in runbook.md). - shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'. - POST /api/models validates the full entry via ModelDef before persisting, so a bad entry can't be written and then break catalog load; _merge_overrides skips an invalid override entry instead of failing the whole catalog. - disk.py size-probes a local path with du; disk-delete refused for local models. - UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF link, delete button hidden for local models. - Tests: local launch + injection round-trip, chat-template location, traversal, exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent pass; findings addressed.
This commit is contained in:
+29
-5
@@ -6,7 +6,7 @@ from pathlib import Path
|
||||
from fastapi import FastAPI, HTTPException, Query, Request
|
||||
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, ValidationError
|
||||
from typing import Literal
|
||||
|
||||
from .config import Settings
|
||||
@@ -22,7 +22,7 @@ from .redaction_gateway import build_router as build_redaction_router, MapStore
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
|
||||
from .matrix_bridge import MatrixBridgeManager
|
||||
from .models import load_catalog
|
||||
from .models import ModelDef, load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
from .services import docker_state, run_action, services_from_settings
|
||||
@@ -183,7 +183,8 @@ async def put_model_knobs(key: str, body: KnobsBody) -> dict:
|
||||
class CustomModelBody(BaseModel):
|
||||
key: str
|
||||
display_name: str
|
||||
repo: str
|
||||
repo: str = ""
|
||||
local_path: str | None = None
|
||||
size_gb: float = 0
|
||||
mode: Literal["solo", "cluster"] = "solo"
|
||||
description: str | None = None
|
||||
@@ -196,8 +197,17 @@ class CustomModelBody(BaseModel):
|
||||
async def post_model(body: CustomModelBody) -> dict:
|
||||
if not body.key or not body.key.replace("-", "").replace("_", "").isalnum():
|
||||
raise HTTPException(400, "key must be alphanumeric/-/_ only")
|
||||
# Validate the full entry BEFORE persisting (exactly-one source, local-path
|
||||
# whitelist, chat-template location). Doing it via ModelDef means the API and
|
||||
# the YAML-override path share one set of rules, and a bad entry can't be
|
||||
# written to /data and then break catalog load.
|
||||
try:
|
||||
validate_repo(body.repo)
|
||||
ModelDef.model_validate(body.model_dump())
|
||||
if body.repo:
|
||||
validate_repo(body.repo) # HF charset (the model only validates local paths)
|
||||
except ValidationError as e:
|
||||
msg = e.errors()[0]["msg"] if e.errors() else str(e)
|
||||
raise HTTPException(400, msg.removeprefix("Value error, "))
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
if body.key in catalog.models and not catalog.models[body.key].custom:
|
||||
@@ -229,7 +239,13 @@ async def get_models_disk_status() -> dict:
|
||||
return {"configured": False, "models": {}}
|
||||
keys = list(catalog.models.keys())
|
||||
statuses = await asyncio.gather(*(
|
||||
probe_disk(catalog.models[k].repo, catalog.models[k].mode, settings) for k in keys
|
||||
probe_disk(
|
||||
catalog.models[k].repo,
|
||||
catalog.models[k].mode,
|
||||
settings,
|
||||
local_path=catalog.models[k].local_path,
|
||||
)
|
||||
for k in keys
|
||||
), return_exceptions=True)
|
||||
out: dict[str, dict] = {}
|
||||
for k, s in zip(keys, statuses):
|
||||
@@ -260,6 +276,14 @@ async def del_model_disk(key: str) -> dict:
|
||||
raise HTTPException(404, f"unknown model: {key}")
|
||||
m = catalog.models[key]
|
||||
|
||||
# Never rm a local fine-tune directory from the dashboard — it's irreplaceable
|
||||
# training output the user placed by hand, not a re-downloadable HF cache.
|
||||
if m.local_path:
|
||||
raise HTTPException(
|
||||
400,
|
||||
"this is a local model; its directory must be managed on the Spark, not deleted from here",
|
||||
)
|
||||
|
||||
# Refuse if currently loaded
|
||||
try:
|
||||
vllm = await check_vllm(settings)
|
||||
|
||||
Reference in New Issue
Block a user