Files
Keysat e783653ef0 v0.23.0:0 - local / fine-tuned model support
Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes),
not just Hugging Face repos.

- ModelDef gains local_path; a model must set exactly one of repo / local_path.
  The validator also enforces the local-path whitelist and that any
  --chat-template lives inside local_path (only that dir is mounted).
- build_launch_command bind-mounts the dir into the vLLM container at the SAME
  host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook,
  then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream
  expands that var unquoted; contract noted in runbook.md).
- shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'.
- POST /api/models validates the full entry via ModelDef before persisting, so a
  bad entry can't be written and then break catalog load; _merge_overrides skips
  an invalid override entry instead of failing the whole catalog.
- disk.py size-probes a local path with du; disk-delete refused for local models.
- UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF
  link, delete button hidden for local models.
- Tests: local launch + injection round-trip, chat-template location, traversal,
  exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent
  pass; findings addressed.
2026-06-17 22:27:41 -05:00

86 lines
3.7 KiB
Python

"""Validation + safe-quoting for user-supplied values that cross into SSH shell
commands on the Sparks.
Two layers of defense (same spirit as disk.py's `_SAFE_DIRNAME`):
1. Validate at the API boundary against a strict whitelist — rejects junk
early with a clear error, and guarantees the value carries no shell
metacharacters (so it is also safe to drop into echo/log lines).
2. `quote_arg` / `quote_args` at the actual interpolation site — the real
guarantee: even a value that somehow skips validation cannot break out of
the command.
Rule: anything user-controlled that ends up in an `ssh_run` / `ssh_stream`
command string must go through one of these, never be raw f-string'd.
"""
from __future__ import annotations
import re
import shlex
# Hugging Face repo 'org/name'. HF identifiers allow letters, digits, dot, dash,
# underscore; exactly one slash separates org from name.
_HF_REPO_RE = re.compile(r"^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$")
# Docker/OCI image reference: registry/path/name[:tag][@sha256:digest].
# Conservative charset covering e.g. nvcr.io/nim/nvidia/parakeet-...:latest and
# @digest pins; excludes every shell metacharacter.
_IMAGE_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/@-]*$")
# Docker container / volume name (Docker's own rule).
_CONTAINER_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.-]*$")
# Absolute filesystem path to a local model directory on a Spark. Conservative
# charset (letters, digits, and safe path punctuation) with a required leading
# '/', so it carries no shell metacharacters and no whitespace. Traversal ('.'
# and '..' segments) is rejected separately in validate_local_path.
_LOCAL_PATH_RE = re.compile(r"^/[A-Za-z0-9._+/-]+$")
def validate_repo(repo: str) -> str:
"""Return `repo` if it is a well-formed 'org/name'; else raise ValueError."""
if not _HF_REPO_RE.fullmatch(repo or ""):
raise ValueError(f"invalid model repo (expected 'org/name'): {repo!r}")
return repo
def validate_image(image: str) -> str:
"""Return `image` if it is a well-formed container image ref; else ValueError."""
if not image or len(image) > 512 or not _IMAGE_RE.fullmatch(image):
raise ValueError(f"invalid container image reference: {image!r}")
return image
def validate_container(name: str) -> str:
"""Return `name` if it is a valid Docker container/volume name; else ValueError."""
if not name or len(name) > 128 or not _CONTAINER_RE.fullmatch(name):
raise ValueError(f"invalid container name: {name!r}")
return name
def validate_local_path(path: str) -> str:
"""Return `path` if it is a safe absolute model directory path; else ValueError.
For locally fine-tuned models served by directory (not an HF repo). Requires
an absolute path, a metacharacter-free charset, and no '.'/'..' segments so a
caller cannot traverse out of an intended models directory. The `quote_arg`
sink still quotes it in depth — this is the boundary check.
"""
p = path or ""
if len(p) > 512 or not _LOCAL_PATH_RE.fullmatch(p):
raise ValueError(
f"invalid local model path (expected an absolute path, no spaces or "
f"shell metacharacters): {path!r}"
)
if any(seg in (".", "..") for seg in p.split("/")):
raise ValueError(f"local model path must not contain '.' or '..' segments: {path!r}")
return p
def quote_arg(value: object) -> str:
"""shlex.quote a single token for safe embedding in a shell command string."""
return shlex.quote(str(value))
def quote_args(values: object) -> str:
"""shlex.quote each token and join with spaces."""
return " ".join(shlex.quote(str(v)) for v in values) # type: ignore[union-attr]