Files
spark-control/image/tests/test_shellsafe.py
T
Keysat e783653ef0 v0.23.0:0 - local / fine-tuned model support
Add models that live as a directory on a Spark (e.g. LoRA-merged fine-tunes),
not just Hugging Face repos.

- ModelDef gains local_path; a model must set exactly one of repo / local_path.
  The validator also enforces the local-path whitelist and that any
  --chat-template lives inside local_path (only that dir is mounted).
- build_launch_command bind-mounts the dir into the vLLM container at the SAME
  host==container path via the launch script's VLLM_SPARK_EXTRA_DOCKER_ARGS hook,
  then `vllm serve <dir>`. No launch-cluster.sh change (verified the upstream
  expands that var unquoted; contract noted in runbook.md).
- shellsafe.validate_local_path: absolute path, charset whitelist, no '.'/'..'.
- POST /api/models validates the full entry via ModelDef before persisting, so a
  bad entry can't be written and then break catalog load; _merge_overrides skips
  an invalid override entry instead of failing the whole catalog.
- disk.py size-probes a local path with du; disk-delete refused for local models.
- UI: "+ Add local model" dialog, `local` badge, path shown instead of an HF
  link, delete button hidden for local models.
- Tests: local launch + injection round-trip, chat-template location, traversal,
  exactly-one-source, _merge_overrides skip-invalid (94 pass). Reviewer-agent
  pass; findings addressed.
2026-06-17 22:27:41 -05:00

128 lines
3.6 KiB
Python

"""shellsafe validators: the API-boundary whitelist behind the v0.19.0 SSH
command-injection hardening. The quoting *sink* is covered in
test_launch_command.py; this locks in the *boundary* — that hostile input is
rejected early, and that a valid value passes through unchanged so callers can
use `validate_x(v)` inline.
"""
import pytest
from app.shellsafe import (
validate_container,
validate_image,
validate_local_path,
validate_repo,
)
# Shell metacharacters that must never survive any validator — these are the
# actual injection vectors. (Path traversal like "../" is NOT in scope here:
# validate_image legitimately permits "/" and "." for real image refs such as
# nvcr.io/nim/...; the defense for images is "no shell metacharacters" + the
# quote_arg sink, not path-shape. Slash-rejection is tested directly for repo
# and container, where "/" is disallowed.)
HOSTILE = [
"; rm -rf /",
" a b",
"$(touch pwned)",
"`id`",
"x|cat",
"x&y",
"x>out",
"x\nrm",
]
# ---- validate_repo: HF 'org/name', exactly one slash ----
@pytest.mark.parametrize("repo", [
"RedHatAI/Qwen3.6-35B-A3B-NVFP4", # the live production model
"org/name",
"a.b_c-d/x.y_z-1",
])
def test_repo_valid_passes_through_unchanged(repo):
assert validate_repo(repo) == repo
@pytest.mark.parametrize("repo", [
"",
"noslash",
"a/b/c", # two slashes
"/name", # empty org
"org/", # empty name
] + [f"org/name{h}" for h in HOSTILE])
def test_repo_rejects_malformed_and_hostile(repo):
with pytest.raises(ValueError):
validate_repo(repo)
# ---- validate_image: registry/path:tag@digest ----
@pytest.mark.parametrize("image", [
"nvcr.io/nim/nvidia/parakeet-1_1b-ctc-en-us:latest",
"ubuntu",
"img@sha256:deadbeefcafe",
"a.b/c:1.2_3-4",
])
def test_image_valid_passes_through_unchanged(image):
assert validate_image(image) == image
@pytest.mark.parametrize("image", [
"",
"-leading", # must start alphanumeric
".leading",
"/leading",
":leading",
"a" * 513, # over the 512 cap
] + [f"img{h}" for h in HOSTILE])
def test_image_rejects_malformed_and_hostile(image):
with pytest.raises(ValueError):
validate_image(image)
# ---- validate_container: Docker name rule, no slash ----
@pytest.mark.parametrize("name", [
"parakeet-asr",
"a",
"vol_1.2-3",
])
def test_container_valid_passes_through_unchanged(name):
assert validate_container(name) == name
@pytest.mark.parametrize("name", [
"",
"_leading", # underscore is not a valid first char
"-leading",
".leading",
"has/slash", # slash not allowed in a container name
"a" * 129, # over the 128 cap
] + [f"name{h}" for h in HOSTILE])
def test_container_rejects_malformed_and_hostile(name):
with pytest.raises(ValueError):
validate_container(name)
# ---- validate_local_path: absolute model dir, no traversal/metacharacters ----
@pytest.mark.parametrize("path", [
"/home/modelo/models/gemma-4-31B-ten31-v2",
"/data/models/ft.v2_1",
"/srv/m/a-b/c",
])
def test_local_path_valid_passes_through_unchanged(path):
assert validate_local_path(path) == path
@pytest.mark.parametrize("path", [
"",
"relative/path", # must be absolute
"~/models/x", # no ~ expansion
"/models/../etc/shadow", # '..' traversal
"/models/./x", # '.' segment
"/a" * 300, # over the 512 cap (600 chars)
] + [f"/models/x{h}" for h in HOSTILE])
def test_local_path_rejects_relative_traversal_and_hostile(path):
with pytest.raises(ValueError):
validate_local_path(path)