Files
spark-control/image/tests/test_launch_command.py
T

68 lines
2.7 KiB
Python

"""build_launch_command: argument assembly + the shell-injection invariant.
The security-critical property is that every user-controllable value (repo,
vllm_args, knobs) is shlex-quoted at the sink, so `shlex.split` cleanly reverses
the command back into the exact token list. The vLLM pre-flight validator
(validate.py) depends on this round-trip — these tests lock it in.
"""
import shlex
from app.models import Defaults, ModelDef, build_launch_command
DEFAULTS = Defaults(port=8888, host="0.0.0.0")
def _model(**kw) -> ModelDef:
base = dict(display_name="X", repo="org/name", size_gb=1.0, mode="solo")
base.update(kw)
return ModelDef(**base)
def test_solo_model_emits_solo_flag_and_ordered_args():
cmd = build_launch_command("k", _model(vllm_args=["--max-model-len=1000"]), DEFAULTS)
assert cmd == (
"./launch-cluster.sh --solo -d exec vllm serve org/name "
"--port=8888 --host=0.0.0.0 --max-model-len=1000"
)
def test_cluster_model_omits_solo_flag():
cmd = build_launch_command("k", _model(mode="cluster", vllm_args=["-tp=2"]), DEFAULTS)
assert " --solo " not in cmd
assert cmd.startswith("./launch-cluster.sh -d exec vllm serve org/name")
def test_knob_overrides_matching_bundled_flag():
# bundled arg sets max-model-len; the knob must win (single occurrence).
m = _model(vllm_args=["--max-model-len=1000"], knobs={"max_model_len": 65536})
cmd = build_launch_command("k", m, DEFAULTS)
assert "--max-model-len=65536" in cmd
assert "--max-model-len=1000" not in cmd
def test_repo_with_shell_metacharacters_is_quoted_not_executed():
# build_launch_command quotes even a hostile repo (validate_repo guards the
# API boundary; this proves the sink itself is safe in depth).
evil = "org/name; rm -rf ~ #"
cmd = build_launch_command("k", _model(repo=evil), DEFAULTS)
# The raw metacharacters must not appear unquoted...
assert "; rm -rf" not in cmd.replace(shlex.quote(evil), "")
# ...and shlex.split must recover the repo as one literal token.
tokens = shlex.split(cmd)
assert evil in tokens
def test_command_string_round_trips_through_shlex_split():
# The invariant validate.py relies on: every arg survives quote -> split intact.
args = ["--max-model-len=32768", "--load-format=fastsafetensors", "--note=a b c"]
cmd = build_launch_command("k", _model(vllm_args=args), DEFAULTS)
tokens = shlex.split(cmd)
for a in args:
assert a in tokens
def test_injection_via_vllm_arg_stays_literal():
payload = "--foo=$(touch /tmp/pwned)"
cmd = build_launch_command("k", _model(vllm_args=[payload]), DEFAULTS)
assert payload in shlex.split(cmd) # preserved as one inert token