From 6238ac88f7f6e7e84015ff4812eb290537e06756 Mon Sep 17 00:00:00 2001 From: Keysat Date: Mon, 15 Jun 2026 17:24:49 -0500 Subject: [PATCH] test: add offline pytest harness (build_launch_command injection, label-merge) --- AGENTS.md | 1 + docs/guides/fastapi-image.md | 5 ++- image/pyproject.toml | 6 +++ image/tests/conftest.py | 17 ++++++++ image/tests/test_label_merge.py | 69 ++++++++++++++++++++++++++++++ image/tests/test_launch_command.py | 67 +++++++++++++++++++++++++++++ 6 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 image/tests/conftest.py create mode 100644 image/tests/test_label_merge.py create mode 100644 image/tests/test_launch_command.py diff --git a/AGENTS.md b/AGENTS.md index 175fc9d..a7a88a3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,6 +23,7 @@ Subsystem guidance lives in `docs/guides/` and loads when matching files are tou ```bash (cd package && make x86) # build the s9pk; make install sideloads (restarts live service — ask first) (cd image && uvicorn app.server:app --port 9999) # local dev — needs env vars, see fastapi-image rule +(cd image && .venv/bin/python -m pytest) # offline unit suite (launch-cmd injection, label-merge) (cd image && .venv/bin/python -m app.redaction.test_gateway) # offline redaction suite 1 (cd image && .venv/bin/python app/redaction/test_scrub_leak.py) # offline redaction suite 2 ./scripts/test-audio-with-speakers.sh # e2e audio — hits the LIVE cluster diff --git a/docs/guides/fastapi-image.md b/docs/guides/fastapi-image.md index 1df6ad1..ed123ed 100644 --- a/docs/guides/fastapi-image.md +++ b/docs/guides/fastapi-image.md @@ -24,7 +24,10 @@ Other env vars: `BIND_PORT`, `MODELS_YAML`, `SSH_DIR`, `SSH_KNOWN_HOSTS`, `MODEL ## Tests -No pytest harness — each suite is a standalone script run with the `image/.venv` interpreter (system python3 has no deps). See the redaction and audio rules for the suites themselves. +Two kinds, both run with the `image/.venv` interpreter (system python3 has no deps): + +- **pytest unit suite** — offline, pure functions, no cluster. `.venv/bin/python -m pytest` from `image/`. Lives in `image/tests/`; currently covers `build_launch_command` (incl. the shell-injection / `shlex` round-trip invariant) and the transcript↔diarizer label-merge (`_merge_words_with_speakers`). Install the test dep once with `pip install -e '.[dev]'`. Add new pure-function coverage here. +- **Standalone scripts** — the redaction suites and the live-cluster audio e2e are run directly (not via pytest). See the redaction and audio rules. ## Conventions diff --git a/image/pyproject.toml b/image/pyproject.toml index 0917ea7..bf9dcd0 100644 --- a/image/pyproject.toml +++ b/image/pyproject.toml @@ -12,6 +12,12 @@ dependencies = [ "python-multipart>=0.0.9", ] +[project.optional-dependencies] +dev = ["pytest>=8"] + +[tool.pytest.ini_options] +testpaths = ["tests"] + [build-system] requires = ["setuptools>=68"] build-backend = "setuptools.build_meta" diff --git a/image/tests/conftest.py b/image/tests/conftest.py new file mode 100644 index 0000000..17809fc --- /dev/null +++ b/image/tests/conftest.py @@ -0,0 +1,17 @@ +"""Shared pytest setup. + +These suites are pure/offline — they exercise pure functions and never touch the +Sparks, /data, or the network. We still pin the env vars the app modules expect +(documented in docs/guides/fastapi-image.md) to tmp paths so importing them can +never write to the container-only /data path. +""" +import os +import sys +from pathlib import Path + +# Let `import app...` resolve whether or not the package is pip-installed. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +os.environ.setdefault("REDACTION_MAP_DB", "/tmp/spark_control_test_maps.db") +os.environ.setdefault("CONNECTIVITY_LOG", "/tmp/spark_control_test_connectivity.json") +os.environ.setdefault("MODELS_OVERRIDES", "/tmp/spark_control_test_overrides.yaml") diff --git a/image/tests/test_label_merge.py b/image/tests/test_label_merge.py new file mode 100644 index 0000000..6e42577 --- /dev/null +++ b/image/tests/test_label_merge.py @@ -0,0 +1,69 @@ +"""_merge_words_with_speakers + _assign_speaker_to_word: the transcript/diarizer +merge that turns Parakeet words + Sortformer turns into speaker-labelled blocks. +Pure functions, no cluster — this is the core of transcribe-with-speakers. +""" +from app.audio_proxy import _assign_speaker_to_word, _merge_words_with_speakers + + +def _w(start, end, text): + return {"start": start, "end": end, "text": text} + + +def _t(start, end, speaker): + return {"start_s": start, "end_s": end, "speaker": speaker} + + +# ---- _assign_speaker_to_word ---- + +def test_assign_by_midpoint_containment(): + turns = [_t(0.0, 2.0, "Speaker_0"), _t(2.0, 4.0, "Speaker_1")] + assert _assign_speaker_to_word(2.4, 2.8, turns) == "Speaker_1" + + +def test_assign_falls_back_to_max_overlap_when_midpoint_outside(): + # midpoint 5.0 is in no turn; word span overlaps Speaker_0 more than Speaker_1. + turns = [_t(0.0, 4.9, "Speaker_0"), _t(6.0, 8.0, "Speaker_1")] + assert _assign_speaker_to_word(4.0, 6.0, turns) == "Speaker_0" + + +def test_assign_unknown_when_no_overlap(): + turns = [_t(0.0, 1.0, "Speaker_0")] + assert _assign_speaker_to_word(10.0, 11.0, turns) == "Speaker_unknown" + + +# ---- _merge_words_with_speakers ---- + +def test_empty_words_returns_empty(): + assert _merge_words_with_speakers([], [_t(0, 1, "Speaker_0")]) == [] + + +def test_consecutive_same_speaker_words_join_into_one_block(): + words = [_w(0.0, 0.5, "good"), _w(0.5, 1.0, "morning")] + turns = [_t(0.0, 2.0, "Speaker_0")] + blocks = _merge_words_with_speakers(words, turns) + assert blocks == [ + {"start_ms": 0, "end_ms": 1000, "speaker": "Speaker_0", "text": "good morning"} + ] + + +def test_speaker_change_splits_blocks(): + words = [_w(0.0, 1.0, "hi"), _w(2.1, 3.0, "hello")] + turns = [_t(0.0, 2.0, "Speaker_0"), _t(2.0, 4.0, "Speaker_1")] + blocks = _merge_words_with_speakers(words, turns) + assert [b["speaker"] for b in blocks] == ["Speaker_0", "Speaker_1"] + assert [b["text"] for b in blocks] == ["hi", "hello"] + + +def test_long_silence_breaks_block_for_same_speaker(): + # >1.5s gap between two words of the same speaker forces a new block. + words = [_w(0.0, 0.5, "one"), _w(3.0, 3.5, "two")] + turns = [_t(0.0, 4.0, "Speaker_0")] + blocks = _merge_words_with_speakers(words, turns) + assert len(blocks) == 2 + assert [b["text"] for b in blocks] == ["one", "two"] + + +def test_punctuation_token_joins_without_leading_space(): + words = [_w(0.0, 0.5, "hello"), _w(0.5, 0.7, ".")] + turns = [_t(0.0, 2.0, "Speaker_0")] + assert _merge_words_with_speakers(words, turns)[0]["text"] == "hello." diff --git a/image/tests/test_launch_command.py b/image/tests/test_launch_command.py new file mode 100644 index 0000000..8d879bf --- /dev/null +++ b/image/tests/test_launch_command.py @@ -0,0 +1,67 @@ +"""build_launch_command: argument assembly + the shell-injection invariant. + +The security-critical property is that every user-controllable value (repo, +vllm_args, knobs) is shlex-quoted at the sink, so `shlex.split` cleanly reverses +the command back into the exact token list. The vLLM pre-flight validator +(validate.py) depends on this round-trip — these tests lock it in. +""" +import shlex + +from app.models import Defaults, ModelDef, build_launch_command + +DEFAULTS = Defaults(port=8888, host="0.0.0.0") + + +def _model(**kw) -> ModelDef: + base = dict(display_name="X", repo="org/name", size_gb=1.0, mode="solo") + base.update(kw) + return ModelDef(**base) + + +def test_solo_model_emits_solo_flag_and_ordered_args(): + cmd = build_launch_command("k", _model(vllm_args=["--max-model-len=1000"]), DEFAULTS) + assert cmd == ( + "./launch-cluster.sh --solo -d exec vllm serve org/name " + "--port=8888 --host=0.0.0.0 --max-model-len=1000" + ) + + +def test_cluster_model_omits_solo_flag(): + cmd = build_launch_command("k", _model(mode="cluster", vllm_args=["-tp=2"]), DEFAULTS) + assert " --solo " not in cmd + assert cmd.startswith("./launch-cluster.sh -d exec vllm serve org/name") + + +def test_knob_overrides_matching_bundled_flag(): + # bundled arg sets max-model-len; the knob must win (single occurrence). + m = _model(vllm_args=["--max-model-len=1000"], knobs={"max_model_len": 65536}) + cmd = build_launch_command("k", m, DEFAULTS) + assert "--max-model-len=65536" in cmd + assert "--max-model-len=1000" not in cmd + + +def test_repo_with_shell_metacharacters_is_quoted_not_executed(): + # build_launch_command quotes even a hostile repo (validate_repo guards the + # API boundary; this proves the sink itself is safe in depth). + evil = "org/name; rm -rf ~ #" + cmd = build_launch_command("k", _model(repo=evil), DEFAULTS) + # The raw metacharacters must not appear unquoted... + assert "; rm -rf" not in cmd.replace(shlex.quote(evil), "") + # ...and shlex.split must recover the repo as one literal token. + tokens = shlex.split(cmd) + assert evil in tokens + + +def test_command_string_round_trips_through_shlex_split(): + # The invariant validate.py relies on: every arg survives quote -> split intact. + args = ["--max-model-len=32768", "--load-format=fastsafetensors", "--note=a b c"] + cmd = build_launch_command("k", _model(vllm_args=args), DEFAULTS) + tokens = shlex.split(cmd) + for a in args: + assert a in tokens + + +def test_injection_via_vllm_arg_stays_literal(): + payload = "--foo=$(touch /tmp/pwned)" + cmd = build_launch_command("k", _model(vllm_args=[payload]), DEFAULTS) + assert payload in shlex.split(cmd) # preserved as one inert token