From 98aeef8779dd21ea8223ce44a0a4276376f6cfad Mon Sep 17 00:00:00 2001 From: Keysat Date: Mon, 18 May 2026 21:26:08 -0500 Subject: [PATCH] v0.12.0:2 - hotfix: pin NGC's torch versions so pip can't break the ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WhisperX docker build was crashing at the model-prewarm step: OSError: undefined symbol: torch_library_impl Root cause: the NGC PyTorch base ships custom builds of torch + torchaudio + torchvision matched together for Blackwell (sm_120). When pip installed whisperx, it pulled the latest stock torchaudio wheel as a transitive dep, which was compiled against a different libtorch and won't load against NGC's. Fix: at build time, capture NGC's actual torch/torchaudio/torchvision versions into /tmp/torch-constraints.txt, then `pip install -c` that constraint for all subsequent installs. pip can't swap torch out, so the ABI stays consistent. whisperx and pyannote are happy with torch>=2.0 — NGC's 2.10.0a0 satisfies that easily. The pinned versions print to the build log so you can see them. Co-Authored-By: Claude Opus 4.7 (1M context) --- image/whisperx_container/Dockerfile | 19 ++++++++++++++++--- package/startos/versions/v0_1_0.ts | 4 ++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/image/whisperx_container/Dockerfile b/image/whisperx_container/Dockerfile index 47aac01..d79b1d7 100644 --- a/image/whisperx_container/Dockerfile +++ b/image/whisperx_container/Dockerfile @@ -26,10 +26,23 @@ RUN apt-get update \ && apt-get install -y --no-install-recommends ffmpeg \ && rm -rf /var/lib/apt/lists/* -# Install whisperx + the FastAPI wrapper deps. --break-system-packages because -# the NGC PyTorch image has its own managed Python that's flagged "system". +# CRITICAL: the NGC base image ships custom builds of torch + torchaudio + +# torchvision compiled together for Blackwell (sm_120). If pip pulls a stock +# torchaudio wheel as a transitive dep of whisperx/pyannote, the resulting +# ABI mismatch crashes at import time: +# "undefined symbol: torch_library_impl" +# Generate a constraints.txt from whatever versions NGC actually shipped, +# then pass it to every pip install so pip cannot swap torch out. +RUN python3 -c "import torch, torchaudio, torchvision; \ +import sys; \ +sys.stdout.write(f'torch=={torch.__version__}\ntorchaudio=={torchaudio.__version__}\ntorchvision=={torchvision.__version__}\n')" \ + > /tmp/torch-constraints.txt \ + && echo '── pinned torch versions ──' && cat /tmp/torch-constraints.txt + +# Install whisperx + the FastAPI wrapper deps under the torch constraint. COPY requirements.txt /tmp/requirements.txt -RUN pip install --break-system-packages --no-cache-dir -r /tmp/requirements.txt +RUN pip install --break-system-packages --no-cache-dir \ + -c /tmp/torch-constraints.txt -r /tmp/requirements.txt # Pre-warm the default Whisper + alignment models at build time so first-call # latency on a fresh container is small. (~3 GB cached into the image; if you diff --git a/package/startos/versions/v0_1_0.ts b/package/startos/versions/v0_1_0.ts index 0965b1e..b02e1c8 100644 --- a/package/startos/versions/v0_1_0.ts +++ b/package/startos/versions/v0_1_0.ts @@ -1,10 +1,10 @@ import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk' export const v0_1_0 = VersionInfo.of({ - version: '0.12.0:1', + version: '0.12.0:2', releaseNotes: { en_US: - 'v0.12.0:1 — hotfix: 0.12.0:0\'s install action used shlex.quote() on the remote build path, which wraps `~/whisperx-build/...` in single quotes — the remote shell then doesn\'t expand the tilde and treats it as a literal directory named `~`. Result: "bash: line 1: ~/whisperx-build/Dockerfile: No such file or directory" on the very first file copy. Same bug pattern we hit before with $HOME in the disk probe. Rewrote to embed $HOME in double-quoted remote shell strings; hardcoded file names (Dockerfile, requirements.txt, README.md, app/main.py) embed unquoted inside that scope. All other 0.12.0 behavior is unchanged.', + 'v0.12.0:2 — hotfix: WhisperX docker build was failing at the model-prewarm step with "undefined symbol: torch_library_impl". Root cause: the NGC PyTorch base image ships custom builds of torch + torchaudio + torchvision compiled together for Blackwell (sm_120); pip pulled a stock torchaudio wheel as a transitive dep of whisperx/pyannote, the ABIs didn\'t match, and the resulting .so file refused to load. Fix: generate a constraints.txt at build time from the NGC base\'s installed torch versions, and pass it to every pip install so pip can\'t swap torch/torchaudio/torchvision out from under us. Build should now finish through the model-prewarm step. No other changes vs 0.12.0:1.', }, migrations: { up: async ({ effects }) => {},