diff --git a/image/Dockerfile b/image/Dockerfile index 021fbe6..63a91e0 100644 --- a/image/Dockerfile +++ b/image/Dockerfile @@ -18,12 +18,6 @@ COPY models.yaml /app/models.yaml # time — survives docker rm + redeploy of the parakeet container. COPY parakeet_patches /app/parakeet_patches -# WhisperX container build context (Dockerfile + requirements.txt + app/). -# The "Install WhisperX" action in spark-control ships these files to Spark 2 -# over SSH, then runs `docker build` + `docker run` there. The container -# becomes a managed always-on service alongside parakeet-asr and magpie-tts. -COPY whisperx_container /app/whisperx_container - RUN pip install --no-cache-dir -e . ENV BIND_PORT=9999 diff --git a/image/app/audio_proxy.py b/image/app/audio_proxy.py index 92adbe0..0bfc95a 100644 --- a/image/app/audio_proxy.py +++ b/image/app/audio_proxy.py @@ -209,17 +209,6 @@ def build_router(settings: Settings, deep_health: Any = None) -> APIRouter: raise HTTPException(r.status_code, r.text[:500]) return Response(content=r.content, media_type=r.headers.get("content-type", "application/json")) - def _whisperx_base() -> str: - return f"http://{settings.whisperx_host}:{settings.whisperx_port}" - - async def _whisperx_healthy() -> bool: - try: - async with httpx.AsyncClient(timeout=2.0) as client: - r = await client.get(f"{_whisperx_base()}/health") - return r.status_code == 200 and bool(r.json().get("diarizer_loaded")) - except Exception: - return False - # ---- /api/audio/transcribe-with-speakers (STT + diarization, merged) ---- @router.post("/api/audio/transcribe-with-speakers") async def transcribe_with_speakers( @@ -256,23 +245,8 @@ def build_router(settings: Settings, deep_health: Any = None) -> APIRouter: filename = file.filename or "audio.wav" content_type = file.content_type or "application/octet-stream" - # Prefer WhisperX (single-pipeline, handles long audio properly) when it's - # installed and healthy. Fall back to Parakeet + Sortformer otherwise. - if await _whisperx_healthy(): - files = {"file": (filename, body, content_type)} - try: - async with httpx.AsyncClient(timeout=1800.0) as client: - r = await client.post( - f"{_whisperx_base()}/v1/audio/transcribe-with-speakers", - files=files, - ) - except httpx.HTTPError as e: - raise HTTPException(502, f"whisperx unreachable: {e}") - if r.status_code != 200: - raise HTTPException(r.status_code, r.text[:500]) - return r.json() - - # ── Legacy fallback: Parakeet ASR + Sortformer diarizer in parallel ── + # Parakeet ASR + Sortformer diarizer in parallel. (A WhisperX detour + # lived here briefly — reverted in v0.13.0:0; see release notes.) async def _call_transcribe(client: httpx.AsyncClient) -> dict: files = {"file": (filename, body, content_type)} data = {"response_format": "verbose_json"} diff --git a/image/app/config.py b/image/app/config.py index 9392b21..678519e 100644 --- a/image/app/config.py +++ b/image/app/config.py @@ -35,11 +35,6 @@ class Settings: magpie_host: str magpie_user: str magpie_container: str - whisperx_host: str - whisperx_user: str - whisperx_container: str - whisperx_port: int - whisperx_model: str ssh_key_path: str ssh_known_hosts: str models_yaml: str @@ -54,7 +49,7 @@ class Settings: def from_env(cls) -> "Settings": spark2_host = _env("SPARK2_HOST") spark2_user = _env("SPARK2_USER") - # Parakeet, Magpie, and WhisperX all default to Spark 2 unless overridden. + # Parakeet and Magpie default to Spark 2 unless explicitly overridden. return cls( spark1_host=_env("SPARK1_HOST"), spark1_user=_env("SPARK1_USER"), @@ -66,11 +61,6 @@ class Settings: magpie_host=_env("MAGPIE_HOST") or spark2_host, magpie_user=_env("MAGPIE_USER") or spark2_user, magpie_container=_env("MAGPIE_CONTAINER") or "magpie-tts", - whisperx_host=_env("WHISPERX_HOST") or spark2_host, - whisperx_user=_env("WHISPERX_USER") or spark2_user, - whisperx_container=_env("WHISPERX_CONTAINER") or "whisperx-asr", - whisperx_port=int(_env("WHISPERX_PORT", "8002")), - whisperx_model=_env("WHISPERX_MODEL", "medium"), ssh_key_path=_env("SSH_KEY_PATH"), ssh_known_hosts=_env("SSH_KNOWN_HOSTS"), models_yaml=_resolve_models_yaml(), diff --git a/image/app/server.py b/image/app/server.py index fa42740..ed05be6 100644 --- a/image/app/server.py +++ b/image/app/server.py @@ -24,7 +24,6 @@ from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_ from .services import docker_state, run_action, services_from_settings from .speech_models import SpeechModelsManager from .ssh import ssh_run -from .whisperx_install import WhisperXInstaller from .swap import SwapManager from .updates import UpdateManager, get_update_status from .validate import validate_launch @@ -40,7 +39,6 @@ hardware_probe = HardwareProbe(settings) nim_manager = NimManager(settings) deep_health = DeepHealth(settings) speech_models = SpeechModelsManager(settings) -whisperx_installer = WhisperXInstaller(settings) app = FastAPI(title="spark-control", version="0.1.0") @@ -537,68 +535,10 @@ async def post_speech_models_restart() -> dict: return result -# ---- WhisperX install (Phase 2 of the WhisperX migration) ---- - -@app.get("/api/whisperx/status") -async def get_whisperx_status() -> dict: - """Is WhisperX installed + healthy on Spark 2 right now?""" - return await whisperx_installer.status() - - -@app.post("/api/whisperx/install") -async def post_whisperx_install() -> dict: - """One-click install: ships the WhisperX build context from inside - spark-control to Spark 2, runs `docker build` + `docker run`, polls - /health until both models are loaded. Streams progress via the matching - GET /api/whisperx/install/{job_id}/stream SSE endpoint.""" - try: - job = await whisperx_installer.trigger() - except RuntimeError as e: - raise HTTPException(409, str(e)) - return {"job_id": job.id, "started_at": job.started_at} - - -@app.get("/api/whisperx/install/{job_id}") -async def get_whisperx_install(job_id: str) -> dict: - job = whisperx_installer.get(job_id) - if not job: - raise HTTPException(404, "unknown job") - return { - "id": job.id, - "state": job.state, - "phase": job.phase, - "lines": job.lines, - "started_at": job.started_at, - "finished_at": job.finished_at, - "returncode": job.returncode, - } - - -@app.get("/api/whisperx/install/{job_id}/stream") -async def stream_whisperx_install(job_id: str) -> StreamingResponse: - job = whisperx_installer.get(job_id) - if not job: - raise HTTPException(404, "unknown job") - - async def event_stream(): - last_idx = 0 - last_phase = "" - last_state = "" - while True: - new_lines = job.lines[last_idx:] - last_idx = len(job.lines) - for line in new_lines: - yield f"data: {json.dumps({'line': line})}\n\n" - if job.phase != last_phase or job.state != last_state: - yield f"event: phase\ndata: {json.dumps({'phase': job.phase, 'state': job.state})}\n\n" - last_phase = job.phase - last_state = job.state - if job.finished_at: - yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n" - return - await asyncio.sleep(0.6) - - return StreamingResponse(event_stream(), media_type="text/event-stream") +# NOTE: a WhisperX-on-Spark-2 install action lived here briefly in v0.12.0:0–4 +# but was reverted in v0.13.0:0. NGC's custom-versioned torch on ARM64 made +# building torchaudio (which WhisperX needs via pyannote) unworkable. The +# existing Parakeet + Sortformer pipeline stays as the audio path. @app.get("/api/endpoints") diff --git a/image/app/services.py b/image/app/services.py index ff7322a..7f4dce5 100644 --- a/image/app/services.py +++ b/image/app/services.py @@ -65,14 +65,6 @@ def services_from_settings(s: Settings) -> dict[str, ServiceDef]: container=s.magpie_container, port=s.magpie_port, ), - "whisperx": ServiceDef( - name="whisperx", - kind="stt+diarize", - host=s.whisperx_host, - user=s.whisperx_user, - container=s.whisperx_container, - port=s.whisperx_port, - ), } for entry in load_custom_services(): key = entry.get("key") diff --git a/image/app/static/app.js b/image/app/static/app.js index 6c465ff..ef58f02 100644 --- a/image/app/static/app.js +++ b/image/app/static/app.js @@ -664,116 +664,10 @@ async function onSpeechModelsRestart() { } } -// ===================== WhisperX install (v0.12) ===================== - -const wxState = { - job_id: null, - eventsource: null, - timer_handle: null, - started_at: null, -}; - -async function renderWhisperXBanner() { - const card = el('#whisperx-install-card'); - if (!card) return; - let status; - try { - status = await fetchJSON('/api/whisperx/status'); - } catch { - card.classList.add('hidden'); - return; - } - if (status.installed && status.healthy) { - card.classList.add('hidden'); - } else if (status.configured) { - card.classList.remove('hidden'); - } else { - card.classList.add('hidden'); - } -} - -async function onWhisperXInstall() { - if (wxState.job_id) { - // Just re-attach to the running job - showWhisperXDialog(); - return; - } - if (!confirm('Install WhisperX on Spark 2? This builds a new Docker image (~10–15 min first time, mostly downloading pyannote + whisper weights). Parakeet/Magpie stay untouched.')) return; - try { - const r = await fetchJSON('/api/whisperx/install', { method: 'POST' }); - attachToWhisperXInstall(r.job_id); - } catch (e) { - alert('Failed to start WhisperX install: ' + e.message); - } -} - -function showWhisperXDialog() { - el('#whisperx-progress-dialog').showModal(); -} - -function attachToWhisperXInstall(jobId) { - wxState.job_id = jobId; - el('#wx-prog-title').textContent = 'Installing WhisperX…'; - el('#wx-prog-phase').textContent = 'Starting…'; - el('#wx-prog-log').textContent = ''; - showWhisperXDialog(); - - // Tick a timer - wxState.started_at = Date.now(); - if (wxState.timer_handle) clearInterval(wxState.timer_handle); - wxState.timer_handle = setInterval(() => { - const sec = Math.max(0, Math.floor((Date.now() - wxState.started_at) / 1000)); - const m = Math.floor(sec / 60); - el('#wx-prog-elapsed').textContent = `${m}:${(sec % 60).toString().padStart(2, '0')}`; - }, 500); - - // Backfill snapshot then connect SSE - fetchJSON(`/api/whisperx/install/${jobId}`).then((snap) => { - el('#wx-prog-phase').textContent = snap.phase || 'Working…'; - el('#wx-prog-log').textContent = (snap.lines || []).join('\n'); - el('#wx-prog-log').scrollTop = el('#wx-prog-log').scrollHeight; - if (snap.finished_at) { - handleWhisperXDone(snap); - return; - } - const es = new EventSource(`/api/whisperx/install/${jobId}/stream`); - wxState.eventsource = es; - es.onmessage = (ev) => { - try { - const log = el('#wx-prog-log'); - log.textContent += JSON.parse(ev.data).line + '\n'; - log.scrollTop = log.scrollHeight; - } catch {} - }; - es.addEventListener('phase', (ev) => { - try { el('#wx-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {} - }); - es.addEventListener('done', (ev) => { - try { handleWhisperXDone(JSON.parse(ev.data)); } catch {} - es.close(); - wxState.eventsource = null; - }); - es.onerror = () => { es.close(); wxState.eventsource = null; }; - }).catch(() => {}); -} - -function handleWhisperXDone(d) { - if (wxState.timer_handle) { clearInterval(wxState.timer_handle); wxState.timer_handle = null; } - wxState.job_id = null; - const rc = d.returncode; - if (d.state === 'failed' || (rc !== 0 && rc != null)) { - el('#wx-prog-title').textContent = `WhisperX install failed (rc=${rc})`; - el('#wx-prog-phase').textContent = 'Failed — check the build log below'; - } else { - el('#wx-prog-title').textContent = 'WhisperX installed'; - el('#wx-prog-phase').textContent = 'Ready ✓ — appears in Always-on services below'; - // Refresh services + banner state - setTimeout(() => { - renderServices(); - renderWhisperXBanner(); - }, 1000); - } -} +// NOTE: a WhisperX install action lived here briefly in v0.12 but was +// reverted in v0.13.0:0 — the NGC PyTorch container on ARM64 doesn't ship +// torchaudio and we couldn't reliably build it from source. The existing +// Parakeet + Sortformer pipeline stays as the audio path. See release notes. async function onServiceAction(key) { if (state.service_action_in_flight) return; @@ -1971,11 +1865,6 @@ async function init() { } catch {} setupDashboardTabs(); setupEndpointCollapse(); - // WhisperX install button - const wxBtn = el('#wx-install'); - if (wxBtn) wxBtn.addEventListener('click', onWhisperXInstall); - const wxCloseBtn = el('#wx-prog-close'); - if (wxCloseBtn) wxCloseBtn.addEventListener('click', () => el('#whisperx-progress-dialog').close()); await loadModels(); await pollStatus(); await renderServices(); @@ -1985,14 +1874,11 @@ async function init() { loadDiskStatus(); // Speech-model patches panel — slow over SSH, runs after first paint. renderSpeechModels(); - // WhisperX install banner — show only when not yet installed/healthy. - renderWhisperXBanner(); setInterval(pollStatus, 5000); setInterval(pollHardware, 8000); // every 8s setInterval(pollUpdates, 300000); // every 5 min setInterval(loadDiskStatus, 60000); // every 60s — disk state changes rarely setInterval(renderSpeechModels, 120000); // every 2 min — patches change rarely - setInterval(renderWhisperXBanner, 60000); // every 60s — auto-hides banner after install } init(); diff --git a/image/app/static/index.html b/image/app/static/index.html index da9ce2b..8accfe4 100644 --- a/image/app/static/index.html +++ b/image/app/static/index.html @@ -103,46 +103,6 @@