diff --git a/image/Dockerfile b/image/Dockerfile index 63a91e0..021fbe6 100644 --- a/image/Dockerfile +++ b/image/Dockerfile @@ -18,6 +18,12 @@ COPY models.yaml /app/models.yaml # time — survives docker rm + redeploy of the parakeet container. COPY parakeet_patches /app/parakeet_patches +# WhisperX container build context (Dockerfile + requirements.txt + app/). +# The "Install WhisperX" action in spark-control ships these files to Spark 2 +# over SSH, then runs `docker build` + `docker run` there. The container +# becomes a managed always-on service alongside parakeet-asr and magpie-tts. +COPY whisperx_container /app/whisperx_container + RUN pip install --no-cache-dir -e . ENV BIND_PORT=9999 diff --git a/image/app/audio_proxy.py b/image/app/audio_proxy.py index cfffdb4..92adbe0 100644 --- a/image/app/audio_proxy.py +++ b/image/app/audio_proxy.py @@ -209,6 +209,17 @@ def build_router(settings: Settings, deep_health: Any = None) -> APIRouter: raise HTTPException(r.status_code, r.text[:500]) return Response(content=r.content, media_type=r.headers.get("content-type", "application/json")) + def _whisperx_base() -> str: + return f"http://{settings.whisperx_host}:{settings.whisperx_port}" + + async def _whisperx_healthy() -> bool: + try: + async with httpx.AsyncClient(timeout=2.0) as client: + r = await client.get(f"{_whisperx_base()}/health") + return r.status_code == 200 and bool(r.json().get("diarizer_loaded")) + except Exception: + return False + # ---- /api/audio/transcribe-with-speakers (STT + diarization, merged) ---- @router.post("/api/audio/transcribe-with-speakers") async def transcribe_with_speakers( @@ -245,6 +256,23 @@ def build_router(settings: Settings, deep_health: Any = None) -> APIRouter: filename = file.filename or "audio.wav" content_type = file.content_type or "application/octet-stream" + # Prefer WhisperX (single-pipeline, handles long audio properly) when it's + # installed and healthy. Fall back to Parakeet + Sortformer otherwise. + if await _whisperx_healthy(): + files = {"file": (filename, body, content_type)} + try: + async with httpx.AsyncClient(timeout=1800.0) as client: + r = await client.post( + f"{_whisperx_base()}/v1/audio/transcribe-with-speakers", + files=files, + ) + except httpx.HTTPError as e: + raise HTTPException(502, f"whisperx unreachable: {e}") + if r.status_code != 200: + raise HTTPException(r.status_code, r.text[:500]) + return r.json() + + # ── Legacy fallback: Parakeet ASR + Sortformer diarizer in parallel ── async def _call_transcribe(client: httpx.AsyncClient) -> dict: files = {"file": (filename, body, content_type)} data = {"response_format": "verbose_json"} diff --git a/image/app/config.py b/image/app/config.py index 678519e..9392b21 100644 --- a/image/app/config.py +++ b/image/app/config.py @@ -35,6 +35,11 @@ class Settings: magpie_host: str magpie_user: str magpie_container: str + whisperx_host: str + whisperx_user: str + whisperx_container: str + whisperx_port: int + whisperx_model: str ssh_key_path: str ssh_known_hosts: str models_yaml: str @@ -49,7 +54,7 @@ class Settings: def from_env(cls) -> "Settings": spark2_host = _env("SPARK2_HOST") spark2_user = _env("SPARK2_USER") - # Parakeet and Magpie default to Spark 2 unless explicitly overridden. + # Parakeet, Magpie, and WhisperX all default to Spark 2 unless overridden. return cls( spark1_host=_env("SPARK1_HOST"), spark1_user=_env("SPARK1_USER"), @@ -61,6 +66,11 @@ class Settings: magpie_host=_env("MAGPIE_HOST") or spark2_host, magpie_user=_env("MAGPIE_USER") or spark2_user, magpie_container=_env("MAGPIE_CONTAINER") or "magpie-tts", + whisperx_host=_env("WHISPERX_HOST") or spark2_host, + whisperx_user=_env("WHISPERX_USER") or spark2_user, + whisperx_container=_env("WHISPERX_CONTAINER") or "whisperx-asr", + whisperx_port=int(_env("WHISPERX_PORT", "8002")), + whisperx_model=_env("WHISPERX_MODEL", "medium"), ssh_key_path=_env("SSH_KEY_PATH"), ssh_known_hosts=_env("SSH_KNOWN_HOSTS"), models_yaml=_resolve_models_yaml(), diff --git a/image/app/server.py b/image/app/server.py index a0a9eac..fa42740 100644 --- a/image/app/server.py +++ b/image/app/server.py @@ -24,6 +24,7 @@ from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_ from .services import docker_state, run_action, services_from_settings from .speech_models import SpeechModelsManager from .ssh import ssh_run +from .whisperx_install import WhisperXInstaller from .swap import SwapManager from .updates import UpdateManager, get_update_status from .validate import validate_launch @@ -39,6 +40,7 @@ hardware_probe = HardwareProbe(settings) nim_manager = NimManager(settings) deep_health = DeepHealth(settings) speech_models = SpeechModelsManager(settings) +whisperx_installer = WhisperXInstaller(settings) app = FastAPI(title="spark-control", version="0.1.0") @@ -535,6 +537,70 @@ async def post_speech_models_restart() -> dict: return result +# ---- WhisperX install (Phase 2 of the WhisperX migration) ---- + +@app.get("/api/whisperx/status") +async def get_whisperx_status() -> dict: + """Is WhisperX installed + healthy on Spark 2 right now?""" + return await whisperx_installer.status() + + +@app.post("/api/whisperx/install") +async def post_whisperx_install() -> dict: + """One-click install: ships the WhisperX build context from inside + spark-control to Spark 2, runs `docker build` + `docker run`, polls + /health until both models are loaded. Streams progress via the matching + GET /api/whisperx/install/{job_id}/stream SSE endpoint.""" + try: + job = await whisperx_installer.trigger() + except RuntimeError as e: + raise HTTPException(409, str(e)) + return {"job_id": job.id, "started_at": job.started_at} + + +@app.get("/api/whisperx/install/{job_id}") +async def get_whisperx_install(job_id: str) -> dict: + job = whisperx_installer.get(job_id) + if not job: + raise HTTPException(404, "unknown job") + return { + "id": job.id, + "state": job.state, + "phase": job.phase, + "lines": job.lines, + "started_at": job.started_at, + "finished_at": job.finished_at, + "returncode": job.returncode, + } + + +@app.get("/api/whisperx/install/{job_id}/stream") +async def stream_whisperx_install(job_id: str) -> StreamingResponse: + job = whisperx_installer.get(job_id) + if not job: + raise HTTPException(404, "unknown job") + + async def event_stream(): + last_idx = 0 + last_phase = "" + last_state = "" + while True: + new_lines = job.lines[last_idx:] + last_idx = len(job.lines) + for line in new_lines: + yield f"data: {json.dumps({'line': line})}\n\n" + if job.phase != last_phase or job.state != last_state: + yield f"event: phase\ndata: {json.dumps({'phase': job.phase, 'state': job.state})}\n\n" + last_phase = job.phase + last_state = job.state + if job.finished_at: + yield f"event: done\ndata: {json.dumps({'state': job.state, 'returncode': job.returncode})}\n\n" + return + await asyncio.sleep(0.6) + + return StreamingResponse(event_stream(), media_type="text/event-stream") + + @app.get("/api/endpoints") async def get_endpoints() -> dict: """Service-discovery summary. Stable shape; other apps on the LAN can poll this diff --git a/image/app/services.py b/image/app/services.py index 7f4dce5..ff7322a 100644 --- a/image/app/services.py +++ b/image/app/services.py @@ -65,6 +65,14 @@ def services_from_settings(s: Settings) -> dict[str, ServiceDef]: container=s.magpie_container, port=s.magpie_port, ), + "whisperx": ServiceDef( + name="whisperx", + kind="stt+diarize", + host=s.whisperx_host, + user=s.whisperx_user, + container=s.whisperx_container, + port=s.whisperx_port, + ), } for entry in load_custom_services(): key = entry.get("key") diff --git a/image/app/static/app.js b/image/app/static/app.js index 44f421f..6c465ff 100644 --- a/image/app/static/app.js +++ b/image/app/static/app.js @@ -664,6 +664,117 @@ async function onSpeechModelsRestart() { } } +// ===================== WhisperX install (v0.12) ===================== + +const wxState = { + job_id: null, + eventsource: null, + timer_handle: null, + started_at: null, +}; + +async function renderWhisperXBanner() { + const card = el('#whisperx-install-card'); + if (!card) return; + let status; + try { + status = await fetchJSON('/api/whisperx/status'); + } catch { + card.classList.add('hidden'); + return; + } + if (status.installed && status.healthy) { + card.classList.add('hidden'); + } else if (status.configured) { + card.classList.remove('hidden'); + } else { + card.classList.add('hidden'); + } +} + +async function onWhisperXInstall() { + if (wxState.job_id) { + // Just re-attach to the running job + showWhisperXDialog(); + return; + } + if (!confirm('Install WhisperX on Spark 2? This builds a new Docker image (~10–15 min first time, mostly downloading pyannote + whisper weights). Parakeet/Magpie stay untouched.')) return; + try { + const r = await fetchJSON('/api/whisperx/install', { method: 'POST' }); + attachToWhisperXInstall(r.job_id); + } catch (e) { + alert('Failed to start WhisperX install: ' + e.message); + } +} + +function showWhisperXDialog() { + el('#whisperx-progress-dialog').showModal(); +} + +function attachToWhisperXInstall(jobId) { + wxState.job_id = jobId; + el('#wx-prog-title').textContent = 'Installing WhisperX…'; + el('#wx-prog-phase').textContent = 'Starting…'; + el('#wx-prog-log').textContent = ''; + showWhisperXDialog(); + + // Tick a timer + wxState.started_at = Date.now(); + if (wxState.timer_handle) clearInterval(wxState.timer_handle); + wxState.timer_handle = setInterval(() => { + const sec = Math.max(0, Math.floor((Date.now() - wxState.started_at) / 1000)); + const m = Math.floor(sec / 60); + el('#wx-prog-elapsed').textContent = `${m}:${(sec % 60).toString().padStart(2, '0')}`; + }, 500); + + // Backfill snapshot then connect SSE + fetchJSON(`/api/whisperx/install/${jobId}`).then((snap) => { + el('#wx-prog-phase').textContent = snap.phase || 'Working…'; + el('#wx-prog-log').textContent = (snap.lines || []).join('\n'); + el('#wx-prog-log').scrollTop = el('#wx-prog-log').scrollHeight; + if (snap.finished_at) { + handleWhisperXDone(snap); + return; + } + const es = new EventSource(`/api/whisperx/install/${jobId}/stream`); + wxState.eventsource = es; + es.onmessage = (ev) => { + try { + const log = el('#wx-prog-log'); + log.textContent += JSON.parse(ev.data).line + '\n'; + log.scrollTop = log.scrollHeight; + } catch {} + }; + es.addEventListener('phase', (ev) => { + try { el('#wx-prog-phase').textContent = JSON.parse(ev.data).phase; } catch {} + }); + es.addEventListener('done', (ev) => { + try { handleWhisperXDone(JSON.parse(ev.data)); } catch {} + es.close(); + wxState.eventsource = null; + }); + es.onerror = () => { es.close(); wxState.eventsource = null; }; + }).catch(() => {}); +} + +function handleWhisperXDone(d) { + if (wxState.timer_handle) { clearInterval(wxState.timer_handle); wxState.timer_handle = null; } + wxState.job_id = null; + const rc = d.returncode; + if (d.state === 'failed' || (rc !== 0 && rc != null)) { + el('#wx-prog-title').textContent = `WhisperX install failed (rc=${rc})`; + el('#wx-prog-phase').textContent = 'Failed — check the build log below'; + } else { + el('#wx-prog-title').textContent = 'WhisperX installed'; + el('#wx-prog-phase').textContent = 'Ready ✓ — appears in Always-on services below'; + // Refresh services + banner state + setTimeout(() => { + renderServices(); + renderWhisperXBanner(); + }, 1000); + } +} + async function onServiceAction(key) { if (state.service_action_in_flight) return; const [name, action] = key.split(':'); @@ -1860,6 +1971,11 @@ async function init() { } catch {} setupDashboardTabs(); setupEndpointCollapse(); + // WhisperX install button + const wxBtn = el('#wx-install'); + if (wxBtn) wxBtn.addEventListener('click', onWhisperXInstall); + const wxCloseBtn = el('#wx-prog-close'); + if (wxCloseBtn) wxCloseBtn.addEventListener('click', () => el('#whisperx-progress-dialog').close()); await loadModels(); await pollStatus(); await renderServices(); @@ -1869,11 +1985,14 @@ async function init() { loadDiskStatus(); // Speech-model patches panel — slow over SSH, runs after first paint. renderSpeechModels(); + // WhisperX install banner — show only when not yet installed/healthy. + renderWhisperXBanner(); setInterval(pollStatus, 5000); setInterval(pollHardware, 8000); // every 8s setInterval(pollUpdates, 300000); // every 5 min setInterval(loadDiskStatus, 60000); // every 60s — disk state changes rarely setInterval(renderSpeechModels, 120000); // every 2 min — patches change rarely + setInterval(renderWhisperXBanner, 60000); // every 60s — auto-hides banner after install } init(); diff --git a/image/app/static/index.html b/image/app/static/index.html index 8accfe4..da9ce2b 100644 --- a/image/app/static/index.html +++ b/image/app/static/index.html @@ -103,6 +103,46 @@
+ + + + + +