diff --git a/server/backends/hardware.js b/server/backends/hardware.js index 76618f7..f15b773 100644 --- a/server/backends/hardware.js +++ b/server/backends/hardware.js @@ -26,6 +26,17 @@ const DEFAULT_TIMEOUT_MS = 900_000; const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3"; const DEFAULT_ANALYZE_MODEL = "gemma3:27b"; +// Normalize an OpenAI-API-compatible base URL: strip trailing slash +// AND strip a trailing `/v1` segment if the operator pasted one, +// because we always append `/v1/...` below. Without this, a base URL +// of `http://192.168.1.87:8000/v1` would produce +// `http://192.168.1.87:8000/v1/v1/audio/transcriptions` → 404. +function normalizeApiBase(url) { + let s = (url || "").trim().replace(/\/$/, ""); + s = s.replace(/\/v1$/, ""); + return s; +} + export function createHardwareBackend({ parakeetBaseURL = "", gemmaBaseURL = "", @@ -33,8 +44,8 @@ export function createHardwareBackend({ gemmaModel = DEFAULT_ANALYZE_MODEL, timeoutMs = DEFAULT_TIMEOUT_MS, } = {}) { - const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : ""; - const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : ""; + const parakeet = normalizeApiBase(parakeetBaseURL); + const gemma = normalizeApiBase(gemmaBaseURL); const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL; const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL; @@ -76,37 +87,62 @@ export function createHardwareBackend({ return form; }; - const url = `${parakeet}/v1/audio/transcriptions`; - let res; - try { - res = await fetch(url, { - method: "POST", - body: buildForm(true), - signal: AbortSignal.timeout(timeoutMs), - }); - } catch (err) { - const e = new Error( - `Parakeet transcribe network error: ${err?.message || err}` - ); - e.status = 502; - throw e; - } - - // If the wrapper rejects the rich params, retry with bare-bones. - if (!res.ok && res.status >= 400 && res.status < 600) { - const richBody = await safeBody(res); - console.warn( - `[hardware] rich Parakeet request returned ${res.status}: ${richBody.slice(0, 200)} — retrying bare` - ); + // Path candidates, in order. The OpenAI Whisper standard is + // `/v1/audio/transcriptions`; some self-hosted wrappers (or + // operators who pasted their base URL with a path already + // stripped) expose the endpoint at `/audio/transcriptions` + // instead. We try the standard path first, then fall back on + // 404 only — other status codes (rate-limit, 500) shouldn't + // trigger a different path retry. + const pathCandidates = [ + "/v1/audio/transcriptions", + "/audio/transcriptions", + ]; + let res = null; + let lastUrl = null; + let pathErrSummary = null; + for (const p of pathCandidates) { + const url = `${parakeet}${p}`; + lastUrl = url; try { res = await fetch(url, { + method: "POST", + body: buildForm(true), + signal: AbortSignal.timeout(timeoutMs), + }); + } catch (err) { + const e = new Error( + `Parakeet transcribe network error at ${url}: ${err?.message || err}` + ); + e.status = 502; + throw e; + } + if (res.status !== 404) break; + // 404 → try the next path candidate. Capture the body for the + // final error message if all candidates 404. + pathErrSummary = await safeBody(res); + console.warn( + `[hardware] 404 at ${url} — trying next path candidate` + ); + } + + // If the wrapper rejects the rich params (4xx other than 404 we + // already exhausted, or 5xx), retry with bare-bones at the + // working URL. + if (!res.ok && res.status >= 400 && res.status < 600 && res.status !== 404) { + const richBody = await safeBody(res); + console.warn( + `[hardware] rich Parakeet request to ${lastUrl} returned ${res.status}: ${richBody.slice(0, 200)} — retrying bare` + ); + try { + res = await fetch(lastUrl, { method: "POST", body: buildForm(false), signal: AbortSignal.timeout(timeoutMs), }); } catch (err) { const e = new Error( - `Parakeet transcribe network error (fallback): ${err?.message || err}` + `Parakeet transcribe network error (fallback) at ${lastUrl}: ${err?.message || err}` ); e.status = 502; throw e; @@ -115,8 +151,12 @@ export function createHardwareBackend({ if (!res.ok) { const body = await safeBody(res); + const hint = + res.status === 404 + ? ` (tried ${pathCandidates.join(" and ")} on base ${parakeet} — wrapper may expose the endpoint at a different path; check the Parakeet URL or container logs)` + : ""; const e = new Error( - `Parakeet transcribe ${res.status}: ${body.slice(0, 300)}` + `Parakeet transcribe ${res.status} at ${lastUrl}: ${body.slice(0, 300)}${hint}` ); e.status = res.status; throw e; @@ -165,31 +205,49 @@ export function createHardwareBackend({ throw err; } - const url = `${gemma}/v1/chat/completions`; - let res; - try { - res = await fetch(url, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - model: analyzeModel, - max_tokens: ANALYZE_MAX_TOKENS, - messages: [{ role: "user", content: prompt }], - stream: false, - }), - signal: AbortSignal.timeout(timeoutMs), - }); - } catch (err) { - const e = new Error( - `Gemma analyze network error: ${err?.message || err}` + // Same path-fallback shape as Parakeet transcribe. Standard + // OpenAI-compatible path is /v1/chat/completions; some Ollama + // versions also expose it at /chat/completions without the /v1. + const pathCandidates = ["/v1/chat/completions", "/chat/completions"]; + let res = null; + let lastUrl = null; + for (const p of pathCandidates) { + const url = `${gemma}${p}`; + lastUrl = url; + try { + res = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: analyzeModel, + max_tokens: ANALYZE_MAX_TOKENS, + messages: [{ role: "user", content: prompt }], + stream: false, + }), + signal: AbortSignal.timeout(timeoutMs), + }); + } catch (err) { + const e = new Error( + `Gemma analyze network error at ${url}: ${err?.message || err}` + ); + e.status = 502; + throw e; + } + if (res.status !== 404) break; + console.warn( + `[hardware] 404 at ${url} — trying next path candidate` ); - e.status = 502; - throw e; } if (!res.ok) { const body = await safeBody(res); - const e = new Error(`Gemma analyze ${res.status}: ${body.slice(0, 300)}`); + const hint = + res.status === 404 + ? ` (tried ${pathCandidates.join(" and ")} on base ${gemma} — check the Gemma/Ollama URL)` + : ""; + const e = new Error( + `Gemma analyze ${res.status} at ${lastUrl}: ${body.slice(0, 300)}${hint}` + ); e.status = res.status; throw e; } diff --git a/server/package.json b/server/package.json index 10cebb0..e08f9e1 100644 --- a/server/package.json +++ b/server/package.json @@ -1,6 +1,6 @@ { "name": "recap-relay-server", - "version": "0.2.9", + "version": "0.2.10", "type": "module", "private": true, "dependencies": { diff --git a/startos/versions/index.ts b/startos/versions/index.ts index 8605fc6..6b0f07c 100644 --- a/startos/versions/index.ts +++ b/startos/versions/index.ts @@ -10,8 +10,9 @@ import { v_0_2_6 } from './v0.2.6' import { v_0_2_7 } from './v0.2.7' import { v_0_2_8 } from './v0.2.8' import { v_0_2_9 } from './v0.2.9' +import { v_0_2_10 } from './v0.2.10' export const versionGraph = VersionGraph.of({ - current: v_0_2_9, - other: [v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0], + current: v_0_2_10, + other: [v_0_2_9, v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0], }) diff --git a/startos/versions/v0.2.10.ts b/startos/versions/v0.2.10.ts new file mode 100644 index 0000000..c6a9bcd --- /dev/null +++ b/startos/versions/v0.2.10.ts @@ -0,0 +1,13 @@ +import { VersionInfo } from '@start9labs/start-sdk' + +export const v_0_2_10 = VersionInfo.of({ + version: '0.2.10:0', + releaseNotes: { + en_US: + 'Hardware backend (Parakeet + Gemma) is now robust to two common URL-config mistakes: (1) base URLs that already end in /v1 no longer produce /v1/v1/... requests — the trailing /v1 is stripped before path-appending, (2) wrappers that expose the endpoint at /audio/transcriptions or /chat/completions (without the /v1 prefix) are auto-tried as a fallback when the standard path 404s. Failure messages now include the exact attempted URL so misconfigured endpoints are obvious in the relay logs and the audit log.', + }, + migrations: { + up: async ({ effects }) => {}, + down: async ({ effects }) => {}, + }, +})