// Ollama provider — analysis only, raw HTTP to a local Ollama server. // // Ollama runs LLMs locally; there is no per-request cost. Default // baseURL is the conventional `http://localhost:11434`. Users on a // LAN-hosted Ollama point at it explicitly via the StartOS action. // // We don't ship a hardcoded model list — Ollama's catalog is whatever // the user has `pull`ed locally. listAnalysisModels() can optionally // query /api/tags at config time, but for v1 we expose a free-text // model field in the picker UI. import { retryAPI } from "../util.js"; import { zeroCost } from "./cost.js"; const DEFAULT_BASE_URL = "http://localhost:11434"; export function createOllamaProvider({ baseURL, timeoutMs = 900_000, } = {}) { const base = (baseURL || DEFAULT_BASE_URL).replace(/\/$/, ""); return { name: "ollama", capabilities: { transcribe: false, analyze: true, listModels: true, }, listAnalysisModels() { return []; }, listTranscriptionModels() { return []; }, async transcribeAudio() { throw new Error( "Ollama is wired for analysis only. Use Gemini or OpenAI Whisper for transcription." ); }, // Lists models the local Ollama server has pulled. Best-effort — // returns [] on any error so the picker can fall back to the // free-text input. async listInstalledModels() { try { const res = await fetch(`${base}/api/tags`, { signal: AbortSignal.timeout(5000), }); if (!res.ok) return []; const data = await res.json(); return (data.models || []).map((m) => m.name).filter(Boolean); } catch { return []; } }, async analyzeText({ prompt, model, onProgress = () => {}, retries = 2, signal, }) { const result = await retryAPI( async () => { // Combine the per-request timeout with the caller-supplied // cancel signal so a user-pressed Cancel button aborts the // fetch immediately instead of waiting for the (long) timeout. const timeoutSignal = AbortSignal.timeout(timeoutMs); const combinedSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal; const res = await fetch(`${base}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model, prompt, stream: false, }), signal: combinedSignal, }); if (!res.ok) { const errText = await res.text().catch(() => ""); const err = new Error( `Ollama ${res.status} ${res.statusText}: ${errText.slice(0, 200)}` ); err.status = res.status; throw err; } return res.json(); }, { retries, delayMs: 5000, label: "Ollama analysis", log: (msg) => onProgress(msg), } ); const text = result.response || ""; // Ollama's /api/generate returns prompt_eval_count + eval_count. const usage = { inputTokens: result.prompt_eval_count || 0, outputTokens: result.eval_count || 0, thinkingTokens: 0, }; const cost = zeroCost(usage); return { text, usage, cost, finishReason: result.done ? "stop" : null, raw: result, }; }, }; }