recap/server/providers/ollama.js

// Ollama provider — analysis only, raw HTTP to a local Ollama server.
//
// Ollama runs LLMs locally; there is no per-request cost. Default
// baseURL is the conventional `http://localhost:11434`. Users on a
// LAN-hosted Ollama point at it explicitly via the StartOS action.
//
// We don't ship a hardcoded model list — Ollama's catalog is whatever
// the user has `pull`ed locally. listAnalysisModels() can optionally
// query /api/tags at config time, but for v1 we expose a free-text
// model field in the picker UI.

import { retryAPI } from "../util.js";
import { zeroCost } from "./cost.js";

const DEFAULT_BASE_URL = "http://localhost:11434";

export function createOllamaProvider({
  baseURL,
  timeoutMs = 900_000,
} = {}) {
  const base = (baseURL || DEFAULT_BASE_URL).replace(/\/$/, "");

  return {
    name: "ollama",

    capabilities: {
      transcribe: false,
      analyze: true,
      listModels: true,
    },

    listAnalysisModels() {
      return [];
    },

    listTranscriptionModels() {
      return [];
    },

    async transcribeAudio() {
      throw new Error(
        "Ollama is wired for analysis only. Use Gemini or OpenAI Whisper for transcription."
      );
    },

    // Lists models the local Ollama server has pulled. Best-effort —
    // returns [] on any error so the picker can fall back to the
    // free-text input.
    async listInstalledModels() {
      try {
        const res = await fetch(`${base}/api/tags`, {
          signal: AbortSignal.timeout(5000),
        });
        if (!res.ok) return [];
        const data = await res.json();
        return (data.models || []).map((m) => m.name).filter(Boolean);
      } catch {
        return [];
      }
    },

    async analyzeText({
      prompt,
      model,
      onProgress = () => {},
      retries = 2,
      signal,
    }) {
      const result = await retryAPI(
        async () => {
          // Combine the per-request timeout with the caller-supplied
          // cancel signal so a user-pressed Cancel button aborts the
          // fetch immediately instead of waiting for the (long) timeout.
          const timeoutSignal = AbortSignal.timeout(timeoutMs);
          const combinedSignal = signal
            ? AbortSignal.any([signal, timeoutSignal])
            : timeoutSignal;
          const res = await fetch(`${base}/api/generate`, {
            method: "POST",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({
              model,
              prompt,
              stream: false,
            }),
            signal: combinedSignal,
          });
          if (!res.ok) {
            const errText = await res.text().catch(() => "");
            const err = new Error(
              `Ollama ${res.status} ${res.statusText}: ${errText.slice(0, 200)}`
            );
            err.status = res.status;
            throw err;
          }
          return res.json();
        },
        {
          retries,
          delayMs: 5000,
          label: "Ollama analysis",
          log: (msg) => onProgress(msg),
        }
      );

      const text = result.response || "";

      // Ollama's /api/generate returns prompt_eval_count + eval_count.
      const usage = {
        inputTokens: result.prompt_eval_count || 0,
        outputTokens: result.eval_count || 0,
        thinkingTokens: 0,
      };
      const cost = zeroCost(usage);

      return {
        text,
        usage,
        cost,
        finishReason: result.done ? "stop" : null,
        raw: result,
      };
    },
  };
}