Files
recap/server/providers/ollama.js
T
Keysat 373d10595b Pluggable AI providers, relay credit system, picker UX overhaul
Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.

- Pluggable provider system under server/providers/: gemini, anthropic,
  openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
  match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
  (operator-controlled at build time, not user-configurable). New
  /api/relay/{status,policy} endpoints proxy to the relay; balance pings
  populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
  Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
  on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
  password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
  relay_pro / relay_max), replacing the misleading "core" entitlement
  that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
  "Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
  Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
  collapsible by chevron, default-collapsed unless currently selected,
  "Use comped credits (reset to relay)" link when the user has strayed,
  green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
  localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
  transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
  retryAPI short-circuits on AbortError; cancellation events surface in
  the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
  analysis prompt so local-model context windows (32k-ish) don't overflow.
  Original entries preserved for transcript display via an index map; the
  analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
  navigable.
- Manifest description rewritten to reflect multi-provider support and
  free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
  key when other providers are configured; analysis fallback chain handles
  context-length and 503 capacity errors; single-segment expansion for
  providers that don't return per-segment timestamps (Parakeet et al.);
  many other UX polish items.
2026-05-11 23:46:20 -05:00

126 lines
3.5 KiB
JavaScript

// Ollama provider — analysis only, raw HTTP to a local Ollama server.
//
// Ollama runs LLMs locally; there is no per-request cost. Default
// baseURL is the conventional `http://localhost:11434`. Users on a
// LAN-hosted Ollama point at it explicitly via the StartOS action.
//
// We don't ship a hardcoded model list — Ollama's catalog is whatever
// the user has `pull`ed locally. listAnalysisModels() can optionally
// query /api/tags at config time, but for v1 we expose a free-text
// model field in the picker UI.
import { retryAPI } from "../util.js";
import { zeroCost } from "./cost.js";
const DEFAULT_BASE_URL = "http://localhost:11434";
export function createOllamaProvider({
baseURL,
timeoutMs = 900_000,
} = {}) {
const base = (baseURL || DEFAULT_BASE_URL).replace(/\/$/, "");
return {
name: "ollama",
capabilities: {
transcribe: false,
analyze: true,
listModels: true,
},
listAnalysisModels() {
return [];
},
listTranscriptionModels() {
return [];
},
async transcribeAudio() {
throw new Error(
"Ollama is wired for analysis only. Use Gemini or OpenAI Whisper for transcription."
);
},
// Lists models the local Ollama server has pulled. Best-effort —
// returns [] on any error so the picker can fall back to the
// free-text input.
async listInstalledModels() {
try {
const res = await fetch(`${base}/api/tags`, {
signal: AbortSignal.timeout(5000),
});
if (!res.ok) return [];
const data = await res.json();
return (data.models || []).map((m) => m.name).filter(Boolean);
} catch {
return [];
}
},
async analyzeText({
prompt,
model,
onProgress = () => {},
retries = 2,
signal,
}) {
const result = await retryAPI(
async () => {
// Combine the per-request timeout with the caller-supplied
// cancel signal so a user-pressed Cancel button aborts the
// fetch immediately instead of waiting for the (long) timeout.
const timeoutSignal = AbortSignal.timeout(timeoutMs);
const combinedSignal = signal
? AbortSignal.any([signal, timeoutSignal])
: timeoutSignal;
const res = await fetch(`${base}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model,
prompt,
stream: false,
}),
signal: combinedSignal,
});
if (!res.ok) {
const errText = await res.text().catch(() => "");
const err = new Error(
`Ollama ${res.status} ${res.statusText}: ${errText.slice(0, 200)}`
);
err.status = res.status;
throw err;
}
return res.json();
},
{
retries,
delayMs: 5000,
label: "Ollama analysis",
log: (msg) => onProgress(msg),
}
);
const text = result.response || "";
// Ollama's /api/generate returns prompt_eval_count + eval_count.
const usage = {
inputTokens: result.prompt_eval_count || 0,
outputTokens: result.eval_count || 0,
thinkingTokens: 0,
};
const cost = zeroCost(usage);
return {
text,
usage,
cost,
finishReason: result.done ? "stop" : null,
raw: result,
};
},
};
}