Pluggable AI providers, relay credit system, picker UX overhaul

Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that accumulated without commits. - Pluggable provider system under server/providers/: gemini, anthropic, openai, openai-compatible, ollama, whisper-compatible, relay. Mix and match transcription + analysis per request via the picker UI. - Relay backend integration. Hardcoded relay URL in server/relay-default.js (operator-controlled at build time, not user-configurable). New /api/relay/{status,policy} endpoints proxy to the relay; balance pings populate a cached credit display. - Per-install identity in server/install-id.js for relay credit accounting. Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost on a full uninstall + reinstall. Not surfaced in the UI. - Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt password hash + HMAC-signed session cookie. - Entitlement scheme rename: pro / max (each paired with subscriptions and relay_pro / relay_max), replacing the misleading "core" entitlement that conflicted with the user-facing "Core" tier name. - Activation screen: dynamic credit count pulled from /api/relay/policy, "Skip — use free mode" button, accurate paid-feature list. - Top toolbar: inline credit-balance pill (or "BYO configured" fallback), Upgrade + "I have a key" buttons. - Picker UI: per-provider sections with Save/Test/Delete buttons, sections collapsible by chevron, default-collapsed unless currently selected, "Use comped credits (reset to relay)" link when the user has strayed, green hint under inputs whose values are server-configured. - Activity log: chevron-collapsible groups per video, refresh-survival via localStorage + a 500-entry server-side buffer, explicit Clear button. - YouTube captions fast-path with user toggle (skips audio download + AI transcription when captions are available — uncheck for speaker labels). - Cancel button: AbortController plumbed through every provider SDK call; retryAPI short-circuits on AbortError; cancellation events surface in the activity log instead of silent retries. - Long-video analysis: auto-coalesce transcript entries before building the analysis prompt so local-model context windows (32k-ish) don't overflow. Original entries preserved for transcript display via an index map; the analyzer sees a coarser view but click-to-seek timestamps stay precise. - StartOS action grouping (Setup / AI Providers) so the actions list is navigable. - Manifest description rewritten to reflect multi-provider support and free-tier relay credits. - Smaller fixes: summarize-button enablement no longer requires a Gemini key when other providers are configured; analysis fallback chain handles context-length and 503 capacity errors; single-segment expansion for providers that don't return per-segment timestamps (Parakeet et al.); many other UX polish items.
2026-05-11 23:46:20 -05:00
parent 2544cf7dde
commit 373d10595b
79 changed files with 6322 additions and 397 deletions
@@ -0,0 +1,154 @@
+// Provider registry. Each provider wraps a single LLM/SDK behind a
+// uniform interface (see ./gemini.js for the reference shape). The rest
+// of the server talks to providers through getProvider() and never
+// imports SDKs directly.
+//
+// Adding a new provider:
+//   1. Create ./<name>.js exporting createXxxProvider({ apiKey, ... }).
+//   2. Add it to PROVIDER_NAMES + the switch in getProvider().
+//   3. Add the matching opts shape to PROVIDER_KEY_FIELDS so
+//      resolveProviderOpts() can pull the right key/baseURL out of the
+//      StartOS config.
+//   4. Wire its config field into startos/file-models/config.json.ts
+//      and add a "Set <Provider> Key" StartOS action.
+//
+// Capabilities (see provider.capabilities) signal what each one can do.
+// Some providers analyze but can't transcribe (Claude, OpenAI-compat,
+// Ollama); the orchestration layer in server/index.js can mix providers
+// across the transcription + analysis pipelines.
+
+import { createGeminiProvider } from "./gemini.js";
+import { createAnthropicProvider } from "./anthropic.js";
+import { createOpenAIProvider } from "./openai.js";
+import { createOpenAICompatibleProvider } from "./openai-compatible.js";
+import { createOllamaProvider } from "./ollama.js";
+import { createWhisperProvider } from "./whisper.js";
+import { createRelayProvider } from "./relay.js";
+import { getInstallId } from "../install-id.js";
+import { getRawLicenseKey } from "../license.js";
+import { getRelayBaseURL } from "../relay-default.js";
+
+export const PROVIDER_NAMES = [
+  "gemini",
+  "anthropic",
+  "openai",
+  "openai-compatible",
+  "ollama",
+  "whisper",
+  "relay",
+];
+
+// Map provider name → which fields to read from the StartOS config blob
+// when resolving its construction opts. Used by resolveProviderOpts().
+export const PROVIDER_KEY_FIELDS = {
+  gemini: { apiKey: "gemini_api_key" },
+  anthropic: { apiKey: "anthropic_api_key" },
+  openai: { apiKey: "openai_api_key" },
+  "openai-compatible": {
+    apiKey: "openai_compatible_api_key",
+    baseURL: "openai_compatible_base_url",
+  },
+  ollama: { baseURL: "ollama_base_url" },
+  whisper: {
+    apiKey: "whisper_api_key",
+    baseURL: "whisper_base_url",
+  },
+  // Relay is operator-only — base URL is HARDCODED in
+  // server/relay-default.js, NOT read from StartOS config. The empty
+  // object is intentional: resolveProviderOpts uses `name in
+  // PROVIDER_KEY_FIELDS` to recognise the provider, then the
+  // relay-specific block at the bottom of resolveProviderOpts
+  // injects baseURL + installId + licenseKey server-side. Without
+  // this entry the lookup throws "Unknown provider: relay" before
+  // reaching the injection block.
+  relay: {},
+};
+
+export function getProvider(name, opts = {}) {
+  switch (name) {
+    case "gemini":
+      return createGeminiProvider(opts);
+    case "anthropic":
+      return createAnthropicProvider(opts);
+    case "openai":
+      return createOpenAIProvider(opts);
+    case "openai-compatible":
+      return createOpenAICompatibleProvider(opts);
+    case "ollama":
+      return createOllamaProvider(opts);
+    case "whisper":
+      return createWhisperProvider(opts);
+    case "relay":
+      return createRelayProvider(opts);
+    default:
+      throw new Error(
+        `Unknown provider: ${name}. Available: ${PROVIDER_NAMES.join(", ")}`
+      );
+  }
+}
+
+// Pull the construction opts for a provider out of the StartOS config
+// blob, optionally overridden per-provider by client-side opts the web
+// UI passed in the request body.
+//
+// `config` is the parsed startos-config.json snapshot.
+// `clientOpts` is { apiKey?, baseURL? } for THIS provider only —
+//   typically a value out of req.body.providerOpts[name].
+//
+// Resolution priority for each field: client opt → config opt.
+// Returns { apiKey?, baseURL? } as appropriate for the provider.
+export function resolveProviderOpts(name, { config = {}, clientOpts = {} } = {}) {
+  const fields = PROVIDER_KEY_FIELDS[name];
+  if (!fields) {
+    throw new Error(`Unknown provider: ${name}`);
+  }
+  const opts = {};
+  if (fields.apiKey) {
+    const fromConfig = config[fields.apiKey] || "";
+    const fromClient = (clientOpts.apiKey || "").trim();
+    opts.apiKey = fromClient || fromConfig;
+  }
+  if (fields.baseURL) {
+    const fromConfig = config[fields.baseURL] || "";
+    const fromClient = (clientOpts.baseURL || "").trim();
+    opts.baseURL = fromClient || fromConfig;
+    // Last-resort fallback for Ollama: the canonical StartOS internal
+    // hostname. Reachable when the optional Ollama dependency is
+    // installed alongside Recap on the same StartOS server, even if
+    // the user hasn't run the "Set Ollama Server URL" action.
+    if (!opts.baseURL && name === "ollama") {
+      opts.baseURL = "http://ollama.startos:11434";
+    }
+  }
+  // User-defined model list: providers with dynamic catalogs (ollama,
+  // openai-compatible, whisper) accept a comma- or newline-separated
+  // list of model names in clientOpts.models. Parse and pass through
+  // as `defaultModels` so listTranscriptionModels / listAnalysisModels
+  // return the right thing AND so the orchestration layer's fallback
+  // chain knows what to walk through if the user's chosen model fails.
+  if (typeof clientOpts.models === "string" && clientOpts.models.trim()) {
+    const seen = new Set();
+    const models = clientOpts.models
+      .split(/[,\n]/)
+      .map((s) => s.trim())
+      .filter((s) => {
+        if (!s || seen.has(s)) return false;
+        seen.add(s);
+        return true;
+      });
+    if (models.length > 0) {
+      opts.defaultModels = models;
+    }
+  }
+  // Relay-specific injections: baseURL (hardcoded constant or env
+  // override) + install-id (always) + license key (when present).
+  // None of these come from clientOpts — relay identity + endpoint
+  // must not be spoofable from a request body.
+  if (name === "relay") {
+    opts.baseURL = getRelayBaseURL();
+    opts.installId = getInstallId();
+    const rawKey = getRawLicenseKey();
+    if (rawKey) opts.licenseKey = rawKey;
+  }
+  return opts;
+}