Pluggable AI providers, relay credit system, picker UX overhaul

Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that accumulated without commits. - Pluggable provider system under server/providers/: gemini, anthropic, openai, openai-compatible, ollama, whisper-compatible, relay. Mix and match transcription + analysis per request via the picker UI. - Relay backend integration. Hardcoded relay URL in server/relay-default.js (operator-controlled at build time, not user-configurable). New /api/relay/{status,policy} endpoints proxy to the relay; balance pings populate a cached credit display. - Per-install identity in server/install-id.js for relay credit accounting. Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost on a full uninstall + reinstall. Not surfaced in the UI. - Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt password hash + HMAC-signed session cookie. - Entitlement scheme rename: pro / max (each paired with subscriptions and relay_pro / relay_max), replacing the misleading "core" entitlement that conflicted with the user-facing "Core" tier name. - Activation screen: dynamic credit count pulled from /api/relay/policy, "Skip — use free mode" button, accurate paid-feature list. - Top toolbar: inline credit-balance pill (or "BYO configured" fallback), Upgrade + "I have a key" buttons. - Picker UI: per-provider sections with Save/Test/Delete buttons, sections collapsible by chevron, default-collapsed unless currently selected, "Use comped credits (reset to relay)" link when the user has strayed, green hint under inputs whose values are server-configured. - Activity log: chevron-collapsible groups per video, refresh-survival via localStorage + a 500-entry server-side buffer, explicit Clear button. - YouTube captions fast-path with user toggle (skips audio download + AI transcription when captions are available — uncheck for speaker labels). - Cancel button: AbortController plumbed through every provider SDK call; retryAPI short-circuits on AbortError; cancellation events surface in the activity log instead of silent retries. - Long-video analysis: auto-coalesce transcript entries before building the analysis prompt so local-model context windows (32k-ish) don't overflow. Original entries preserved for transcript display via an index map; the analyzer sees a coarser view but click-to-seek timestamps stay precise. - StartOS action grouping (Setup / AI Providers) so the actions list is navigable. - Manifest description rewritten to reflect multi-provider support and free-tier relay credits. - Smaller fixes: summarize-button enablement no longer requires a Gemini key when other providers are configured; analysis fallback chain handles context-length and 503 capacity errors; single-segment expansion for providers that don't return per-segment timestamps (Parakeet et al.); many other UX polish items.
2026-05-11 23:46:20 -05:00
parent 2544cf7dde
commit 373d10595b
79 changed files with 6322 additions and 397 deletions
@@ -0,0 +1,116 @@
+// Anthropic (Claude) provider — analysis only.
+//
+// Claude does not natively transcribe audio, so transcribeAudio() throws.
+// Mix-and-match users can pair this provider for analysis with Gemini
+// (or future OpenAI Whisper) for transcription.
+//
+// Pricing reflects standard-context rates as of 2026-04-29 (cached in
+// the claude-api skill). Update when Anthropic changes published rates.
+
+import Anthropic from "@anthropic-ai/sdk";
+import { retryAPI } from "../util.js";
+import { formatCost, ratesFor } from "./cost.js";
+
+// Per-1M-token rates in USD. Anthropic does not expose a separate
+// "thinking" rate — thinking tokens are billed as output, so we let
+// formatCost default thinking → output by omitting the thinking field.
+export const ANTHROPIC_PRICING = {
+  "claude-opus-4-7":   { input: 5.00, output: 25.00 },
+  "claude-opus-4-6":   { input: 5.00, output: 25.00 },
+  "claude-sonnet-4-6": { input: 3.00, output: 15.00 },
+  "claude-haiku-4-5":  { input: 1.00, output: 5.00 },
+  // Fallback for unknown / future models.
+  "default":           { input: 3.00, output: 15.00 },
+};
+
+// Analysis model list. Order = default fallback chain (most capable first).
+export const ANTHROPIC_ANALYSIS_MODELS = [
+  "claude-opus-4-7",
+  "claude-opus-4-6",
+  "claude-sonnet-4-6",
+  "claude-haiku-4-5",
+];
+
+// Analysis output cap. Generous — the topic-analysis prompt produces a
+// JSON document scaled to transcript length, and truncation here loses
+// trailing sections.
+const ANALYSIS_MAX_TOKENS = 16000;
+
+export function createAnthropicProvider({ apiKey, timeoutMs = 900_000 } = {}) {
+  if (!apiKey) {
+    throw new Error("createAnthropicProvider: apiKey is required");
+  }
+  const client = new Anthropic({ apiKey, timeout: timeoutMs });
+
+  return {
+    name: "anthropic",
+
+    capabilities: {
+      transcribe: false,
+      analyze: true,
+      listModels: true,
+    },
+
+    listAnalysisModels() {
+      return [...ANTHROPIC_ANALYSIS_MODELS];
+    },
+
+    listTranscriptionModels() {
+      return [];
+    },
+
+    async transcribeAudio() {
+      throw new Error(
+        "Anthropic models do not natively transcribe audio. Use Gemini or OpenAI (Whisper) for the transcription step."
+      );
+    },
+
+    async analyzeText({
+      prompt,
+      model,
+      onProgress = () => {},
+      retries = 2,
+      signal,
+    }) {
+      const result = await retryAPI(
+        () =>
+          client.messages.create(
+            {
+              model,
+              max_tokens: ANALYSIS_MAX_TOKENS,
+              messages: [{ role: "user", content: prompt }],
+            },
+            // The Anthropic SDK accepts a per-call signal as the second
+            // arg; abort() rejects the in-flight HTTP request immediately.
+            signal ? { signal } : undefined
+          ),
+        {
+          retries,
+          delayMs: 5000,
+          label: "Anthropic analysis",
+          log: (msg) => onProgress(msg),
+        }
+      );
+
+      const text = (result.content || [])
+        .filter((b) => b.type === "text")
+        .map((b) => b.text)
+        .join("");
+
+      const usage = {
+        inputTokens: result.usage?.input_tokens || 0,
+        outputTokens: result.usage?.output_tokens || 0,
+        thinkingTokens: 0,
+      };
+      const cost = formatCost(ratesFor(ANTHROPIC_PRICING, model), usage);
+
+      return {
+        text,
+        usage,
+        cost,
+        finishReason: result.stop_reason || null,
+        raw: result,
+      };
+    },
+  };
+}