Pluggable AI providers, relay credit system, picker UX overhaul

Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that accumulated without commits. - Pluggable provider system under server/providers/: gemini, anthropic, openai, openai-compatible, ollama, whisper-compatible, relay. Mix and match transcription + analysis per request via the picker UI. - Relay backend integration. Hardcoded relay URL in server/relay-default.js (operator-controlled at build time, not user-configurable). New /api/relay/{status,policy} endpoints proxy to the relay; balance pings populate a cached credit display. - Per-install identity in server/install-id.js for relay credit accounting. Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost on a full uninstall + reinstall. Not surfaced in the UI. - Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt password hash + HMAC-signed session cookie. - Entitlement scheme rename: pro / max (each paired with subscriptions and relay_pro / relay_max), replacing the misleading "core" entitlement that conflicted with the user-facing "Core" tier name. - Activation screen: dynamic credit count pulled from /api/relay/policy, "Skip — use free mode" button, accurate paid-feature list. - Top toolbar: inline credit-balance pill (or "BYO configured" fallback), Upgrade + "I have a key" buttons. - Picker UI: per-provider sections with Save/Test/Delete buttons, sections collapsible by chevron, default-collapsed unless currently selected, "Use comped credits (reset to relay)" link when the user has strayed, green hint under inputs whose values are server-configured. - Activity log: chevron-collapsible groups per video, refresh-survival via localStorage + a 500-entry server-side buffer, explicit Clear button. - YouTube captions fast-path with user toggle (skips audio download + AI transcription when captions are available — uncheck for speaker labels). - Cancel button: AbortController plumbed through every provider SDK call; retryAPI short-circuits on AbortError; cancellation events surface in the activity log instead of silent retries. - Long-video analysis: auto-coalesce transcript entries before building the analysis prompt so local-model context windows (32k-ish) don't overflow. Original entries preserved for transcript display via an index map; the analyzer sees a coarser view but click-to-seek timestamps stay precise. - StartOS action grouping (Setup / AI Providers) so the actions list is navigable. - Manifest description rewritten to reflect multi-provider support and free-tier relay credits. - Smaller fixes: summarize-button enablement no longer requires a Gemini key when other providers are configured; analysis fallback chain handles context-length and 503 capacity errors; single-segment expansion for providers that don't return per-segment timestamps (Parakeet et al.); many other UX polish items.
2026-05-11 23:46:20 -05:00
parent 2544cf7dde
commit 373d10595b
79 changed files with 6322 additions and 397 deletions
@@ -1,7 +1,22 @@
 import { sdk } from '../sdk'
 import { setApiKey } from './setApiKey'
 import { setLicense } from './setLicense'
+import { setAdminPassword } from './setAdminPassword'
+import { setAnthropicApiKey } from './setAnthropicApiKey'
+import { setOpenAIApiKey } from './setOpenAIApiKey'
+import { setOpenAICompatible } from './setOpenAICompatible'
+import { setOllamaUrl } from './setOllamaUrl'
+import { setWhisperEndpoint } from './setWhisperEndpoint'

+// NOTE: setRelayUrl was removed in 0.2.34. The relay base URL is now
+// hardcoded in server/relay-default.js and updated via Recap version
+// releases — end users should never see or configure it.
 export const actions = sdk.Actions.of()
  .addAction(setApiKey)
+  .addAction(setAnthropicApiKey)
+  .addAction(setOpenAIApiKey)
+  .addAction(setOpenAICompatible)
+  .addAction(setOllamaUrl)
+  .addAction(setWhisperEndpoint)
  .addAction(setLicense)
+  .addAction(setAdminPassword)
@@ -0,0 +1,108 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+import { randomBytes, scryptSync } from 'crypto'
+
+const { InputSpec, Value } = sdk
+
+const SCRYPT_KEYLEN = 64
+
+const inputSpec = InputSpec.of({
+  recap_admin_username: Value.text({
+    name: 'Admin Username',
+    description:
+      'Username required at the login screen. Defaults to "admin".',
+    required: true,
+    default: 'admin',
+    minLength: 1,
+    maxLength: 64,
+  }),
+  recap_admin_password: Value.text({
+    name: 'Admin Password',
+    description:
+      'Password required at the login screen. Must be at least 8 characters. Leave blank to disable the login gate.',
+    required: false,
+    default: null,
+    masked: true,
+    minLength: 0,
+    maxLength: 256,
+  }),
+  recap_admin_password_confirm: Value.text({
+    name: 'Confirm Password',
+    description: 'Re-enter the password to confirm.',
+    required: false,
+    default: null,
+    masked: true,
+    minLength: 0,
+    maxLength: 256,
+  }),
+})
+
+export const setAdminPassword = sdk.Action.withInput(
+  'set-admin-password',
+
+  async ({ effects }) => ({
+    name: 'Set Admin Password',
+    description:
+      'Set a username and password that gate the Recap web UI. Anyone visiting the site (LAN or clearnet) must log in before reaching the activation screen. Leave the password blank to disable the gate.',
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'Setup',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    return {
+      recap_admin_username: config?.recap_admin_username || 'admin',
+      recap_admin_password: undefined,
+      recap_admin_password_confirm: undefined,
+    }
+  },
+
+  async ({ effects, input }) => {
+    const username = (input.recap_admin_username || '').trim()
+    const password = input.recap_admin_password || ''
+    const confirm = input.recap_admin_password_confirm || ''
+
+    if (!username) {
+      throw new Error('Username is required.')
+    }
+
+    if (password === '' && confirm === '') {
+      // Disable the gate: clear hash + salt, keep username for next time.
+      await configFile.merge(effects, {
+        recap_admin_username: username,
+        recap_admin_password_hash: '',
+        recap_admin_password_salt: '',
+      })
+      return null
+    }
+
+    if (password !== confirm) {
+      throw new Error('Password and confirmation do not match.')
+    }
+    if (password.length < 8) {
+      throw new Error('Password must be at least 8 characters.')
+    }
+
+    const salt = randomBytes(16).toString('hex')
+    const hash = scryptSync(password, salt, SCRYPT_KEYLEN).toString('hex')
+
+    const existing = await configFile.read().once()
+    const sessionSecret =
+      existing?.recap_admin_session_secret && existing.recap_admin_session_secret.length > 0
+        ? existing.recap_admin_session_secret
+        : randomBytes(32).toString('hex')
+
+    await configFile.merge(effects, {
+      recap_admin_username: username,
+      recap_admin_password_hash: hash,
+      recap_admin_password_salt: salt,
+      recap_admin_session_secret: sessionSecret,
+    })
+
+    return null
+  },
+)
@@ -0,0 +1,45 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+
+const { InputSpec, Value } = sdk
+
+const inputSpec = InputSpec.of({
+  anthropic_api_key: Value.text({
+    name: 'Anthropic API Key',
+    description:
+      'Your Anthropic (Claude) API key. Get one at console.anthropic.com. Required to use Claude models for topic analysis.',
+    required: true,
+    default: null,
+    masked: true,
+    minLength: 1,
+    maxLength: 256,
+  }),
+})
+
+export const setAnthropicApiKey = sdk.Action.withInput(
+  'set-anthropic-api-key',
+
+  async ({ effects }) => ({
+    name: 'Set Anthropic API Key',
+    description:
+      'Configure your Anthropic (Claude) API key for topic analysis. Claude does not transcribe audio — pair it with Gemini or OpenAI Whisper for transcription.',
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'AI Providers',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    return { anthropic_api_key: config?.anthropic_api_key || undefined }
+  },
+
+  async ({ effects, input }) => {
+    await configFile.merge(effects, {
+      anthropic_api_key: (input.anthropic_api_key || '').trim(),
+    })
+    return null
+  },
+)
@@ -25,7 +25,7 @@ export const setApiKey = sdk.Action.withInput(
      'Configure your Google Gemini API key for transcription and analysis',
    warning: null,
    allowedStatuses: 'any',
-    group: null,
+    group: 'AI Providers',
    visibility: 'enabled',
  }),

@@ -28,10 +28,10 @@ export const setLicense = sdk.Action.withInput(
  async ({ effects }) => ({
    name: 'Set Recap License',
    description:
-      'Activate a Recap license to unlock paid features (saved library, channel & podcast subscriptions, auto-queue).',
+      'Activate a Recap license to unlock paid features (channel & podcast subscriptions, auto-queue, and a monthly allotment of relay credits).',
    warning: null,
    allowedStatuses: 'any',
-    group: null,
+    group: 'Setup',
    visibility: 'enabled',
  }),

@@ -0,0 +1,81 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+
+const { InputSpec, Value } = sdk
+
+// Standard Ollama port. Hardcoded because Ollama upstream uses 11434
+// universally — its StartOS package preserves this. If a future
+// release changes the port we can swap to a runtime
+// sdk.serviceInterface.get(...) lookup against ollama's exposed
+// interface, but for now hardcode + override-on-mismatch is simpler
+// and avoids a guess at the interface ID.
+const OLLAMA_DEFAULT_PORT = 11434
+
+const inputSpec = InputSpec.of({
+  ollama_base_url: Value.text({
+    name: 'Ollama Base URL',
+    description:
+      'URL of your Ollama server. If you have the Ollama StartOS package installed on this server, this field is pre-populated automatically. Override only if you want to point at a different Ollama instance (e.g. on another machine: http://192.168.1.10:11434).',
+    required: false,
+    default: 'http://localhost:11434',
+    minLength: 0,
+    maxLength: 256,
+    patterns: [
+      {
+        regex: '^(https?://.+)?$',
+        description: 'Must be empty or start with http:// or https://',
+      },
+    ],
+  }),
+})
+
+// Best-effort detection of an Ollama instance running on this same
+// StartOS server. StartOS exposes every package on its own internal
+// `<package-id>.startos` hostname, reachable from any other package's
+// container without explicit networking config (per the Service
+// Packaging docs). Returns the URL when ollama is installed, null
+// otherwise.
+async function detectStartOsOllamaUrl(effects: any): Promise<string | null> {
+  try {
+    const check = await sdk.checkDependencies(effects, ['ollama'])
+    if (!check.installedSatisfied('ollama')) return null
+    return `http://ollama.startos:${OLLAMA_DEFAULT_PORT}`
+  } catch {
+    return null
+  }
+}
+
+export const setOllamaUrl = sdk.Action.withInput(
+  'set-ollama-url',
+
+  async ({ effects }) => ({
+    name: 'Set Ollama Server URL',
+    description:
+      'Configure where to reach a local Ollama server for topic analysis. No API key required (Ollama runs locally). Does not transcribe audio. Auto-pre-populates if the Ollama StartOS package is installed on this server.',
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'AI Providers',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    // If the user has already set a value, respect it — don't
+    // overwrite a manual override on every action open.
+    if (config?.ollama_base_url) {
+      return { ollama_base_url: config.ollama_base_url }
+    }
+    const auto = await detectStartOsOllamaUrl(effects)
+    if (auto) return { ollama_base_url: auto }
+    return { ollama_base_url: 'http://localhost:11434' }
+  },
+
+  async ({ effects, input }) => {
+    await configFile.merge(effects, {
+      ollama_base_url: (input.ollama_base_url || '').trim(),
+    })
+    return null
+  },
+)
@@ -0,0 +1,45 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+
+const { InputSpec, Value } = sdk
+
+const inputSpec = InputSpec.of({
+  openai_api_key: Value.text({
+    name: 'OpenAI API Key',
+    description:
+      'Your OpenAI API key. Get one at platform.openai.com. Used for both topic analysis (GPT models) and audio transcription (Whisper).',
+    required: true,
+    default: null,
+    masked: true,
+    minLength: 1,
+    maxLength: 256,
+  }),
+})
+
+export const setOpenAIApiKey = sdk.Action.withInput(
+  'set-openai-api-key',
+
+  async ({ effects }) => ({
+    name: 'Set OpenAI API Key',
+    description:
+      'Configure your OpenAI API key. Enables GPT models for topic analysis and Whisper for audio transcription.',
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'AI Providers',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    return { openai_api_key: config?.openai_api_key || undefined }
+  },
+
+  async ({ effects, input }) => {
+    await configFile.merge(effects, {
+      openai_api_key: (input.openai_api_key || '').trim(),
+    })
+    return null
+  },
+)
@@ -0,0 +1,64 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+
+const { InputSpec, Value } = sdk
+
+const inputSpec = InputSpec.of({
+  openai_compatible_base_url: Value.text({
+    name: 'Base URL',
+    description:
+      'OpenAI-compatible API endpoint. Examples: https://api.deepseek.com/v1, https://api.together.xyz/v1, https://api.groq.com/openai/v1. Must include the /v1 (or equivalent) path segment.',
+    required: true,
+    default: null,
+    minLength: 1,
+    maxLength: 512,
+    patterns: [
+      {
+        regex: '^https?://.+',
+        description: 'Must start with http:// or https://',
+      },
+    ],
+  }),
+  openai_compatible_api_key: Value.text({
+    name: 'API Key',
+    description:
+      'API key for the OpenAI-compatible backend. Some self-hosted backends accept any non-empty value — leave blank for those.',
+    required: false,
+    default: null,
+    masked: true,
+    minLength: 0,
+    maxLength: 256,
+  }),
+})
+
+export const setOpenAICompatible = sdk.Action.withInput(
+  'set-openai-compatible',
+
+  async ({ effects }) => ({
+    name: 'Set OpenAI-Compatible Backend',
+    description:
+      'Point Recap at any OpenAI-compatible chat-completions API: DeepSeek, Together, Groq, Fireworks, self-hosted vLLM, etc. Used for topic analysis only — does not transcribe audio.',
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'AI Providers',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    return {
+      openai_compatible_base_url: config?.openai_compatible_base_url || undefined,
+      openai_compatible_api_key: config?.openai_compatible_api_key || undefined,
+    }
+  },
+
+  async ({ effects, input }) => {
+    await configFile.merge(effects, {
+      openai_compatible_base_url: (input.openai_compatible_base_url || '').trim(),
+      openai_compatible_api_key: (input.openai_compatible_api_key || '').trim(),
+    })
+    return null
+  },
+)
@@ -0,0 +1,64 @@
+import { sdk } from '../sdk'
+import { configFile } from '../file-models/config.json'
+
+const { InputSpec, Value } = sdk
+
+const inputSpec = InputSpec.of({
+  whisper_base_url: Value.text({
+    name: 'Whisper Base URL',
+    description:
+      "URL of your Whisper-compatible transcription server. Example: http://whisper.startos:8000 for a local StartOS package, or http://192.168.1.10:9000 for whisper.cpp running on another machine on your LAN. The endpoint must implement OpenAI's /v1/audio/transcriptions wire format.",
+    required: true,
+    default: null,
+    minLength: 1,
+    maxLength: 512,
+    patterns: [
+      {
+        regex: '^https?://.+',
+        description: 'Must start with http:// or https://',
+      },
+    ],
+  }),
+  whisper_api_key: Value.text({
+    name: 'API Key (optional)',
+    description:
+      'API key for the Whisper backend. Most self-hosted Whisper servers (whisper.cpp HTTP server, faster-whisper-server) accept any value or none at all — leave blank for those. Cloud Whisper providers (Groq, etc.) require a real key.',
+    required: false,
+    default: null,
+    masked: true,
+    minLength: 0,
+    maxLength: 256,
+  }),
+})
+
+export const setWhisperEndpoint = sdk.Action.withInput(
+  'set-whisper-endpoint',
+
+  async ({ effects }) => ({
+    name: 'Set Whisper Endpoint',
+    description:
+      'Point Recap at a self-hosted or third-party Whisper transcription server (whisper.cpp, faster-whisper-server, Groq, etc.). Free alternative to OpenAI Whisper API or Gemini multimodal transcription.',
+    warning: null,
+    allowedStatuses: 'any',
+    group: 'AI Providers',
+    visibility: 'enabled',
+  }),
+
+  inputSpec,
+
+  async ({ effects }) => {
+    const config = await configFile.read().once()
+    return {
+      whisper_base_url: config?.whisper_base_url || undefined,
+      whisper_api_key: config?.whisper_api_key || undefined,
+    }
+  },
+
+  async ({ effects, input }) => {
+    await configFile.merge(effects, {
+      whisper_base_url: (input.whisper_base_url || '').trim(),
+      whisper_api_key: (input.whisper_api_key || '').trim(),
+    })
+    return null
+  },
+)