Pluggable AI providers, relay credit system, picker UX overhaul

Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.

- Pluggable provider system under server/providers/: gemini, anthropic,
  openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
  match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
  (operator-controlled at build time, not user-configurable). New
  /api/relay/{status,policy} endpoints proxy to the relay; balance pings
  populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
  Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
  on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
  password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
  relay_pro / relay_max), replacing the misleading "core" entitlement
  that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
  "Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
  Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
  collapsible by chevron, default-collapsed unless currently selected,
  "Use comped credits (reset to relay)" link when the user has strayed,
  green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
  localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
  transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
  retryAPI short-circuits on AbortError; cancellation events surface in
  the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
  analysis prompt so local-model context windows (32k-ish) don't overflow.
  Original entries preserved for transcript display via an index map; the
  analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
  navigable.
- Manifest description rewritten to reflect multi-provider support and
  free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
  key when other providers are configured; analysis fallback chain handles
  context-length and 503 capacity errors; single-segment expansion for
  providers that don't return per-segment timestamps (Parakeet et al.);
  many other UX polish items.
This commit is contained in:
Keysat
2026-05-11 23:46:20 -05:00
parent 2544cf7dde
commit 373d10595b
79 changed files with 6322 additions and 397 deletions
+154
View File
@@ -0,0 +1,154 @@
// Provider registry. Each provider wraps a single LLM/SDK behind a
// uniform interface (see ./gemini.js for the reference shape). The rest
// of the server talks to providers through getProvider() and never
// imports SDKs directly.
//
// Adding a new provider:
// 1. Create ./<name>.js exporting createXxxProvider({ apiKey, ... }).
// 2. Add it to PROVIDER_NAMES + the switch in getProvider().
// 3. Add the matching opts shape to PROVIDER_KEY_FIELDS so
// resolveProviderOpts() can pull the right key/baseURL out of the
// StartOS config.
// 4. Wire its config field into startos/file-models/config.json.ts
// and add a "Set <Provider> Key" StartOS action.
//
// Capabilities (see provider.capabilities) signal what each one can do.
// Some providers analyze but can't transcribe (Claude, OpenAI-compat,
// Ollama); the orchestration layer in server/index.js can mix providers
// across the transcription + analysis pipelines.
import { createGeminiProvider } from "./gemini.js";
import { createAnthropicProvider } from "./anthropic.js";
import { createOpenAIProvider } from "./openai.js";
import { createOpenAICompatibleProvider } from "./openai-compatible.js";
import { createOllamaProvider } from "./ollama.js";
import { createWhisperProvider } from "./whisper.js";
import { createRelayProvider } from "./relay.js";
import { getInstallId } from "../install-id.js";
import { getRawLicenseKey } from "../license.js";
import { getRelayBaseURL } from "../relay-default.js";
export const PROVIDER_NAMES = [
"gemini",
"anthropic",
"openai",
"openai-compatible",
"ollama",
"whisper",
"relay",
];
// Map provider name → which fields to read from the StartOS config blob
// when resolving its construction opts. Used by resolveProviderOpts().
export const PROVIDER_KEY_FIELDS = {
gemini: { apiKey: "gemini_api_key" },
anthropic: { apiKey: "anthropic_api_key" },
openai: { apiKey: "openai_api_key" },
"openai-compatible": {
apiKey: "openai_compatible_api_key",
baseURL: "openai_compatible_base_url",
},
ollama: { baseURL: "ollama_base_url" },
whisper: {
apiKey: "whisper_api_key",
baseURL: "whisper_base_url",
},
// Relay is operator-only — base URL is HARDCODED in
// server/relay-default.js, NOT read from StartOS config. The empty
// object is intentional: resolveProviderOpts uses `name in
// PROVIDER_KEY_FIELDS` to recognise the provider, then the
// relay-specific block at the bottom of resolveProviderOpts
// injects baseURL + installId + licenseKey server-side. Without
// this entry the lookup throws "Unknown provider: relay" before
// reaching the injection block.
relay: {},
};
export function getProvider(name, opts = {}) {
switch (name) {
case "gemini":
return createGeminiProvider(opts);
case "anthropic":
return createAnthropicProvider(opts);
case "openai":
return createOpenAIProvider(opts);
case "openai-compatible":
return createOpenAICompatibleProvider(opts);
case "ollama":
return createOllamaProvider(opts);
case "whisper":
return createWhisperProvider(opts);
case "relay":
return createRelayProvider(opts);
default:
throw new Error(
`Unknown provider: ${name}. Available: ${PROVIDER_NAMES.join(", ")}`
);
}
}
// Pull the construction opts for a provider out of the StartOS config
// blob, optionally overridden per-provider by client-side opts the web
// UI passed in the request body.
//
// `config` is the parsed startos-config.json snapshot.
// `clientOpts` is { apiKey?, baseURL? } for THIS provider only —
// typically a value out of req.body.providerOpts[name].
//
// Resolution priority for each field: client opt → config opt.
// Returns { apiKey?, baseURL? } as appropriate for the provider.
export function resolveProviderOpts(name, { config = {}, clientOpts = {} } = {}) {
const fields = PROVIDER_KEY_FIELDS[name];
if (!fields) {
throw new Error(`Unknown provider: ${name}`);
}
const opts = {};
if (fields.apiKey) {
const fromConfig = config[fields.apiKey] || "";
const fromClient = (clientOpts.apiKey || "").trim();
opts.apiKey = fromClient || fromConfig;
}
if (fields.baseURL) {
const fromConfig = config[fields.baseURL] || "";
const fromClient = (clientOpts.baseURL || "").trim();
opts.baseURL = fromClient || fromConfig;
// Last-resort fallback for Ollama: the canonical StartOS internal
// hostname. Reachable when the optional Ollama dependency is
// installed alongside Recap on the same StartOS server, even if
// the user hasn't run the "Set Ollama Server URL" action.
if (!opts.baseURL && name === "ollama") {
opts.baseURL = "http://ollama.startos:11434";
}
}
// User-defined model list: providers with dynamic catalogs (ollama,
// openai-compatible, whisper) accept a comma- or newline-separated
// list of model names in clientOpts.models. Parse and pass through
// as `defaultModels` so listTranscriptionModels / listAnalysisModels
// return the right thing AND so the orchestration layer's fallback
// chain knows what to walk through if the user's chosen model fails.
if (typeof clientOpts.models === "string" && clientOpts.models.trim()) {
const seen = new Set();
const models = clientOpts.models
.split(/[,\n]/)
.map((s) => s.trim())
.filter((s) => {
if (!s || seen.has(s)) return false;
seen.add(s);
return true;
});
if (models.length > 0) {
opts.defaultModels = models;
}
}
// Relay-specific injections: baseURL (hardcoded constant or env
// override) + install-id (always) + license key (when present).
// None of these come from clientOpts — relay identity + endpoint
// must not be spoofable from a request body.
if (name === "relay") {
opts.baseURL = getRelayBaseURL();
opts.installId = getInstallId();
const rawKey = getRawLicenseKey();
if (rawKey) opts.licenseKey = rawKey;
}
return opts;
}