Pluggable AI providers, relay credit system, picker UX overhaul
Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.
- Pluggable provider system under server/providers/: gemini, anthropic,
openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
(operator-controlled at build time, not user-configurable). New
/api/relay/{status,policy} endpoints proxy to the relay; balance pings
populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
relay_pro / relay_max), replacing the misleading "core" entitlement
that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
"Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
collapsible by chevron, default-collapsed unless currently selected,
"Use comped credits (reset to relay)" link when the user has strayed,
green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
retryAPI short-circuits on AbortError; cancellation events surface in
the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
analysis prompt so local-model context windows (32k-ish) don't overflow.
Original entries preserved for transcript display via an index map; the
analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
navigable.
- Manifest description rewritten to reflect multi-provider support and
free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
key when other providers are configured; analysis fallback chain handles
context-length and 503 capacity errors; single-segment expansion for
providers that don't return per-segment timestamps (Parakeet et al.);
many other UX polish items.
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
// Provider registry. Each provider wraps a single LLM/SDK behind a
|
||||
// uniform interface (see ./gemini.js for the reference shape). The rest
|
||||
// of the server talks to providers through getProvider() and never
|
||||
// imports SDKs directly.
|
||||
//
|
||||
// Adding a new provider:
|
||||
// 1. Create ./<name>.js exporting createXxxProvider({ apiKey, ... }).
|
||||
// 2. Add it to PROVIDER_NAMES + the switch in getProvider().
|
||||
// 3. Add the matching opts shape to PROVIDER_KEY_FIELDS so
|
||||
// resolveProviderOpts() can pull the right key/baseURL out of the
|
||||
// StartOS config.
|
||||
// 4. Wire its config field into startos/file-models/config.json.ts
|
||||
// and add a "Set <Provider> Key" StartOS action.
|
||||
//
|
||||
// Capabilities (see provider.capabilities) signal what each one can do.
|
||||
// Some providers analyze but can't transcribe (Claude, OpenAI-compat,
|
||||
// Ollama); the orchestration layer in server/index.js can mix providers
|
||||
// across the transcription + analysis pipelines.
|
||||
|
||||
import { createGeminiProvider } from "./gemini.js";
|
||||
import { createAnthropicProvider } from "./anthropic.js";
|
||||
import { createOpenAIProvider } from "./openai.js";
|
||||
import { createOpenAICompatibleProvider } from "./openai-compatible.js";
|
||||
import { createOllamaProvider } from "./ollama.js";
|
||||
import { createWhisperProvider } from "./whisper.js";
|
||||
import { createRelayProvider } from "./relay.js";
|
||||
import { getInstallId } from "../install-id.js";
|
||||
import { getRawLicenseKey } from "../license.js";
|
||||
import { getRelayBaseURL } from "../relay-default.js";
|
||||
|
||||
export const PROVIDER_NAMES = [
|
||||
"gemini",
|
||||
"anthropic",
|
||||
"openai",
|
||||
"openai-compatible",
|
||||
"ollama",
|
||||
"whisper",
|
||||
"relay",
|
||||
];
|
||||
|
||||
// Map provider name → which fields to read from the StartOS config blob
|
||||
// when resolving its construction opts. Used by resolveProviderOpts().
|
||||
export const PROVIDER_KEY_FIELDS = {
|
||||
gemini: { apiKey: "gemini_api_key" },
|
||||
anthropic: { apiKey: "anthropic_api_key" },
|
||||
openai: { apiKey: "openai_api_key" },
|
||||
"openai-compatible": {
|
||||
apiKey: "openai_compatible_api_key",
|
||||
baseURL: "openai_compatible_base_url",
|
||||
},
|
||||
ollama: { baseURL: "ollama_base_url" },
|
||||
whisper: {
|
||||
apiKey: "whisper_api_key",
|
||||
baseURL: "whisper_base_url",
|
||||
},
|
||||
// Relay is operator-only — base URL is HARDCODED in
|
||||
// server/relay-default.js, NOT read from StartOS config. The empty
|
||||
// object is intentional: resolveProviderOpts uses `name in
|
||||
// PROVIDER_KEY_FIELDS` to recognise the provider, then the
|
||||
// relay-specific block at the bottom of resolveProviderOpts
|
||||
// injects baseURL + installId + licenseKey server-side. Without
|
||||
// this entry the lookup throws "Unknown provider: relay" before
|
||||
// reaching the injection block.
|
||||
relay: {},
|
||||
};
|
||||
|
||||
export function getProvider(name, opts = {}) {
|
||||
switch (name) {
|
||||
case "gemini":
|
||||
return createGeminiProvider(opts);
|
||||
case "anthropic":
|
||||
return createAnthropicProvider(opts);
|
||||
case "openai":
|
||||
return createOpenAIProvider(opts);
|
||||
case "openai-compatible":
|
||||
return createOpenAICompatibleProvider(opts);
|
||||
case "ollama":
|
||||
return createOllamaProvider(opts);
|
||||
case "whisper":
|
||||
return createWhisperProvider(opts);
|
||||
case "relay":
|
||||
return createRelayProvider(opts);
|
||||
default:
|
||||
throw new Error(
|
||||
`Unknown provider: ${name}. Available: ${PROVIDER_NAMES.join(", ")}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pull the construction opts for a provider out of the StartOS config
|
||||
// blob, optionally overridden per-provider by client-side opts the web
|
||||
// UI passed in the request body.
|
||||
//
|
||||
// `config` is the parsed startos-config.json snapshot.
|
||||
// `clientOpts` is { apiKey?, baseURL? } for THIS provider only —
|
||||
// typically a value out of req.body.providerOpts[name].
|
||||
//
|
||||
// Resolution priority for each field: client opt → config opt.
|
||||
// Returns { apiKey?, baseURL? } as appropriate for the provider.
|
||||
export function resolveProviderOpts(name, { config = {}, clientOpts = {} } = {}) {
|
||||
const fields = PROVIDER_KEY_FIELDS[name];
|
||||
if (!fields) {
|
||||
throw new Error(`Unknown provider: ${name}`);
|
||||
}
|
||||
const opts = {};
|
||||
if (fields.apiKey) {
|
||||
const fromConfig = config[fields.apiKey] || "";
|
||||
const fromClient = (clientOpts.apiKey || "").trim();
|
||||
opts.apiKey = fromClient || fromConfig;
|
||||
}
|
||||
if (fields.baseURL) {
|
||||
const fromConfig = config[fields.baseURL] || "";
|
||||
const fromClient = (clientOpts.baseURL || "").trim();
|
||||
opts.baseURL = fromClient || fromConfig;
|
||||
// Last-resort fallback for Ollama: the canonical StartOS internal
|
||||
// hostname. Reachable when the optional Ollama dependency is
|
||||
// installed alongside Recap on the same StartOS server, even if
|
||||
// the user hasn't run the "Set Ollama Server URL" action.
|
||||
if (!opts.baseURL && name === "ollama") {
|
||||
opts.baseURL = "http://ollama.startos:11434";
|
||||
}
|
||||
}
|
||||
// User-defined model list: providers with dynamic catalogs (ollama,
|
||||
// openai-compatible, whisper) accept a comma- or newline-separated
|
||||
// list of model names in clientOpts.models. Parse and pass through
|
||||
// as `defaultModels` so listTranscriptionModels / listAnalysisModels
|
||||
// return the right thing AND so the orchestration layer's fallback
|
||||
// chain knows what to walk through if the user's chosen model fails.
|
||||
if (typeof clientOpts.models === "string" && clientOpts.models.trim()) {
|
||||
const seen = new Set();
|
||||
const models = clientOpts.models
|
||||
.split(/[,\n]/)
|
||||
.map((s) => s.trim())
|
||||
.filter((s) => {
|
||||
if (!s || seen.has(s)) return false;
|
||||
seen.add(s);
|
||||
return true;
|
||||
});
|
||||
if (models.length > 0) {
|
||||
opts.defaultModels = models;
|
||||
}
|
||||
}
|
||||
// Relay-specific injections: baseURL (hardcoded constant or env
|
||||
// override) + install-id (always) + license key (when present).
|
||||
// None of these come from clientOpts — relay identity + endpoint
|
||||
// must not be spoofable from a request body.
|
||||
if (name === "relay") {
|
||||
opts.baseURL = getRelayBaseURL();
|
||||
opts.installId = getInstallId();
|
||||
const rawKey = getRawLicenseKey();
|
||||
if (rawKey) opts.licenseKey = rawKey;
|
||||
}
|
||||
return opts;
|
||||
}
|
||||
Reference in New Issue
Block a user