373d10595b
Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.
- Pluggable provider system under server/providers/: gemini, anthropic,
openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
(operator-controlled at build time, not user-configurable). New
/api/relay/{status,policy} endpoints proxy to the relay; balance pings
populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
relay_pro / relay_max), replacing the misleading "core" entitlement
that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
"Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
collapsible by chevron, default-collapsed unless currently selected,
"Use comped credits (reset to relay)" link when the user has strayed,
green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
retryAPI short-circuits on AbortError; cancellation events surface in
the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
analysis prompt so local-model context windows (32k-ish) don't overflow.
Original entries preserved for transcript display via an index map; the
analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
navigable.
- Manifest description rewritten to reflect multi-provider support and
free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
key when other providers are configured; analysis fallback chain handles
context-length and 503 capacity errors; single-segment expansion for
providers that don't return per-segment timestamps (Parakeet et al.);
many other UX polish items.
111 lines
3.1 KiB
JavaScript
111 lines
3.1 KiB
JavaScript
// OpenAI-compatible provider — analysis only.
|
|
//
|
|
// Same wire format as OpenAI's chat.completions endpoint, but pointed
|
|
// at a user-supplied baseURL: DeepSeek, Together, Groq, Fireworks, your
|
|
// own self-hosted vLLM, etc. The user provides baseURL + apiKey + model
|
|
// name; we don't ship a hardcoded model list (each backend's catalog
|
|
// differs), and we don't have pricing (varies wildly per backend).
|
|
//
|
|
// Structurally this is a thin re-export of the OpenAI SDK with the
|
|
// pricing table forced to zero — costs are reported as $0.0000 since we
|
|
// can't know the backend's rates without per-deploy configuration.
|
|
|
|
import OpenAI from "openai";
|
|
import { retryAPI } from "../util.js";
|
|
import { zeroCost } from "./cost.js";
|
|
|
|
// Default model lists are empty — the picker UI surfaces a free-text
|
|
// model field for OpenAI-compatible. listAnalysisModels() returns the
|
|
// caller-provided defaults if any were passed at construction time.
|
|
const ANALYSIS_MAX_TOKENS = 16000;
|
|
|
|
export function createOpenAICompatibleProvider({
|
|
apiKey,
|
|
baseURL,
|
|
defaultModels = [],
|
|
timeoutMs = 900_000,
|
|
} = {}) {
|
|
if (!baseURL) {
|
|
throw new Error(
|
|
"createOpenAICompatibleProvider: baseURL is required (e.g. https://api.deepseek.com/v1)"
|
|
);
|
|
}
|
|
// Some self-hosted backends accept any non-empty key. Default to a
|
|
// sentinel so the SDK's auth header stays well-formed.
|
|
const client = new OpenAI({
|
|
apiKey: apiKey || "no-auth",
|
|
baseURL,
|
|
timeout: timeoutMs,
|
|
});
|
|
|
|
return {
|
|
name: "openai-compatible",
|
|
|
|
capabilities: {
|
|
transcribe: false,
|
|
analyze: true,
|
|
listModels: defaultModels.length > 0,
|
|
},
|
|
|
|
listAnalysisModels() {
|
|
return [...defaultModels];
|
|
},
|
|
|
|
listTranscriptionModels() {
|
|
return [];
|
|
},
|
|
|
|
async transcribeAudio() {
|
|
throw new Error(
|
|
"openai-compatible providers are wired for analysis only. Use Gemini or OpenAI Whisper for transcription."
|
|
);
|
|
},
|
|
|
|
async analyzeText({
|
|
prompt,
|
|
model,
|
|
onProgress = () => {},
|
|
retries = 2,
|
|
signal,
|
|
}) {
|
|
const result = await retryAPI(
|
|
() =>
|
|
client.chat.completions.create(
|
|
{
|
|
model,
|
|
max_tokens: ANALYSIS_MAX_TOKENS,
|
|
messages: [{ role: "user", content: prompt }],
|
|
},
|
|
signal ? { signal } : undefined
|
|
),
|
|
{
|
|
retries,
|
|
delayMs: 5000,
|
|
label: "openai-compatible analysis",
|
|
log: (msg) => onProgress(msg),
|
|
}
|
|
);
|
|
|
|
const choice = result.choices?.[0];
|
|
const text = choice?.message?.content || "";
|
|
|
|
const usage = {
|
|
inputTokens: result.usage?.prompt_tokens || 0,
|
|
outputTokens: result.usage?.completion_tokens || 0,
|
|
thinkingTokens: 0,
|
|
};
|
|
// Per-backend pricing varies — report zero. UI can warn that cost
|
|
// tracking is not available for this provider.
|
|
const cost = zeroCost(usage);
|
|
|
|
return {
|
|
text,
|
|
usage,
|
|
cost,
|
|
finishReason: choice?.finish_reason || null,
|
|
raw: result,
|
|
};
|
|
},
|
|
};
|
|
}
|