Pluggable AI providers, relay credit system, picker UX overhaul
Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.
- Pluggable provider system under server/providers/: gemini, anthropic,
openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
(operator-controlled at build time, not user-configurable). New
/api/relay/{status,policy} endpoints proxy to the relay; balance pings
populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
relay_pro / relay_max), replacing the misleading "core" entitlement
that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
"Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
collapsible by chevron, default-collapsed unless currently selected,
"Use comped credits (reset to relay)" link when the user has strayed,
green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
retryAPI short-circuits on AbortError; cancellation events surface in
the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
analysis prompt so local-model context windows (32k-ish) don't overflow.
Original entries preserved for transcript display via an index map; the
analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
navigable.
- Manifest description rewritten to reflect multi-provider support and
free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
key when other providers are configured; analysis fallback chain handles
context-length and 503 capacity errors; single-segment expansion for
providers that don't return per-segment timestamps (Parakeet et al.);
many other UX polish items.
This commit is contained in:
+112
-6
@@ -113,18 +113,50 @@ export function fetchUrl(url) {
|
||||
});
|
||||
}
|
||||
|
||||
// ── Retry helper for transient Gemini API errors ────────────────────────────
|
||||
// Retries on 503/429 and on common transient network errors. Linear backoff
|
||||
// (delayMs * attempt). The optional `log` callback receives a one-line
|
||||
// status message per retry — useful for streaming progress to a UI.
|
||||
export async function retryGemini(fn, { retries = 3, delayMs = 3000, label = "Gemini call", log: logFn } = {}) {
|
||||
// ── Retry helper for transient API errors ──────────────────────────────────
|
||||
// Retries on 503/429/529 and on common transient network errors. Linear
|
||||
// backoff (delayMs * attempt). The optional `log` callback receives a
|
||||
// one-line status message per retry — useful for streaming progress to a
|
||||
// UI. Provider-neutral: error shapes from @google/genai, @anthropic-ai/sdk,
|
||||
// openai, and raw fetch all expose `.status` (or message text) we can match.
|
||||
export async function retryAPI(fn, { retries = 3, delayMs = 3000, label = "API call", log: logFn } = {}) {
|
||||
let lastErr;
|
||||
for (let attempt = 1; attempt <= retries; attempt++) {
|
||||
// Surface every attempt — including the first — so the user
|
||||
// sees what's happening when a retry is in flight rather than
|
||||
// a frozen-looking activity log between "failed, retrying in
|
||||
// 5s" and the final outcome.
|
||||
if (attempt > 1 && logFn) {
|
||||
logFn(`Retrying ${label}... (attempt ${attempt}/${retries})`);
|
||||
}
|
||||
try {
|
||||
return await fn();
|
||||
} catch (err) {
|
||||
// User-cancelled requests must not be retried — re-throw so the
|
||||
// outer handler can treat it as a clean cancellation rather than
|
||||
// letting the retry loop log noise and burn time.
|
||||
if (err?.name === "AbortError" || /aborted|operation was aborted/i.test(err?.message || "")) {
|
||||
throw err;
|
||||
}
|
||||
lastErr = err;
|
||||
const msg = err?.message || String(err);
|
||||
const status = err?.status || err?.httpStatusCode || 0;
|
||||
const isRetryable = status === 503 || status === 429 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg);
|
||||
const isRetryable = status === 503 || status === 429 || status === 529 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg);
|
||||
// Dump every detail we can pry out of the error so generic
|
||||
// messages like "500 status code (no body)" become debuggable
|
||||
// server-side. Anthropic/OpenAI SDK errors expose .response,
|
||||
// .body, .headers, .cause; Node stream errors expose .code.
|
||||
const richDetail = {
|
||||
status,
|
||||
code: err?.code,
|
||||
type: err?.type,
|
||||
body: err?.body || err?.response?.body || err?.error,
|
||||
cause: err?.cause?.message || err?.cause?.code || err?.cause,
|
||||
};
|
||||
console.error(
|
||||
`[retryAPI] ${label} failed (attempt ${attempt}/${retries}, status=${status || "n/a"}): ${msg}`,
|
||||
JSON.stringify(richDetail, (_k, v) => (typeof v === "bigint" ? v.toString() : v))
|
||||
);
|
||||
if (isRetryable && attempt < retries) {
|
||||
const waitSec = (delayMs * attempt / 1000).toFixed(0);
|
||||
if (logFn) logFn(`⚠ ${label} failed (${status || "error"}), retrying in ${waitSec}s... (attempt ${attempt}/${retries})`);
|
||||
@@ -134,4 +166,78 @@ export async function retryGemini(fn, { retries = 3, delayMs = 3000, label = "Ge
|
||||
}
|
||||
}
|
||||
}
|
||||
throw lastErr;
|
||||
}
|
||||
|
||||
// Back-compat alias: pre-existing call sites used `retryGemini`. Keep
|
||||
// the name working so this rename is non-breaking.
|
||||
export const retryGemini = retryAPI;
|
||||
|
||||
// Split a plain-text transcript into synthetic sentence-based entries
|
||||
// with interpolated timestamps. Used when a transcription provider
|
||||
// returns just text (no per-segment timing) — e.g. NVIDIA Parakeet
|
||||
// behind an OpenAI-compatible wrapper. Without this, the entire
|
||||
// transcript lands in one entry at [0:00] and the analyzer can only
|
||||
// produce a single section spanning the whole audio.
|
||||
//
|
||||
// Strategy:
|
||||
// 1. Split on sentence terminators (. ! ?). Keep the punctuation.
|
||||
// 2. If no terminators (very rare in real speech), fall back to
|
||||
// 30-word chunks.
|
||||
// 3. Distribute timestamps proportionally by character count —
|
||||
// sentence N starts at (cum_chars_so_far / total_chars) *
|
||||
// audio_duration. Not perfectly accurate, but good enough to
|
||||
// let the analyzer carve out coherent topic sections.
|
||||
export function synthesizeEntriesFromText(text, totalDurationSeconds) {
|
||||
const t = (text || "").trim();
|
||||
if (!t || !totalDurationSeconds || totalDurationSeconds <= 0) {
|
||||
return [{ offset: 0, text: t, duration: totalDurationSeconds || 0 }];
|
||||
}
|
||||
|
||||
// Sentence split — keep the terminator on each sentence.
|
||||
const sentenceMatches = t.match(/[^.!?\n]+[.!?]+|[^.!?\n]+$/g) || [];
|
||||
let chunks = sentenceMatches.map((s) => s.trim()).filter(Boolean);
|
||||
|
||||
// If we couldn't find sentence boundaries (unpunctuated transcript),
|
||||
// fall back to fixed-size word chunks.
|
||||
if (chunks.length <= 1) {
|
||||
const words = t.split(/\s+/).filter(Boolean);
|
||||
if (words.length <= 1) {
|
||||
return [{ offset: 0, text: t, duration: totalDurationSeconds }];
|
||||
}
|
||||
const wordsPerChunk = 30;
|
||||
chunks = [];
|
||||
for (let i = 0; i < words.length; i += wordsPerChunk) {
|
||||
chunks.push(words.slice(i, i + wordsPerChunk).join(" "));
|
||||
}
|
||||
}
|
||||
|
||||
// Coalesce extremely short sentences (single words like "Yeah." or
|
||||
// "Right.") into the previous chunk so we don't end up with hundreds
|
||||
// of useless 5-char entries.
|
||||
const COALESCE_MIN_CHARS = 40;
|
||||
const coalesced = [];
|
||||
for (const c of chunks) {
|
||||
if (coalesced.length > 0 && coalesced[coalesced.length - 1].length < COALESCE_MIN_CHARS) {
|
||||
coalesced[coalesced.length - 1] = `${coalesced[coalesced.length - 1]} ${c}`.trim();
|
||||
} else {
|
||||
coalesced.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Distribute timestamps proportionally by character length.
|
||||
const totalChars = coalesced.reduce((sum, c) => sum + c.length, 0) || 1;
|
||||
const entries = [];
|
||||
let cumChars = 0;
|
||||
for (const c of coalesced) {
|
||||
const startRatio = cumChars / totalChars;
|
||||
cumChars += c.length;
|
||||
const endRatio = cumChars / totalChars;
|
||||
entries.push({
|
||||
offset: startRatio * totalDurationSeconds,
|
||||
text: c,
|
||||
duration: Math.max(0.1, (endRatio - startRatio) * totalDurationSeconds),
|
||||
});
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user