Pluggable AI providers, relay credit system, picker UX overhaul
Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.
- Pluggable provider system under server/providers/: gemini, anthropic,
openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
(operator-controlled at build time, not user-configurable). New
/api/relay/{status,policy} endpoints proxy to the relay; balance pings
populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
relay_pro / relay_max), replacing the misleading "core" entitlement
that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
"Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
collapsible by chevron, default-collapsed unless currently selected,
"Use comped credits (reset to relay)" link when the user has strayed,
green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
retryAPI short-circuits on AbortError; cancellation events surface in
the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
analysis prompt so local-model context windows (32k-ish) don't overflow.
Original entries preserved for transcript display via an index map; the
analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
navigable.
- Manifest description rewritten to reflect multi-provider support and
free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
key when other providers are configured; analysis fallback chain handles
context-length and 503 capacity errors; single-segment expansion for
providers that don't return per-segment timestamps (Parakeet et al.);
many other UX polish items.
This commit is contained in:
@@ -0,0 +1,341 @@
|
||||
// Relay provider — proxies transcription + analysis calls through the
|
||||
// operator's relay backend (operator-side service, not in this repo).
|
||||
// The relay handles billing/credit accounting, picks the actual backing
|
||||
// model (Gemini, the operator's Parakeet+Gemma, etc.), and returns a
|
||||
// uniform response with the user's remaining credit balance.
|
||||
//
|
||||
// Auth shape:
|
||||
// - `X-Recap-Install-Id` header on every call (required; identifies
|
||||
// the credit owner). Comes from ./install-id.js.
|
||||
// - `Authorization: Bearer <license_key>` header when a license is
|
||||
// present. Absent = treat as Core (free) tier.
|
||||
// - `X-Recap-Job-Id` header on every call, common across the
|
||||
// transcribe + analyze pair that make up one full summary. The
|
||||
// relay decrements credits on the FIRST call with a given job_id;
|
||||
// subsequent calls with the same id are free (until job_id
|
||||
// expires, ~1h). Means "1 full summary = 1 credit" regardless of
|
||||
// whether one or both pipeline steps go through the relay.
|
||||
//
|
||||
// Response envelope (success and error both):
|
||||
// {
|
||||
// "result": { ...endpoint-specific payload... },
|
||||
// "credits_remaining": <number>,
|
||||
// "tier": "core" | "pro" | "max",
|
||||
// "credit_charged": <number, 0 if reused job_id>
|
||||
// }
|
||||
//
|
||||
// What this provider does NOT do:
|
||||
// - Validate the user's license. Relay does that server-side.
|
||||
// - Track historical credit usage. Relay's DB owns the ledger.
|
||||
// - Choose which backing model the relay uses. Operator's call.
|
||||
|
||||
import { createReadStream } from "fs";
|
||||
import { retryAPI, formatTime } from "../util.js";
|
||||
import { zeroCost } from "./cost.js";
|
||||
import { updateRelayState, recordRelayError } from "../relay-state.js";
|
||||
|
||||
// Provider name shown in logs + chunk pagination labels. "relay" rather
|
||||
// than e.g. "keysat-relay" because operators may run their own relay
|
||||
// using a different backend brand — the name should describe the
|
||||
// architecture, not the operator.
|
||||
const NAME = "relay";
|
||||
|
||||
// Models exposed to the picker. The relay decides what actually runs —
|
||||
// these labels are placeholders so the picker can show something.
|
||||
const RELAY_TRANSCRIPTION_MODELS = ["relay-default"];
|
||||
const RELAY_ANALYSIS_MODELS = ["relay-default"];
|
||||
|
||||
export function createRelayProvider({
|
||||
baseURL,
|
||||
installId,
|
||||
licenseKey,
|
||||
timeoutMs = 900_000,
|
||||
} = {}) {
|
||||
if (!baseURL) {
|
||||
throw new Error(
|
||||
"createRelayProvider: baseURL is required (e.g. https://relay.keysat.xyz)"
|
||||
);
|
||||
}
|
||||
if (!installId) {
|
||||
throw new Error(
|
||||
"createRelayProvider: installId is required (boot must initInstallId first)"
|
||||
);
|
||||
}
|
||||
const base = baseURL.replace(/\/$/, "");
|
||||
|
||||
// Build the auth/identity headers attached to every relay call.
|
||||
// job_id is optional but the orchestration layer should always pass
|
||||
// one — without it the relay can't bundle the transcribe + analyze
|
||||
// pair into a single credit charge.
|
||||
function buildHeaders({ extra = {}, jobId } = {}) {
|
||||
const h = {
|
||||
"X-Recap-Install-Id": installId,
|
||||
...extra,
|
||||
};
|
||||
if (licenseKey) h["Authorization"] = `Bearer ${licenseKey}`;
|
||||
if (jobId) h["X-Recap-Job-Id"] = jobId;
|
||||
return h;
|
||||
}
|
||||
|
||||
// Common error-handling wrapper. The relay's contract is that ANY
|
||||
// response (success or failure) carries the standard envelope so
|
||||
// Recap can keep its balance display accurate even on errors. We
|
||||
// try to parse error bodies to harvest that.
|
||||
async function postRelay({ path, body, headers, signal }) {
|
||||
let res;
|
||||
try {
|
||||
res = await fetch(`${base}${path}`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body,
|
||||
signal,
|
||||
});
|
||||
} catch (err) {
|
||||
recordRelayError(err?.message || String(err));
|
||||
throw err;
|
||||
}
|
||||
const text = await res.text();
|
||||
let parsed = null;
|
||||
try {
|
||||
parsed = text ? JSON.parse(text) : null;
|
||||
} catch {}
|
||||
if (parsed && (typeof parsed.credits_remaining === "number" || parsed.tier)) {
|
||||
updateRelayState(parsed);
|
||||
}
|
||||
if (!res.ok) {
|
||||
const msg =
|
||||
parsed?.error ||
|
||||
parsed?.message ||
|
||||
text?.slice(0, 300) ||
|
||||
`HTTP ${res.status}`;
|
||||
const err = new Error(`Relay ${path} ${res.status}: ${msg}`);
|
||||
err.status = res.status;
|
||||
err.envelope = parsed;
|
||||
if (!parsed) recordRelayError(msg);
|
||||
throw err;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
return {
|
||||
name: NAME,
|
||||
|
||||
capabilities: {
|
||||
transcribe: true,
|
||||
analyze: true,
|
||||
// The relay's model catalog is internal — Recap doesn't pick.
|
||||
// listModels: false signals the picker to skip the dropdown.
|
||||
listModels: false,
|
||||
},
|
||||
|
||||
listAnalysisModels() {
|
||||
return [...RELAY_ANALYSIS_MODELS];
|
||||
},
|
||||
|
||||
listTranscriptionModels() {
|
||||
return [...RELAY_TRANSCRIPTION_MODELS];
|
||||
},
|
||||
|
||||
async transcribeAudio({
|
||||
filePath,
|
||||
mimeType,
|
||||
titleHint,
|
||||
channelHint = "",
|
||||
descriptionHint = "",
|
||||
chaptersHint = [],
|
||||
offsetSeconds = 0,
|
||||
onProgress = () => {},
|
||||
signal,
|
||||
jobId,
|
||||
}) {
|
||||
onProgress(
|
||||
`Uploading audio${offsetSeconds > 0 ? ` (offset ${formatTime(offsetSeconds)})` : ""} to relay (${base})...`
|
||||
);
|
||||
const start = Date.now();
|
||||
|
||||
// Use multipart form encoding so the audio binary doesn't have
|
||||
// to be base64-blown-up. Node 20+ provides global FormData; pair
|
||||
// it with a stream so we don't load the whole audio file into
|
||||
// memory.
|
||||
const form = new FormData();
|
||||
const blob = await fileToBlob(filePath, mimeType);
|
||||
form.append("audio", blob, "audio.bin");
|
||||
form.append("mime_type", mimeType || "application/octet-stream");
|
||||
if (titleHint) form.append("title", titleHint);
|
||||
if (channelHint) form.append("channel", channelHint);
|
||||
if (descriptionHint) form.append("description", descriptionHint);
|
||||
if (Array.isArray(chaptersHint) && chaptersHint.length > 0) {
|
||||
form.append("chapters", JSON.stringify(chaptersHint));
|
||||
}
|
||||
form.append("offset_seconds", String(offsetSeconds));
|
||||
|
||||
const envelope = await retryAPI(
|
||||
() =>
|
||||
postRelay({
|
||||
path: "/relay/transcribe",
|
||||
body: form,
|
||||
headers: buildHeaders({ jobId }),
|
||||
signal,
|
||||
}),
|
||||
{
|
||||
retries: 2,
|
||||
delayMs: 5000,
|
||||
label: `Relay transcribe${offsetSeconds > 0 ? ` (chunk@${formatTime(offsetSeconds)})` : ""}`,
|
||||
log: (msg) => onProgress(msg),
|
||||
}
|
||||
);
|
||||
|
||||
const elapsed = ((Date.now() - start) / 1000).toFixed(1);
|
||||
const remaining =
|
||||
typeof envelope.credits_remaining === "number"
|
||||
? `, ${envelope.credits_remaining} credits left`
|
||||
: "";
|
||||
onProgress(`Relay transcribe complete in ${elapsed}s${remaining}`);
|
||||
|
||||
// Relay's transcribe result shape: { text, segments?: [{start,
|
||||
// end, text}], duration? }. We don't fabricate segment timestamps
|
||||
// here — the orchestration layer's synthesizeEntriesFromText
|
||||
// handles single-segment-only responses.
|
||||
const result = envelope.result || {};
|
||||
const segments = Array.isArray(result.segments) ? result.segments : [];
|
||||
const lines = segments.length
|
||||
? segments.map(
|
||||
(s) => `[${formatTime(s.start || 0)}] ${(s.text || "").trim()}`
|
||||
)
|
||||
: [`[0:00] ${(result.text || "").trim()}`];
|
||||
const text = lines.join("\n");
|
||||
|
||||
// Cost from Recap's POV is always zero — credits are the unit,
|
||||
// and they're tracked separately. The orchestration layer's
|
||||
// cost-summing code keeps working unchanged.
|
||||
const cost = zeroCost({
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
thinkingTokens: 0,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
usage: { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0 },
|
||||
cost,
|
||||
finishReason: null,
|
||||
blockReason: "none",
|
||||
raw: envelope,
|
||||
};
|
||||
},
|
||||
|
||||
// Peek at the install's current credit balance without charging.
|
||||
// Used by /api/relay/status to populate the picker banner on boot
|
||||
// (or on demand) so the user sees real numbers before running any
|
||||
// summarize. Short timeout — if the relay is unreachable we want
|
||||
// the UI to fall back to "balance unknown" quickly, not hang.
|
||||
async pingBalance({ timeoutMs = 5000, signal } = {}) {
|
||||
const headers = buildHeaders({});
|
||||
const ac = new AbortController();
|
||||
const timer = setTimeout(() => ac.abort(), timeoutMs);
|
||||
const combined = signal
|
||||
? AbortSignal.any([signal, ac.signal])
|
||||
: ac.signal;
|
||||
try {
|
||||
const res = await fetch(`${base}/relay/balance`, {
|
||||
method: "GET",
|
||||
headers,
|
||||
signal: combined,
|
||||
});
|
||||
const text = await res.text();
|
||||
let parsed = null;
|
||||
try {
|
||||
parsed = text ? JSON.parse(text) : null;
|
||||
} catch {}
|
||||
if (parsed && (typeof parsed.credits_remaining === "number" || parsed.tier)) {
|
||||
updateRelayState(parsed);
|
||||
}
|
||||
if (!res.ok) {
|
||||
const msg =
|
||||
parsed?.error ||
|
||||
parsed?.message ||
|
||||
text?.slice(0, 300) ||
|
||||
`HTTP ${res.status}`;
|
||||
recordRelayError(msg);
|
||||
const err = new Error(`Relay /balance ${res.status}: ${msg}`);
|
||||
err.status = res.status;
|
||||
throw err;
|
||||
}
|
||||
return parsed;
|
||||
} catch (err) {
|
||||
if (err?.name === "AbortError") {
|
||||
recordRelayError(`balance ping timed out after ${timeoutMs}ms`);
|
||||
} else if (!err.status) {
|
||||
recordRelayError(err?.message || String(err));
|
||||
}
|
||||
throw err;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
},
|
||||
|
||||
async analyzeText({
|
||||
prompt,
|
||||
onProgress = () => {},
|
||||
retries = 2,
|
||||
signal,
|
||||
jobId,
|
||||
}) {
|
||||
const start = Date.now();
|
||||
const headers = buildHeaders({
|
||||
extra: { "Content-Type": "application/json" },
|
||||
jobId,
|
||||
});
|
||||
|
||||
const envelope = await retryAPI(
|
||||
() =>
|
||||
postRelay({
|
||||
path: "/relay/analyze",
|
||||
body: JSON.stringify({ prompt }),
|
||||
headers,
|
||||
signal,
|
||||
}),
|
||||
{
|
||||
retries,
|
||||
delayMs: 5000,
|
||||
label: "Relay analyze",
|
||||
log: (msg) => onProgress(msg),
|
||||
}
|
||||
);
|
||||
|
||||
const elapsed = ((Date.now() - start) / 1000).toFixed(1);
|
||||
const remaining =
|
||||
typeof envelope.credits_remaining === "number"
|
||||
? `, ${envelope.credits_remaining} credits left`
|
||||
: "";
|
||||
onProgress(`Relay analyze complete in ${elapsed}s${remaining}`);
|
||||
|
||||
const result = envelope.result || {};
|
||||
const text = typeof result.text === "string" ? result.text : "";
|
||||
const cost = zeroCost({
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
thinkingTokens: 0,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
usage: { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0 },
|
||||
cost,
|
||||
finishReason: null,
|
||||
raw: envelope,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Streams a file off disk into a Blob with the given MIME type for
|
||||
// FormData upload. Node's global Blob/File don't accept a stream
|
||||
// directly the way browser File objects do, so we read into a Buffer
|
||||
// here. Acceptable for typical podcast chunk sizes (~50 MB at most
|
||||
// after the orchestration layer's 45-min split).
|
||||
async function fileToBlob(filePath, mimeType) {
|
||||
const { promises: fsp } = await import("fs");
|
||||
const buf = await fsp.readFile(filePath);
|
||||
return new Blob([buf], { type: mimeType || "application/octet-stream" });
|
||||
}
|
||||
Reference in New Issue
Block a user