Files
recap/server/index.js
T
Keysat b4fa5d7be8 Add opt-in Daily Digest (daily email of last 24h of library recaps)
Multi-mode, off by default. Each new recap is synthesized into a 1-2
paragraph overview via the relay (operator-absorbed) and cached onto the
session JSON; a daily 08:00 scan emails opted-in users their fresh
recaps, deduped by a per-user watermark that never skips a failed or
over-cap recap. One-click tokenized unsubscribe; settings-modal toggle;
admin test trigger. Bumps to 0.2.158.
2026-06-15 19:50:48 -05:00

4377 lines
183 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import express from "express";
import cors from "cors";
import { execFile } from "child_process";
import { promisify } from "util";
import fs from "fs/promises";
import { createWriteStream } from "fs";
import path from "path";
import os from "os";
import https from "https";
import http from "http";
import { randomUUID } from "crypto";
import * as license from "./license.js";
import {
sendEvent,
extractVideoId,
formatTime,
parseTimestampedTranscript,
synthesizeEntriesFromText,
safeText,
retryGemini,
fetchUrl,
} from "./util.js";
import { buildAnalysisPrompt } from "./gemini-helpers.js";
import { runChunkedAnalysis, CHUNKING_CUTOFF_SECONDS } from "./chunked-analyze.js";
import { getProvider, resolveProviderOpts, PROVIDER_NAMES } from "./providers/index.js";
import {
getAudioDuration,
splitAudioFile,
downloadPodcastAudio,
} from "./audio.js";
import { checkYtdlp, autoUpdateYtdlp } from "./ytdlp.js";
import {
initCookies,
ytCookieArgs,
ytExtraArgs,
ytCookieMethod,
setupCookieRoutes,
getCookieFilePath,
} from "./cookies.js";
import * as config from "./config.js";
import { initInstallId, getInstallId } from "./install-id.js";
import * as relayState from "./relay-state.js";
import * as relayDefault from "./relay-default.js";
import {
startRelayCapabilitiesRefresh,
getRelayCapabilities,
refreshRelayCapabilities,
} from "./relay-capabilities.js";
import {
isResolvableShareUrl,
isApplePodcastUrl,
isSpotifyUrl,
resolveShareUrl,
URLResolveError,
} from "./url-resolver.js";
import { resolveApiKey, getConfigSnapshot } from "./config.js";
import * as licenseMW from "./license-middleware.js";
import { setupLicensePurchaseRoutes } from "./license-purchase.js";
import {
setupCreditsPurchaseRoutes,
sweepUnappliedPurchases,
} from "./credits-purchase.js";
import {
setupLicenseMiddleware,
setupLicenseRoutes,
startLicenseRefresh,
refreshLicenseOnline,
isFreeUser,
tryAcquireFreeSlot,
releaseFreeSlot,
getCurrentFreeJob,
abortCurrentFreeJob,
isFreeJobAborted,
appendCurrentJobLog,
} from "./license-middleware.js";
import {
initHistory,
saveToHistory,
loadMeta,
saveMeta,
setupHistoryRoutes,
getHistoryDir,
scopeForRequest,
migrateLegacyLibrary,
} from "./history.js";
import {
getProcessedVideoIds,
isKnownVideo,
loadSubscriptions,
saveSubscriptions,
loadSkipList,
addToSkipList,
loadSeenList,
addToSeenList,
loadAutoQueue,
saveAutoQueue,
mutateAutoQueue,
listSubscriptionScopes,
listAutoQueueScopes,
migrateGlobalSubscriptionsToOwner,
} from "./subscriptions.js";
import {
mintInternalSession,
deleteInternalSession,
adminUserId,
SESSION_COOKIE,
} from "./auth-routes.js";
import { setupLibraryRoutes } from "./library.js";
import { setupTtsRoutes } from "./tts-routes.js";
import {
initAdminAuth,
setupAdminAuthMiddleware,
setupAdminAuthRoutes,
} from "./admin-auth.js";
import { buildTenantAuthMiddleware } from "./tenant-auth.js";
const execFileAsync = promisify(execFile);
const app = express();
// Trust the operator's reverse proxy (StartOS / StartTunnel, or a cloud proxy)
// so req.ip is the real client address rather than a client-spoofable
// X-Forwarded-For entry. The value is how many trusted proxies sit in front of
// this process — default 1 (the StartOS/StartTunnel hop). Erring low is safe
// (it can only over-count clients onto one IP, hitting the trial cap sooner);
// erring high would re-open the trial-cap bypass. Override via
// RECAP_TRUSTED_PROXY_HOPS (0 = no proxy in front; use the socket address only).
const hopsParsed = parseInt(process.env.RECAP_TRUSTED_PROXY_HOPS, 10);
const trustedProxyHops =
Number.isInteger(hopsParsed) && hopsParsed >= 0 ? hopsParsed : 1;
app.set("trust proxy", trustedProxyHops);
const PORT = process.env.PORT || 3001;
// ── Multi-tenant mode toggle ────────────────────────────────────────────
// RECAP_MODE is set by startos/main.ts based on the recap_mode field of
// the config. "single" (default) preserves the original self-hosted
// behavior; "multi" enables email + magic-link auth, per-user libraries,
// per-user keysat licenses, and BTCPay subscriptions.
//
// Anything outside "multi" is treated as single — defensive parsing so
// a stray env value can't accidentally enable multi-tenant routing.
const RECAP_MODE = process.env.RECAP_MODE === "multi" ? "multi" : "single";
console.log(`[boot] RECAP_MODE=${RECAP_MODE}`);
// ── Data directory (configurable for StartOS or local dev) ────────────────
// On StartOS: DATA_DIR=/data (persistent volume)
// On local dev: defaults to project root (parent of server/)
const __dirname = path.dirname(new URL(import.meta.url).pathname);
const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, "..");
const historyDir = path.join(DATA_DIR, "history");
const configDir = path.join(DATA_DIR, "config");
await fs.mkdir(historyDir, { recursive: true }).catch(() => {});
await fs.mkdir(configDir, { recursive: true }).catch(() => {});
await initHistory({ dataDir: DATA_DIR, mode: RECAP_MODE });
// Per-install identity for the upcoming relay backend. Generated once
// on first boot, persisted in DATA_DIR. Surfaced via /api/health and
// /api/install-id (read-only). See ./install-id.js for details.
await initInstallId({ dataDir: DATA_DIR });
// API key + live reload moved to ./config.js
await config.initConfig({ dataDir: DATA_DIR });
const envPath = config.getEnvPath();
// Cookies state + helpers + routes moved to ./cookies.js
await initCookies({ dataDir: DATA_DIR, envPath });
// Admin login gate state. Reads username + scrypt hash + session secret
// from /data/config/startos-config.json (set via the "Set Admin
// Password" StartOS action) and refreshes on the same poll cadence as
// config.js. When no hash is set, the gate is a no-op.
//
// Single-mode only. In multi-tenant mode the operator authenticates as
// a regular user (is_admin = 1) via magic-link, so the legacy admin
// gate is bypassed entirely.
if (RECAP_MODE === "single") {
await initAdminAuth({ dataDir: DATA_DIR });
}
// ── Multi-tenant store + outbound mail ─────────────────────────────────
// Only initialized when RECAP_MODE === 'multi'. Lazy-imported so the
// single-mode boot never touches the native better-sqlite3 binding or
// loads nodemailer.
if (RECAP_MODE === "multi") {
const { initDb } = await import("./db.js");
const { initSmtp } = await import("./smtp.js");
await initDb({ dataDir: DATA_DIR });
initSmtp();
// One-time migration: pre-0.2.77 single-mode installs wrote
// /data/history/*.json flat. Move them into /data/history/owner/ so
// they're addressable under a scope. Idempotent (sentinel-guarded).
try {
await migrateLegacyLibrary();
} catch (err) {
console.warn("[boot] migrateLegacyLibrary failed:", err);
}
// Fixup for installs upgraded from <0.2.91: the first admin signup
// used to rename /data/history/owner/ → /data/history/<admin_user_id>/.
// The new convention is admin → always "owner" so multi↔single mode
// switching is reversible. If the legacy rename happened, reverse it
// here. Idempotent — runs only when the data layout requires it.
try {
const { reclaimAdminLibraryToOwner } = await import("./history.js");
const { getDb } = await import("./db.js");
await reclaimAdminLibraryToOwner({ db: getDb() });
} catch (err) {
console.warn("[boot] reclaimAdminLibraryToOwner failed:", err);
}
}
// resolveApiKey moved to ./config.js
app.use(cors());
app.use(express.json({ limit: "100mb" }));
// ── Tenant auth (multi-mode) / synthetic owner (single-mode) ───────────
// MUST run before every other /api/* middleware so downstream handlers
// can rely on req.userId being populated. In single mode the middleware
// is a no-op shim that stamps req.userId="owner". In multi mode it
// validates the recap_session cookie and 401s non-public paths.
app.use(buildTenantAuthMiddleware({ mode: RECAP_MODE }));
// Single-mode whoami stub. Lets the frontend probe what mode it's
// running in WITHOUT branching on a separate config endpoint. In
// single mode every "user" is the operator, so always state:signed_in
// with is_admin:true — the existing settings UI is the right thing to
// render for them.
if (RECAP_MODE === "single") {
app.get("/api/account/whoami", (req, res) => {
res.json({
recap_mode: "single",
state: "signed_in",
user: { id: "owner", is_admin: true, has_license: true },
});
});
}
// ── Magic-link auth + whoami (multi mode only) ──────────────────────────
// /auth/request-link, /auth/verify, /auth/signout, /api/account/whoami.
// These have to be registered AFTER the tenant-auth middleware (so the
// public-path bypass routes them past the 401 gate) but BEFORE any
// /api/* route that assumes req.userId is set.
if (RECAP_MODE === "multi") {
const { setupAuthRoutes } = await import("./auth-routes.js");
const { setupAdminRoutes } = await import("./admin-routes.js");
const { setupAccountRoutes } = await import("./account-routes.js");
const { setupBillingRoutes } = await import("./billing-routes.js");
setupAuthRoutes(app);
// Account routes (per-user session management). Mount before admin
// routes so request-handler order is predictable; both use different
// paths so no conflict.
setupAccountRoutes(app);
// Admin routes (tenant list, grant credits, recent-signups). Every
// route inside is wrapped in requireOperator — non-admins hit 403.
setupAdminRoutes(app);
// Self-serve purchase: POST /api/billing/buy + GET /api/billing/status.
// A signed-in Core user buys their own Pro/Max period via the relay's
// BTCPay invoice. Multi-mode only (single-mode operators carry a
// license, not a relay-owned cloud tier).
setupBillingRoutes(app);
// Expiry-reminder emails: a daily scan asks the relay who's expiring and
// emails them via the recaps.cc SMTP transport. Self-gates on SMTP +
// public URL + relay being configured, so it's a safe no-op until then.
const { startReminderScheduler } = await import("./subscription-reminders.js");
startReminderScheduler();
// Daily Digest: opt-in (off by default) once-a-day email of a user's
// last ~24h of library recaps. Same self-gating shape as reminders —
// no-op until SMTP + public URL are set. The one-click unsubscribe GET
// is public (whitelisted in tenant-auth) since the email has no session.
const { startDigestScheduler, setupDigestRoutes } = await import(
"./daily-digest.js"
);
setupDigestRoutes(app);
startDigestScheduler();
// /api/account/whoami — frontend hits this on every page load to
// determine which UI state to render:
// - signed-in user → full app
// - active trial w/budget → app + "N trials left" badge
// - trial exhausted → sign-up screen
// - no cookies → landing page (paste-and-try)
app.get("/api/account/whoami", async (req, res) => {
const base = { recap_mode: "multi" };
if (req.user) {
// Self-healing sweep: if the user paid for credits but the
// BTCPay redirect killed the frontend poller before /api/credits/
// invoice/:id could fire, the pending_purchases row stays
// unapplied. Catch it up here so the very next whoami call
// (every page load) reflects the correct balance.
try {
await sweepUnappliedPurchases({
buyerType: "user",
buyerId: req.user.id,
req,
});
} catch (err) {
console.warn(
`[whoami] purchase sweep failed for user ${req.user.id}: ${err?.message || err}`,
);
}
return res.json({
...base,
state: "signed_in",
user: {
id: req.user.id,
email: req.user.email,
display_name: req.user.display_name,
is_admin: !!req.user.is_admin,
has_license: !!req.user.keysat_license,
has_password: !!req.user.password_hash,
created_at: req.user.created_at,
},
});
}
if (req.trial) {
// Same sweep for anon visitors who bought a credit pack — apply
// any settled invoices to anon_trials.credits_total before we
// compute credits_remaining for the response.
try {
await sweepUnappliedPurchases({
buyerType: "anon",
buyerId: req.trial.cookie_id,
req,
});
// Re-read the trial row so the response reflects any credits
// just applied by the sweep.
const { lookupTrial } = await import("./anon-trial.js");
const refreshed = lookupTrial(req.trial.cookie_id);
if (refreshed) req.trial = refreshed;
} catch (err) {
console.warn(
`[whoami] purchase sweep failed for trial ${req.trial.cookie_id}: ${err?.message || err}`,
);
}
return res.json({
...base,
state: "trial",
trial: {
credits_total: req.trial.credits_total,
credits_used: req.trial.credits_used,
credits_remaining:
req.trial.credits_total - req.trial.credits_used,
},
});
}
// Anonymous — include the operator-configured trial allowance AND
// the post-signup grant so the UI can compose dynamic copy on the
// tier signup modal ("3 trial credits, +5 more on signup" vs
// "your trial credits transfer over"). Without exposing both, the
// Free card has to hardcode a number that goes stale the moment
// an operator tunes their config.
//
// ALSO check whether this visitor's IP is at the lifetime
// trials_per_ip_lifetime cap. If yes, force available_trial_credits
// to 0 and stamp a trial_blocked_reason so the UI can swap the
// misleading "N free credits ready" pill for "you've used your
// free trial — sign up or buy credits" copy. Without this gate
// the pill advertises credits the visitor literally cannot mint
// (anon-trial.js's issueIfEligible() will refuse), the user
// submits a video, the relay rejects, and they see a confusing
// "Processing failed" with the player half-loaded.
let availableTrialCredits = 0;
let signupGrantCredits = 0;
let trialBlockedReason = null;
try {
const snap = await getConfigSnapshot();
availableTrialCredits = Math.max(
0,
parseInt(snap?.trial_credits_per_visitor ?? 1, 10) || 0,
);
signupGrantCredits = Math.max(
0,
parseInt(snap?.tenant_default_credits ?? 0, 10) || 0,
);
// IP cap check — only meaningful in multi mode where the
// anon-trial system exists. Lazy-import so single-mode doesn't
// pull the trial DB in unnecessarily.
if (availableTrialCredits > 0 && RECAP_MODE === "multi") {
try {
const { getClientIp, ipTrialsLifetime } =
await import("./anon-trial.js");
const ip = getClientIp(req);
const perIpLifetime = Math.max(
1,
parseInt(
snap?.trials_per_ip_lifetime ?? snap?.trials_per_ip_per_day ?? 5,
10,
) || 5,
);
if (ip && ipTrialsLifetime(ip) >= perIpLifetime) {
availableTrialCredits = 0;
trialBlockedReason = "ip_cap_reached";
}
} catch (err) {
// best-effort — if the cap check fails we don't want to
// suddenly downgrade a legitimate visitor's pill to zero.
// Leave the configured value and log for diagnosis.
console.warn(
`[whoami] IP cap check failed: ${err?.message || err}`,
);
}
}
} catch {
// best-effort; fall through with 0 so UI degrades to "Sign in"
// CTA rather than a misleading "N free credits" badge.
}
res.json({
...base,
state: "anonymous",
available_trial_credits: availableTrialCredits,
signup_grant_credits: signupGrantCredits,
// Set only when the visitor's IP can't mint a new trial cookie.
// The frontend uses this to swap the "N free credits ready"
// pill for an honest "trial used up — sign up / buy" CTA AND
// to refuse the optimistic submit-flow (no video player
// render, no fake processing status).
trial_blocked_reason: trialBlockedReason,
});
});
}
// ── Admin login gate (single mode only) ─────────────────────────────────
// MUST run before the license middleware: if an admin password is set,
// nobody (licensed or not) reaches the activation flow without first
// passing the login. Endpoints needed by the login UI itself
// (/api/admin/status, /api/admin/login, /api/admin/logout) and
// /api/health stay open.
if (RECAP_MODE === "single") {
setupAdminAuthMiddleware(app);
setupAdminAuthRoutes(app);
}
// ── Keysat licensing ────────────────────────────────────────────────────────
// All license-aware request handling (gate, Pro feature gates, /api/license
// routes, free-tier slot management, periodic online refresh) lives in
// ./license-middleware.js. Importers read the current state via
// licenseMW.LIC (a live binding).
setupLicenseMiddleware(app);
setupLicenseRoutes(app);
// In-app purchase flow: proxies Keysat's public policies + purchase
// + poll endpoints so the buy page renders in Recap's own visual
// style instead of being redirected to Keysat's hosted /buy/<slug>
// page. On a settled invoice the issued license is written to disk
// and licenseMW.refreshLicenseOnline picks it up immediately.
setupLicensePurchaseRoutes(app, {
onLicenseActivated: () => licenseMW.refreshLicenseOnline("post-purchase"),
});
// Credit-purchase proxy: lets the buyer top-up relay credits via the
// operator's BTCPay store. All BTCPay credentials live on the relay
// — Recap just forwards the buyer's pick and proxies the polling.
setupCreditsPurchaseRoutes(app);
startLicenseRefresh();
// Boot-time fetch of the relay's /relay/capabilities so the chunking
// decision in /api/process can honor the operator's current backend
// preference. Refreshes hourly. Safe defaults are used until the
// first successful fetch lands.
startRelayCapabilitiesRefresh();
// History storage + routes moved to ./history.js
// (saveToHistory, loadMeta, saveMeta are imported above)
// addToSkipList is scope-keyed (./subscriptions.js): deleting a session
// suppresses re-queueing that video in the SAME scope's subscriptions.
setupHistoryRoutes(app, { addToSkipList: (scope, id) => addToSkipList(scope, id) });
// Audio-first ("walking mode") TTS routes — synthesize + cache + serve
// per-topic summary clips. Self-gates access (Max in multi mode); the
// /api/tts prefix is license-exempt so the route's own gate decides.
setupTtsRoutes(app);
// Serve the frontend from ../public
app.use(express.static(path.join(__dirname, "..", "public")));
app.use("/assets", express.static(path.join(__dirname, "..", "assets")));
// checkYtdlp + autoUpdateYtdlp moved to ./ytdlp.js
// PRICING + calcCost + buildAnalysisPrompt moved to ./gemini-helpers.js
// safeText + retryGemini moved to ./util.js
// ── Health check ───────────────────────────────────────────────────────────
app.get("/api/health", async (req, res) => {
const info = await checkYtdlp();
// Check cookies.txt freshness
const cookieMethod = ytCookieMethod();
let cookieInfo = { method: cookieMethod };
if (cookieMethod === "cookies.txt") {
try {
const stat = await fs.stat(getCookieFilePath());
const ageMs = Date.now() - stat.mtimeMs;
const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24));
cookieInfo.fileAgeDays = ageDays;
cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days
} catch {}
}
res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!config.serverApiKey, cookies: cookieInfo, installId: getInstallId(), ...info });
});
// Read-only install identity. Used by the UI's settings panel so the
// operator can verify the install has been provisioned, and by the
// future relay client to attach the X-Recap-Install-Id header. Open
// path — license gate doesn't apply (the relay needs this ID to be
// reachable before any credits have been granted).
app.get("/api/install-id", (_req, res) => {
res.json({ installId: getInstallId() });
});
// Proxy through to the relay's /relay/policy endpoint. Used by the UI
// to render dynamic copy (e.g. "N relay credits" in the activation
// screen reflects whatever the operator currently has the relay
// configured to give Core users — no Recap update needed when the
// operator tunes tier quotas). Cached in-memory for a short window
// so a busy UI doesn't hammer the relay.
let __cachedRelayPolicy = { at: 0, body: null };
app.get("/api/relay/policy", async (_req, res) => {
const base = relayDefault.getRelayBaseURL();
if (!base) return res.json({ configured: false, tiers: null });
const ttl = 5 * 60 * 1000;
if (__cachedRelayPolicy.body && Date.now() - __cachedRelayPolicy.at < ttl) {
return res.json({ configured: true, ...__cachedRelayPolicy.body });
}
try {
const r = await fetch(`${base.replace(/\/$/, "")}/relay/policy`, {
signal: AbortSignal.timeout(5000),
});
if (!r.ok) {
return res.json({
configured: true,
tiers: null,
error: `HTTP ${r.status}`,
});
}
const body = await r.json();
__cachedRelayPolicy = { at: Date.now(), body };
res.json({ configured: true, ...body });
} catch (err) {
res.json({
configured: true,
tiers: null,
error: err?.message || String(err),
});
}
});
// Last-known relay state (credits + tier) cached in-process. The UI
// polls this for the "N credits remaining · Tier: X" banner; the
// underlying numbers are refreshed every time a relay provider call
// lands.
//
// First-paint UX: when the cache is empty (no relay calls yet AND no
// prior ping has populated it), opportunistically hit the relay's
// /balance endpoint with a short timeout so the banner shows real
// numbers on first page load instead of "balance unknown". Best
// effort — if the relay is unreachable the cache stays null and the
// UI falls back to its "unknown" copy. `?refresh=1` forces a ping
// even when the cache already has a value.
//
// `configured` is true when relay-default.js has a non-empty URL —
// operator-controlled at build time, never user-configurable.
app.get("/api/relay/status", async (req, res) => {
const configured = !!relayDefault.getRelayBaseURL();
// ── Multi-mode: return per-user credit view ────────────────────────────
// Each user role sees a different "credits remaining" number:
// - trial: trial.credits_total - trial.credits_used (local)
// - free tenant: tenant_credits.balance (local, set by operator)
// - paid user: their relay-pool balance (relay-side, license-keyed)
// - admin: the operator's relay pool (current single-mode behavior)
// - anonymous: no credits to show; configured + null balance
//
// The frontend reads `creditsRemaining` and `tier` and renders a
// single badge. We construct the right shape for the requester here.
if (RECAP_MODE === "multi" && !(req.user && req.user.is_admin)) {
// Anonymous + no trial cookie at all.
if (!req.user && !req.trial) {
return res.json({
configured,
creditsRemaining: null,
tier: null,
scope: "anonymous",
lastUpdated: Date.now(),
lastError: null,
});
}
// Self-healing sweep — catch any settled-but-unapplied credit
// purchases for this buyer before we report their balance. Without
// it, an anon or signed-in user who pays via BTCPay but loses the
// poll loop on redirect sees a stale balance until they manually
// re-trigger the apply (which there's no UI for). Cheap: bounded
// query (5 invoices, 30-min lookback) + short-timeout relay calls.
try {
if (req.user) {
await sweepUnappliedPurchases({
buyerType: "user",
buyerId: req.user.id,
req,
});
} else if (req.trial) {
await sweepUnappliedPurchases({
buyerType: "anon",
buyerId: req.trial.cookie_id,
req,
});
// Re-read trial row so the response uses post-sweep credits.
const { lookupTrial } = await import("./anon-trial.js");
const refreshed = lookupTrial(req.trial.cookie_id);
if (refreshed) req.trial = refreshed;
}
} catch (err) {
console.warn(
`[relay/status] purchase sweep failed: ${err?.message || err}`,
);
}
// Trial visitor.
if (req.trial) {
const remaining = Math.max(
0,
(req.trial.credits_total || 0) - (req.trial.credits_used || 0),
);
return res.json({
configured,
creditsRemaining: remaining,
tier: "trial",
scope: "trial",
lastUpdated: Date.now(),
lastError: null,
});
}
// Core-decoupling: a user's paid status is their relay-owned tier
// (req.user.tier), NOT any leftover Keysat license. A PAID cloud user
// (tier pro/max) falls through to the real relay ping below, which
// pickRelayIdentity routes to their cloud user:<id> pool. Everyone
// else signed in is a free tenant on the operator pool — regardless of
// whether they still carry a legacy license (which is now ignored).
const isPaidCloudUser =
req.user && (req.user.tier === "pro" || req.user.tier === "max");
// Free tenant. Lazy replenish-check happens inside getOrInit, so the
// displayed balance reflects any due refill.
if (req.user && !isPaidCloudUser) {
try {
const { getOrInit } = await import("./tenant-credits.js");
const credits = await getOrInit(req.user.id);
return res.json({
configured,
creditsRemaining: credits?.total ?? 0,
// Surface the bucket split so the UI can show "5 free + 10
// purchased = 15 total" if it wants. Frontend currently
// just reads creditsRemaining; the extra fields are
// additive.
purchasedBalance: credits?.purchased ?? 0,
replenishBalance: credits?.replenish ?? 0,
replenishPeriod: credits?.period ?? "off",
tier: "free",
scope: "free_tenant",
lastUpdated: Date.now(),
lastError: null,
});
} catch (err) {
console.warn("[relay/status] tenant_credits lookup failed:", err);
return res.json({
configured,
creditsRemaining: null,
tier: "free",
scope: "free_tenant",
lastUpdated: Date.now(),
lastError: "balance_unknown",
});
}
}
// Paid cloud user (tier pro/max) — fall through to the real relay
// ping. pickRelayIdentity routes it to their cloud user:<id> identity
// (or the operator pool if the operator key isn't configured), so the
// response reflects the pool their calls actually bill.
}
// Determine which credit-key this request would use against the relay.
// The relay-state cache is keyed by the same value so each (operator
// / paid user) gets its own snapshot — no more clobbering when an
// admin and a tenant hit the endpoint in the same Node process.
// Constructs a temporary provider just to inherit its creditKey
// logic; cheap (no network).
const identityCfg = await getConfigSnapshot();
const probeOpts = resolveProviderOpts("relay", {
config: identityCfg,
clientOpts: {},
req,
});
const requestCreditKey = relayState.computeCreditKey({
// Cloud (core-decoupling) identity keys by userId; license/install
// identities key by their respective fields. probeOpts carries
// exactly one of these shapes depending on pickRelayIdentity, so
// pass all three — computeCreditKey picks userId → license → install
// in priority order, matching what the provider records under.
installId: probeOpts.installId,
licenseKey: probeOpts.licenseKey,
userId: probeOpts.userId,
});
let snapshot = relayState.getRelayState(requestCreditKey);
const wantsRefresh = req.query.refresh === "1" || req.query.refresh === "true";
const cacheEmpty = snapshot.creditsRemaining === null && snapshot.tier === null;
// Auto-refresh stale cache. Without this, /api/relay/status returns
// whatever was last cached forever, missing balance changes that
// happen outside Recap's request flow — e.g. an operator-side
// BTCPay webhook crediting purchased invoices, or the rescan tool
// in the relay dashboard. 10-second TTL is short enough that
// out-of-band changes show up promptly while still avoiding a
// ping on every dashboard render burst.
const STALE_THRESHOLD_MS = 10_000;
const cacheStale =
!cacheEmpty &&
typeof snapshot.lastUpdated === "number" &&
Date.now() - snapshot.lastUpdated > STALE_THRESHOLD_MS;
if (configured && (cacheEmpty || wantsRefresh || cacheStale)) {
try {
const provider = getProvider("relay", probeOpts);
await provider.pingBalance({ timeoutMs: 5000 });
snapshot = relayState.getRelayState(requestCreditKey);
} catch (err) {
// Log + record so the UI shows a real error instead of a silent
// "balance unknown". The provider call itself records when the
// ping reaches the network; this catches the earlier failures
// (e.g. createRelayProvider throwing because install-id wasn't
// initialized yet) where lastError wouldn't otherwise be set.
console.error(
`[relay/status] ping failed: ${err?.message || err} (stack: ${err?.stack || "n/a"})`
);
relayState.recordRelayError(err?.message || String(err), requestCreditKey);
snapshot = relayState.getRelayState(requestCreditKey);
}
}
res.json({ ...snapshot, configured });
});
// ── Status endpoints ───────────────────────────────────────────────────────
app.post("/api/heartbeat", (req, res) => {
res.json({ ok: true, sleeping: false });
});
app.get("/api/status", (req, res) => {
res.json({ ok: true, sleeping: false, uptime: process.uptime() });
});
// Shutdown: used by the macOS .app launcher to stop the server cleanly.
// On StartOS this endpoint is unused (StartOS manages the container lifecycle).
app.post("/api/shutdown", (req, res) => {
res.json({ ok: true, message: "Server shutting down..." });
console.log("\n Server shutdown requested from browser. Goodbye!\n");
setTimeout(() => process.exit(0), 300);
});
// ── Manual update endpoint ─────────────────────────────────────────────────
app.post("/api/update-ytdlp", async (req, res) => {
const result = await autoUpdateYtdlp(DATA_DIR);
const info = await checkYtdlp();
res.json({ ...result, ...info });
});
// ── Cookie management endpoints ───────────────────────────────────────────
// /api/cookies/* routes registered via setupCookieRoutes (./cookies.js)
setupCookieRoutes(app);
// ── Library export/import ──── moved to ./library.js ─────────
setupLibraryRoutes(app);
// ── Subscriptions ─────────────────────────────────────────────────────────
// Storage (subscriptions / skip / seen / auto-queue) + dedup live in
// ./subscriptions.js, keyed by scope. Endpoints pass scopeForRequest(req);
// the check loop + processor operate on SUBSCRIPTIONS_SCOPE ("owner") while
// the feature is operator-only in multi mode.
// List recent videos from a channel/playlist via yt-dlp (no download)
// Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode)
async function listChannelVideosFast(url, limit = 15) {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(id)s|%(title)s",
"--no-download",
"--playlist-end", String(limit),
"--flat-playlist",
...ytCookieArgs(),
...ytExtraArgs(),
url,
], { timeout: 60000 });
return stdout.trim().split("\n").filter(Boolean).map(line => {
const idx = line.indexOf("|");
return { id: line.slice(0, idx), title: line.slice(idx + 1) };
});
}
// Fetch upload_date for a batch of video IDs (processes in batches of 3)
// Bails after 2 consecutive failures to avoid grinding through blocked requests
async function fetchUploadDates(videoIds) {
if (videoIds.length === 0) return {};
const dateMap = {};
const batchSize = 50;
let consecutiveFails = 0;
for (let i = 0; i < videoIds.length; i += batchSize) {
const batch = videoIds.slice(i, i + batchSize);
const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`);
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(id)s|%(upload_date)s",
"--no-download",
...ytCookieArgs(),
...urls,
], { timeout: 45000 });
for (const line of stdout.trim().split("\n").filter(Boolean)) {
const [id, date] = line.split("|");
if (id && date && date !== "NA") dateMap[id] = date;
}
consecutiveFails = 0;
subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`);
} catch (err) {
consecutiveFails++;
subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`);
if (consecutiveFails >= 2) {
subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`);
break;
}
}
}
return dateMap;
}
// ── RSS-based date fetching (bypasses bot detection) ─────────────────────
// Fetch a URL and return the response body as a string
// fetchUrl moved to ./util.js
// Get channel_id from a YouTube channel/playlist URL using yt-dlp
async function getChannelId(url) {
// Method 1: flat-playlist channel_id (fast, may return NA)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel_id)s",
"--no-download",
"--playlist-items", "1",
"--flat-playlist",
...ytCookieArgs(),
url,
], { timeout: 15000 });
const id = stdout.trim().split("\n")[0];
if (id && id !== "NA" && id.length > 5) return id;
} catch {}
// Method 2: non-flat single video (slower but gets full metadata)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel_id)s",
"--no-download",
"--playlist-items", "1",
...ytCookieArgs(),
url,
], { timeout: 30000 });
const id = stdout.trim().split("\n")[0];
if (id && id !== "NA" && id.length > 5) return id;
} catch {}
return null;
}
// Fetch video dates from YouTube RSS feed (no auth, no bot detection)
// Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos
async function fetchDatesFromRSS(channelId) {
const dateMap = {};
if (!channelId) return dateMap;
try {
const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
const xml = await fetchUrl(rssUrl);
const entryRegex = /<entry>[\s\S]*?<yt:videoId>([^<]+)<\/yt:videoId>[\s\S]*?<published>([^<]+)<\/published>[\s\S]*?<\/entry>/g;
let match;
while ((match = entryRegex.exec(xml)) !== null) {
const videoId = match[1];
const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00"
const date = published.slice(0, 10).replace(/-/g, ""); // "20251201"
dateMap[videoId] = date;
}
} catch (err) {
subLog(` ⚠ RSS feed fetch failed: ${err.message}`);
}
return dateMap;
}
// ── Podcast RSS feed parsing ────────────────────────────────────────────────
// Detect if a URL looks like a podcast RSS feed
function isPodcastFeedUrl(url) {
if (!url) return false;
const u = url.trim().toLowerCase();
// Common podcast RSS feed patterns
if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true;
if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true;
if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true;
if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true;
if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true;
return false;
}
// Apple Podcasts SHOW URL — the whole-podcast page, no `?i=<id>`
// episode parameter. Format:
// https://podcasts.apple.com/<country>/podcast/<slug>/id<num>
// We do prefix-match via regex, then USE URL PARSING to confirm no
// `i` query param (the episode discriminator). The earlier all-regex
// version had a misplaced negative lookahead that matched episode
// URLs too, sending episode links into the subscribe flow by mistake.
const APPLE_PODCAST_PREFIX_RE =
/^https?:\/\/(?:www\.)?podcasts\.apple\.com\/[^/]+\/podcast\/[^/]+\/id(\d+)/i;
function isAppleShowUrl(url) {
if (!url) return false;
const trimmed = url.trim();
if (!APPLE_PODCAST_PREFIX_RE.test(trimmed)) return false;
try {
const parsed = new URL(trimmed);
// Episode URLs have `?i=<id>` (or `&i=<id>`). Anything else is
// the show landing page.
return !parsed.searchParams.has("i");
} catch {
return false;
}
}
// Legacy alias — the regex's first capture group is still used to
// pull the podcast id in resolveAppleShowToFeed below. Keep it named
// to avoid splitting that helper into two regex+parse blocks.
const APPLE_SHOW_URL_RE = APPLE_PODCAST_PREFIX_RE;
// Resolve an Apple Podcasts show URL to its RSS feed URL via the
// public iTunes Lookup API. No auth required. Returns the feedUrl
// string or null if Apple doesn't have a usable RSS URL on file
// (rare — shows distributed only via Apple-exclusive feeds, mostly
// Apple Podcast Subscriptions paid content).
async function resolveAppleShowToFeed(url) {
const m = url.match(APPLE_SHOW_URL_RE);
if (!m) return null;
const podcastId = m[1];
try {
const raw = await fetchUrl(
`https://itunes.apple.com/lookup?id=${encodeURIComponent(podcastId)}`,
);
const parsed = JSON.parse(raw);
const show = (parsed?.results || []).find(
(r) => r && r.wrapperType === "track" && r.kind === "podcast",
);
return show?.feedUrl || null;
} catch {
return null;
}
}
// Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] }
async function parsePodcastRSS(feedUrl, limit = 200) {
const xml = await fetchUrl(feedUrl);
// Extract podcast title
const titleMatch = xml.match(/<channel>[\s\S]*?<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast";
// Extract episodes from <item> elements
const episodes = [];
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
let match;
while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) {
const item = match[1];
// GUID (unique episode identifier)
const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/);
const guid = guidMatch ? guidMatch[1].trim() : null;
// Title
const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode";
// Publish date
const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/);
const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null;
let dateStr = ""; // YYYYMMDD
if (pubDate) {
try {
const d = new Date(pubDate);
if (!isNaN(d.getTime())) {
dateStr = d.toISOString().slice(0, 10).replace(/-/g, "");
}
} catch {}
}
// Audio enclosure URL
const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/);
const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null;
// Duration (itunes:duration)
const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/);
const duration = durMatch ? durMatch[1].trim() : "";
if (!audioUrl) continue; // Skip episodes without audio
// Use guid, or fall back to audioUrl as unique ID
const id = guid || audioUrl;
episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration });
}
return { title: podcastTitle, episodes };
}
// Download a podcast episode audio file via HTTP(S) to a local path
// downloadPodcastAudio moved to ./audio.js
// Get channel name from URL
async function fetchChannelName(url) {
// Try fast flat-playlist approach first
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel)s",
"--no-download",
"--playlist-end", "1",
"--flat-playlist",
...ytCookieArgs(),
url,
], { timeout: 15000 });
const name = stdout.trim().split("\n")[0];
if (name && name !== "NA") return name;
} catch {}
// Fallback: fetch without flat-playlist (slower but gets channel from video metadata)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel)s",
"--no-download",
"--playlist-end", "1",
...ytCookieArgs(),
url,
], { timeout: 30000 });
const name = stdout.trim().split("\n")[0];
if (name && name !== "NA") return name;
} catch {}
// Last resort: extract handle from URL
try {
const u = new URL(url);
const handleMatch = u.pathname.match(/\/@([^/]+)/);
if (handleMatch) return "@" + handleMatch[1];
} catch {}
return "Unknown Channel";
}
// Subscription storage (subscriptions / skip / seen / auto-queue), the
// dedup, scope enumeration, and the file-locking that serializes
// read-modify-writes all live in ./subscriptions.js now — scope-keyed and
// unit-tested. getProcessedVideoIds(scope) + isKnownVideo() are the dedup
// that keeps the auto-queue from re-offering already-handled videos.
//
// While subscriptions are operator-only in multi mode, the check loop +
// background processor operate on this scope. Endpoints use
// scopeForRequest(req) (= "owner" for the operator). Per-tenant flips these
// to each user's scope — see docs/per-tenant-subscriptions-plan.md.
const SUBSCRIPTIONS_SCOPE = "owner";
// Prune auto-queue items that have since been summarized (e.g. cleared from
// the queue then re-discovered by a check). Runs per scope at boot, after
// the migration. The scope-aware getProcessedVideoIds is what makes this
// work — it used to scan the wrong directory and prune nothing.
async function reconcileAutoQueueWithLibrary(scope) {
const processed = await getProcessedVideoIds(scope);
let removed = 0;
await mutateAutoQueue(scope, (items) => {
const before = items.length;
const kept = items.filter((q) => !processed.has(q.videoId));
removed = before - kept.length;
return kept;
});
if (removed > 0) {
console.log(
` Auto-queue (${scope}): removed ${removed} already-processed item(s)`,
);
}
}
// Which user's identity the processor assumes to run a scope's items over
// the authenticated loopback /api/process call. Single mode → null (no auth;
// the loopback resolves to "owner"). Multi mode: "owner" → the operator;
// a tenant scope IS that user's id.
function userIdForScope(scope) {
if (RECAP_MODE !== "multi") return null;
if (scope === "owner") return adminUserId();
return scope;
}
// ── Background processing queue ──────────────────────────────────────────
// Processes "approved" auto-queue items sequentially with configurable delay
// between items to avoid hammering YouTube with rapid-fire downloads.
const processingConfigPath = path.join(configDir, "processing-config.json");
let processingConfig = {
delaySeconds: 300, // Default delay between processing queue items (5 minutes)
enabled: true, // Whether background processing is active
};
let processingState = {
running: false, // Is the processor loop currently active?
currentItem: null, // The item currently being processed (or null)
lastCompleted: null, // Timestamp of last completed item
rush: false, // If true, skip delay before next item
log: [], // Recent processing log entries
};
function procLog(msg) {
console.log(` [processor] ${msg}`);
processingState.log.push({ t: new Date().toISOString(), msg });
if (processingState.log.length > 100) processingState.log.shift();
}
async function loadProcessingConfig() {
try {
const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8"));
if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds;
if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled;
} catch {}
}
async function saveProcessingConfig() {
await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2));
}
await loadProcessingConfig();
// Find the next approved item across every scope's auto-queue. Returns
// { scope, item } (owner scope first, then tenants) or null when none.
async function findNextApprovedItem() {
const scopes = await listAutoQueueScopes();
for (const scope of scopes) {
const queue = await loadAutoQueue(scope);
const item = queue.find((q) => q.status === "approved");
if (item) return { scope, item };
}
return null;
}
// The background processor: picks "approved" items, processes via internal HTTP,
// waits the configured delay, then picks the next one. Runs continuously.
async function backgroundProcessor() {
if (processingState.running) return; // Already running
processingState.running = true;
procLog("Background processor started");
while (true) {
// Find the next approved item across every owner's queue (a scope can
// hold queued items even with no current subscriptions). Each item is
// processed AS its owner — see processItemInternally(item, scope).
const next = await findNextApprovedItem();
if (!next) {
procLog("No approved items in any queue — processor sleeping");
processingState.running = false;
processingState.currentItem = null;
return;
}
const { scope, item } = next;
if (!processingConfig.enabled) {
procLog("Processing is paused — processor sleeping");
processingState.running = false;
processingState.currentItem = null;
return;
}
// Wait the configured delay (unless rush mode or first item)
if (processingState.lastCompleted && !processingState.rush) {
const delaySec = processingConfig.delaySeconds;
procLog(`Waiting ${delaySec}s before next item...`);
await new Promise(r => setTimeout(r, delaySec * 1000));
}
processingState.rush = false;
// Atomically claim the item: re-check it's still approved (it may have
// been removed/changed while we waited) and flip it to "processing".
let claimed = null;
await mutateAutoQueue(scope, (items) => {
const it = items.find((q) => q.id === item.id);
if (it && it.status === "approved") {
it.status = "processing";
claimed = { ...it };
}
});
if (!claimed) {
procLog(`Item ${item.id} was removed or status changed — skipping`);
continue;
}
processingState.currentItem = claimed;
procLog(`Processing: ${claimed.title} (${claimed.url})`);
let result = null;
let errMsg = null;
try {
// Call /api/process via loopback, AS the scope's owner.
result = await processItemInternally(claimed, scope);
processingState.lastCompleted = new Date().toISOString();
procLog(`✓ Completed: ${claimed.title}`);
} catch (err) {
errMsg = err.message || String(err);
procLog(`✗ Failed: ${claimed.title}${errMsg.slice(0, 200)}`);
}
// Write the terminal status back atomically.
await mutateAutoQueue(scope, (items) => {
const it = items.find((q) => q.id === item.id);
if (!it) return;
if (errMsg) {
it.status = "failed";
it.error = errMsg;
it.failedAt = new Date().toISOString();
} else {
it.status = "completed";
it.completedAt = new Date().toISOString();
it.historyId = (result && result.historyId) || null;
}
});
processingState.currentItem = null;
}
}
// Internal HTTP request to /api/process — consumes the SSE stream and
// waits for the "result" or "error" event. This reuses the entire existing
// pipeline without any code duplication.
function processItemInternally(item, scope) {
return new Promise((resolve, reject) => {
// Pick a provider for the auto-queue run. Historically this
// function hard-required a server-side Gemini API key (legacy
// pre-relay-as-provider design), so a user with the relay set
// up but no Gemini key would silently fail here — the request
// never even reached /api/process and the failure only landed
// in the in-memory processingState.log, invisible to the
// dashboard. That broke subscription auto-processing for every
// relay-only user.
//
// New behavior: prefer the relay (the modern default for fresh
// installs and the most common config), fall back to Gemini
// when only a local key is configured, and fail with a clear,
// user-visible error when neither is set. The frontend stores
// the user's actual preference in localStorage which the server
// can't read — picking based on what's CONFIGURED here mirrors
// the practical reality (the relay is the only modern option
// that doesn't require a key) and matches the interactive
// experience for relay users.
const relayConfigured = !!relayDefault.getRelayBaseURL();
const geminiKey = resolveApiKey(null);
let transcriptionProvider, analysisProvider;
if (relayConfigured) {
transcriptionProvider = "relay";
analysisProvider = "relay";
} else if (geminiKey) {
transcriptionProvider = "gemini";
analysisProvider = "gemini";
} else {
return reject(
new Error(
"No transcribe/analyze provider is configured. Set the Recap Relay URL in Settings, or paste a Gemini API key.",
),
);
}
const body = JSON.stringify({
url: item.url,
// Only used by the Gemini provider; harmless for the relay
// path (relay provider ignores this field).
apiKey: "USE_SERVER_KEY",
type: item.type || undefined,
title: item.title || undefined,
uploadDate: item.uploadDate || undefined,
episodeId: item.videoId || undefined,
transcriptionProvider,
transcriptionModel:
transcriptionProvider === "relay" ? "relay-default" : undefined,
analysisProvider,
analysisModel:
analysisProvider === "relay" ? "relay-default" : undefined,
});
// Per-tenant: assume the owning user's identity for this loopback call
// so /api/process gates, bills, and saves to the right account. Single
// mode needs no cookie (the loopback resolves to "owner"). A short-lived
// real session is minted here and deleted on every exit path below.
const headers = {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(body),
};
let internalSession = null;
const ownerUserId = userIdForScope(scope);
if (ownerUserId) {
try {
internalSession = mintInternalSession(ownerUserId);
headers["Cookie"] = `${SESSION_COOKIE}=${internalSession}`;
} catch (err) {
return reject(
new Error(`Could not establish processing identity: ${err?.message || err}`),
);
}
}
const cleanup = () => deleteInternalSession(internalSession);
const req = http.request({
hostname: "127.0.0.1",
port: PORT,
path: "/api/process",
method: "POST",
headers,
timeout: 1800000, // 30 minutes max for very long videos
}, (res) => {
let buffer = "";
let lastResult = null;
let lastError = null;
res.on("data", (chunk) => {
buffer += chunk.toString();
// Parse SSE events from the buffer
const lines = buffer.split("\n");
buffer = lines.pop() || ""; // Keep incomplete line in buffer
let currentEvent = null;
for (const line of lines) {
if (line.startsWith("event: ")) {
currentEvent = line.slice(7).trim();
} else if (line.startsWith("data: ") && currentEvent) {
try {
const data = JSON.parse(line.slice(6));
if (currentEvent === "result") lastResult = data;
if (currentEvent === "error") lastError = data;
if (currentEvent === "log") {
procLog(` [${data.elapsed || "?"}s] ${data.message}`);
}
} catch {}
currentEvent = null;
} else if (line === "") {
currentEvent = null;
}
}
});
res.on("end", () => {
cleanup();
if (lastError) {
reject(new Error(lastError.message || "Processing failed"));
} else if (lastResult) {
resolve(lastResult);
} else {
reject(new Error("Processing ended without result"));
}
});
res.on("error", (err) => {
cleanup();
reject(err);
});
});
req.on("error", (err) => {
cleanup();
reject(err);
});
req.on("timeout", () => {
req.destroy();
cleanup();
reject(new Error("Processing timed out after 30 minutes"));
});
req.write(body);
req.end();
});
}
// Wake up the processor whenever there are approved items
function kickProcessor() {
// Just start the loop if idle — it loads the queue itself and sleeps
// immediately if there's nothing approved, so no pre-check needed.
if (!processingState.running && processingConfig.enabled) {
backgroundProcessor().catch((err) => procLog(`Processor error: ${err.message}`));
}
}
let subCheckRunning = false;
let subCheckPromise = null;
let subCheckLog = []; // Stores recent check logs for debug endpoint
function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); }
async function checkSubscriptions() {
if (subCheckRunning) {
// Wait for current check to finish, then run again
if (subCheckPromise) await subCheckPromise;
return checkSubscriptions();
}
subCheckRunning = true;
subCheckPromise = _checkSubscriptionsInner().finally(() => {
subCheckRunning = false;
subCheckPromise = null;
});
return subCheckPromise;
}
async function _checkSubscriptionsInner() {
// Pro-tier feature: skip silently when not entitled. The HTTP gate above
// returns 402 to callers; this guards the background timer + manual paths.
if (!licenseMW.LIC.entitlements.has("subscriptions")) {
subCheckLog = [];
subLog("Skipped: subscriptions require a Pro license.");
return;
}
subCheckLog = []; // Clear logs for fresh check
// Operator-only in multi mode, so this resolves to ["owner"]. When
// per-tenant subscriptions ship it fans out to every subscribing tenant.
const scopes = await listSubscriptionScopes();
for (const scope of scopes) {
try {
await checkScopeSubscriptions(scope);
} catch (err) {
subLog(`⚠ Subscription check failed for scope ${scope}: ${err.message}`);
}
}
}
// Discover + queue new videos for ONE scope's subscriptions. New items are
// collected locally and appended to the scope's auto-queue atomically at the
// end (mutateAutoQueue) so a concurrent endpoint mutation can't be lost.
async function checkScopeSubscriptions(scope) {
const subs = await loadSubscriptions(scope);
if (subs.length === 0) { return; }
const processedIds = await getProcessedVideoIds(scope);
const skippedIds = await loadSkipList(scope);
const seenIds = await loadSeenList(scope);
const queue = await loadAutoQueue(scope);
// Dedup also against items already in the queue. queuedIds grows as we add
// within this run so the same video isn't queued twice across subs.
const queuedIds = new Set(queue.map(q => {
// For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID)
if (q.videoId) return q.videoId;
const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/);
return m ? m[1] : null;
}).filter(Boolean));
// Split queue counts into active (pending/approved/processing — what the
// UI shows) vs done (completed/failed — terminal, hidden by default). All
// four buckets feed dedup against newly discovered videos below.
const qActive = queue.filter(q => ["pending", "approved", "processing"].includes(q.status)).length;
const qCompleted = queue.filter(q => q.status === "completed").length;
const qFailed = queue.filter(q => q.status === "failed").length;
const qDone = qCompleted + qFailed;
subLog(`[${scope}] ${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen — queue: ${qActive} active, ${qDone} done (${qCompleted} completed, ${qFailed} failed)`);
if (qFailed > 0) {
subLog(` (queue has ${qFailed} failed item${qFailed === 1 ? "" : "s"} — open the Queue panel and toggle "Show all" to view + retry)`);
}
let changed = false;
const newItems = []; // appended to the scope's queue atomically at the end
for (const sub of subs) {
if (sub.paused) { subLog(`${sub.name} — paused, skipping`); continue; }
try {
const icon = sub.type === "podcast" ? "🎙" : "📡";
subLog(`${icon} Checking: ${sub.name} (${sub.url})`);
const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8);
if (sub.type === "podcast") {
// ── Podcast subscription: discover episodes from RSS feed ──
const { episodes } = await parsePodcastRSS(sub.url, 200);
subLog(` Found ${episodes.length} episode(s) in RSS feed`);
if (episodes.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
// Filter out already-known episodes (in library / queue / skip / seen)
const dedupSets = { processedIds, queuedIds, skippedIds, seenIds };
const unknowns = episodes.filter((ep) => !isKnownVideo(ep.id, dedupSets));
const filtered = episodes.length - unknowns.length;
subLog(` ${unknowns.length} to check, ${filtered} already known`);
if (unknowns.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
subLog(` Cutoff date: ${cutoffDate}`);
let newCount = 0;
const seenNow = [];
for (const ep of unknowns) {
if (!ep.date || ep.date.length !== 8) continue; // skip undated
if (ep.date < cutoffDate) {
seenNow.push(ep.id);
continue; // before cutoff
}
subLog(`${ep.date}${ep.title.slice(0, 60)}`);
const itemStatus = sub.autoDownload ? "approved" : "pending";
newItems.push({
id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`,
videoId: ep.id, // episode GUID
url: ep.audioUrl,
title: ep.title,
uploadDate: ep.date,
subscriptionId: sub.id,
subscriptionName: sub.name,
status: itemStatus,
type: "podcast",
duration: ep.duration || "",
});
queuedIds.add(ep.id);
newCount++;
}
if (seenNow.length > 0) await addToSeenList(scope, seenNow);
sub.lastChecked = new Date().toISOString();
subLog(`${newCount} episode(s) queued for approval from ${sub.name}`);
changed = true;
} else {
// ── YouTube subscription: discover videos via yt-dlp + RSS dates ──
// Scale fetch limit based on how far back the subscription date goes
const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000));
const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4;
const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200);
subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`);
const candidates = await listChannelVideosFast(sub.url, fetchLimit);
subLog(` Found ${candidates.length} recent video(s)`);
if (candidates.length === 0) {
subLog(` ⚠ No videos returned from yt-dlp for this channel`);
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
const unknowns = candidates.filter(
(v) => !isKnownVideo(v.id, { processedIds, queuedIds, skippedIds, seenIds }),
);
const filtered = candidates.length - unknowns.length;
subLog(` ${unknowns.length} to check, ${filtered} already known`);
if (unknowns.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`);
// Step 1: Get channel ID (cached on subscription, or fetch once)
if (!sub.channelId) {
subLog(` Resolving channel ID...`);
sub.channelId = await getChannelId(sub.url);
if (sub.channelId) {
subLog(` Resolved channel ID: ${sub.channelId}`);
changed = true;
} else {
subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`);
}
}
// Step 2: RSS feed — fast, no bot detection, covers ~15 most recent
const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {};
const rssCount = Object.keys(dateMap).length;
if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`);
// Step 3: Smart early termination
const needDates = unknowns.filter(v => !dateMap[v.id]);
let earlyTermination = false;
if (needDates.length > 0 && rssCount > 0) {
const rssDates = Object.values(dateMap).sort();
const oldestRssDate = rssDates[0];
if (oldestRssDate < cutoffDate) {
earlyTermination = true;
subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`);
} else {
subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`);
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
Object.assign(dateMap, ytDates);
const ytCount = Object.keys(ytDates).length;
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
}
} else if (needDates.length > 0 && rssCount === 0) {
subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`);
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
Object.assign(dateMap, ytDates);
const ytCount = Object.keys(ytDates).length;
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
}
const gotDates = Object.keys(dateMap).length;
if (gotDates > 0 || needDates.length === 0) {
subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`);
} else {
subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`);
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
let newCount = 0;
for (const video of unknowns) {
const uploadDate = dateMap[video.id];
if (!uploadDate || uploadDate.length !== 8) {
continue;
}
if (uploadDate < cutoffDate) {
subLog(`${video.id} (${uploadDate}) — before cutoff`);
continue;
}
subLog(`${video.id}${uploadDate ? ` (${uploadDate})` : ""}${video.title.slice(0,50)}`);
const ytItemStatus = sub.autoDownload ? "approved" : "pending";
newItems.push({
id: `auto-${Date.now()}-${video.id}`,
videoId: video.id,
url: `https://www.youtube.com/watch?v=${video.id}`,
title: video.title,
uploadDate: uploadDate || null,
subscriptionId: sub.id,
subscriptionName: sub.name,
status: ytItemStatus,
});
queuedIds.add(video.id);
newCount++;
}
// Only add to seen list when we can PROVE a video is too old
const seenNow = unknowns.filter(v => {
if (queuedIds.has(v.id)) return false;
const d = dateMap[v.id];
if (d && d.length === 8 && d < cutoffDate) return true;
if (d && d.length === 8 && d >= cutoffDate) return false;
return earlyTermination;
}).map(v => v.id);
if (seenNow.length > 0) await addToSeenList(scope, seenNow);
sub.lastChecked = new Date().toISOString();
subLog(`${newCount} video(s) queued for approval from ${sub.name}`);
changed = true;
}
} catch (err) {
subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`);
}
}
if (changed) await saveSubscriptions(scope, subs);
// Append everything discovered this run in one atomic mutation.
if (newItems.length > 0) {
await mutateAutoQueue(scope, (items) => {
items.push(...newItems);
});
}
const finalQueue = await loadAutoQueue(scope);
const pendingCount = finalQueue.filter(q => q.status === "pending").length;
const approvedCount = finalQueue.filter(q => q.status === "approved").length;
subLog(`[${scope}] Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`);
// Wake up the background processor if there are approved items. (Behind
// the operator-only gate the processor works the "owner" queue, which is
// the only scope that reaches here.)
if (approvedCount > 0) {
subLog(`Kicking background processor for ${approvedCount} approved item(s)...`);
kickProcessor();
}
}
// Scope for a subscription / auto-queue request. Behind the operator-only
// gate (license-middleware) this is always "owner" (admin in multi mode, or
// single mode). Forward-compatible with per-tenant — it's just the signed-in
// user's scope. Falls back to "owner" if scope resolution somehow fails.
function subScope(req) {
try {
return scopeForRequest(req);
} catch {
return SUBSCRIPTIONS_SCOPE;
}
}
// CRUD endpoints
app.get("/api/subscriptions", async (req, res) => {
const subs = await loadSubscriptions(subScope(req));
res.json({ subscriptions: subs });
});
// Extract a normalized channel key from a YouTube URL for dedup
function channelKeyFromUrl(url) {
try {
const u = new URL(url);
const path = u.pathname.toLowerCase().replace(/\/+$/, "");
// /@handle/videos → @handle, /@handle/streams → @handle
const handleMatch = path.match(/\/(@[^/]+)/);
if (handleMatch) return handleMatch[1];
// /channel/UCxxx → channel/UCxxx
const channelMatch = path.match(/\/(channel\/[^/]+)/);
if (channelMatch) return channelMatch[1];
// /c/name or /user/name
const cMatch = path.match(/\/(c|user)\/([^/]+)/);
if (cMatch) return cMatch[0];
// /playlist?list=PLxxx
const list = u.searchParams.get("list");
if (list) return `playlist/${list}`;
return path; // fallback
} catch {
return url.toLowerCase().replace(/\/+$/, "");
}
}
app.post("/api/subscriptions", async (req, res) => {
let { url, since, type, autoDownload } = req.body;
if (!url) return res.status(400).json({ error: "Missing url" });
// Apple Podcasts SHOW URLs (`/podcast/<slug>/id<num>` with NO
// `?i=<id>` episode param) get resolved to the show's RSS feed
// before we persist the subscription. The feed URL is what the
// poll loop fetches to discover new episodes, so storing the
// landing-page URL would be a dead end.
if (isAppleShowUrl(url)) {
try {
const feed = await resolveAppleShowToFeed(url);
if (!feed) {
return res.status(400).json({
error: "apple_show_no_feed",
message:
"Apple doesn't list an RSS feed for that show — it may be an Apple Podcasts Subscriptions exclusive. Paste the show's direct RSS URL instead.",
});
}
console.log(`[subscribe] Apple show → resolved feed: ${feed}`);
url = feed;
type = "podcast";
} catch (err) {
return res.status(502).json({
error: "apple_show_lookup_failed",
message: `Couldn't reach Apple to resolve that show URL: ${err?.message || err}`,
});
}
}
const isPodcast = type === "podcast" || isPodcastFeedUrl(url);
const scope = subScope(req);
const subs = await loadSubscriptions(scope);
// Prevent duplicates
if (isPodcast) {
const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, "");
if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) {
return res.status(409).json({ error: "Already subscribed to this podcast" });
}
} else {
const newKey = channelKeyFromUrl(url);
if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) {
return res.status(409).json({ error: "Already subscribed to this channel" });
}
}
// Use provided cutoff date, or default to right now
const cutoff = since ? new Date(since).toISOString() : new Date().toISOString();
const sub = {
id: `sub-${Date.now()}`,
url,
name: "Loading...",
type: isPodcast ? "podcast" : "youtube",
channelId: null,
createdAt: cutoff,
lastChecked: null,
paused: false,
autoDownload: autoDownload === true,
};
subs.push(sub);
await saveSubscriptions(scope, subs);
// Respond immediately so the UI isn't blocked
res.json(sub);
// Background: resolve name and check for new content
(async () => {
try {
if (isPodcast) {
// Fetch podcast title from RSS feed
const { title } = await parsePodcastRSS(url, 1);
const freshSubs = await loadSubscriptions(scope);
const s = freshSubs.find(x => x.id === sub.id);
if (s) {
s.name = title || url;
await saveSubscriptions(scope, freshSubs);
}
console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`);
} else {
const [name, channelId] = await Promise.all([
fetchChannelName(url),
getChannelId(url),
]);
const freshSubs = await loadSubscriptions(scope);
const s = freshSubs.find(x => x.id === sub.id);
if (s) {
s.name = name;
if (channelId) s.channelId = channelId;
await saveSubscriptions(scope, freshSubs);
}
console.log(` 📡 New subscription: ${name} — checking for recent videos...`);
}
await checkSubscriptions();
} catch (err) {
console.error(" ⚠ Background subscription setup error:", err.message);
}
})();
});
app.delete("/api/subscriptions/:id", async (req, res) => {
const scope = subScope(req);
let subs = await loadSubscriptions(scope);
subs = subs.filter(s => s.id !== req.params.id);
await saveSubscriptions(scope, subs);
// Also remove any auto-queue items from this subscription.
await mutateAutoQueue(scope, (items) =>
items.filter((q) => q.subscriptionId !== req.params.id),
);
res.json({ ok: true });
});
app.put("/api/subscriptions/:id/pause", async (req, res) => {
const scope = subScope(req);
const subs = await loadSubscriptions(scope);
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.paused = !sub.paused;
await saveSubscriptions(scope, subs);
res.json(sub);
});
app.put("/api/subscriptions/:id/since", async (req, res) => {
const { since } = req.body;
if (!since) return res.status(400).json({ error: "Missing 'since' date" });
const scope = subScope(req);
const subs = await loadSubscriptions(scope);
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.createdAt = new Date(since).toISOString();
await saveSubscriptions(scope, subs);
res.json(sub);
});
// Debug: subscription check logs (viewable in-app)
app.get("/api/sub-check-log", async (req, res) => {
const q = await loadAutoQueue(subScope(req));
res.json({
log: subCheckLog,
autoQueueCount: q.length,
autoQueue: q.map((x) => ({ id: x.id, videoId: x.videoId, title: x.title, status: x.status, sub: x.subscriptionName })),
});
});
// Auto-queue endpoints (frontend polls these)
app.get("/api/auto-queue", async (req, res) => {
const queue = await loadAutoQueue(subScope(req));
// Return all items grouped by status for the frontend
const showAll = req.query.all === "true";
const items = showAll
? queue
: queue.filter(q => ["pending", "approved", "processing"].includes(q.status));
res.json({
items,
checkRunning: subCheckRunning,
counts: {
pending: queue.filter(q => q.status === "pending").length,
approved: queue.filter(q => q.status === "approved").length,
processing: queue.filter(q => q.status === "processing").length,
completed: queue.filter(q => q.status === "completed").length,
failed: queue.filter(q => q.status === "failed").length,
},
});
});
app.delete("/api/auto-queue/:id", async (req, res) => {
await mutateAutoQueue(subScope(req), (items) =>
items.filter((q) => q.id !== req.params.id),
);
res.json({ ok: true });
});
app.post("/api/auto-queue/:id/skip", async (req, res) => {
const scope = subScope(req);
let videoId = null;
await mutateAutoQueue(scope, (items) => {
const it = items.find((q) => q.id === req.params.id);
if (it && it.videoId) videoId = it.videoId;
return items.filter((q) => q.id !== req.params.id);
});
if (videoId) await addToSkipList(scope, videoId);
res.json({ ok: true });
});
// Approve a single auto-queue item for background processing
app.post("/api/auto-queue/:id/approve", async (req, res) => {
let item = null;
let badStatus = null;
await mutateAutoQueue(subScope(req), (items) => {
const it = items.find((q) => q.id === req.params.id);
if (!it) return;
if (it.status !== "pending") { badStatus = it.status; return; }
it.status = "approved";
item = { ...it };
});
if (badStatus) return res.status(400).json({ error: `Cannot approve item with status '${badStatus}'` });
if (!item) return res.status(404).json({ error: "Item not found" });
kickProcessor();
res.json({ ok: true, item });
});
// Approve all pending items at once
app.post("/api/auto-queue/approve-all", async (req, res) => {
let count = 0;
await mutateAutoQueue(subScope(req), (items) => {
for (const it of items) {
if (it.status === "pending") { it.status = "approved"; count++; }
}
});
if (count > 0) kickProcessor();
res.json({ ok: true, approved: count });
});
// Retry a failed item
app.post("/api/auto-queue/:id/retry", async (req, res) => {
let item = null;
let badStatus = null;
await mutateAutoQueue(subScope(req), (items) => {
const it = items.find((q) => q.id === req.params.id);
if (!it) return;
if (it.status !== "failed") { badStatus = it.status; return; }
it.status = "approved";
it.error = undefined;
it.failedAt = undefined;
item = { ...it };
});
if (badStatus) return res.status(400).json({ error: `Cannot retry item with status '${badStatus}'` });
if (!item) return res.status(404).json({ error: "Item not found" });
kickProcessor();
res.json({ ok: true, item });
});
// Clear completed and failed items from the queue
app.post("/api/auto-queue/clear-finished", async (req, res) => {
let removed = 0;
await mutateAutoQueue(subScope(req), (items) => {
const before = items.length;
const kept = items.filter((q) => !["completed", "failed"].includes(q.status));
removed = before - kept.length;
return kept;
});
res.json({ ok: true, removed });
});
app.post("/api/subscriptions/check-now", async (req, res) => {
if (subCheckRunning) return res.json({ ok: true, message: "Already checking" });
checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message));
res.json({ ok: true, message: "Check started" });
});
// ── Auto-download toggle per subscription ──────────────────────────────────
app.put("/api/subscriptions/:id/auto-download", async (req, res) => {
const scope = subScope(req);
const subs = await loadSubscriptions(scope);
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.autoDownload = req.body.enabled === true;
await saveSubscriptions(scope, subs);
res.json({ ok: true, subscription: sub });
});
// ── Background processing status & configuration ───────────────────────────
// Get current processing status (what's running, queue depth, config)
app.get("/api/processing/status", async (req, res) => {
const queue = await loadAutoQueue(subScope(req));
res.json({
running: processingState.running,
currentItem: processingState.currentItem
? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url }
: null,
lastCompleted: processingState.lastCompleted,
config: processingConfig,
counts: {
approved: queue.filter(q => q.status === "approved").length,
processing: queue.filter(q => q.status === "processing").length,
pending: queue.filter(q => q.status === "pending").length,
},
log: processingState.log.slice(-20),
});
});
// Update processing configuration (delay between items, enable/disable)
app.put("/api/processing/config", async (req, res) => {
if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) {
processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds));
}
if (typeof req.body.enabled === "boolean") {
processingConfig.enabled = req.body.enabled;
// If enabling and there are approved items, kick the processor
if (req.body.enabled) kickProcessor();
}
await saveProcessingConfig();
res.json({ ok: true, config: processingConfig });
});
// Rush: skip the delay before the next item in the queue
app.post("/api/processing/rush", (req, res) => {
processingState.rush = true;
// If not currently running but there are approved items, start processing
kickProcessor();
res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" });
});
// Processing log (for debug/monitoring)
app.get("/api/processing/log", (req, res) => {
res.json({ log: processingState.log });
});
// ── Full pipeline: URL → audio → transcript → topic analysis ──────────────
// Single yt-dlp call that returns the full info-dict as JSON. One shot
// gives us title, channel, description, chapters, duration, and the
// per-language caption availability — used both for richer transcription
// prompts and for the captions-first path (skip audio + transcription
// entirely when YouTube has usable captions for this video).
async function fetchYouTubeMetadata(videoId) {
try {
const { stdout } = await execFileAsync(
"yt-dlp",
["-j", "--no-warnings", "--skip-download", `https://www.youtube.com/watch?v=${videoId}`],
{ timeout: 30000, maxBuffer: 10 * 1024 * 1024 }
);
const info = JSON.parse(stdout);
return {
title: info.title || "",
uploadDate: info.upload_date || "",
channel: info.channel || info.uploader || "",
// Truncate to keep prompt size sane — descriptions can be huge
// (release-notes-stuffed Lex Fridman podcasts run thousands of chars).
description: (info.description || "").slice(0, 2000),
chapters: Array.isArray(info.chapters) ? info.chapters : [],
duration: typeof info.duration === "number" ? info.duration : 0,
hasManualCaptions: !!(info.subtitles && Object.keys(info.subtitles).length > 0),
hasAutoCaptions: !!(info.automatic_captions && Object.keys(info.automatic_captions).length > 0),
};
} catch {
return null;
}
}
// Pulls YouTube captions for the given video and parses them into the
// app's standard { offset, text, duration } entries shape. Prefers
// manual captions over auto-generated. Returns null if no captions are
// available or parsing produces too few segments to be useful.
//
// Output is the same shape that `parseTimestampedTranscript()` produces
// from a Gemini transcription, so downstream code (the analysis step,
// the chunk renderer) doesn't care how the transcript got here.
async function tryFetchYouTubeCaptions(videoId, tmpDir, opts, log) {
const { hasManual, hasAuto } = opts;
if (!hasManual && !hasAuto) return null;
const url = `https://www.youtube.com/watch?v=${videoId}`;
const captionsBase = path.join(tmpDir, "captions");
// Try manual first (cleaner — punctuated, no fragmented words);
// fall back to auto-generated if no manual subs.
const langs = "en.*,en";
try {
if (hasManual) {
await execFileAsync(
"yt-dlp",
["--skip-download", "--write-subs", "--sub-langs", langs,
"--sub-format", "json3", "--no-warnings", "-o", captionsBase, url],
{ timeout: 30000 }
);
} else {
await execFileAsync(
"yt-dlp",
["--skip-download", "--write-auto-subs", "--sub-langs", langs,
"--sub-format", "json3", "--no-warnings", "-o", captionsBase, url],
{ timeout: 30000 }
);
}
} catch (err) {
log(1, `⚠ Caption download failed: ${(err.message || "").slice(0, 200)}`);
return null;
}
// yt-dlp names the file like `captions.en.json3` or `captions.en-US.json3`.
// Some videos report subtitles as "available" via the metadata API but
// yt-dlp can't actually pull a usable JSON3 file (age-gate, members-only,
// wrong language match against our `en.*,en` pattern, etc.). Surface
// exactly which failure mode hit so the operator can decide whether
// to disable captions toggle for this video or pursue a workaround.
let captionPath = null;
let availableFiles = [];
try {
availableFiles = await fs.readdir(tmpDir);
const match = availableFiles.find(
(f) => f.startsWith("captions.") && f.endsWith(".json3")
);
if (match) captionPath = path.join(tmpDir, match);
} catch (err) {
log(1, `⚠ Captions tmpDir read failed: ${(err.message || "").slice(0, 200)}`);
}
if (!captionPath) {
// Did yt-dlp produce ANY caption file in another format?
const otherCaps = availableFiles.filter((f) => f.startsWith("captions."));
if (otherCaps.length > 0) {
log(
1,
`⚠ Captions: yt-dlp wrote ${otherCaps.join(", ")} but not the expected .json3 — likely subtitle-language mismatch (we asked for en.*,en). Video may only have captions in another language.`
);
} else {
log(
1,
`⚠ Captions: yt-dlp produced no caption file despite metadata reporting them available. Possibly age-gated, members-only, or yt-dlp couldn't decrypt them.`
);
}
return null;
}
let parsed;
try {
const content = await fs.readFile(captionPath, "utf-8");
parsed = JSON.parse(content);
} catch (err) {
log(1, `⚠ Caption parse failed: ${(err.message || "").slice(0, 200)}`);
return null;
}
const rawEvents = Array.isArray(parsed.events) ? parsed.events : [];
const entries = [];
for (const evt of rawEvents) {
const text = (Array.isArray(evt.segs) ? evt.segs : [])
.map((s) => s.utf8 || "")
.join("")
.replace(/\n+/g, " ")
.trim();
if (!text) continue;
entries.push({
offset: (evt.tStartMs || 0) / 1000,
text,
duration: (evt.dDurationMs || 0) / 1000,
});
}
if (entries.length < 5) {
log(
1,
`⚠ Captions parsed but only ${entries.length} non-empty entries (raw events: ${rawEvents.length}). Captions look empty/malformed — falling back.`
);
return null;
}
return { entries, source: hasManual ? "manual" : "auto" };
}
// Coalesce a list of timestamped entries into coarser chunks. Used
// after pulling auto-captions (which are typically 13-word fragments
// every 13 seconds) so the downstream analysis prompt has ~100
// medium-sized segments instead of ~900 tiny ones. Manual captions and
// Gemini-transcribed entries already average 1030s and pass through
// unchanged when their median duration exceeds the threshold.
function coalesceTranscriptEntries(entries, targetSeconds = 15) {
if (!Array.isArray(entries) || entries.length === 0) return entries;
// Sample median duration; only coalesce if entries are too fine.
const durations = entries.map((e) => e.duration || 0).sort((a, b) => a - b);
const median = durations[Math.floor(durations.length / 2)];
if (median >= 8) return entries; // already coarse enough
const merged = [];
let current = null;
for (const e of entries) {
const text = (e.text || "").trim();
if (!text) continue;
if (!current) {
current = {
offset: e.offset,
text,
duration: e.duration || 0,
};
continue;
}
const elapsedFromStart = (e.offset || 0) - current.offset;
if (elapsedFromStart >= targetSeconds) {
merged.push(current);
current = {
offset: e.offset,
text,
duration: e.duration || 0,
};
} else {
current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim();
current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset;
}
}
if (current) merged.push(current);
return merged;
}
// Reduce an over-long transcript down to roughly `maxEntries` segments
// so the analysis prompt fits in smaller model contexts (typical local
// models are 16k32k tokens — a 2-hour podcast at Parakeet's ~5s
// granularity easily blows past that). Bucket size is computed from
// total audio duration so we end up with at most `maxEntries` segments
// regardless of source granularity. Unlike coalesceTranscriptEntries
// above, this one is purely count-driven — no median-duration guard,
// because the failure mode is "context exceeded" not "bad UX from
// over-fragmented entries".
//
// Returns { coalesced, indexMap } where indexMap[i] = { startOrig,
// endOrig } maps coalesced-entry i back to a range of original-entry
// indices. The caller uses this to translate section start/end indices
// returned by the analyzer back into the original entries array, so
// the final transcript displayed to the user keeps its full granularity
// — only the analyzer sees the coarser view.
//
// FLAGGED TO WATCH: ship 0.2.28. Auto-coalesce may degrade analysis
// quality on borderline content (the LLM sees fewer, longer segments
// — section boundaries get blurrier). If users report missed topics or
// imprecise section starts on long content, the alternative is real
// chunked analysis (split into overlapping windows, analyze each,
// stitch sections at boundaries) — significantly more involved.
function coalesceForAnalysis(entries, maxEntries = 400) {
if (!Array.isArray(entries) || entries.length <= maxEntries) {
return { coalesced: entries, indexMap: null };
}
const lastEntry = entries[entries.length - 1];
const totalDuration = (lastEntry.offset || 0) + (lastEntry.duration || 0);
if (totalDuration <= 0) {
return { coalesced: entries, indexMap: null };
}
// Bucket size: roughly total / maxEntries, but never tighter than 15s
// (a typical "natural pause" gap — going much smaller would defeat
// the point of coalescing).
const targetSeconds = Math.max(15, Math.ceil(totalDuration / maxEntries));
const merged = [];
const indexMap = [];
let current = null;
let currentStartOrig = 0;
for (let i = 0; i < entries.length; i++) {
const e = entries[i];
const text = (e.text || "").trim();
if (!text) continue;
if (!current) {
current = { offset: e.offset, text, duration: e.duration || 0 };
currentStartOrig = i;
continue;
}
const elapsedFromStart = (e.offset || 0) - current.offset;
if (elapsedFromStart >= targetSeconds) {
merged.push(current);
indexMap.push({ startOrig: currentStartOrig, endOrig: i - 1 });
current = { offset: e.offset, text, duration: e.duration || 0 };
currentStartOrig = i;
} else {
current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim();
current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset;
}
}
if (current) {
merged.push(current);
indexMap.push({ startOrig: currentStartOrig, endOrig: entries.length - 1 });
}
return { coalesced: merged, indexMap };
}
// ── In-flight free-tier job status + cancel ─────────────────────────────────
// Lets the web UI render a "Currently processing X — Cancel" banner after
// a browser refresh, when the SSE stream from the original /api/process
// call is no longer attached. Only the free-tier slot is tracked today;
// paid-tier batch queueing happens client-side.
app.get("/api/process/current", (req, res) => {
// ?logs=1 returns the in-flight job's accumulated log buffer so a
// browser refresh mid-pipeline can repopulate the activity log
// instead of starting blank. Default is the lightweight header-only
// shape used by the 5s banner poll.
const includeLogs = req.query.logs === "1" || req.query.logs === "true";
res.json({ job: getCurrentFreeJob({ includeLogs }) });
});
app.post("/api/process/cancel", (_req, res) => {
const had = abortCurrentFreeJob();
if (!had) return res.json({ ok: true, cancelled: false });
// We don't kill the in-flight yt-dlp / model API call here — the
// pipeline polls isFreeJobAborted() at each major step and throws
// early, which lands in the request handler's finally block where the
// slot is released. So cancellation latency is bounded by the time
// until the next checkpoint (a few seconds in practice, up to the
// length of one outstanding model call).
res.json({ ok: true, cancelled: true });
});
// ── Auto-discovery of provider connection info ──────────────────────────────
// The picker UI hits this on boot to pre-fill placeholders for providers
// that have a server-detectable default — most notably Ollama on
// StartOS, reachable at the documented `http://<package-id>.startos:<port>`
// internal hostname when installed alongside Recap.
app.get("/api/providers/discover", async (_req, res) => {
const out = {};
// Ollama: prefer the URL the operator set via the StartOS action,
// then try the canonical service-discovery hostname, finally fall
// back to localhost (useful for dev outside StartOS).
try {
const cfg = await config.getConfigSnapshot();
const fromConfig = (cfg.ollama_base_url || "").trim();
let ollamaUrl = null;
let source = null;
if (fromConfig) {
ollamaUrl = fromConfig;
source = "config";
} else {
const candidate = "http://ollama.startos:11434";
const ok = await fetch(`${candidate}/api/tags`, {
signal: AbortSignal.timeout(1500),
}).then((r) => r.ok).catch(() => false);
if (ok) {
ollamaUrl = candidate;
source = "startos-dep";
}
}
if (ollamaUrl) {
// Probe /api/tags to list installed models — picker UI surfaces
// them as dropdown options so users don't have to type them.
let models = [];
try {
const tagsRes = await fetch(`${ollamaUrl.replace(/\/$/, "")}/api/tags`, {
signal: AbortSignal.timeout(2000),
});
if (tagsRes.ok) {
const data = await tagsRes.json();
models = (data.models || []).map((m) => m.name).filter(Boolean);
}
} catch {}
out.ollama = { baseURL: ollamaUrl, source, models };
}
} catch {}
res.json(out);
});
// Quick connection-test endpoint. The picker UI Test button hits this
// to verify a provider+model+opts combo actually works before the user
// commits to using it. Sends a tiny prompt and returns the model's
// 3-word answer + latency, or a clear error string.
app.post("/api/providers/test", async (req, res) => {
const { providerId, model, opts: clientOpts } = req.body || {};
if (!providerId || typeof providerId !== "string") {
return res.status(400).json({ ok: false, error: "missing providerId" });
}
if (!model || typeof model !== "string") {
return res.status(400).json({ ok: false, error: "missing model" });
}
if (!PROVIDER_NAMES.includes(providerId)) {
return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` });
}
let provider;
try {
const cfg = await config.getConfigSnapshot();
const resolvedOpts = resolveProviderOpts(providerId, {
config: cfg,
clientOpts: (clientOpts && typeof clientOpts === "object") ? clientOpts : {},
req,
});
provider = getProvider(providerId, resolvedOpts);
} catch (err) {
return res.status(400).json({ ok: false, error: err.message });
}
if (!provider.capabilities.analyze) {
return res.status(400).json({ ok: false, error: `${providerId} cannot analyze text` });
}
const t0 = Date.now();
// Special-case the relay: a real analyzeText round-trip would burn
// one of the user's lifetime credits per click. Use pingBalance
// instead — verifies connectivity + auth, returns the current
// balance, no charge.
if (providerId === "relay") {
try {
const env = await provider.pingBalance({ timeoutMs: 5000 });
const latencyMs = Date.now() - t0;
const credits = env?.credits_remaining;
const tier = env?.tier || "core";
const summary =
credits == null
? `Connected · Tier: ${tier}`
: `Connected · Tier: ${tier} · ${credits} credit${credits === 1 ? "" : "s"} remaining`;
return res.json({
ok: true,
text: summary,
latencyMs,
provider: providerId,
model,
});
} catch (err) {
return res.json({
ok: false,
error: (err?.message || String(err)).slice(0, 300),
latencyMs: Date.now() - t0,
});
}
}
try {
const result = await provider.analyzeText({
prompt: "Respond with exactly three words confirming you received this prompt.",
model,
retries: 0,
});
const latencyMs = Date.now() - t0;
return res.json({
ok: true,
text: (result.text || "").trim().slice(0, 200),
latencyMs,
provider: providerId,
model,
});
} catch (err) {
return res.json({
ok: false,
error: (err?.message || String(err)).slice(0, 300),
latencyMs: Date.now() - t0,
});
}
});
// Per-provider, per-field boolean indicating whether the StartOS
// config has a non-empty value for each PROVIDER_KEY_FIELDS slot.
// The picker UI uses this to (a) show a "✓ Server-configured" hint
// under empty inputs, and (b) decide whether the Delete button is
// visible when localStorage is empty but the server has a value.
//
// Never returns the actual values — only booleans. Anything stored in
// startos-config.json could be secret and shouldn't surface in any
// response that could end up in a screenshot.
app.get("/api/providers/credentials-status", async (_req, res) => {
const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js");
const cfg = await getConfigSnapshot();
const status = {};
for (const [providerId, fields] of Object.entries(PROVIDER_KEY_FIELDS)) {
status[providerId] = {};
for (const [optName, cfgKey] of Object.entries(fields)) {
const v = cfg[cfgKey];
status[providerId][optName] =
typeof v === "string" && v.trim().length > 0;
}
}
res.json({ status });
});
// Clear all server-side config fields for a provider (the StartOS
// action-set values). The picker UI's Delete button calls this in
// addition to wiping localStorage so credentials are gone from BOTH
// storage paths. Returns the list of fields that were cleared.
app.post("/api/providers/:id/clear", async (req, res) => {
const providerId = req.params.id;
if (!PROVIDER_NAMES.includes(providerId)) {
return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` });
}
// Build a patch of { config_field: "" } for every PROVIDER_KEY_FIELDS
// entry mapped to this provider. Unknown providers or providers with
// no server-side fields (e.g. relay, whose URL is hardcoded) end
// up clearing nothing — still a 200, just with empty `cleared`.
const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js");
const fields = PROVIDER_KEY_FIELDS[providerId] || {};
const patch = {};
for (const cfgKey of Object.values(fields)) {
if (typeof cfgKey === "string") patch[cfgKey] = "";
}
try {
await config.mergeConfig(patch);
return res.json({ ok: true, cleared: Object.keys(patch) });
} catch (err) {
return res.status(500).json({
ok: false,
error: (err?.message || String(err)).slice(0, 300),
});
}
});
app.post("/api/process", async (req, res) => {
// ── Multi-mode credit gate ───────────────────────────────────────────────
// Four reachable states arriving here in multi-mode:
// (a) req.user is admin → no local gate (operator pool)
// (b) req.user with license → license tier handles the gate (license-mw)
// (b2) paid cloud user → relay-side user:<id> quota gates it; NO
// local tenant_credits gate (core-decoupling)
// (c) req.user free tenant → tenant_credits.balance gate + debit
// (d) req.trial holder → trial cookie gate + debit
// (e) anonymous, no trial → try issueIfEligible, then re-check
//
// The signed-in-free-tenant case (c) was unhandled pre-0.2.92: their
// tenant_credits.balance was DISPLAYED but never gated or debited,
// so they could summarize indefinitely. Now we check + debit
// alongside the trial flow.
//
// Core-decoupling: paid status is the relay-owned tier (pro/max), NOT a
// Keysat license. Paid cloud users are metered by the relay's user:<id>
// monthly quota (its own 402 envelope), so they skip this local gate.
// Everyone else signed in (tier core) is a free tenant gated on
// tenant_credits — including accounts that still carry a now-ignored
// legacy license.
const isPaidCloudUser =
req.user && (req.user.tier === "pro" || req.user.tier === "max");
if (
RECAP_MODE === "multi" &&
req.user &&
!req.user.is_admin &&
!isPaidCloudUser
) {
// Free signed-in tenant — gate on tenant_credits.balance. We
// CHECK here at request start; the actual debit happens after
// saveToHistory succeeds (so a failed pipeline doesn't burn a
// credit). Same pattern as the trial flow below.
try {
const { getOrInit } = await import("./tenant-credits.js");
const credits = await getOrInit(req.user.id);
if (!credits || credits.total <= 0) {
return res.status(402).json({
error: "credits_exhausted",
message:
"You're out of credits. Buy more or wait for your free credits to refresh.",
});
}
// Stash for the post-save debit step
req.creditsToDebit = "tenant";
} catch (err) {
console.error("[/api/process] tenant_credits gate failed:", err);
return res.status(500).json({ error: "internal_error" });
}
}
if (RECAP_MODE === "multi" && !req.user) {
if (!req.trial && typeof req.userId === "undefined") {
// Pre-trial visitor on their first POST. Try to mint a cookie.
try {
const { issueIfEligible } = await import("./anon-trial.js");
const trial = await issueIfEligible({ req, res });
if (trial) {
req.userId = `anon:${trial.cookie_id}`;
req.trial = trial;
}
} catch (err) {
console.warn("[/api/process] anon-trial mint failed:", err);
}
}
if (!req.trial) {
// No session, no trial — either trials disabled, IP-capped, or
// their previous trial is exhausted (middleware fell through).
return res.status(401).json({
error: "trial_unavailable",
message:
"Sign in to keep using Recaps. New here? Create an account in a minute — no payment required.",
});
}
// Verify trial still has budget at request time. Cheap to check,
// catches the case where a parallel request just spent the last
// credit between middleware lookup and this handler firing.
const { lookupTrial, hasTrialBudget } = await import("./anon-trial.js");
const fresh = lookupTrial(req.trial.cookie_id);
if (!fresh || !hasTrialBudget(fresh)) {
return res.status(402).json({
error: "trial_exhausted",
message:
"You've used all your free trials. Sign up for an account to keep going.",
});
}
req.trial = fresh;
}
let {
url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId,
transcriptionProvider: reqTransProvider,
transcriptionModel: reqTransModel,
analysisProvider: reqAnaProvider,
analysisModel: reqAnaModel,
providerOpts: reqProviderOpts,
useYouTubeCaptions: reqUseYTCaptions,
} = req.body;
// Defensive: a stale browser cache or older client might submit
// the literal string "Untitled" as the title (a sentinel that
// earlier Recap versions used as a falsy placeholder but which is
// actually truthy and pollutes every downstream gate — see relay
// 0.2.57's same normalization). Strip it here at the request
// boundary so all subsequent code can rely on "itemTitle is either
// a real title or empty". Also trim whitespace + drop "untitled"
// case-insensitively in case some other client variant emits it.
if (typeof itemTitle === "string") {
const trimmed = itemTitle.trim();
if (trimmed === "" || trimmed.toLowerCase() === "untitled") {
itemTitle = "";
} else {
itemTitle = trimmed;
}
}
// Default: use captions when available (huge speed/cost win). The
// picker-UI toggle lets users force a full transcription pass when
// they want speaker labels (captions don't have them) or when the
// auto-captions quality is too low.
const useYouTubeCaptions = reqUseYTCaptions !== false;
// Sentinel error message thrown by checkCancelled() when the user
// hits the in-flight banner's Cancel button. Declared at handler
// scope (not inside the try) so the catch block can compare against
// it — see the catch a few hundred lines below.
const CANCELLED_MARK = "__recap_cancelled__";
// Per-provider client-side opts: { gemini: {apiKey}, anthropic: {apiKey},
// openai: {apiKey}, "openai-compatible": {apiKey, baseURL}, ollama: {baseURL} }.
// Each provider's opts override that provider's config-stored values
// (set via the StartOS actions). Used by the picker UI to BYO keys
// per provider without round-tripping the StartOS dashboard.
const providerOpts = (reqProviderOpts && typeof reqProviderOpts === "object") ? reqProviderOpts : {};
// Provider selection: each pipeline step (transcribe + analyze) can
// independently target any registered provider. Both default to gemini
// so existing clients (which don't send provider fields) keep working.
const transcriptionProvider = reqTransProvider || "gemini";
const analysisProvider = reqAnaProvider || "gemini";
// Free tier: unlicensed users can summarize one video at a time. They
// still bring their own key — same as paid users today; the key can
// come from either the StartOS config action (server-side) or the
// web UI Settings panel (client-side). The future "bundled key" relay
// (paid users' requests proxied through the operator's service) isn't
// built yet, so there's nothing here that gates key sourcing by tier.
// AbortController for this request. Fired by abortCurrentFreeJob()
// when the user hits Cancel — passed through to every provider SDK
// call (transcription + analysis) so in-flight network requests
// reject immediately instead of running to completion.
const abortController = new AbortController();
// Stable identifier for this summarize job. Sent to the relay
// (when used) as `X-Recap-Job-Id`. The relay charges 1 credit on
// the first call with a given jobId and treats subsequent calls
// with the same id as free — so a full summary (transcribe +
// analyze) costs one credit regardless of which steps route
// through the relay. Non-relay providers ignore this opt.
const jobId = randomUUID();
// The free-tier single-flight lock is a single-mode concept (one operator,
// BYO key, one job at a time). In multi mode, per-tenant credit metering is
// the resource control, so a process-global lock would wrongly serialize
// every tenant onto one job at a time — never apply it there.
const isFree = req.recapMode !== "multi" && isFreeUser();
if (isFree) {
if (!tryAcquireFreeSlot({ url, title: itemTitle, abortController })) {
const current = getCurrentFreeJob();
const elapsedSec = current ? Math.round(current.elapsedMs / 1000) : 0;
const what = current?.title || current?.url || "another video";
return res.status(409).json({
error: "processing_in_progress",
message:
`A summary is already being processed (${what}, started ${elapsedSec}s ago). Free mode handles one video at a time — wait for it to finish, or cancel it from the status bar at the top of the app.`,
currentJob: current,
});
}
}
if (!url) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "Missing url" });
}
if (!PROVIDER_NAMES.includes(transcriptionProvider)) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "unknown_provider", message: `Unknown transcription provider: ${transcriptionProvider}` });
}
if (!PROVIDER_NAMES.includes(analysisProvider)) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "unknown_provider", message: `Unknown analysis provider: ${analysisProvider}` });
}
// Resolve per-provider construction opts from the StartOS config blob,
// overlaying any client-supplied opts (req.body.providerOpts[name]).
// For Gemini, the legacy single "apiKey" field on the request body
// also flows through as a fallback when providerOpts.gemini.apiKey
// isn't set — keeps pre-picker-UI clients working.
const cfg = await getConfigSnapshot();
function clientOptsFor(name) {
const fromBody = (providerOpts[name] && typeof providerOpts[name] === "object") ? providerOpts[name] : {};
if (name === "gemini" && !fromBody.apiKey) {
const legacy = resolveApiKey(clientKey);
if (legacy) return { ...fromBody, apiKey: legacy };
}
return fromBody;
}
const transcriptionOpts = resolveProviderOpts(transcriptionProvider, {
config: cfg,
clientOpts: clientOptsFor(transcriptionProvider),
req,
});
const analysisOpts = resolveProviderOpts(analysisProvider, {
config: cfg,
clientOpts: clientOptsFor(analysisProvider),
req,
});
let transcriber;
try {
transcriber = getProvider(transcriptionProvider, transcriptionOpts);
} catch (err) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "transcription_provider_not_configured",
message: `Transcription provider ${transcriptionProvider} is not configured: ${err.message}`,
});
}
if (!transcriber.capabilities.transcribe) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "transcription_unsupported",
message: `Provider ${transcriptionProvider} does not support audio transcription. Pick a different transcription provider (gemini or openai).`,
});
}
let analyzer;
try {
analyzer = transcriptionProvider === analysisProvider
? transcriber
: getProvider(analysisProvider, analysisOpts);
} catch (err) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "analysis_provider_not_configured",
message: `Analysis provider ${analysisProvider} is not configured: ${err.message}`,
});
}
if (!analyzer.capabilities.analyze) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "analysis_unsupported",
message: `Provider ${analysisProvider} does not support text analysis.`,
});
}
// ── Resolve Apple Podcasts / Spotify share URLs ──
// Most users paste apple/spotify share links rather than RSS feeds.
// Resolve those to a direct audio enclosure URL up-front so the rest
// of the handler runs the existing podcast pipeline unchanged. On
// resolver failure we surface a clear error rather than letting the
// downstream code stumble on a URL it can't process.
let resolvedFrom = null; // "apple" | "spotify" | null — for the result event
if (isResolvableShareUrl(url)) {
try {
const resolved = await resolveShareUrl(url, {
podcastIndexKey: cfg.podcastindex_api_key,
podcastIndexSecret: cfg.podcastindex_api_secret,
});
if (resolved) {
console.log(
` [url-resolver] ${resolved.source}${resolved.audioUrl.slice(0, 80)} ("${resolved.title}")`
);
url = resolved.audioUrl;
if (!itemTitle) itemTitle = resolved.title;
if (!itemUploadDate) itemUploadDate = resolved.uploadDate;
if (!episodeId) episodeId = resolved.episodeId;
itemType = "podcast";
resolvedFrom = resolved.source;
}
} catch (err) {
if (isFree) releaseFreeSlot();
const code = err instanceof URLResolveError ? err.code : "resolve_failed";
return res.status(400).json({
error: code,
message: err?.message || "Failed to resolve share URL",
});
}
}
// Determine if this is a podcast episode or YouTube video
const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url);
const videoId = isPodcast ? (episodeId || url) : extractVideoId(url);
if (!isPodcast && !videoId) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "Invalid YouTube URL" });
}
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-"));
const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3";
const audioPath = path.join(tmpDir, `audio.${audioExt}`);
const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3";
try {
const pipelineStart = Date.now();
// Set up SSE
res.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
});
// Helper to send log entries with elapsed time. Also pushes onto the
// free-tier in-flight job's server-side buffer (no-op when there is
// no free job, e.g. licensed user) so a browser refresh during a
// long pipeline can re-hydrate the activity log from the server
// instead of starting blank.
const logHistory = [];
function log(step, message, detail) {
const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1);
const logMsg = `[${elapsed}s] ${message}`;
console.log(` ${logMsg}`);
const entry = { elapsed, message, detail: detail || null };
logHistory.push(entry);
appendCurrentJobLog(entry);
sendEvent(res, "status", { step, message });
sendEvent(res, "log", entry);
}
// Cancellation checkpoint. Polled at each major pipeline boundary;
// throws CANCELLED_MARK (declared at handler scope above) when the
// user has hit Cancel on the status banner so the request bails
// out cleanly via the catch path. Also catches abort signals fired
// by abortCurrentFreeJob() in case an SDK call swallowed the abort
// and returned cleanly.
function checkCancelled() {
if (abortController.signal.aborted) {
throw new Error(CANCELLED_MARK);
}
if (isFree && isFreeJobAborted()) {
throw new Error(CANCELLED_MARK);
}
}
// ── Recap Relay mode (unified pipeline) ──────────────────────
// When the user picks "Recap Relay" in the picker, both providers
// are set to "relay". The relay's /relay/summarize-url endpoint
// does the WHOLE pipeline (download + transcribe + chunked-analyze)
// server-side and streams per-window section results back to us
// over SSE. We forward those events to the browser as the existing
// event types (transcript_ready / sections_partial / result) so
// the front-end's incremental rendering keeps working unchanged.
//
// Why this branch exists instead of just letting the per-step
// pickers route to relay independently:
// 1. Latency — saves 12+ round-trips (one per analyze window)
// and the bandwidth of sending the full transcript back
// to Recap just for Recap to ship it back as 12 prompts.
// 2. Settings respect — the operator's Settings-tab knobs
// (window body, overlap, concurrency, etc.) only take
// effect when the relay drives the analyze loop. Per-step
// relay use would leave Recap's hardcoded constants in
// charge of windowing.
// 3. Credits clarity — one summarize = one credit, charged
// atomically on the relay side. No "what if transcribe
// succeeded but analyze fell back to Anthropic" accounting.
const relayMode =
transcriptionProvider === "relay" && analysisProvider === "relay";
if (relayMode) {
// Title surrogate — used for the activity log display + as a
// fallback if the relay doesn't echo a real title back (older
// relay, or a podcast/non-YouTube URL where yt-dlp metadata
// isn't available). Defaults to "Untitled" so log messages
// remain readable.
const titleSurrogate = itemTitle || "Untitled";
// titleHint sent to the relay — must NOT be the surrogate.
// Passing "Untitled" as a hint causes a subtle bug: the relay's
// download step uses `if (!title && audio.title) title = audio.title;`
// which treats any truthy operator-supplied title (including
// "Untitled") as a real hint and skips the yt-dlp-extracted
// title fallback. Net effect: the library entry stays
// "Untitled" forever even though yt-dlp got the real title.
// Pass only the actual operator/subscription-supplied title (or
// empty string, which the provider serializes as omitted JSON).
const titleHintRaw = itemTitle ? String(itemTitle).trim() : "";
// For YouTube URLs, fetch the metadata block (channel, description,
// chapters) BEFORE handing off to the relay. The relay can extract
// these fields itself via yt-dlp (relay 0.2.63+ does so) but
// fetching here gives the relay a head-start AND lets older relays
// that don't yet do their own extraction still get the speaker-ID
// metadata. The fetch costs ~1-2 seconds — small relative to the
// 1-3 minute pipeline. Defense-in-depth: belt-and-suspenders with
// the relay-side fallback.
//
// For podcasts, skip — fetchYouTubeMetadata is YouTube-specific
// and would return null anyway. The podcast resolver above
// already populated itemTitle when relevant; channel/description/
// chapters aren't available for podcast feeds.
let relayChannelHint = "";
let relayDescriptionHint = "";
let relayChaptersHint = [];
if (!isPodcast && videoId) {
try {
const meta = await fetchYouTubeMetadata(videoId);
if (meta) {
relayChannelHint = meta.channel || "";
relayDescriptionHint = meta.description || "";
relayChaptersHint = Array.isArray(meta.chapters) ? meta.chapters : [];
log(
1,
`YouTube metadata fetched: channel="${relayChannelHint.slice(0, 60)}", ${relayChaptersHint.length} chapter(s), description ${relayDescriptionHint.length} chars`
);
}
} catch (err) {
// Non-fatal — the relay's own yt-dlp extraction will fill these
// fields when our request body sends them as empty strings.
log(1, `YouTube metadata fetch failed: ${err?.message || err} — relay will extract via its own yt-dlp`);
}
}
log(1, `Recap Relay mode — full pipeline on operator's relay`);
// Cache of parsed transcript entries — populated by the
// onTranscribeComplete provider callback below as soon as the
// relay finishes the transcribe phase. Used by the per-window
// streaming callback to build full { title, summary, entries,
// startTime } chunk objects, which is the shape the browser's
// sections_partial handler reads via `data.chunks`. Without
// this cache, per-window streaming sent just titles + summaries
// and the browser had no entries to render under each section.
let streamedRelayEntries = [];
let relayResult;
// Track whether we've emitted the early transcript_ready event
// yet. In sequential mode this fires from onTranscribeComplete
// (when relay's transcribe finishes); in pipelined mode it can
// fire even earlier from the FIRST onWindowComplete (when an
// early window's sections arrive before transcribe is done).
// Either way: we want the browser to flip from loading view to
// results view ASAP so it can render the streaming sections.
let earlyTranscriptReadyEmitted = false;
const emitEarlyTranscriptReady = (extra = {}) => {
if (earlyTranscriptReadyEmitted) return;
earlyTranscriptReadyEmitted = true;
sendEvent(res, "transcript_ready", {
videoId,
// Best-known title at this point — the surrogate from the
// user-supplied title hint or "Untitled". The final result
// event also carries videoTitle and the browser updates
// it then, so any final title mismatch self-heals.
videoTitle: titleSurrogate,
videoChannel: relayChannelHint || "",
videoChapters: relayChaptersHint || [],
videoUploadDate: itemUploadDate || "",
videoDescription: relayDescriptionHint || "",
entries: streamedRelayEntries || [],
type: isPodcast ? "podcast" : "youtube",
willChunk: true, // pessimistic; corrected at result time
...extra,
});
};
try {
relayResult = await transcriber.summarizeUrl({
mediaUrl: url,
mediaType: isPodcast ? "podcast" : "youtube",
mimeType: undefined,
titleHint: titleHintRaw,
channelHint: relayChannelHint,
descriptionHint: relayDescriptionHint,
chaptersHint: relayChaptersHint,
onProgress: (msg) => log(2, msg),
// Capture transcript entries the moment the relay finishes
// transcribing. Analyze runs serially AFTER transcribe in
// the relay's pipeline — so by the time the FIRST
// onWindowComplete fires, this callback has already populated
// the entries cache below. That lets the onWindowComplete
// callback build proper { title, summary, entries, startTime }
// chunk objects (the shape the browser's sections_partial
// handler expects under .chunks) rather than the raw
// ownedSections-without-entries we sent pre-v0.2.75.
onTranscribeComplete: ({ transcript }) => {
streamedRelayEntries = parseTimestampedTranscript(transcript || "");
log(2, `Relay transcript parsed — ${streamedRelayEntries.length} segments cached for streaming`);
// Sequential-mode UI flip: emit transcript_ready as soon
// as transcribe is done so the browser switches from
// loading view to results view BEFORE the analyze
// sections start streaming in. Pipelined mode beats us to
// this from onWindowComplete (the first window may
// complete before transcribe is fully done) — either way
// the emitter is idempotent.
emitEarlyTranscriptReady();
},
// Per-window streaming: forward each window's owned sections
// to the browser as sections_partial events with the chunk
// SHAPE the browser handler expects (.chunks not
// .partialSections). Without this, the browser's
// `data.chunks || []` read at the sections_partial handler
// resolved to an empty array and the operator saw zero
// sections appear during the analyze phase — they only
// landed when the terminal "result" event arrived at the
// very end of the pipeline.
onWindowComplete: ({ windowIdx, totalWindows, ownedSections, windowEntries }) => {
// Pipelined-mode UI flip: if the first window completes
// before transcribe-done (very possible — first window
// only needs ~20min of audio fully transcribed), emit
// transcript_ready NOW so the browser flips to results
// view in time for these partial sections to render.
emitEarlyTranscriptReady();
// Pipelined mode (relay v0.2.89+): each window arrives
// with its OWN entries embedded, before transcribe has
// fully completed. Section indices are LOCAL to those
// window entries, so we slice them directly.
//
// Sequential mode (older relays, or Gemini-transcribe
// path): windowEntries is null. The relay's transcribe
// phase has already finished by the time these arrive,
// so streamedRelayEntries (populated by
// onTranscribeComplete) is non-empty and section
// indices are global into it.
const entries = Array.isArray(windowEntries) && windowEntries.length > 0
? windowEntries
: (streamedRelayEntries || []);
const partialChunks = (ownedSections || []).map((s) => {
const start = Math.max(0, s.startIndex ?? 0);
const end = entries.length > 0
? Math.min(entries.length - 1, s.endIndex ?? start)
: (s.endIndex ?? start);
const slice = entries.length > 0 ? entries.slice(start, end + 1) : [];
return {
title: s.title,
summary: s.summary,
entries: slice,
startTime: slice[0]?.offset || 0,
};
}).filter((c) => c.entries.length > 0 || !entries.length);
sendEvent(res, "sections_partial", {
windowIdx,
totalWindows,
chunks: partialChunks,
});
},
signal: abortController.signal,
jobId,
});
} catch (err) {
if (abortController.signal.aborted) throw new Error(CANCELLED_MARK);
sendEvent(res, "error", { message: err?.message || String(err) });
sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: "" });
if (isFree) releaseFreeSlot();
res.end();
return;
}
checkCancelled();
// Backfill the real title now that the relay has finished. The
// relay extracts it via yt-dlp during the download step (or
// echoes back the client-supplied hint) and returns it in the
// result envelope's `title` field — see relay 0.2.53+. For older
// relays the field is null, so we keep the original surrogate.
// Trim + drop empty strings to avoid storing whitespace-only
// titles that would render as a blank row in the library list.
const relayTitle = typeof relayResult.title === "string"
? relayResult.title.trim()
: "";
const resolvedTitle = relayTitle || titleSurrogate;
// Parse the bracketed transcript into the entries array shape
// Recap's UI / history layer expects.
const relayEntries = parseTimestampedTranscript(relayResult.transcript || "");
// Phase 1E — attach speaker labels to each parsed entry by
// time-matching against the raw per-segment diarization data
// the relay returned. The relay's `transcript_segments` are the
// fine-grained Parakeet segments (often 1-3s each) with a
// `speaker` field per segment. Recap's `relayEntries` are the
// MERGED readable lines (sortAndDedupeTranscript + mergeShort-
// Entries collapsed multiple Parakeet segments into one
// sentence-sized thought). To attach speakers to merged
// entries: for each entry, find the raw segment whose [start,
// end] contains the entry's offset (start time). The merged
// entry's start time always corresponds to the start of one of
// the underlying segments — that segment's speaker is the
// entry's speaker. Speaker stays null when:
// - relay version < 0.2.88 (no transcript_segments field)
// - diarization was off on the operator's relay
// - relay couldn't match the entry's time to any segment
const relaySegments = Array.isArray(relayResult.transcript_segments)
? relayResult.transcript_segments
: null;
if (relaySegments && relaySegments.length > 0) {
// Build a sorted-by-start list for an O(N log N + M log N) match.
// Linear scan would be O(N*M) which is fine for typical sizes
// but the sorted form also makes the data debuggable.
const sortedSegs = relaySegments
.slice()
.sort((a, b) => (a.start || 0) - (b.start || 0));
for (const entry of relayEntries) {
const t = entry.offset || 0;
// Find the segment whose [start, end] contains t. Use
// binary search for efficiency on long transcripts.
let lo = 0;
let hi = sortedSegs.length - 1;
let found = null;
while (lo <= hi) {
const mid = (lo + hi) >> 1;
const seg = sortedSegs[mid];
if (t < (seg.start || 0)) {
hi = mid - 1;
} else if (t > (seg.end || 0)) {
lo = mid + 1;
} else {
found = seg;
break;
}
}
// If no segment contains t (gaps in diarization output),
// fall back to the segment that starts CLOSEST to t.
if (!found && sortedSegs.length > 0) {
const idx = Math.min(Math.max(lo, 0), sortedSegs.length - 1);
const candidates = [
sortedSegs[idx],
sortedSegs[idx - 1],
sortedSegs[idx + 1],
].filter(Boolean);
let bestDist = Infinity;
for (const seg of candidates) {
const segMid = ((seg.start || 0) + (seg.end || 0)) / 2;
const d = Math.abs(segMid - t);
if (d < bestDist) {
bestDist = d;
found = seg;
}
}
// Cap the fallback at 5s — beyond that the assignment is
// probably bogus, leave it null so the UI can render
// "(speaker unknown)" or just drop the chip.
if (bestDist > 5) found = null;
}
if (found && found.speaker) {
entry.speaker = found.speaker;
entry.speaker_confidence =
typeof found.speaker_confidence === "number"
? found.speaker_confidence
: null;
// Phase 2 — surface the suppression-pass uncertainty
// flag so the chip renders with a "?" suffix for
// best-guess attribution. Absent on older relays.
entry.speaker_uncertain = !!found.speaker_uncertain;
}
}
}
const contentType = isPodcast ? "podcast" : "youtube";
// Note: transcript_ready was emitted EARLIER by
// emitEarlyTranscriptReady() — either from the first
// onWindowComplete (pipelined mode) or from
// onTranscribeComplete (sequential mode). Re-emitting here
// would either (a) be a no-op since the browser handler is
// already in results view, or (b) double-trigger the chunk
// wipe / streamWindowsTotal reset. The real title arrives
// via the result event below — browser's result handler
// updates state.videoTitle on receipt — so dropping the
// duplicate emit is safe.
// Build chunks from the relay's stitched analysis sections.
const sections = Array.isArray(relayResult.analysis?.sections)
? relayResult.analysis.sections
: [];
const chunks = sections
.map((s) => {
const start = Math.max(0, s.startIndex ?? 0);
const end = Math.min(relayEntries.length - 1, s.endIndex ?? start);
const slice = relayEntries.slice(start, end + 1);
return {
title: s.title,
summary: s.summary,
entries: slice,
startTime: slice[0]?.offset || 0,
};
})
.filter((c) => c.entries.length > 0);
const historyId = await saveToHistory(
scopeForRequest(req),
videoId,
url,
resolvedTitle,
chunks,
relayEntries,
logHistory,
itemUploadDate || "",
contentType,
relayResult.speakers || null,
relayResult.speaker_names || null,
).catch(() => null);
// Debit local credit on success — trial cookie OR free signed-in
// tenant, depending on who's making the request. AFTER
// saveToHistory rather than at request-start so a failed pipeline
// doesn't burn a credit.
if (RECAP_MODE === "multi" && req.trial) {
try {
const { debitOne } = await import("./anon-trial.js");
debitOne(req.trial.cookie_id);
} catch (err) {
console.warn("[/api/process] trial debit failed:", err);
}
} else if (RECAP_MODE === "multi" && req.creditsToDebit === "tenant") {
try {
const { gateAndDebit } = await import("./tenant-credits.js");
await gateAndDebit(req.user.id);
} catch (err) {
console.warn("[/api/process] tenant_credits debit failed:", err);
}
}
sendEvent(res, "result", {
videoId,
videoTitle: resolvedTitle,
entries: relayEntries,
chunks,
historyId,
type: contentType,
// Phase 1E — speaker legend + per-segment confidence summary.
// Null when diarization wasn't available (older relay, or
// operator disabled it). Frontend renders a colored chip per
// speaker above the topic list; each transcript line uses
// the entry.speaker field (already attached above) to pick
// the matching chip color.
speakers: relayResult.speakers || null,
// Phase 2 — inferred speaker names from the relay's post-
// cluster polish pass. Map { Speaker_A: "Matt Hill", ... }
// with null values for speakers the LLM couldn't confidently
// name. Recap legend prefers the inferred name when present.
speaker_names: relayResult.speaker_names || null,
});
if (isFree) releaseFreeSlot();
res.end();
return;
}
let videoTitle = itemTitle || "Untitled";
let videoUploadDate = itemUploadDate || "";
// Rich metadata used to ground the transcription prompt (speaker
// names from the channel/description/chapters) and surfaced to
// downstream UI. Populated for YouTube videos only; podcasts have
// their own (lighter) metadata path.
let videoChannel = "";
let videoDescription = "";
let videoChapters = [];
// If captions came from YouTube we skip Step 1 (download audio) +
// Step 2 (transcribe) and jump straight to analysis. `entries` is
// also the post-Step-2 output of the transcription path, so this
// value just flows through.
let entries = null;
let captionSource = null; // "manual" | "auto" | null
let transcriptText = "";
let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" };
// Populated from YouTube metadata or (later) Apple/Spotify resolution.
// Used by the relay-URL fast-path to decide if the audio fits the
// relay's reported capability. null = unknown → fast-path attempts
// anyway and lets the relay reject if the file is over its cap.
let knownDuration = null;
// Hoisted out of the audio-path block so Step 3 (analysis) can
// reference analysisModel even when the captions fast-path skips
// the entire audio + transcription block. transcriptionModel is
// only used inside the audio block but lives here for symmetry.
// Per-provider model defaults: caller's request → provider's first
// listed model → Gemini fallback (preserves the prior default for
// legacy clients that don't send model fields).
const transcriptionModel =
reqTransModel ||
transcriber.listTranscriptionModels()[0] ||
"gemini-3-flash-preview";
const analysisModel =
reqAnaModel ||
model ||
analyzer.listAnalysisModels()[0] ||
"gemini-3.1-pro-preview";
if (resolvedFrom) {
log(
0,
`Resolved ${
resolvedFrom === "apple" ? "Apple Podcasts" : "Spotify"
} share URL → ${url.slice(0, 80)}${url.length > 80 ? "…" : ""}`
);
}
// ── Step 0 (YouTube only): metadata + captions fast path ──
if (!isPodcast && videoId) {
log(1, "Fetching video metadata...");
const meta = await fetchYouTubeMetadata(videoId);
if (meta) {
if (meta.title) videoTitle = meta.title;
if (meta.uploadDate) videoUploadDate = meta.uploadDate;
videoChannel = meta.channel;
videoDescription = meta.description;
videoChapters = meta.chapters;
if (typeof meta.duration === "number" && meta.duration > 0) {
knownDuration = meta.duration;
}
log(1, `Video title: ${videoTitle}${videoChannel ? ` (${videoChannel})` : ""}`);
if ((meta.hasManualCaptions || meta.hasAutoCaptions) && !useYouTubeCaptions) {
log(1, `YouTube captions available but the user has the captions fast-path disabled — will transcribe audio directly.`);
} else if (meta.hasManualCaptions || meta.hasAutoCaptions) {
log(1, `YouTube captions available (${meta.hasManualCaptions ? "manual" : "auto"}) — attempting fast-path…`);
const cap = await tryFetchYouTubeCaptions(
videoId,
tmpDir,
{ hasManual: meta.hasManualCaptions, hasAuto: meta.hasAutoCaptions },
log
);
if (cap && cap.entries && cap.entries.length > 0) {
// Auto-captions fragment audio every 13 seconds (often only
// 13 words per entry). Sending hundreds of micro-segments
// to the analyzer balloons the prompt and overloads the
// model's index-tracking ("Section N must start at index
// K+1 of section M…"). Coalesce into ~15s blocks for a
// saner analysis prompt while keeping timestamps accurate.
const rawCount = cap.entries.length;
entries = coalesceTranscriptEntries(cap.entries, 15);
captionSource = cap.source;
log(
1,
`Using YouTube ${cap.source === "manual" ? "manual" : "auto-generated"} captions — ${rawCount} segments coalesced to ${entries.length}, skipping audio download + transcription`
);
} else {
log(1, "Captions unusable or failed to parse — falling back to audio transcription");
}
} else {
log(1, "No captions available — will download audio and transcribe");
}
}
}
checkCancelled();
// ── Step 0.5: Relay-URL fast-path ──
// When transcription routes through the operator's relay AND we
// have a public source URL (YouTube watch URL, or direct podcast
// .mp3/.m4a enclosure), hand the URL off to /relay/transcribe-url
// so the relay does the download itself. This saves the buyer's
// home upload bandwidth — often the slowest leg of the pipeline,
// since residential connections rarely match the relay's symmetric
// datacenter link. Only fires if:
// 1. captions fast-path didn't already populate entries
// 2. caller asked to use the relay for transcription
// 3. duration (when known) fits the relay's reported capability
// On any failure we fall through to the legacy local-download
// path, which has its own retry/fallback machinery.
if (!entries && transcriptionProvider === "relay") {
// Fresh fetch so the fast-path decision matches the routing
// decision the relay will make for THIS install — same
// rationale as the Step 2 refresh below.
await refreshRelayCapabilities().catch(() => {});
const caps = getRelayCapabilities();
const withinCapability =
typeof caps.max_audio_minutes !== "number" ||
knownDuration === null ||
knownDuration <= caps.max_audio_minutes * 60;
if (!withinCapability) {
log(
1,
`Relay-URL fast-path skipped — known duration ${(knownDuration / 60).toFixed(1)}min exceeds relay capability ${caps.max_audio_minutes}min`
);
} else {
const mediaType = isPodcast ? "podcast" : "youtube";
const handoffUrl = isPodcast
? url
: `https://www.youtube.com/watch?v=${videoId}`;
log(
1,
`Relay-URL fast-path: handing ${mediaType} URL to relay (saves your upload bandwidth)…`
);
try {
const relayResult = await transcriber.transcribeUrl({
mediaUrl: handoffUrl,
mediaType,
mimeType,
titleHint: videoTitle,
channelHint: videoChannel,
descriptionHint: videoDescription,
chaptersHint: videoChapters,
onProgress: (msg) => log(2, msg),
signal: abortController.signal,
jobId,
});
if (relayResult && relayResult.text) {
transcriptText = relayResult.text;
const parsed = parseTimestampedTranscript(transcriptText);
if (parsed.length === 0) {
log(
1,
`⚠ Relay-URL transcript parsed to zero segments — falling back to local download`
);
transcriptText = "";
entries = null;
} else {
entries = parsed;
}
if (!entries) {
// fall through to local-download flow
} else {
// Single-segment expansion — same as the local path below.
// Parakeet-style backends return one big text blob; spread
// it into per-sentence entries with interpolated timestamps
// so the analyzer sees more than one section's worth of
// structure.
if (
entries.length === 1 &&
knownDuration &&
knownDuration > 30 &&
(entries[0].text || "").length > 100
) {
const synthesized = synthesizeEntriesFromText(
entries[0].text,
knownDuration
);
if (synthesized.length > 1) {
log(
2,
`Single-segment relay transcript expanded into ${synthesized.length} synthetic entries`
);
entries = synthesized;
}
}
txCost = relayResult.cost || txCost;
log(
2,
`Relay-URL transcription complete — ${entries.length} segments, skipping local download`
);
}
} else {
log(
1,
`⚠ Relay-URL returned empty — falling back to local download`
);
}
} catch (err) {
if (err?.message === CANCELLED_MARK || abortController.signal.aborted) {
throw err;
}
const msg = err?.message || String(err);
// Surface err.cause too — Node's fetch() wraps low-level
// failures (DNS, TLS, dead-socket) in a bare "fetch failed"
// message and stashes the real reason in .cause. Without it
// the activity log is uninformative on transport errors.
const cause = err?.cause?.message || err?.cause?.code || err?.cause;
const detail = cause ? ` (cause: ${String(cause).slice(0, 200)})` : "";
// Did the relay accept the request and then fail at its
// backend (Parakeet/Gemma/Gemini), or did we fail before
// the relay even processed it (network blip, TLS issue,
// relay unreachable)?
//
// Backend failures → DON'T fall back to local download.
// The relay's /relay/transcribe endpoint would route to
// the same broken backend, costing 60+ seconds of audio
// upload bandwidth for the same error.
//
// Transport failures → DO fall back. The relay may be
// intermittently unreachable; upload-path might catch a
// moment when it works.
const isBackendFailure =
/CUDA error|Resource exhausted|Parakeet transcribe|Gemma analyze|model does not exist|model.*not found|backend_error|all attempts failed|HTTP 5\d\d/i.test(
msg
);
if (isBackendFailure) {
log(
1,
`⚠ Relay-URL fast-path failed: ${msg.slice(0, 300)}${detail}`
);
log(
1,
`Skipping local-download fallback — same relay backend would just fail again. ` +
`Operator hardware (or Gemini quota) needs attention.`
);
sendEvent(res, "error", {
message: msg.slice(0, 500),
});
res.end();
if (isFree) releaseFreeSlot();
return;
}
log(
1,
`⚠ Relay-URL fast-path failed: ${msg.slice(0, 200)}${detail} — falling back to local download (transport-level error, relay may be intermittent)`
);
}
}
}
checkCancelled();
// ── Step 1: Download audio (skipped when captions populated entries) ──
if (!entries) {
const dlStart = Date.now();
if (isPodcast) {
log(1, "Downloading podcast episode...");
await downloadPodcastAudio(url, audioPath);
const stats = await fs.stat(audioPath);
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
log(1, `Episode: ${videoTitle}`);
} else {
log(1, "Downloading audio from YouTube...");
const dlBaseArgs = [
"-x",
"--audio-format", "mp3",
"--audio-quality", "5",
"-o", audioPath,
"--no-playlist",
"--print", "%(title)s|||%(upload_date)s",
"--no-simulate",
];
const dlUrl = `https://www.youtube.com/watch?v=${videoId}`;
const cookieArgs = ytCookieArgs();
const hasCookies = cookieArgs.length > 0;
let usedCookies = false;
let dlStdout = "";
// Helper: attempt a single yt-dlp download
async function attemptDownload(args, label) {
const result = await execFileAsync("yt-dlp", args, { timeout: 600000 });
return result.stdout || "";
}
// Helper: check if error is a bot detection / rate limit block
function isBotBlock(errText) {
return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText);
}
// ── Smart download with retry ──
// Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays)
const MAX_RETRIES = 3;
const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff
let downloaded = false;
let lastError = "";
for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) {
// On retry attempts, wait before trying again
if (attempt > 0) {
const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)];
log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`);
sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` });
await new Promise(r => setTimeout(r, waitSec * 1000));
log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`);
// Clean up any partial file from previous attempt
await fs.unlink(audioPath).catch(() => {});
}
// Try with cookies first
if (hasCookies && !usedCookies) {
try {
log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies...");
dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies");
usedCookies = true;
downloaded = true;
break;
} catch (cookieErr) {
const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || "");
if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`);
log(1, "Retrying without cookies...");
await fs.unlink(audioPath).catch(() => {});
}
}
// Try without cookies
if (!downloaded) {
try {
dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies");
downloaded = true;
break;
} catch (dlErr) {
lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || "");
const blocked = isBotBlock(lastError);
if (blocked && attempt < MAX_RETRIES) {
log(1, `⚠ YouTube bot detection triggered`);
// Will loop back and wait
continue;
}
if (blocked && attempt === MAX_RETRIES) {
// Last resort: try yt-dlp auto-update in case there's a newer version that handles this
log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort...");
const updateResult = await autoUpdateYtdlp(DATA_DIR);
if (updateResult.success) {
log(1, "yt-dlp updated! Final retry...");
try {
const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update");
dlStdout = retryResult;
downloaded = true;
break;
} catch { /* fall through to error */ }
}
}
// Non-bot error or exhausted retries
if (!downloaded) {
log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`);
}
}
}
}
if (!downloaded) {
const blocked = isBotBlock(lastError);
let hint = "";
if (blocked) {
hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " +
"This is usually caused by:\n" +
"• Recent VPN use (YouTube flags VPN IPs)\n" +
"• Too many downloads in a short period\n" +
"• YouTube's general anti-bot measures\n\n" +
"What to try:\n" +
"• Wait 10-30 minutes and try again\n" +
"• Disconnect any VPN/proxy\n" +
"• Upload fresh cookies.txt via Settings\n" +
"• Try a different network (mobile hotspot, etc.)";
}
throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`);
}
if (!usedCookies && hasCookies) {
log(1, "⚠ Downloaded without cookies — audio may contain ads");
}
const stats = await fs.stat(audioPath);
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
// Extract title from the --print output of the download command
const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null;
let gotTitle = false;
// First try: parse title from the download stdout (most reliable — same call that succeeded)
if (dlStdout) {
const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean);
for (const line of printLines) {
if (line.includes("|||")) {
const sep = line.indexOf("|||");
const t = line.slice(0, sep).trim();
const d = line.slice(sep + 3).trim();
if (t && t !== "NA") {
videoTitle = t;
if (d && d !== "NA") videoUploadDate = d;
gotTitle = true;
log(1, `Video title: ${videoTitle}`);
break;
}
}
}
}
// Second try: separate yt-dlp call (no cookies needed for public metadata)
if (!gotTitle) {
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(title)s|||%(upload_date)s",
"--no-download",
`https://www.youtube.com/watch?v=${videoId}`,
], { timeout: 15000 });
const raw = stdout.trim();
const sep = raw.indexOf("|||");
if (sep > 0) {
videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled";
const d = raw.slice(sep + 3).trim();
if (d && d !== "NA") videoUploadDate = d || videoUploadDate;
} else {
videoTitle = raw || fallbackTitle || "Untitled";
}
gotTitle = videoTitle !== "Untitled";
if (gotTitle) log(1, `Video title: ${videoTitle}`);
} catch {
// Title fetch failed
}
}
// Third try: use the queue-provided title
if (!gotTitle && fallbackTitle) {
videoTitle = fallbackTitle;
log(1, `Using queue title: ${fallbackTitle}`);
gotTitle = true;
}
if (!gotTitle) {
log(1, "⚠ Could not fetch video title");
}
}
checkCancelled();
// ── Step 2: Transcribe audio ──
// Detect audio duration to choose strategy
const audioDuration = await getAudioDuration(audioPath);
const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown";
log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`);
// Strategy:
// < threshold → full file (fast, cheap, reliable on capable backends)
// ≥ threshold → chunked transcription
// If full-file transcription is truncated or empty → fall back to chunks
//
// Thresholds are provider-aware. For the relay provider, we ask
// the relay (via /relay/capabilities) what limits it can accept —
// if the operator has routed transcription to their own Parakeet
// hardware (which handles 2+ hour podcasts in one shot), the
// relay reports very large limits and we skip chunking entirely.
// For other providers, we use the legacy Gemini-safe defaults.
let chunkTimeThresholdSec = 60 * 60; // 60 min
let chunkSizeThresholdBytes = 30 * 1024 * 1024; // 30 MB
let preferredChunkSeconds = 2700; // 45 min
if (transcriptionProvider === "relay") {
// Force a fresh fetch so the chunking decision matches the
// routing decision the relay will make for THIS install RIGHT
// NOW. The hourly background refresh is a fallback for when
// this inline fetch fails (relay unreachable, etc.).
await refreshRelayCapabilities().catch(() => {});
const caps = getRelayCapabilities();
if (typeof caps.max_audio_minutes === "number") {
chunkTimeThresholdSec = caps.max_audio_minutes * 60;
}
if (typeof caps.max_audio_mb === "number") {
chunkSizeThresholdBytes = caps.max_audio_mb * 1024 * 1024;
}
if (caps.preferred_chunk_seconds === null) {
// Hardware-capable: never chunk. Set both thresholds to
// Infinity so the needsChunking check stays simple.
chunkTimeThresholdSec = Infinity;
chunkSizeThresholdBytes = Infinity;
preferredChunkSeconds = null;
} else if (typeof caps.preferred_chunk_seconds === "number") {
preferredChunkSeconds = caps.preferred_chunk_seconds;
}
// Plain-language line for the activity log. The full caps
// payload (tier, backend pref, chunking thresholds) is verbose
// and internal — surface only what a non-developer reader
// benefits from: the max audio length the relay will accept,
// since that's what dictates the chunking behavior they see.
if (caps.max_audio_minutes) {
log(2, `Relay will transcribe up to ${caps.max_audio_minutes} minutes per upload.`);
} else {
log(2, `Relay ready to transcribe.`);
}
}
let audioFileSize = 0;
try { audioFileSize = (await fs.stat(audioPath)).size; } catch {}
const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1);
const needsChunking =
(audioDuration && audioDuration >= chunkTimeThresholdSec) ||
audioFileSize >= chunkSizeThresholdBytes;
if (needsChunking) {
const reason = audioDuration >= chunkTimeThresholdSec ? `${audioDurMin} min` : `${audioSizeMB} MB`;
log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriber.name}/${transcriptionModel}`);
} else if (transcriptionProvider === "relay" && preferredChunkSeconds === null) {
log(2, `Sending full file to relay (relay will chunk server-side if needed for its backend)`);
}
// Transcription model fallback chain: user's chosen model first,
// then the rest of the provider's list. If Gemini 3 Flash hits a
// 503 capacity error, the wrapper transparently retries with
// 2.5 Flash. Matches the analysis fallback pattern below.
const transcriptionFallbacks = [
transcriptionModel,
...transcriber.listTranscriptionModels().filter((m) => m !== transcriptionModel),
];
// Thin wrapper: keeps the call-site shape the chunking + main
// pipeline already use. `transcriber.transcribeAudio` returns
// { text, usage, cost, finishReason, blockReason, raw } — callers
// read off that normalized shape regardless of which provider is
// doing the transcription. On hard failure (after the provider's
// own retry loop), walks the fallback chain to the next model.
async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) {
// Build the per-call fallback list: caller's chosen model
// first, then the others. (Same as transcriptionFallbacks but
// honors per-chunk overrides like chunkResult retries.)
const chain = [modelName, ...transcriptionFallbacks.filter((m) => m !== modelName)];
let lastErr;
for (const tryModel of chain) {
try {
return await transcriber.transcribeAudio({
filePath,
mimeType: mType,
titleHint,
// Rich-context hints used by the provider's prompt builder
// to anchor speaker-name extraction. Provider implementations
// that don't care (e.g. Whisper) ignore these.
channelHint: videoChannel,
descriptionHint: videoDescription,
chaptersHint: videoChapters,
model: tryModel,
offsetSeconds,
onProgress: (msg) => log(2, msg),
signal: abortController.signal,
// Shared with the analyze call below so the relay bundles
// both into one credit charge. Non-relay providers ignore it.
jobId,
});
} catch (err) {
// Cancellation: bail immediately, don't try the next model.
if (abortController.signal.aborted || err?.name === "AbortError") {
throw new Error(CANCELLED_MARK);
}
lastErr = err;
const msg = err?.message || String(err);
log(2, `⚠ Transcription with ${tryModel} failed: ${msg.slice(0, 150)}`);
if (tryModel !== chain[chain.length - 1]) {
log(2, `Falling back to next transcription model...`);
}
}
}
throw lastErr || new Error("All transcription models failed");
}
// ── Helper: chunked transcription for long audio ──
async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) {
const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`);
await fs.mkdir(chunkDir, { recursive: true });
try {
// Honor the provider's preferred chunk size (relay reports
// this via /relay/capabilities; legacy defaults to 45 min).
const chunkSec = preferredChunkSeconds || 2700;
const audioChunks = await splitAudioFile(srcPath, chunkDir, chunkSec);
if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed
logFn(`Split audio into ${audioChunks.length} chunks for transcription (parallel, up to ${Math.min(audioChunks.length, 6)} in flight)`);
let totalIn = 0, totalOut = 0;
const perChunkEntries = new Array(audioChunks.length).fill(null);
// Fire chunks in parallel with a bounded in-flight count.
// 6 is well under Gemini's paid-tier RPM (1k for flash, 150
// for pro) and stays comfortable on the relay (which serves
// each upload independently). Out-of-order completion is
// fine — we sort by chunk index before merging at the end.
const CHUNK_CONCURRENCY = 6;
let nextChunkIdx = 0;
const transcribeOne = async () => {
while (true) {
const my = nextChunkIdx++;
if (my >= audioChunks.length) return;
const chunk = audioChunks[my];
logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`);
try {
const chunkResult = await transcribeSingleFile(
chunk.path, "audio/mpeg", title,
modelName,
chunk.startOffset
);
totalIn += chunkResult.cost.inputTokens;
totalOut += chunkResult.cost.outputTokens;
const chunkText = chunkResult.text;
if (!chunkText) {
logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`);
perChunkEntries[my] = [];
continue;
}
const chunkEntries = parseTimestampedTranscript(chunkText);
if (chunk.startOffset > 0) {
const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0;
const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8;
if (!alreadyAdjusted) {
for (const e of chunkEntries) {
e.offset += chunk.startOffset;
}
logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`);
}
}
// Sanity cap: drop any entries whose absolute offset
// exceeds the chunk's true time window. Observed bug
// (May 2026): gemini-3.1-flash-lite emitted timestamps
// like [10:12:44] on a 45-min chunk, which then survived
// the merge step and caused the stitched transcript to
// claim 10:12:44 total length AND wiped out subsequent
// chunks' entries (the merge dedupe rule dropped anything
// <= the running max offset). Small tolerance (10s) for
// models that mark the END of a phrase rather than the
// start, where the actual emission can slightly exceed
// the chunk boundary.
const chunkEndAbsolute = chunk.startOffset + chunk.durationSec;
const TOLERANCE_SEC = 10;
const validUpper = chunkEndAbsolute + TOLERANCE_SEC;
const filtered = chunkEntries.filter((e) => e.offset <= validUpper);
const dropped = chunkEntries.length - filtered.length;
if (dropped > 0) {
const worstOffset = Math.max(...chunkEntries.map((e) => e.offset || 0));
logFn(
`⚠ Chunk ${chunk.index + 1}: dropped ${dropped} segment(s) with bogus timestamps past chunk end ${formatTime(chunkEndAbsolute)} (worst: ${formatTime(worstOffset)}). Model: ${modelName}`
);
}
// Sort by offset before handing off to the merge step.
// Some models emit segments out of chronological order
// (observed on noisy transcribe outputs); the merge step
// relies on monotonically-increasing offsets to dedupe
// boundary overlaps. Sorting here guarantees that
// invariant per chunk.
filtered.sort((a, b) => (a.offset || 0) - (b.offset || 0));
// Detect truncated transcripts. If the last entry's
// absolute offset is significantly less than the chunk's
// expected end, the model probably hit its output-token
// cap mid-transcript and lost the tail. Surface as a
// loud warning so the operator knows which chunk + model
// misbehaved and can switch to a model with bigger
// output capacity. We keep the partial entries because
// they're better than nothing, but the operator should
// know they're looking at incomplete content.
const lastAbsolute = filtered.length > 0
? filtered[filtered.length - 1].offset
: chunk.startOffset;
const expectedEnd = chunk.startOffset + chunk.durationSec;
const coverageRatio = chunk.durationSec > 0
? (lastAbsolute - chunk.startOffset) / chunk.durationSec
: 1;
if (coverageRatio < 0.8 && chunk.durationSec > 60) {
const missingSec = expectedEnd - lastAbsolute;
logFn(
`⚠ Chunk ${chunk.index + 1}: transcript appears TRUNCATED — last entry at ${formatTime(lastAbsolute)} but chunk ends at ${formatTime(expectedEnd)} (missing ${formatTime(missingSec)} of speech). Model: ${modelName}. Likely cause: model hit maxOutputTokens. Consider a model with larger output capacity, or shrink chunk size.`
);
}
logFn(`Chunk ${chunk.index + 1}: ${filtered.length} segments, last timestamp ${filtered.length > 0 ? formatTime(filtered[filtered.length - 1].offset) : "N/A"} (coverage ${(coverageRatio * 100).toFixed(0)}%)`);
perChunkEntries[my] = filtered;
} catch (err) {
logFn(`⚠ Chunk ${chunk.index + 1} failed: ${err?.message?.slice(0, 120) || err}`);
perChunkEntries[my] = [];
}
}
};
const workers = Array.from(
{ length: Math.min(audioChunks.length, CHUNK_CONCURRENCY) },
transcribeOne
);
await Promise.all(workers);
// Merge chunks in chronological order. Per-chunk timestamps
// are absolute (we adjusted by startOffset above), so merging
// in index order is the same as in time order. The overlap-
// skip rule preserves the original behavior: any entry whose
// offset has already been covered by a prior chunk is dropped.
const allEntries = [];
for (const chunkEntries of perChunkEntries) {
if (!chunkEntries) continue;
const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1;
for (const e of chunkEntries) {
if (e.offset > lastExistingTime) allEntries.push(e);
}
}
// Recalculate durations
for (let i = 0; i < allEntries.length - 1; i++) {
allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset;
}
if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15;
logFn(`Chunked transcription complete: ${allEntries.length} total segments`);
return {
entries: allEntries,
cost: {
inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0,
totalTokens: totalIn + totalOut,
totalCost: "0", totalCostDisplay: "",
},
};
} finally {
try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {}
}
}
// entries / transcriptText / txCost are declared earlier (top of
// the request handler) since the captions-fast-path needs to
// populate them before this audio-transcription block runs.
const txStart = Date.now();
if (needsChunking) {
// ── Very long audio: go straight to chunked transcription ──
log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
} else {
log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`);
entries = null; // fall through to full-file attempt below
}
}
if (!entries) {
// ── Normal: transcribe full file ──
const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel);
transcriptText = transcriptResult.text;
if (!transcriptText) {
log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
} else {
throw new Error(`${transcriber.name} returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.`);
}
} else {
txCost = transcriptResult.cost;
const txTime = ((Date.now() - txStart) / 1000).toFixed(1);
log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`);
entries = parseTimestampedTranscript(transcriptText);
log(2, `Parsed ${entries.length} transcript segments`);
}
// ── Single-segment expansion ──
// Whisper-API endpoints that don't return per-segment timestamps
// (e.g. NVIDIA Parakeet, some bare-Whisper wrappers) hand back a
// single text blob. parseTimestampedTranscript dumps that into
// one entry at [0:00], which (a) trips the truncation detector
// below and (b) leaves the analyzer with one giant segment so
// it can only produce one section. Synthesize sentence-based
// entries with interpolated timestamps so both code paths
// downstream work like they do for Gemini/Whisper-1.
if (entries.length === 1 && audioDuration && audioDuration > 30 && (entries[0].text || "").length > 100) {
const synthesized = synthesizeEntriesFromText(entries[0].text, audioDuration);
if (synthesized.length > 1) {
log(2, `Single-segment transcript expanded into ${synthesized.length} synthetic sentence-based entries with interpolated timestamps`);
entries = synthesized;
}
}
// ── Truncation detection → fall back to chunks ──
if (audioDuration && entries.length > 0) {
const lastEntryTime = entries[entries.length - 1].offset;
const coverageRatio = lastEntryTime / audioDuration;
const missingSeconds = audioDuration - lastEntryTime;
if (coverageRatio < 0.90 && missingSeconds > 120) {
log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`);
log(2, `Falling back to chunked transcription...`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
const finalCoverage = entries[entries.length - 1].offset;
log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`);
}
} else {
log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`);
}
}
}
const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1);
log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`);
if (!entries || entries.length === 0) {
const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ ");
log(2, `⚠ Transcript parse failed. Preview: ${preview}`);
sendEvent(res, "error", { message: "Failed to parse transcript." });
sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText });
res.end();
return;
}
} // end if (!entries) — close the audio + transcription block
if (!entries || entries.length === 0) {
throw new Error("No transcript available — neither captions nor audio transcription produced segments.");
}
checkCancelled();
// ── Step 3: Topic analysis with model fallback ──
// Fallback order: caller's chosen model first, then the rest of the
// analyzer provider's analysis-model list.
const providerModels = analyzer.listAnalysisModels();
const analysisFallbacks = [
analysisModel,
...providerModels.filter((m) => m !== analysisModel),
];
// ── Auto-coalesce input for the analysis LLM (single-shot path only) ──
// Long content uses the chunked path (each ~18-min window only sees
// a small slice of entries, so context isn't a concern) — coalescing
// there would just degrade section-boundary precision unnecessarily.
// Short content still goes through a single analyze call; for tiny
// local models with 8-16K context windows, a dense Parakeet
// transcript at full granularity can still overflow, so we keep
// the safety-net coalesce for that path.
const lastEntry = entries[entries.length - 1];
const totalSec = (lastEntry?.offset || 0) + (lastEntry?.duration || 0);
const willChunkAnalysis = totalSec > CHUNKING_CUTOFF_SECONDS;
const { coalesced: analysisEntries, indexMap: analysisIndexMap } =
willChunkAnalysis
? { coalesced: entries, indexMap: null }
: coalesceForAnalysis(entries, 400);
if (analysisEntries !== entries) {
log(
3,
`⚠ Single-shot path: coalesced ${entries.length}${analysisEntries.length} segments for analyzer (transcript display keeps full granularity).`
);
} else if (willChunkAnalysis) {
log(
3,
`Chunked path: keeping full segment granularity (${entries.length} entries) — chunker handles per-window context.`
);
}
// Helper: convert sections (in analysisEntries index space) into
// Recap "chunks" (full UI shape with entries slice + startTime).
// Used by both the per-window streaming callback AND the final
// assembly below — keeps the two paths in lockstep so what
// streams matches what gets saved.
const sectionsToChunks = (sections) =>
sections
.map((section) => {
let start = Math.max(0, section.startIndex);
let end = section.endIndex;
if (analysisIndexMap) {
const mappedStart = analysisIndexMap[Math.max(0, Math.min(start, analysisIndexMap.length - 1))];
const mappedEnd = analysisIndexMap[Math.max(0, Math.min(end, analysisIndexMap.length - 1))];
start = mappedStart ? mappedStart.startOrig : 0;
end = mappedEnd ? mappedEnd.endOrig : entries.length - 1;
}
start = Math.max(0, start);
end = Math.min(entries.length - 1, end);
const sectionEntries = entries.slice(start, end + 1);
return {
title: section.title,
summary: section.summary,
entries: sectionEntries,
startTime: sectionEntries[0]?.offset || 0,
};
})
.filter((c) => c.entries.length > 0);
// Emit transcript_ready so the browser can switch from the
// loading screen to the results view immediately — transcript
// pane populated, sections pane in "analyzing…" state. Sections
// then stream in via sections_partial events as analyze windows
// complete (only the chunked path streams; short content gets
// its sections in the final result event).
const contentType = isPodcast ? "podcast" : "youtube";
sendEvent(res, "transcript_ready", {
videoId,
videoTitle,
videoChannel,
videoChapters,
videoUploadDate,
videoDescription,
entries,
type: contentType,
willChunk: willChunkAnalysis,
});
// Single entry point: runChunkedAnalysis decides per-content
// whether to single-shot (≤25 min) or window-and-stitch (>25 min).
// Per-window concurrency is bounded inside; model fallback walks
// the analysisFallbacks list within each window independently.
let analysisResult = null;
let usedAnalysisModel = analysisModel;
const anaStart = Date.now();
log(3, `Analyzing topics across ${analysisEntries.length} segments with ${analyzer.name}...`);
try {
analysisResult = await runChunkedAnalysis({
entries: analysisEntries,
analyzer,
fallbackModels: analysisFallbacks,
onProgress: (msg) => log(3, msg),
onWindowComplete: ({ windowIdx, totalWindows, ownedSections }) => {
// Convert this window's owned sections into the final
// chunk shape and stream them to the browser. The stitcher's
// "trust the next window for the overlap region" rule was
// applied per-window inside runChunkedAnalysis, so what we
// emit here is final — the UI won't have to revise it.
const partialChunks = sectionsToChunks(ownedSections);
if (partialChunks.length > 0) {
sendEvent(res, "sections_partial", {
windowIdx,
totalWindows,
chunks: partialChunks,
});
}
},
signal: abortController.signal,
jobId,
});
usedAnalysisModel = analysisResult.model || analysisModel;
} catch (err) {
if (abortController.signal.aborted || err?.name === "AbortError") {
throw new Error(CANCELLED_MARK);
}
throw err;
}
if (!analysisResult) {
throw new Error("All analysis models failed. Please try again later.");
}
const analysisText = analysisResult.text;
if (!analysisText) {
throw new Error(`${analyzer.name} returned an empty analysis. The transcript may be too long for the model. Try again.`);
}
const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1);
const anaCost = analysisResult.cost;
// Parse the analysis JSON
let analysisJson;
try {
let jsonStr = analysisText.trim();
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim();
analysisJson = JSON.parse(jsonStr);
} catch (e) {
console.error("Failed to parse analysis JSON:", e.message);
sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." });
sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText });
res.end();
return;
}
// Build final chunks via the same helper used for streaming —
// guarantees what the browser accumulated from sections_partial
// events matches what gets saved to history and sent in the
// canonical result event.
const chunks = sectionsToChunks(analysisJson.sections);
const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1);
log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`);
log(3, `Pipeline finished in ${totalTime}s`);
// Save to history. As of the library-for-everyone change, free
// users save too — the app feels broken otherwise (summarize a
// video, never find it again).
const historyId = await saveToHistory(scopeForRequest(req), videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null);
// Debit local credit on success — trial cookie OR free signed-in tenant.
if (RECAP_MODE === "multi" && req.trial) {
try {
const { debitOne } = await import("./anon-trial.js");
debitOne(req.trial.cookie_id);
} catch (err) {
console.warn("[/api/process] trial debit failed:", err);
}
} else if (RECAP_MODE === "multi" && req.creditsToDebit === "tenant") {
try {
const { gateAndDebit } = await import("./tenant-credits.js");
await gateAndDebit(req.user.id);
} catch (err) {
console.warn("[/api/process] tenant_credits debit failed:", err);
}
}
sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType });
res.end();
} catch (err) {
// Treat any AbortError or aborted-signal state as a user cancellation
// even if it didn't surface as CANCELLED_MARK upstream (e.g. an SDK
// throws AbortError before checkCancelled() runs).
const cancelled =
err?.message === CANCELLED_MARK ||
err?.name === "AbortError" ||
abortController.signal.aborted;
if (!cancelled) {
// Dump as much context as the runtime gives us. Generic
// messages like "Error in input stream" are usually wrappers
// around an underlying SDK / Node stream error; the cause +
// stack are what actually tells us what happened.
console.error("Pipeline error:", {
name: err?.name,
message: err?.message,
code: err?.code,
status: err?.status || err?.statusCode,
cause: err?.cause,
stack: err?.stack,
transcriptionProvider,
analysisProvider,
});
}
if (!res.headersSent) {
res.status(cancelled ? 499 : 500).json({
error: cancelled ? "cancelled" : err.message,
});
} else if (cancelled) {
sendEvent(res, "cancelled", { message: "Cancelled by user" });
res.end();
} else {
sendEvent(res, "error", { message: err.message });
res.end();
}
} finally {
if (isFree) releaseFreeSlot();
// Clean up temp files
try {
await fs.rm(tmpDir, { recursive: true, force: true });
} catch {}
}
});
// ── Helpers ────────────────────────────────────────────────────────────────
// getAudioDuration + splitAudioFile moved to ./audio.js
// sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js
// buildAnalysisPrompt moved to ./gemini-helpers.js
// ── Network mode ──────────────────────────────────────────────────────────
// On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking)
// On local Mac dev: default to localhost (safe on public Wi-Fi)
// - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling)
// - Running "npm start" directly defaults to localhost
const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, "..");
const lanMode = isStartOS ? true : process.env.LAN_MODE === "true";
const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1";
app.get("/api/network-mode", (req, res) => {
res.json({ lan: lanMode });
});
// ── Start server ───────────────────────────────────────────────────────────
app.listen(PORT, BIND_HOST, async () => {
console.log(`\n Recap API running on http://${BIND_HOST}:${PORT}`);
console.log(` Data directory: ${DATA_DIR}`);
console.log(` Recap mode: ${RECAP_MODE}`);
console.log(` Checking yt-dlp...`);
const info = await checkYtdlp();
if (!info.installed) {
console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`);
} else if (info.updateAvailable) {
console.log(` ✓ yt-dlp ${info.version} found`);
console.log(` ↑ Update available: ${info.latestVersion}`);
console.log(` Auto-updating...`);
const result = await autoUpdateYtdlp(DATA_DIR);
if (result.success) {
const refreshed = await checkYtdlp();
console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`);
} else {
console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`);
}
} else {
console.log(` ✓ yt-dlp ${info.version} (up to date)\n`);
}
// One-time, idempotent: migrate any legacy install-wide subscription
// files into the operator's own scope so storage is uniformly per-scope.
try {
const moved = await migrateGlobalSubscriptionsToOwner();
if (moved > 0) {
console.log(` 📦 Migrated ${moved} subscription file(s) into the operator scope`);
}
} catch (err) {
console.warn(" ⚠ Subscription migration skipped:", err.message);
}
// Prune queued items that have since been summarized, across every scope
// that has a queue (owner + each subscribing tenant).
for (const s of await listAutoQueueScopes()) {
await reconcileAutoQueueWithLibrary(s).catch(() => {});
}
// Check subscriptions on startup (fans out over all subscribing scopes).
console.log(` 📡 Checking subscriptions for new videos...`);
await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
// Recover items stuck in "processing" from a crash → re-queue as approved
// (across all scopes), then start the processor if anything is ready.
let anyApproved = false;
for (const s of await listAutoQueueScopes()) {
await mutateAutoQueue(s, (items) => {
for (const item of items) {
if (item.status === "processing") {
console.log(` ⚠ Recovering stuck item (${s}): ${item.title}`);
item.status = "approved";
}
}
if (items.some((q) => q.status === "approved")) anyApproved = true;
});
}
if (anyApproved) {
console.log(` 🔄 Starting background processor...`);
// Delay slightly so the server is fully ready before internal HTTP calls
setTimeout(() => kickProcessor(), 2000);
}
console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`);
// Check subscriptions every hour (runs continuously on StartOS)
setInterval(() => {
checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
}, 60 * 60 * 1000);
});