import express from "express"; import cors from "cors"; import { execFile } from "child_process"; import { promisify } from "util"; import fs from "fs/promises"; import { createWriteStream } from "fs"; import path from "path"; import os from "os"; import https from "https"; import http from "http"; import { randomUUID } from "crypto"; import * as license from "./license.js"; import { sendEvent, extractVideoId, formatTime, parseTimestampedTranscript, synthesizeEntriesFromText, safeText, retryGemini, fetchUrl, } from "./util.js"; import { buildAnalysisPrompt } from "./gemini-helpers.js"; import { runChunkedAnalysis, CHUNKING_CUTOFF_SECONDS } from "./chunked-analyze.js"; import { getProvider, resolveProviderOpts, PROVIDER_NAMES } from "./providers/index.js"; import { getAudioDuration, splitAudioFile, downloadPodcastAudio, } from "./audio.js"; import { checkYtdlp, autoUpdateYtdlp } from "./ytdlp.js"; import { initCookies, ytCookieArgs, ytExtraArgs, ytCookieMethod, setupCookieRoutes, getCookieFilePath, } from "./cookies.js"; import * as config from "./config.js"; import { initInstallId, getInstallId } from "./install-id.js"; import * as relayState from "./relay-state.js"; import * as relayDefault from "./relay-default.js"; import { startRelayCapabilitiesRefresh, getRelayCapabilities, refreshRelayCapabilities, } from "./relay-capabilities.js"; import { isResolvableShareUrl, isApplePodcastUrl, isSpotifyUrl, resolveShareUrl, URLResolveError, } from "./url-resolver.js"; import { resolveApiKey, getConfigSnapshot } from "./config.js"; import * as licenseMW from "./license-middleware.js"; import { setupLicensePurchaseRoutes } from "./license-purchase.js"; import { setupCreditsPurchaseRoutes, sweepUnappliedPurchases, } from "./credits-purchase.js"; import { setupLicenseMiddleware, setupLicenseRoutes, startLicenseRefresh, refreshLicenseOnline, isFreeUser, tryAcquireFreeSlot, releaseFreeSlot, getCurrentFreeJob, abortCurrentFreeJob, isFreeJobAborted, appendCurrentJobLog, } from "./license-middleware.js"; import { initHistory, saveToHistory, loadMeta, saveMeta, setupHistoryRoutes, getHistoryDir, scopeForRequest, migrateLegacyLibrary, } from "./history.js"; import { getProcessedVideoIds, isKnownVideo, loadSubscriptions, saveSubscriptions, loadSkipList, addToSkipList, loadSeenList, addToSeenList, loadAutoQueue, saveAutoQueue, mutateAutoQueue, listSubscriptionScopes, listAutoQueueScopes, migrateGlobalSubscriptionsToOwner, } from "./subscriptions.js"; import { mintInternalSession, deleteInternalSession, adminUserId, SESSION_COOKIE, } from "./auth-routes.js"; import { setupLibraryRoutes } from "./library.js"; import { setupTtsRoutes } from "./tts-routes.js"; import { initAdminAuth, setupAdminAuthMiddleware, setupAdminAuthRoutes, } from "./admin-auth.js"; import { buildTenantAuthMiddleware } from "./tenant-auth.js"; const execFileAsync = promisify(execFile); const app = express(); // Trust the operator's reverse proxy (StartOS / StartTunnel, or a cloud proxy) // so req.ip is the real client address rather than a client-spoofable // X-Forwarded-For entry. The value is how many trusted proxies sit in front of // this process — default 1 (the StartOS/StartTunnel hop). Erring low is safe // (it can only over-count clients onto one IP, hitting the trial cap sooner); // erring high would re-open the trial-cap bypass. Override via // RECAP_TRUSTED_PROXY_HOPS (0 = no proxy in front; use the socket address only). const hopsParsed = parseInt(process.env.RECAP_TRUSTED_PROXY_HOPS, 10); const trustedProxyHops = Number.isInteger(hopsParsed) && hopsParsed >= 0 ? hopsParsed : 1; app.set("trust proxy", trustedProxyHops); const PORT = process.env.PORT || 3001; // ── Multi-tenant mode toggle ──────────────────────────────────────────── // RECAP_MODE is set by startos/main.ts based on the recap_mode field of // the config. "single" (default) preserves the original self-hosted // behavior; "multi" enables email + magic-link auth, per-user libraries, // per-user keysat licenses, and BTCPay subscriptions. // // Anything outside "multi" is treated as single — defensive parsing so // a stray env value can't accidentally enable multi-tenant routing. const RECAP_MODE = process.env.RECAP_MODE === "multi" ? "multi" : "single"; console.log(`[boot] RECAP_MODE=${RECAP_MODE}`); // ── Data directory (configurable for StartOS or local dev) ──────────────── // On StartOS: DATA_DIR=/data (persistent volume) // On local dev: defaults to project root (parent of server/) const __dirname = path.dirname(new URL(import.meta.url).pathname); const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, ".."); const historyDir = path.join(DATA_DIR, "history"); const configDir = path.join(DATA_DIR, "config"); await fs.mkdir(historyDir, { recursive: true }).catch(() => {}); await fs.mkdir(configDir, { recursive: true }).catch(() => {}); await initHistory({ dataDir: DATA_DIR, mode: RECAP_MODE }); // Per-install identity for the upcoming relay backend. Generated once // on first boot, persisted in DATA_DIR. Surfaced via /api/health and // /api/install-id (read-only). See ./install-id.js for details. await initInstallId({ dataDir: DATA_DIR }); // API key + live reload moved to ./config.js await config.initConfig({ dataDir: DATA_DIR }); const envPath = config.getEnvPath(); // Cookies state + helpers + routes moved to ./cookies.js await initCookies({ dataDir: DATA_DIR, envPath }); // Admin login gate state. Reads username + scrypt hash + session secret // from /data/config/startos-config.json (set via the "Set Admin // Password" StartOS action) and refreshes on the same poll cadence as // config.js. When no hash is set, the gate is a no-op. // // Single-mode only. In multi-tenant mode the operator authenticates as // a regular user (is_admin = 1) via magic-link, so the legacy admin // gate is bypassed entirely. if (RECAP_MODE === "single") { await initAdminAuth({ dataDir: DATA_DIR }); } // ── Multi-tenant store + outbound mail ───────────────────────────────── // Only initialized when RECAP_MODE === 'multi'. Lazy-imported so the // single-mode boot never touches the native better-sqlite3 binding or // loads nodemailer. if (RECAP_MODE === "multi") { const { initDb } = await import("./db.js"); const { initSmtp } = await import("./smtp.js"); await initDb({ dataDir: DATA_DIR }); initSmtp(); // One-time migration: pre-0.2.77 single-mode installs wrote // /data/history/*.json flat. Move them into /data/history/owner/ so // they're addressable under a scope. Idempotent (sentinel-guarded). try { await migrateLegacyLibrary(); } catch (err) { console.warn("[boot] migrateLegacyLibrary failed:", err); } // Fixup for installs upgraded from <0.2.91: the first admin signup // used to rename /data/history/owner/ → /data/history//. // The new convention is admin → always "owner" so multi↔single mode // switching is reversible. If the legacy rename happened, reverse it // here. Idempotent — runs only when the data layout requires it. try { const { reclaimAdminLibraryToOwner } = await import("./history.js"); const { getDb } = await import("./db.js"); await reclaimAdminLibraryToOwner({ db: getDb() }); } catch (err) { console.warn("[boot] reclaimAdminLibraryToOwner failed:", err); } } // resolveApiKey moved to ./config.js app.use(cors()); app.use(express.json({ limit: "100mb" })); // ── Tenant auth (multi-mode) / synthetic owner (single-mode) ─────────── // MUST run before every other /api/* middleware so downstream handlers // can rely on req.userId being populated. In single mode the middleware // is a no-op shim that stamps req.userId="owner". In multi mode it // validates the recap_session cookie and 401s non-public paths. app.use(buildTenantAuthMiddleware({ mode: RECAP_MODE })); // Single-mode whoami stub. Lets the frontend probe what mode it's // running in WITHOUT branching on a separate config endpoint. In // single mode every "user" is the operator, so always state:signed_in // with is_admin:true — the existing settings UI is the right thing to // render for them. if (RECAP_MODE === "single") { app.get("/api/account/whoami", (req, res) => { res.json({ recap_mode: "single", state: "signed_in", user: { id: "owner", is_admin: true, has_license: true }, }); }); } // ── Magic-link auth + whoami (multi mode only) ────────────────────────── // /auth/request-link, /auth/verify, /auth/signout, /api/account/whoami. // These have to be registered AFTER the tenant-auth middleware (so the // public-path bypass routes them past the 401 gate) but BEFORE any // /api/* route that assumes req.userId is set. if (RECAP_MODE === "multi") { const { setupAuthRoutes } = await import("./auth-routes.js"); const { setupAdminRoutes } = await import("./admin-routes.js"); const { setupAccountRoutes } = await import("./account-routes.js"); const { setupBillingRoutes } = await import("./billing-routes.js"); setupAuthRoutes(app); // Account routes (per-user session management). Mount before admin // routes so request-handler order is predictable; both use different // paths so no conflict. setupAccountRoutes(app); // Admin routes (tenant list, grant credits, recent-signups). Every // route inside is wrapped in requireOperator — non-admins hit 403. setupAdminRoutes(app); // Self-serve purchase: POST /api/billing/buy + GET /api/billing/status. // A signed-in Core user buys their own Pro/Max period via the relay's // BTCPay invoice. Multi-mode only (single-mode operators carry a // license, not a relay-owned cloud tier). setupBillingRoutes(app); // Expiry-reminder emails: a daily scan asks the relay who's expiring and // emails them via the recaps.cc SMTP transport. Self-gates on SMTP + // public URL + relay being configured, so it's a safe no-op until then. const { startReminderScheduler } = await import("./subscription-reminders.js"); startReminderScheduler(); // Daily Digest: opt-in (off by default) once-a-day email of a user's // last ~24h of library recaps. Same self-gating shape as reminders — // no-op until SMTP + public URL are set. The one-click unsubscribe GET // is public (whitelisted in tenant-auth) since the email has no session. const { startDigestScheduler, setupDigestRoutes } = await import( "./daily-digest.js" ); setupDigestRoutes(app); startDigestScheduler(); // /api/account/whoami — frontend hits this on every page load to // determine which UI state to render: // - signed-in user → full app // - active trial w/budget → app + "N trials left" badge // - trial exhausted → sign-up screen // - no cookies → landing page (paste-and-try) app.get("/api/account/whoami", async (req, res) => { const base = { recap_mode: "multi" }; if (req.user) { // Self-healing sweep: if the user paid for credits but the // BTCPay redirect killed the frontend poller before /api/credits/ // invoice/:id could fire, the pending_purchases row stays // unapplied. Catch it up here so the very next whoami call // (every page load) reflects the correct balance. try { await sweepUnappliedPurchases({ buyerType: "user", buyerId: req.user.id, req, }); } catch (err) { console.warn( `[whoami] purchase sweep failed for user ${req.user.id}: ${err?.message || err}`, ); } return res.json({ ...base, state: "signed_in", user: { id: req.user.id, email: req.user.email, display_name: req.user.display_name, is_admin: !!req.user.is_admin, has_license: !!req.user.keysat_license, has_password: !!req.user.password_hash, created_at: req.user.created_at, }, }); } if (req.trial) { // Same sweep for anon visitors who bought a credit pack — apply // any settled invoices to anon_trials.credits_total before we // compute credits_remaining for the response. try { await sweepUnappliedPurchases({ buyerType: "anon", buyerId: req.trial.cookie_id, req, }); // Re-read the trial row so the response reflects any credits // just applied by the sweep. const { lookupTrial } = await import("./anon-trial.js"); const refreshed = lookupTrial(req.trial.cookie_id); if (refreshed) req.trial = refreshed; } catch (err) { console.warn( `[whoami] purchase sweep failed for trial ${req.trial.cookie_id}: ${err?.message || err}`, ); } return res.json({ ...base, state: "trial", trial: { credits_total: req.trial.credits_total, credits_used: req.trial.credits_used, credits_remaining: req.trial.credits_total - req.trial.credits_used, }, }); } // Anonymous — include the operator-configured trial allowance AND // the post-signup grant so the UI can compose dynamic copy on the // tier signup modal ("3 trial credits, +5 more on signup" vs // "your trial credits transfer over"). Without exposing both, the // Free card has to hardcode a number that goes stale the moment // an operator tunes their config. // // ALSO check whether this visitor's IP is at the lifetime // trials_per_ip_lifetime cap. If yes, force available_trial_credits // to 0 and stamp a trial_blocked_reason so the UI can swap the // misleading "N free credits ready" pill for "you've used your // free trial — sign up or buy credits" copy. Without this gate // the pill advertises credits the visitor literally cannot mint // (anon-trial.js's issueIfEligible() will refuse), the user // submits a video, the relay rejects, and they see a confusing // "Processing failed" with the player half-loaded. let availableTrialCredits = 0; let signupGrantCredits = 0; let trialBlockedReason = null; try { const snap = await getConfigSnapshot(); availableTrialCredits = Math.max( 0, parseInt(snap?.trial_credits_per_visitor ?? 1, 10) || 0, ); signupGrantCredits = Math.max( 0, parseInt(snap?.tenant_default_credits ?? 0, 10) || 0, ); // IP cap check — only meaningful in multi mode where the // anon-trial system exists. Lazy-import so single-mode doesn't // pull the trial DB in unnecessarily. if (availableTrialCredits > 0 && RECAP_MODE === "multi") { try { const { getClientIp, ipTrialsLifetime } = await import("./anon-trial.js"); const ip = getClientIp(req); const perIpLifetime = Math.max( 1, parseInt( snap?.trials_per_ip_lifetime ?? snap?.trials_per_ip_per_day ?? 5, 10, ) || 5, ); if (ip && ipTrialsLifetime(ip) >= perIpLifetime) { availableTrialCredits = 0; trialBlockedReason = "ip_cap_reached"; } } catch (err) { // best-effort — if the cap check fails we don't want to // suddenly downgrade a legitimate visitor's pill to zero. // Leave the configured value and log for diagnosis. console.warn( `[whoami] IP cap check failed: ${err?.message || err}`, ); } } } catch { // best-effort; fall through with 0 so UI degrades to "Sign in" // CTA rather than a misleading "N free credits" badge. } res.json({ ...base, state: "anonymous", available_trial_credits: availableTrialCredits, signup_grant_credits: signupGrantCredits, // Set only when the visitor's IP can't mint a new trial cookie. // The frontend uses this to swap the "N free credits ready" // pill for an honest "trial used up — sign up / buy" CTA AND // to refuse the optimistic submit-flow (no video player // render, no fake processing status). trial_blocked_reason: trialBlockedReason, }); }); } // ── Admin login gate (single mode only) ───────────────────────────────── // MUST run before the license middleware: if an admin password is set, // nobody (licensed or not) reaches the activation flow without first // passing the login. Endpoints needed by the login UI itself // (/api/admin/status, /api/admin/login, /api/admin/logout) and // /api/health stay open. if (RECAP_MODE === "single") { setupAdminAuthMiddleware(app); setupAdminAuthRoutes(app); } // ── Keysat licensing ──────────────────────────────────────────────────────── // All license-aware request handling (gate, Pro feature gates, /api/license // routes, free-tier slot management, periodic online refresh) lives in // ./license-middleware.js. Importers read the current state via // licenseMW.LIC (a live binding). setupLicenseMiddleware(app); setupLicenseRoutes(app); // In-app purchase flow: proxies Keysat's public policies + purchase // + poll endpoints so the buy page renders in Recap's own visual // style instead of being redirected to Keysat's hosted /buy/ // page. On a settled invoice the issued license is written to disk // and licenseMW.refreshLicenseOnline picks it up immediately. setupLicensePurchaseRoutes(app, { onLicenseActivated: () => licenseMW.refreshLicenseOnline("post-purchase"), }); // Credit-purchase proxy: lets the buyer top-up relay credits via the // operator's BTCPay store. All BTCPay credentials live on the relay // — Recap just forwards the buyer's pick and proxies the polling. setupCreditsPurchaseRoutes(app); startLicenseRefresh(); // Boot-time fetch of the relay's /relay/capabilities so the chunking // decision in /api/process can honor the operator's current backend // preference. Refreshes hourly. Safe defaults are used until the // first successful fetch lands. startRelayCapabilitiesRefresh(); // History storage + routes moved to ./history.js // (saveToHistory, loadMeta, saveMeta are imported above) // addToSkipList is scope-keyed (./subscriptions.js): deleting a session // suppresses re-queueing that video in the SAME scope's subscriptions. setupHistoryRoutes(app, { addToSkipList: (scope, id) => addToSkipList(scope, id) }); // Audio-first ("walking mode") TTS routes — synthesize + cache + serve // per-topic summary clips. Self-gates access (Max in multi mode); the // /api/tts prefix is license-exempt so the route's own gate decides. setupTtsRoutes(app); // Serve the frontend from ../public app.use(express.static(path.join(__dirname, "..", "public"))); app.use("/assets", express.static(path.join(__dirname, "..", "assets"))); // checkYtdlp + autoUpdateYtdlp moved to ./ytdlp.js // PRICING + calcCost + buildAnalysisPrompt moved to ./gemini-helpers.js // safeText + retryGemini moved to ./util.js // ── Health check ─────────────────────────────────────────────────────────── app.get("/api/health", async (req, res) => { const info = await checkYtdlp(); // Check cookies.txt freshness const cookieMethod = ytCookieMethod(); let cookieInfo = { method: cookieMethod }; if (cookieMethod === "cookies.txt") { try { const stat = await fs.stat(getCookieFilePath()); const ageMs = Date.now() - stat.mtimeMs; const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24)); cookieInfo.fileAgeDays = ageDays; cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days } catch {} } res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!config.serverApiKey, cookies: cookieInfo, installId: getInstallId(), ...info }); }); // Read-only install identity. Used by the UI's settings panel so the // operator can verify the install has been provisioned, and by the // future relay client to attach the X-Recap-Install-Id header. Open // path — license gate doesn't apply (the relay needs this ID to be // reachable before any credits have been granted). app.get("/api/install-id", (_req, res) => { res.json({ installId: getInstallId() }); }); // Proxy through to the relay's /relay/policy endpoint. Used by the UI // to render dynamic copy (e.g. "N relay credits" in the activation // screen reflects whatever the operator currently has the relay // configured to give Core users — no Recap update needed when the // operator tunes tier quotas). Cached in-memory for a short window // so a busy UI doesn't hammer the relay. let __cachedRelayPolicy = { at: 0, body: null }; app.get("/api/relay/policy", async (_req, res) => { const base = relayDefault.getRelayBaseURL(); if (!base) return res.json({ configured: false, tiers: null }); const ttl = 5 * 60 * 1000; if (__cachedRelayPolicy.body && Date.now() - __cachedRelayPolicy.at < ttl) { return res.json({ configured: true, ...__cachedRelayPolicy.body }); } try { const r = await fetch(`${base.replace(/\/$/, "")}/relay/policy`, { signal: AbortSignal.timeout(5000), }); if (!r.ok) { return res.json({ configured: true, tiers: null, error: `HTTP ${r.status}`, }); } const body = await r.json(); __cachedRelayPolicy = { at: Date.now(), body }; res.json({ configured: true, ...body }); } catch (err) { res.json({ configured: true, tiers: null, error: err?.message || String(err), }); } }); // Last-known relay state (credits + tier) cached in-process. The UI // polls this for the "N credits remaining · Tier: X" banner; the // underlying numbers are refreshed every time a relay provider call // lands. // // First-paint UX: when the cache is empty (no relay calls yet AND no // prior ping has populated it), opportunistically hit the relay's // /balance endpoint with a short timeout so the banner shows real // numbers on first page load instead of "balance unknown". Best // effort — if the relay is unreachable the cache stays null and the // UI falls back to its "unknown" copy. `?refresh=1` forces a ping // even when the cache already has a value. // // `configured` is true when relay-default.js has a non-empty URL — // operator-controlled at build time, never user-configurable. app.get("/api/relay/status", async (req, res) => { const configured = !!relayDefault.getRelayBaseURL(); // ── Multi-mode: return per-user credit view ──────────────────────────── // Each user role sees a different "credits remaining" number: // - trial: trial.credits_total - trial.credits_used (local) // - free tenant: tenant_credits.balance (local, set by operator) // - paid user: their relay-pool balance (relay-side, license-keyed) // - admin: the operator's relay pool (current single-mode behavior) // - anonymous: no credits to show; configured + null balance // // The frontend reads `creditsRemaining` and `tier` and renders a // single badge. We construct the right shape for the requester here. if (RECAP_MODE === "multi" && !(req.user && req.user.is_admin)) { // Anonymous + no trial cookie at all. if (!req.user && !req.trial) { return res.json({ configured, creditsRemaining: null, tier: null, scope: "anonymous", lastUpdated: Date.now(), lastError: null, }); } // Self-healing sweep — catch any settled-but-unapplied credit // purchases for this buyer before we report their balance. Without // it, an anon or signed-in user who pays via BTCPay but loses the // poll loop on redirect sees a stale balance until they manually // re-trigger the apply (which there's no UI for). Cheap: bounded // query (5 invoices, 30-min lookback) + short-timeout relay calls. try { if (req.user) { await sweepUnappliedPurchases({ buyerType: "user", buyerId: req.user.id, req, }); } else if (req.trial) { await sweepUnappliedPurchases({ buyerType: "anon", buyerId: req.trial.cookie_id, req, }); // Re-read trial row so the response uses post-sweep credits. const { lookupTrial } = await import("./anon-trial.js"); const refreshed = lookupTrial(req.trial.cookie_id); if (refreshed) req.trial = refreshed; } } catch (err) { console.warn( `[relay/status] purchase sweep failed: ${err?.message || err}`, ); } // Trial visitor. if (req.trial) { const remaining = Math.max( 0, (req.trial.credits_total || 0) - (req.trial.credits_used || 0), ); return res.json({ configured, creditsRemaining: remaining, tier: "trial", scope: "trial", lastUpdated: Date.now(), lastError: null, }); } // Core-decoupling: a user's paid status is their relay-owned tier // (req.user.tier), NOT any leftover Keysat license. A PAID cloud user // (tier pro/max) falls through to the real relay ping below, which // pickRelayIdentity routes to their cloud user: pool. Everyone // else signed in is a free tenant on the operator pool — regardless of // whether they still carry a legacy license (which is now ignored). const isPaidCloudUser = req.user && (req.user.tier === "pro" || req.user.tier === "max"); // Free tenant. Lazy replenish-check happens inside getOrInit, so the // displayed balance reflects any due refill. if (req.user && !isPaidCloudUser) { try { const { getOrInit } = await import("./tenant-credits.js"); const credits = await getOrInit(req.user.id); return res.json({ configured, creditsRemaining: credits?.total ?? 0, // Surface the bucket split so the UI can show "5 free + 10 // purchased = 15 total" if it wants. Frontend currently // just reads creditsRemaining; the extra fields are // additive. purchasedBalance: credits?.purchased ?? 0, replenishBalance: credits?.replenish ?? 0, replenishPeriod: credits?.period ?? "off", tier: "free", scope: "free_tenant", lastUpdated: Date.now(), lastError: null, }); } catch (err) { console.warn("[relay/status] tenant_credits lookup failed:", err); return res.json({ configured, creditsRemaining: null, tier: "free", scope: "free_tenant", lastUpdated: Date.now(), lastError: "balance_unknown", }); } } // Paid cloud user (tier pro/max) — fall through to the real relay // ping. pickRelayIdentity routes it to their cloud user: identity // (or the operator pool if the operator key isn't configured), so the // response reflects the pool their calls actually bill. } // Determine which credit-key this request would use against the relay. // The relay-state cache is keyed by the same value so each (operator // / paid user) gets its own snapshot — no more clobbering when an // admin and a tenant hit the endpoint in the same Node process. // Constructs a temporary provider just to inherit its creditKey // logic; cheap (no network). const identityCfg = await getConfigSnapshot(); const probeOpts = resolveProviderOpts("relay", { config: identityCfg, clientOpts: {}, req, }); const requestCreditKey = relayState.computeCreditKey({ // Cloud (core-decoupling) identity keys by userId; license/install // identities key by their respective fields. probeOpts carries // exactly one of these shapes depending on pickRelayIdentity, so // pass all three — computeCreditKey picks userId → license → install // in priority order, matching what the provider records under. installId: probeOpts.installId, licenseKey: probeOpts.licenseKey, userId: probeOpts.userId, }); let snapshot = relayState.getRelayState(requestCreditKey); const wantsRefresh = req.query.refresh === "1" || req.query.refresh === "true"; const cacheEmpty = snapshot.creditsRemaining === null && snapshot.tier === null; // Auto-refresh stale cache. Without this, /api/relay/status returns // whatever was last cached forever, missing balance changes that // happen outside Recap's request flow — e.g. an operator-side // BTCPay webhook crediting purchased invoices, or the rescan tool // in the relay dashboard. 10-second TTL is short enough that // out-of-band changes show up promptly while still avoiding a // ping on every dashboard render burst. const STALE_THRESHOLD_MS = 10_000; const cacheStale = !cacheEmpty && typeof snapshot.lastUpdated === "number" && Date.now() - snapshot.lastUpdated > STALE_THRESHOLD_MS; if (configured && (cacheEmpty || wantsRefresh || cacheStale)) { try { const provider = getProvider("relay", probeOpts); await provider.pingBalance({ timeoutMs: 5000 }); snapshot = relayState.getRelayState(requestCreditKey); } catch (err) { // Log + record so the UI shows a real error instead of a silent // "balance unknown". The provider call itself records when the // ping reaches the network; this catches the earlier failures // (e.g. createRelayProvider throwing because install-id wasn't // initialized yet) where lastError wouldn't otherwise be set. console.error( `[relay/status] ping failed: ${err?.message || err} (stack: ${err?.stack || "n/a"})` ); relayState.recordRelayError(err?.message || String(err), requestCreditKey); snapshot = relayState.getRelayState(requestCreditKey); } } res.json({ ...snapshot, configured }); }); // ── Status endpoints ─────────────────────────────────────────────────────── app.post("/api/heartbeat", (req, res) => { res.json({ ok: true, sleeping: false }); }); app.get("/api/status", (req, res) => { res.json({ ok: true, sleeping: false, uptime: process.uptime() }); }); // Shutdown: used by the macOS .app launcher to stop the server cleanly. // On StartOS this endpoint is unused (StartOS manages the container lifecycle). app.post("/api/shutdown", (req, res) => { res.json({ ok: true, message: "Server shutting down..." }); console.log("\n Server shutdown requested from browser. Goodbye!\n"); setTimeout(() => process.exit(0), 300); }); // ── Manual update endpoint ───────────────────────────────────────────────── app.post("/api/update-ytdlp", async (req, res) => { const result = await autoUpdateYtdlp(DATA_DIR); const info = await checkYtdlp(); res.json({ ...result, ...info }); }); // ── Cookie management endpoints ─────────────────────────────────────────── // /api/cookies/* routes registered via setupCookieRoutes (./cookies.js) setupCookieRoutes(app); // ── Library export/import ──── moved to ./library.js ───────── setupLibraryRoutes(app); // ── Subscriptions ───────────────────────────────────────────────────────── // Storage (subscriptions / skip / seen / auto-queue) + dedup live in // ./subscriptions.js, keyed by scope. Endpoints pass scopeForRequest(req); // the check loop + processor operate on SUBSCRIPTIONS_SCOPE ("owner") while // the feature is operator-only in multi mode. // List recent videos from a channel/playlist via yt-dlp (no download) // Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode) async function listChannelVideosFast(url, limit = 15) { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(id)s|%(title)s", "--no-download", "--playlist-end", String(limit), "--flat-playlist", ...ytCookieArgs(), ...ytExtraArgs(), url, ], { timeout: 60000 }); return stdout.trim().split("\n").filter(Boolean).map(line => { const idx = line.indexOf("|"); return { id: line.slice(0, idx), title: line.slice(idx + 1) }; }); } // Fetch upload_date for a batch of video IDs (processes in batches of 3) // Bails after 2 consecutive failures to avoid grinding through blocked requests async function fetchUploadDates(videoIds) { if (videoIds.length === 0) return {}; const dateMap = {}; const batchSize = 50; let consecutiveFails = 0; for (let i = 0; i < videoIds.length; i += batchSize) { const batch = videoIds.slice(i, i + batchSize); const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`); try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(id)s|%(upload_date)s", "--no-download", ...ytCookieArgs(), ...urls, ], { timeout: 45000 }); for (const line of stdout.trim().split("\n").filter(Boolean)) { const [id, date] = line.split("|"); if (id && date && date !== "NA") dateMap[id] = date; } consecutiveFails = 0; subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`); } catch (err) { consecutiveFails++; subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`); if (consecutiveFails >= 2) { subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`); break; } } } return dateMap; } // ── RSS-based date fetching (bypasses bot detection) ───────────────────── // Fetch a URL and return the response body as a string // fetchUrl moved to ./util.js // Get channel_id from a YouTube channel/playlist URL using yt-dlp async function getChannelId(url) { // Method 1: flat-playlist channel_id (fast, may return NA) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel_id)s", "--no-download", "--playlist-items", "1", "--flat-playlist", ...ytCookieArgs(), url, ], { timeout: 15000 }); const id = stdout.trim().split("\n")[0]; if (id && id !== "NA" && id.length > 5) return id; } catch {} // Method 2: non-flat single video (slower but gets full metadata) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel_id)s", "--no-download", "--playlist-items", "1", ...ytCookieArgs(), url, ], { timeout: 30000 }); const id = stdout.trim().split("\n")[0]; if (id && id !== "NA" && id.length > 5) return id; } catch {} return null; } // Fetch video dates from YouTube RSS feed (no auth, no bot detection) // Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos async function fetchDatesFromRSS(channelId) { const dateMap = {}; if (!channelId) return dateMap; try { const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`; const xml = await fetchUrl(rssUrl); const entryRegex = /[\s\S]*?([^<]+)<\/yt:videoId>[\s\S]*?([^<]+)<\/published>[\s\S]*?<\/entry>/g; let match; while ((match = entryRegex.exec(xml)) !== null) { const videoId = match[1]; const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00" const date = published.slice(0, 10).replace(/-/g, ""); // "20251201" dateMap[videoId] = date; } } catch (err) { subLog(` ⚠ RSS feed fetch failed: ${err.message}`); } return dateMap; } // ── Podcast RSS feed parsing ──────────────────────────────────────────────── // Detect if a URL looks like a podcast RSS feed function isPodcastFeedUrl(url) { if (!url) return false; const u = url.trim().toLowerCase(); // Common podcast RSS feed patterns if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true; if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true; if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true; if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true; if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true; return false; } // Apple Podcasts SHOW URL — the whole-podcast page, no `?i=` // episode parameter. Format: // https://podcasts.apple.com//podcast//id // We do prefix-match via regex, then USE URL PARSING to confirm no // `i` query param (the episode discriminator). The earlier all-regex // version had a misplaced negative lookahead that matched episode // URLs too, sending episode links into the subscribe flow by mistake. const APPLE_PODCAST_PREFIX_RE = /^https?:\/\/(?:www\.)?podcasts\.apple\.com\/[^/]+\/podcast\/[^/]+\/id(\d+)/i; function isAppleShowUrl(url) { if (!url) return false; const trimmed = url.trim(); if (!APPLE_PODCAST_PREFIX_RE.test(trimmed)) return false; try { const parsed = new URL(trimmed); // Episode URLs have `?i=` (or `&i=`). Anything else is // the show landing page. return !parsed.searchParams.has("i"); } catch { return false; } } // Legacy alias — the regex's first capture group is still used to // pull the podcast id in resolveAppleShowToFeed below. Keep it named // to avoid splitting that helper into two regex+parse blocks. const APPLE_SHOW_URL_RE = APPLE_PODCAST_PREFIX_RE; // Resolve an Apple Podcasts show URL to its RSS feed URL via the // public iTunes Lookup API. No auth required. Returns the feedUrl // string or null if Apple doesn't have a usable RSS URL on file // (rare — shows distributed only via Apple-exclusive feeds, mostly // Apple Podcast Subscriptions paid content). async function resolveAppleShowToFeed(url) { const m = url.match(APPLE_SHOW_URL_RE); if (!m) return null; const podcastId = m[1]; try { const raw = await fetchUrl( `https://itunes.apple.com/lookup?id=${encodeURIComponent(podcastId)}`, ); const parsed = JSON.parse(raw); const show = (parsed?.results || []).find( (r) => r && r.wrapperType === "track" && r.kind === "podcast", ); return show?.feedUrl || null; } catch { return null; } } // Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] } async function parsePodcastRSS(feedUrl, limit = 200) { const xml = await fetchUrl(feedUrl); // Extract podcast title const titleMatch = xml.match(/[\s\S]*?(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/); const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast"; // Extract episodes from <item> elements const episodes = []; const itemRegex = /<item>([\s\S]*?)<\/item>/g; let match; while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) { const item = match[1]; // GUID (unique episode identifier) const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/); const guid = guidMatch ? guidMatch[1].trim() : null; // Title const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/); const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode"; // Publish date const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/); const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null; let dateStr = ""; // YYYYMMDD if (pubDate) { try { const d = new Date(pubDate); if (!isNaN(d.getTime())) { dateStr = d.toISOString().slice(0, 10).replace(/-/g, ""); } } catch {} } // Audio enclosure URL const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/); const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null; // Duration (itunes:duration) const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/); const duration = durMatch ? durMatch[1].trim() : ""; if (!audioUrl) continue; // Skip episodes without audio // Use guid, or fall back to audioUrl as unique ID const id = guid || audioUrl; episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration }); } return { title: podcastTitle, episodes }; } // Download a podcast episode audio file via HTTP(S) to a local path // downloadPodcastAudio moved to ./audio.js // Get channel name from URL async function fetchChannelName(url) { // Try fast flat-playlist approach first try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel)s", "--no-download", "--playlist-end", "1", "--flat-playlist", ...ytCookieArgs(), url, ], { timeout: 15000 }); const name = stdout.trim().split("\n")[0]; if (name && name !== "NA") return name; } catch {} // Fallback: fetch without flat-playlist (slower but gets channel from video metadata) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel)s", "--no-download", "--playlist-end", "1", ...ytCookieArgs(), url, ], { timeout: 30000 }); const name = stdout.trim().split("\n")[0]; if (name && name !== "NA") return name; } catch {} // Last resort: extract handle from URL try { const u = new URL(url); const handleMatch = u.pathname.match(/\/@([^/]+)/); if (handleMatch) return "@" + handleMatch[1]; } catch {} return "Unknown Channel"; } // Subscription storage (subscriptions / skip / seen / auto-queue), the // dedup, scope enumeration, and the file-locking that serializes // read-modify-writes all live in ./subscriptions.js now — scope-keyed and // unit-tested. getProcessedVideoIds(scope) + isKnownVideo() are the dedup // that keeps the auto-queue from re-offering already-handled videos. // // While subscriptions are operator-only in multi mode, the check loop + // background processor operate on this scope. Endpoints use // scopeForRequest(req) (= "owner" for the operator). Per-tenant flips these // to each user's scope — see docs/per-tenant-subscriptions-plan.md. const SUBSCRIPTIONS_SCOPE = "owner"; // Prune auto-queue items that have since been summarized (e.g. cleared from // the queue then re-discovered by a check). Runs per scope at boot, after // the migration. The scope-aware getProcessedVideoIds is what makes this // work — it used to scan the wrong directory and prune nothing. async function reconcileAutoQueueWithLibrary(scope) { const processed = await getProcessedVideoIds(scope); let removed = 0; await mutateAutoQueue(scope, (items) => { const before = items.length; const kept = items.filter((q) => !processed.has(q.videoId)); removed = before - kept.length; return kept; }); if (removed > 0) { console.log( ` Auto-queue (${scope}): removed ${removed} already-processed item(s)`, ); } } // Which user's identity the processor assumes to run a scope's items over // the authenticated loopback /api/process call. Single mode → null (no auth; // the loopback resolves to "owner"). Multi mode: "owner" → the operator; // a tenant scope IS that user's id. function userIdForScope(scope) { if (RECAP_MODE !== "multi") return null; if (scope === "owner") return adminUserId(); return scope; } // ── Background processing queue ────────────────────────────────────────── // Processes "approved" auto-queue items sequentially with configurable delay // between items to avoid hammering YouTube with rapid-fire downloads. const processingConfigPath = path.join(configDir, "processing-config.json"); let processingConfig = { delaySeconds: 300, // Default delay between processing queue items (5 minutes) enabled: true, // Whether background processing is active }; let processingState = { running: false, // Is the processor loop currently active? currentItem: null, // The item currently being processed (or null) lastCompleted: null, // Timestamp of last completed item rush: false, // If true, skip delay before next item log: [], // Recent processing log entries }; function procLog(msg) { console.log(` [processor] ${msg}`); processingState.log.push({ t: new Date().toISOString(), msg }); if (processingState.log.length > 100) processingState.log.shift(); } async function loadProcessingConfig() { try { const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8")); if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds; if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled; } catch {} } async function saveProcessingConfig() { await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2)); } await loadProcessingConfig(); // Find the next approved item across every scope's auto-queue. Returns // { scope, item } (owner scope first, then tenants) or null when none. async function findNextApprovedItem() { const scopes = await listAutoQueueScopes(); for (const scope of scopes) { const queue = await loadAutoQueue(scope); const item = queue.find((q) => q.status === "approved"); if (item) return { scope, item }; } return null; } // The background processor: picks "approved" items, processes via internal HTTP, // waits the configured delay, then picks the next one. Runs continuously. async function backgroundProcessor() { if (processingState.running) return; // Already running processingState.running = true; procLog("Background processor started"); while (true) { // Find the next approved item across every owner's queue (a scope can // hold queued items even with no current subscriptions). Each item is // processed AS its owner — see processItemInternally(item, scope). const next = await findNextApprovedItem(); if (!next) { procLog("No approved items in any queue — processor sleeping"); processingState.running = false; processingState.currentItem = null; return; } const { scope, item } = next; if (!processingConfig.enabled) { procLog("Processing is paused — processor sleeping"); processingState.running = false; processingState.currentItem = null; return; } // Wait the configured delay (unless rush mode or first item) if (processingState.lastCompleted && !processingState.rush) { const delaySec = processingConfig.delaySeconds; procLog(`Waiting ${delaySec}s before next item...`); await new Promise(r => setTimeout(r, delaySec * 1000)); } processingState.rush = false; // Atomically claim the item: re-check it's still approved (it may have // been removed/changed while we waited) and flip it to "processing". let claimed = null; await mutateAutoQueue(scope, (items) => { const it = items.find((q) => q.id === item.id); if (it && it.status === "approved") { it.status = "processing"; claimed = { ...it }; } }); if (!claimed) { procLog(`Item ${item.id} was removed or status changed — skipping`); continue; } processingState.currentItem = claimed; procLog(`Processing: ${claimed.title} (${claimed.url})`); let result = null; let errMsg = null; try { // Call /api/process via loopback, AS the scope's owner. result = await processItemInternally(claimed, scope); processingState.lastCompleted = new Date().toISOString(); procLog(`✓ Completed: ${claimed.title}`); } catch (err) { errMsg = err.message || String(err); procLog(`✗ Failed: ${claimed.title} — ${errMsg.slice(0, 200)}`); } // Write the terminal status back atomically. await mutateAutoQueue(scope, (items) => { const it = items.find((q) => q.id === item.id); if (!it) return; if (errMsg) { it.status = "failed"; it.error = errMsg; it.failedAt = new Date().toISOString(); } else { it.status = "completed"; it.completedAt = new Date().toISOString(); it.historyId = (result && result.historyId) || null; } }); processingState.currentItem = null; } } // Internal HTTP request to /api/process — consumes the SSE stream and // waits for the "result" or "error" event. This reuses the entire existing // pipeline without any code duplication. function processItemInternally(item, scope) { return new Promise((resolve, reject) => { // Pick a provider for the auto-queue run. Historically this // function hard-required a server-side Gemini API key (legacy // pre-relay-as-provider design), so a user with the relay set // up but no Gemini key would silently fail here — the request // never even reached /api/process and the failure only landed // in the in-memory processingState.log, invisible to the // dashboard. That broke subscription auto-processing for every // relay-only user. // // New behavior: prefer the relay (the modern default for fresh // installs and the most common config), fall back to Gemini // when only a local key is configured, and fail with a clear, // user-visible error when neither is set. The frontend stores // the user's actual preference in localStorage which the server // can't read — picking based on what's CONFIGURED here mirrors // the practical reality (the relay is the only modern option // that doesn't require a key) and matches the interactive // experience for relay users. const relayConfigured = !!relayDefault.getRelayBaseURL(); const geminiKey = resolveApiKey(null); let transcriptionProvider, analysisProvider; if (relayConfigured) { transcriptionProvider = "relay"; analysisProvider = "relay"; } else if (geminiKey) { transcriptionProvider = "gemini"; analysisProvider = "gemini"; } else { return reject( new Error( "No transcribe/analyze provider is configured. Set the Recap Relay URL in Settings, or paste a Gemini API key.", ), ); } const body = JSON.stringify({ url: item.url, // Only used by the Gemini provider; harmless for the relay // path (relay provider ignores this field). apiKey: "USE_SERVER_KEY", type: item.type || undefined, title: item.title || undefined, uploadDate: item.uploadDate || undefined, episodeId: item.videoId || undefined, transcriptionProvider, transcriptionModel: transcriptionProvider === "relay" ? "relay-default" : undefined, analysisProvider, analysisModel: analysisProvider === "relay" ? "relay-default" : undefined, }); // Per-tenant: assume the owning user's identity for this loopback call // so /api/process gates, bills, and saves to the right account. Single // mode needs no cookie (the loopback resolves to "owner"). A short-lived // real session is minted here and deleted on every exit path below. const headers = { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body), }; let internalSession = null; const ownerUserId = userIdForScope(scope); if (ownerUserId) { try { internalSession = mintInternalSession(ownerUserId); headers["Cookie"] = `${SESSION_COOKIE}=${internalSession}`; } catch (err) { return reject( new Error(`Could not establish processing identity: ${err?.message || err}`), ); } } const cleanup = () => deleteInternalSession(internalSession); const req = http.request({ hostname: "127.0.0.1", port: PORT, path: "/api/process", method: "POST", headers, timeout: 1800000, // 30 minutes max for very long videos }, (res) => { let buffer = ""; let lastResult = null; let lastError = null; res.on("data", (chunk) => { buffer += chunk.toString(); // Parse SSE events from the buffer const lines = buffer.split("\n"); buffer = lines.pop() || ""; // Keep incomplete line in buffer let currentEvent = null; for (const line of lines) { if (line.startsWith("event: ")) { currentEvent = line.slice(7).trim(); } else if (line.startsWith("data: ") && currentEvent) { try { const data = JSON.parse(line.slice(6)); if (currentEvent === "result") lastResult = data; if (currentEvent === "error") lastError = data; if (currentEvent === "log") { procLog(` [${data.elapsed || "?"}s] ${data.message}`); } } catch {} currentEvent = null; } else if (line === "") { currentEvent = null; } } }); res.on("end", () => { cleanup(); if (lastError) { reject(new Error(lastError.message || "Processing failed")); } else if (lastResult) { resolve(lastResult); } else { reject(new Error("Processing ended without result")); } }); res.on("error", (err) => { cleanup(); reject(err); }); }); req.on("error", (err) => { cleanup(); reject(err); }); req.on("timeout", () => { req.destroy(); cleanup(); reject(new Error("Processing timed out after 30 minutes")); }); req.write(body); req.end(); }); } // Wake up the processor whenever there are approved items function kickProcessor() { // Just start the loop if idle — it loads the queue itself and sleeps // immediately if there's nothing approved, so no pre-check needed. if (!processingState.running && processingConfig.enabled) { backgroundProcessor().catch((err) => procLog(`Processor error: ${err.message}`)); } } let subCheckRunning = false; let subCheckPromise = null; let subCheckLog = []; // Stores recent check logs for debug endpoint function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); } async function checkSubscriptions() { if (subCheckRunning) { // Wait for current check to finish, then run again if (subCheckPromise) await subCheckPromise; return checkSubscriptions(); } subCheckRunning = true; subCheckPromise = _checkSubscriptionsInner().finally(() => { subCheckRunning = false; subCheckPromise = null; }); return subCheckPromise; } async function _checkSubscriptionsInner() { // Pro-tier feature: skip silently when not entitled. The HTTP gate above // returns 402 to callers; this guards the background timer + manual paths. if (!licenseMW.LIC.entitlements.has("subscriptions")) { subCheckLog = []; subLog("Skipped: subscriptions require a Pro license."); return; } subCheckLog = []; // Clear logs for fresh check // Operator-only in multi mode, so this resolves to ["owner"]. When // per-tenant subscriptions ship it fans out to every subscribing tenant. const scopes = await listSubscriptionScopes(); for (const scope of scopes) { try { await checkScopeSubscriptions(scope); } catch (err) { subLog(`⚠ Subscription check failed for scope ${scope}: ${err.message}`); } } } // Discover + queue new videos for ONE scope's subscriptions. New items are // collected locally and appended to the scope's auto-queue atomically at the // end (mutateAutoQueue) so a concurrent endpoint mutation can't be lost. async function checkScopeSubscriptions(scope) { const subs = await loadSubscriptions(scope); if (subs.length === 0) { return; } const processedIds = await getProcessedVideoIds(scope); const skippedIds = await loadSkipList(scope); const seenIds = await loadSeenList(scope); const queue = await loadAutoQueue(scope); // Dedup also against items already in the queue. queuedIds grows as we add // within this run so the same video isn't queued twice across subs. const queuedIds = new Set(queue.map(q => { // For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID) if (q.videoId) return q.videoId; const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/); return m ? m[1] : null; }).filter(Boolean)); // Split queue counts into active (pending/approved/processing — what the // UI shows) vs done (completed/failed — terminal, hidden by default). All // four buckets feed dedup against newly discovered videos below. const qActive = queue.filter(q => ["pending", "approved", "processing"].includes(q.status)).length; const qCompleted = queue.filter(q => q.status === "completed").length; const qFailed = queue.filter(q => q.status === "failed").length; const qDone = qCompleted + qFailed; subLog(`[${scope}] ${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen — queue: ${qActive} active, ${qDone} done (${qCompleted} completed, ${qFailed} failed)`); if (qFailed > 0) { subLog(` (queue has ${qFailed} failed item${qFailed === 1 ? "" : "s"} — open the Queue panel and toggle "Show all" to view + retry)`); } let changed = false; const newItems = []; // appended to the scope's queue atomically at the end for (const sub of subs) { if (sub.paused) { subLog(`⏸ ${sub.name} — paused, skipping`); continue; } try { const icon = sub.type === "podcast" ? "🎙" : "📡"; subLog(`${icon} Checking: ${sub.name} (${sub.url})`); const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8); if (sub.type === "podcast") { // ── Podcast subscription: discover episodes from RSS feed ── const { episodes } = await parsePodcastRSS(sub.url, 200); subLog(` Found ${episodes.length} episode(s) in RSS feed`); if (episodes.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } // Filter out already-known episodes (in library / queue / skip / seen) const dedupSets = { processedIds, queuedIds, skippedIds, seenIds }; const unknowns = episodes.filter((ep) => !isKnownVideo(ep.id, dedupSets)); const filtered = episodes.length - unknowns.length; subLog(` ${unknowns.length} to check, ${filtered} already known`); if (unknowns.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } subLog(` Cutoff date: ${cutoffDate}`); let newCount = 0; const seenNow = []; for (const ep of unknowns) { if (!ep.date || ep.date.length !== 8) continue; // skip undated if (ep.date < cutoffDate) { seenNow.push(ep.id); continue; // before cutoff } subLog(` ✅ ${ep.date} — ${ep.title.slice(0, 60)}`); const itemStatus = sub.autoDownload ? "approved" : "pending"; newItems.push({ id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`, videoId: ep.id, // episode GUID url: ep.audioUrl, title: ep.title, uploadDate: ep.date, subscriptionId: sub.id, subscriptionName: sub.name, status: itemStatus, type: "podcast", duration: ep.duration || "", }); queuedIds.add(ep.id); newCount++; } if (seenNow.length > 0) await addToSeenList(scope, seenNow); sub.lastChecked = new Date().toISOString(); subLog(` → ${newCount} episode(s) queued for approval from ${sub.name}`); changed = true; } else { // ── YouTube subscription: discover videos via yt-dlp + RSS dates ── // Scale fetch limit based on how far back the subscription date goes const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000)); const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4; const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200); subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`); const candidates = await listChannelVideosFast(sub.url, fetchLimit); subLog(` Found ${candidates.length} recent video(s)`); if (candidates.length === 0) { subLog(` ⚠ No videos returned from yt-dlp for this channel`); sub.lastChecked = new Date().toISOString(); changed = true; continue; } const unknowns = candidates.filter( (v) => !isKnownVideo(v.id, { processedIds, queuedIds, skippedIds, seenIds }), ); const filtered = candidates.length - unknowns.length; subLog(` ${unknowns.length} to check, ${filtered} already known`); if (unknowns.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`); // Step 1: Get channel ID (cached on subscription, or fetch once) if (!sub.channelId) { subLog(` Resolving channel ID...`); sub.channelId = await getChannelId(sub.url); if (sub.channelId) { subLog(` Resolved channel ID: ${sub.channelId}`); changed = true; } else { subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`); } } // Step 2: RSS feed — fast, no bot detection, covers ~15 most recent const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {}; const rssCount = Object.keys(dateMap).length; if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`); // Step 3: Smart early termination const needDates = unknowns.filter(v => !dateMap[v.id]); let earlyTermination = false; if (needDates.length > 0 && rssCount > 0) { const rssDates = Object.values(dateMap).sort(); const oldestRssDate = rssDates[0]; if (oldestRssDate < cutoffDate) { earlyTermination = true; subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`); } else { subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`); const ytDates = await fetchUploadDates(needDates.map(v => v.id)); Object.assign(dateMap, ytDates); const ytCount = Object.keys(ytDates).length; if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`); } } else if (needDates.length > 0 && rssCount === 0) { subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`); const ytDates = await fetchUploadDates(needDates.map(v => v.id)); Object.assign(dateMap, ytDates); const ytCount = Object.keys(ytDates).length; if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`); } const gotDates = Object.keys(dateMap).length; if (gotDates > 0 || needDates.length === 0) { subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`); } else { subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`); sub.lastChecked = new Date().toISOString(); changed = true; continue; } let newCount = 0; for (const video of unknowns) { const uploadDate = dateMap[video.id]; if (!uploadDate || uploadDate.length !== 8) { continue; } if (uploadDate < cutoffDate) { subLog(` ⏭ ${video.id} (${uploadDate}) — before cutoff`); continue; } subLog(` ✅ ${video.id}${uploadDate ? ` (${uploadDate})` : ""} — ${video.title.slice(0,50)}`); const ytItemStatus = sub.autoDownload ? "approved" : "pending"; newItems.push({ id: `auto-${Date.now()}-${video.id}`, videoId: video.id, url: `https://www.youtube.com/watch?v=${video.id}`, title: video.title, uploadDate: uploadDate || null, subscriptionId: sub.id, subscriptionName: sub.name, status: ytItemStatus, }); queuedIds.add(video.id); newCount++; } // Only add to seen list when we can PROVE a video is too old const seenNow = unknowns.filter(v => { if (queuedIds.has(v.id)) return false; const d = dateMap[v.id]; if (d && d.length === 8 && d < cutoffDate) return true; if (d && d.length === 8 && d >= cutoffDate) return false; return earlyTermination; }).map(v => v.id); if (seenNow.length > 0) await addToSeenList(scope, seenNow); sub.lastChecked = new Date().toISOString(); subLog(` → ${newCount} video(s) queued for approval from ${sub.name}`); changed = true; } } catch (err) { subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`); } } if (changed) await saveSubscriptions(scope, subs); // Append everything discovered this run in one atomic mutation. if (newItems.length > 0) { await mutateAutoQueue(scope, (items) => { items.push(...newItems); }); } const finalQueue = await loadAutoQueue(scope); const pendingCount = finalQueue.filter(q => q.status === "pending").length; const approvedCount = finalQueue.filter(q => q.status === "approved").length; subLog(`[${scope}] Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`); // Wake up the background processor if there are approved items. (Behind // the operator-only gate the processor works the "owner" queue, which is // the only scope that reaches here.) if (approvedCount > 0) { subLog(`Kicking background processor for ${approvedCount} approved item(s)...`); kickProcessor(); } } // Scope for a subscription / auto-queue request. Behind the operator-only // gate (license-middleware) this is always "owner" (admin in multi mode, or // single mode). Forward-compatible with per-tenant — it's just the signed-in // user's scope. Falls back to "owner" if scope resolution somehow fails. function subScope(req) { try { return scopeForRequest(req); } catch { return SUBSCRIPTIONS_SCOPE; } } // CRUD endpoints app.get("/api/subscriptions", async (req, res) => { const subs = await loadSubscriptions(subScope(req)); res.json({ subscriptions: subs }); }); // Extract a normalized channel key from a YouTube URL for dedup function channelKeyFromUrl(url) { try { const u = new URL(url); const path = u.pathname.toLowerCase().replace(/\/+$/, ""); // /@handle/videos → @handle, /@handle/streams → @handle const handleMatch = path.match(/\/(@[^/]+)/); if (handleMatch) return handleMatch[1]; // /channel/UCxxx → channel/UCxxx const channelMatch = path.match(/\/(channel\/[^/]+)/); if (channelMatch) return channelMatch[1]; // /c/name or /user/name const cMatch = path.match(/\/(c|user)\/([^/]+)/); if (cMatch) return cMatch[0]; // /playlist?list=PLxxx const list = u.searchParams.get("list"); if (list) return `playlist/${list}`; return path; // fallback } catch { return url.toLowerCase().replace(/\/+$/, ""); } } app.post("/api/subscriptions", async (req, res) => { let { url, since, type, autoDownload } = req.body; if (!url) return res.status(400).json({ error: "Missing url" }); // Apple Podcasts SHOW URLs (`/podcast/<slug>/id<num>` with NO // `?i=<id>` episode param) get resolved to the show's RSS feed // before we persist the subscription. The feed URL is what the // poll loop fetches to discover new episodes, so storing the // landing-page URL would be a dead end. if (isAppleShowUrl(url)) { try { const feed = await resolveAppleShowToFeed(url); if (!feed) { return res.status(400).json({ error: "apple_show_no_feed", message: "Apple doesn't list an RSS feed for that show — it may be an Apple Podcasts Subscriptions exclusive. Paste the show's direct RSS URL instead.", }); } console.log(`[subscribe] Apple show → resolved feed: ${feed}`); url = feed; type = "podcast"; } catch (err) { return res.status(502).json({ error: "apple_show_lookup_failed", message: `Couldn't reach Apple to resolve that show URL: ${err?.message || err}`, }); } } const isPodcast = type === "podcast" || isPodcastFeedUrl(url); const scope = subScope(req); const subs = await loadSubscriptions(scope); // Prevent duplicates if (isPodcast) { const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, ""); if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) { return res.status(409).json({ error: "Already subscribed to this podcast" }); } } else { const newKey = channelKeyFromUrl(url); if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) { return res.status(409).json({ error: "Already subscribed to this channel" }); } } // Use provided cutoff date, or default to right now const cutoff = since ? new Date(since).toISOString() : new Date().toISOString(); const sub = { id: `sub-${Date.now()}`, url, name: "Loading...", type: isPodcast ? "podcast" : "youtube", channelId: null, createdAt: cutoff, lastChecked: null, paused: false, autoDownload: autoDownload === true, }; subs.push(sub); await saveSubscriptions(scope, subs); // Respond immediately so the UI isn't blocked res.json(sub); // Background: resolve name and check for new content (async () => { try { if (isPodcast) { // Fetch podcast title from RSS feed const { title } = await parsePodcastRSS(url, 1); const freshSubs = await loadSubscriptions(scope); const s = freshSubs.find(x => x.id === sub.id); if (s) { s.name = title || url; await saveSubscriptions(scope, freshSubs); } console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`); } else { const [name, channelId] = await Promise.all([ fetchChannelName(url), getChannelId(url), ]); const freshSubs = await loadSubscriptions(scope); const s = freshSubs.find(x => x.id === sub.id); if (s) { s.name = name; if (channelId) s.channelId = channelId; await saveSubscriptions(scope, freshSubs); } console.log(` 📡 New subscription: ${name} — checking for recent videos...`); } await checkSubscriptions(); } catch (err) { console.error(" ⚠ Background subscription setup error:", err.message); } })(); }); app.delete("/api/subscriptions/:id", async (req, res) => { const scope = subScope(req); let subs = await loadSubscriptions(scope); subs = subs.filter(s => s.id !== req.params.id); await saveSubscriptions(scope, subs); // Also remove any auto-queue items from this subscription. await mutateAutoQueue(scope, (items) => items.filter((q) => q.subscriptionId !== req.params.id), ); res.json({ ok: true }); }); app.put("/api/subscriptions/:id/pause", async (req, res) => { const scope = subScope(req); const subs = await loadSubscriptions(scope); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.paused = !sub.paused; await saveSubscriptions(scope, subs); res.json(sub); }); app.put("/api/subscriptions/:id/since", async (req, res) => { const { since } = req.body; if (!since) return res.status(400).json({ error: "Missing 'since' date" }); const scope = subScope(req); const subs = await loadSubscriptions(scope); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.createdAt = new Date(since).toISOString(); await saveSubscriptions(scope, subs); res.json(sub); }); // Debug: subscription check logs (viewable in-app) app.get("/api/sub-check-log", async (req, res) => { const q = await loadAutoQueue(subScope(req)); res.json({ log: subCheckLog, autoQueueCount: q.length, autoQueue: q.map((x) => ({ id: x.id, videoId: x.videoId, title: x.title, status: x.status, sub: x.subscriptionName })), }); }); // Auto-queue endpoints (frontend polls these) app.get("/api/auto-queue", async (req, res) => { const queue = await loadAutoQueue(subScope(req)); // Return all items grouped by status for the frontend const showAll = req.query.all === "true"; const items = showAll ? queue : queue.filter(q => ["pending", "approved", "processing"].includes(q.status)); res.json({ items, checkRunning: subCheckRunning, counts: { pending: queue.filter(q => q.status === "pending").length, approved: queue.filter(q => q.status === "approved").length, processing: queue.filter(q => q.status === "processing").length, completed: queue.filter(q => q.status === "completed").length, failed: queue.filter(q => q.status === "failed").length, }, }); }); app.delete("/api/auto-queue/:id", async (req, res) => { await mutateAutoQueue(subScope(req), (items) => items.filter((q) => q.id !== req.params.id), ); res.json({ ok: true }); }); app.post("/api/auto-queue/:id/skip", async (req, res) => { const scope = subScope(req); let videoId = null; await mutateAutoQueue(scope, (items) => { const it = items.find((q) => q.id === req.params.id); if (it && it.videoId) videoId = it.videoId; return items.filter((q) => q.id !== req.params.id); }); if (videoId) await addToSkipList(scope, videoId); res.json({ ok: true }); }); // Approve a single auto-queue item for background processing app.post("/api/auto-queue/:id/approve", async (req, res) => { let item = null; let badStatus = null; await mutateAutoQueue(subScope(req), (items) => { const it = items.find((q) => q.id === req.params.id); if (!it) return; if (it.status !== "pending") { badStatus = it.status; return; } it.status = "approved"; item = { ...it }; }); if (badStatus) return res.status(400).json({ error: `Cannot approve item with status '${badStatus}'` }); if (!item) return res.status(404).json({ error: "Item not found" }); kickProcessor(); res.json({ ok: true, item }); }); // Approve all pending items at once app.post("/api/auto-queue/approve-all", async (req, res) => { let count = 0; await mutateAutoQueue(subScope(req), (items) => { for (const it of items) { if (it.status === "pending") { it.status = "approved"; count++; } } }); if (count > 0) kickProcessor(); res.json({ ok: true, approved: count }); }); // Retry a failed item app.post("/api/auto-queue/:id/retry", async (req, res) => { let item = null; let badStatus = null; await mutateAutoQueue(subScope(req), (items) => { const it = items.find((q) => q.id === req.params.id); if (!it) return; if (it.status !== "failed") { badStatus = it.status; return; } it.status = "approved"; it.error = undefined; it.failedAt = undefined; item = { ...it }; }); if (badStatus) return res.status(400).json({ error: `Cannot retry item with status '${badStatus}'` }); if (!item) return res.status(404).json({ error: "Item not found" }); kickProcessor(); res.json({ ok: true, item }); }); // Clear completed and failed items from the queue app.post("/api/auto-queue/clear-finished", async (req, res) => { let removed = 0; await mutateAutoQueue(subScope(req), (items) => { const before = items.length; const kept = items.filter((q) => !["completed", "failed"].includes(q.status)); removed = before - kept.length; return kept; }); res.json({ ok: true, removed }); }); app.post("/api/subscriptions/check-now", async (req, res) => { if (subCheckRunning) return res.json({ ok: true, message: "Already checking" }); checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message)); res.json({ ok: true, message: "Check started" }); }); // ── Auto-download toggle per subscription ────────────────────────────────── app.put("/api/subscriptions/:id/auto-download", async (req, res) => { const scope = subScope(req); const subs = await loadSubscriptions(scope); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.autoDownload = req.body.enabled === true; await saveSubscriptions(scope, subs); res.json({ ok: true, subscription: sub }); }); // ── Background processing status & configuration ─────────────────────────── // Get current processing status (what's running, queue depth, config) app.get("/api/processing/status", async (req, res) => { const queue = await loadAutoQueue(subScope(req)); res.json({ running: processingState.running, currentItem: processingState.currentItem ? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url } : null, lastCompleted: processingState.lastCompleted, config: processingConfig, counts: { approved: queue.filter(q => q.status === "approved").length, processing: queue.filter(q => q.status === "processing").length, pending: queue.filter(q => q.status === "pending").length, }, log: processingState.log.slice(-20), }); }); // Update processing configuration (delay between items, enable/disable) app.put("/api/processing/config", async (req, res) => { if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) { processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds)); } if (typeof req.body.enabled === "boolean") { processingConfig.enabled = req.body.enabled; // If enabling and there are approved items, kick the processor if (req.body.enabled) kickProcessor(); } await saveProcessingConfig(); res.json({ ok: true, config: processingConfig }); }); // Rush: skip the delay before the next item in the queue app.post("/api/processing/rush", (req, res) => { processingState.rush = true; // If not currently running but there are approved items, start processing kickProcessor(); res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" }); }); // Processing log (for debug/monitoring) app.get("/api/processing/log", (req, res) => { res.json({ log: processingState.log }); }); // ── Full pipeline: URL → audio → transcript → topic analysis ────────────── // Single yt-dlp call that returns the full info-dict as JSON. One shot // gives us title, channel, description, chapters, duration, and the // per-language caption availability — used both for richer transcription // prompts and for the captions-first path (skip audio + transcription // entirely when YouTube has usable captions for this video). async function fetchYouTubeMetadata(videoId) { try { const { stdout } = await execFileAsync( "yt-dlp", ["-j", "--no-warnings", "--skip-download", `https://www.youtube.com/watch?v=${videoId}`], { timeout: 30000, maxBuffer: 10 * 1024 * 1024 } ); const info = JSON.parse(stdout); return { title: info.title || "", uploadDate: info.upload_date || "", channel: info.channel || info.uploader || "", // Truncate to keep prompt size sane — descriptions can be huge // (release-notes-stuffed Lex Fridman podcasts run thousands of chars). description: (info.description || "").slice(0, 2000), chapters: Array.isArray(info.chapters) ? info.chapters : [], duration: typeof info.duration === "number" ? info.duration : 0, hasManualCaptions: !!(info.subtitles && Object.keys(info.subtitles).length > 0), hasAutoCaptions: !!(info.automatic_captions && Object.keys(info.automatic_captions).length > 0), }; } catch { return null; } } // Pulls YouTube captions for the given video and parses them into the // app's standard { offset, text, duration } entries shape. Prefers // manual captions over auto-generated. Returns null if no captions are // available or parsing produces too few segments to be useful. // // Output is the same shape that `parseTimestampedTranscript()` produces // from a Gemini transcription, so downstream code (the analysis step, // the chunk renderer) doesn't care how the transcript got here. async function tryFetchYouTubeCaptions(videoId, tmpDir, opts, log) { const { hasManual, hasAuto } = opts; if (!hasManual && !hasAuto) return null; const url = `https://www.youtube.com/watch?v=${videoId}`; const captionsBase = path.join(tmpDir, "captions"); // Try manual first (cleaner — punctuated, no fragmented words); // fall back to auto-generated if no manual subs. const langs = "en.*,en"; try { if (hasManual) { await execFileAsync( "yt-dlp", ["--skip-download", "--write-subs", "--sub-langs", langs, "--sub-format", "json3", "--no-warnings", "-o", captionsBase, url], { timeout: 30000 } ); } else { await execFileAsync( "yt-dlp", ["--skip-download", "--write-auto-subs", "--sub-langs", langs, "--sub-format", "json3", "--no-warnings", "-o", captionsBase, url], { timeout: 30000 } ); } } catch (err) { log(1, `⚠ Caption download failed: ${(err.message || "").slice(0, 200)}`); return null; } // yt-dlp names the file like `captions.en.json3` or `captions.en-US.json3`. // Some videos report subtitles as "available" via the metadata API but // yt-dlp can't actually pull a usable JSON3 file (age-gate, members-only, // wrong language match against our `en.*,en` pattern, etc.). Surface // exactly which failure mode hit so the operator can decide whether // to disable captions toggle for this video or pursue a workaround. let captionPath = null; let availableFiles = []; try { availableFiles = await fs.readdir(tmpDir); const match = availableFiles.find( (f) => f.startsWith("captions.") && f.endsWith(".json3") ); if (match) captionPath = path.join(tmpDir, match); } catch (err) { log(1, `⚠ Captions tmpDir read failed: ${(err.message || "").slice(0, 200)}`); } if (!captionPath) { // Did yt-dlp produce ANY caption file in another format? const otherCaps = availableFiles.filter((f) => f.startsWith("captions.")); if (otherCaps.length > 0) { log( 1, `⚠ Captions: yt-dlp wrote ${otherCaps.join(", ")} but not the expected .json3 — likely subtitle-language mismatch (we asked for en.*,en). Video may only have captions in another language.` ); } else { log( 1, `⚠ Captions: yt-dlp produced no caption file despite metadata reporting them available. Possibly age-gated, members-only, or yt-dlp couldn't decrypt them.` ); } return null; } let parsed; try { const content = await fs.readFile(captionPath, "utf-8"); parsed = JSON.parse(content); } catch (err) { log(1, `⚠ Caption parse failed: ${(err.message || "").slice(0, 200)}`); return null; } const rawEvents = Array.isArray(parsed.events) ? parsed.events : []; const entries = []; for (const evt of rawEvents) { const text = (Array.isArray(evt.segs) ? evt.segs : []) .map((s) => s.utf8 || "") .join("") .replace(/\n+/g, " ") .trim(); if (!text) continue; entries.push({ offset: (evt.tStartMs || 0) / 1000, text, duration: (evt.dDurationMs || 0) / 1000, }); } if (entries.length < 5) { log( 1, `⚠ Captions parsed but only ${entries.length} non-empty entries (raw events: ${rawEvents.length}). Captions look empty/malformed — falling back.` ); return null; } return { entries, source: hasManual ? "manual" : "auto" }; } // Coalesce a list of timestamped entries into coarser chunks. Used // after pulling auto-captions (which are typically 1–3-word fragments // every 1–3 seconds) so the downstream analysis prompt has ~100 // medium-sized segments instead of ~900 tiny ones. Manual captions and // Gemini-transcribed entries already average 10–30s and pass through // unchanged when their median duration exceeds the threshold. function coalesceTranscriptEntries(entries, targetSeconds = 15) { if (!Array.isArray(entries) || entries.length === 0) return entries; // Sample median duration; only coalesce if entries are too fine. const durations = entries.map((e) => e.duration || 0).sort((a, b) => a - b); const median = durations[Math.floor(durations.length / 2)]; if (median >= 8) return entries; // already coarse enough const merged = []; let current = null; for (const e of entries) { const text = (e.text || "").trim(); if (!text) continue; if (!current) { current = { offset: e.offset, text, duration: e.duration || 0, }; continue; } const elapsedFromStart = (e.offset || 0) - current.offset; if (elapsedFromStart >= targetSeconds) { merged.push(current); current = { offset: e.offset, text, duration: e.duration || 0, }; } else { current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim(); current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset; } } if (current) merged.push(current); return merged; } // Reduce an over-long transcript down to roughly `maxEntries` segments // so the analysis prompt fits in smaller model contexts (typical local // models are 16k–32k tokens — a 2-hour podcast at Parakeet's ~5s // granularity easily blows past that). Bucket size is computed from // total audio duration so we end up with at most `maxEntries` segments // regardless of source granularity. Unlike coalesceTranscriptEntries // above, this one is purely count-driven — no median-duration guard, // because the failure mode is "context exceeded" not "bad UX from // over-fragmented entries". // // Returns { coalesced, indexMap } where indexMap[i] = { startOrig, // endOrig } maps coalesced-entry i back to a range of original-entry // indices. The caller uses this to translate section start/end indices // returned by the analyzer back into the original entries array, so // the final transcript displayed to the user keeps its full granularity // — only the analyzer sees the coarser view. // // FLAGGED TO WATCH: ship 0.2.28. Auto-coalesce may degrade analysis // quality on borderline content (the LLM sees fewer, longer segments // — section boundaries get blurrier). If users report missed topics or // imprecise section starts on long content, the alternative is real // chunked analysis (split into overlapping windows, analyze each, // stitch sections at boundaries) — significantly more involved. function coalesceForAnalysis(entries, maxEntries = 400) { if (!Array.isArray(entries) || entries.length <= maxEntries) { return { coalesced: entries, indexMap: null }; } const lastEntry = entries[entries.length - 1]; const totalDuration = (lastEntry.offset || 0) + (lastEntry.duration || 0); if (totalDuration <= 0) { return { coalesced: entries, indexMap: null }; } // Bucket size: roughly total / maxEntries, but never tighter than 15s // (a typical "natural pause" gap — going much smaller would defeat // the point of coalescing). const targetSeconds = Math.max(15, Math.ceil(totalDuration / maxEntries)); const merged = []; const indexMap = []; let current = null; let currentStartOrig = 0; for (let i = 0; i < entries.length; i++) { const e = entries[i]; const text = (e.text || "").trim(); if (!text) continue; if (!current) { current = { offset: e.offset, text, duration: e.duration || 0 }; currentStartOrig = i; continue; } const elapsedFromStart = (e.offset || 0) - current.offset; if (elapsedFromStart >= targetSeconds) { merged.push(current); indexMap.push({ startOrig: currentStartOrig, endOrig: i - 1 }); current = { offset: e.offset, text, duration: e.duration || 0 }; currentStartOrig = i; } else { current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim(); current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset; } } if (current) { merged.push(current); indexMap.push({ startOrig: currentStartOrig, endOrig: entries.length - 1 }); } return { coalesced: merged, indexMap }; } // ── In-flight free-tier job status + cancel ───────────────────────────────── // Lets the web UI render a "Currently processing X — Cancel" banner after // a browser refresh, when the SSE stream from the original /api/process // call is no longer attached. Only the free-tier slot is tracked today; // paid-tier batch queueing happens client-side. app.get("/api/process/current", (req, res) => { // ?logs=1 returns the in-flight job's accumulated log buffer so a // browser refresh mid-pipeline can repopulate the activity log // instead of starting blank. Default is the lightweight header-only // shape used by the 5s banner poll. const includeLogs = req.query.logs === "1" || req.query.logs === "true"; res.json({ job: getCurrentFreeJob({ includeLogs }) }); }); app.post("/api/process/cancel", (_req, res) => { const had = abortCurrentFreeJob(); if (!had) return res.json({ ok: true, cancelled: false }); // We don't kill the in-flight yt-dlp / model API call here — the // pipeline polls isFreeJobAborted() at each major step and throws // early, which lands in the request handler's finally block where the // slot is released. So cancellation latency is bounded by the time // until the next checkpoint (a few seconds in practice, up to the // length of one outstanding model call). res.json({ ok: true, cancelled: true }); }); // ── Auto-discovery of provider connection info ────────────────────────────── // The picker UI hits this on boot to pre-fill placeholders for providers // that have a server-detectable default — most notably Ollama on // StartOS, reachable at the documented `http://<package-id>.startos:<port>` // internal hostname when installed alongside Recap. app.get("/api/providers/discover", async (_req, res) => { const out = {}; // Ollama: prefer the URL the operator set via the StartOS action, // then try the canonical service-discovery hostname, finally fall // back to localhost (useful for dev outside StartOS). try { const cfg = await config.getConfigSnapshot(); const fromConfig = (cfg.ollama_base_url || "").trim(); let ollamaUrl = null; let source = null; if (fromConfig) { ollamaUrl = fromConfig; source = "config"; } else { const candidate = "http://ollama.startos:11434"; const ok = await fetch(`${candidate}/api/tags`, { signal: AbortSignal.timeout(1500), }).then((r) => r.ok).catch(() => false); if (ok) { ollamaUrl = candidate; source = "startos-dep"; } } if (ollamaUrl) { // Probe /api/tags to list installed models — picker UI surfaces // them as dropdown options so users don't have to type them. let models = []; try { const tagsRes = await fetch(`${ollamaUrl.replace(/\/$/, "")}/api/tags`, { signal: AbortSignal.timeout(2000), }); if (tagsRes.ok) { const data = await tagsRes.json(); models = (data.models || []).map((m) => m.name).filter(Boolean); } } catch {} out.ollama = { baseURL: ollamaUrl, source, models }; } } catch {} res.json(out); }); // Quick connection-test endpoint. The picker UI Test button hits this // to verify a provider+model+opts combo actually works before the user // commits to using it. Sends a tiny prompt and returns the model's // 3-word answer + latency, or a clear error string. app.post("/api/providers/test", async (req, res) => { const { providerId, model, opts: clientOpts } = req.body || {}; if (!providerId || typeof providerId !== "string") { return res.status(400).json({ ok: false, error: "missing providerId" }); } if (!model || typeof model !== "string") { return res.status(400).json({ ok: false, error: "missing model" }); } if (!PROVIDER_NAMES.includes(providerId)) { return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` }); } let provider; try { const cfg = await config.getConfigSnapshot(); const resolvedOpts = resolveProviderOpts(providerId, { config: cfg, clientOpts: (clientOpts && typeof clientOpts === "object") ? clientOpts : {}, req, }); provider = getProvider(providerId, resolvedOpts); } catch (err) { return res.status(400).json({ ok: false, error: err.message }); } if (!provider.capabilities.analyze) { return res.status(400).json({ ok: false, error: `${providerId} cannot analyze text` }); } const t0 = Date.now(); // Special-case the relay: a real analyzeText round-trip would burn // one of the user's lifetime credits per click. Use pingBalance // instead — verifies connectivity + auth, returns the current // balance, no charge. if (providerId === "relay") { try { const env = await provider.pingBalance({ timeoutMs: 5000 }); const latencyMs = Date.now() - t0; const credits = env?.credits_remaining; const tier = env?.tier || "core"; const summary = credits == null ? `Connected · Tier: ${tier}` : `Connected · Tier: ${tier} · ${credits} credit${credits === 1 ? "" : "s"} remaining`; return res.json({ ok: true, text: summary, latencyMs, provider: providerId, model, }); } catch (err) { return res.json({ ok: false, error: (err?.message || String(err)).slice(0, 300), latencyMs: Date.now() - t0, }); } } try { const result = await provider.analyzeText({ prompt: "Respond with exactly three words confirming you received this prompt.", model, retries: 0, }); const latencyMs = Date.now() - t0; return res.json({ ok: true, text: (result.text || "").trim().slice(0, 200), latencyMs, provider: providerId, model, }); } catch (err) { return res.json({ ok: false, error: (err?.message || String(err)).slice(0, 300), latencyMs: Date.now() - t0, }); } }); // Per-provider, per-field boolean indicating whether the StartOS // config has a non-empty value for each PROVIDER_KEY_FIELDS slot. // The picker UI uses this to (a) show a "✓ Server-configured" hint // under empty inputs, and (b) decide whether the Delete button is // visible when localStorage is empty but the server has a value. // // Never returns the actual values — only booleans. Anything stored in // startos-config.json could be secret and shouldn't surface in any // response that could end up in a screenshot. app.get("/api/providers/credentials-status", async (_req, res) => { const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js"); const cfg = await getConfigSnapshot(); const status = {}; for (const [providerId, fields] of Object.entries(PROVIDER_KEY_FIELDS)) { status[providerId] = {}; for (const [optName, cfgKey] of Object.entries(fields)) { const v = cfg[cfgKey]; status[providerId][optName] = typeof v === "string" && v.trim().length > 0; } } res.json({ status }); }); // Clear all server-side config fields for a provider (the StartOS // action-set values). The picker UI's Delete button calls this in // addition to wiping localStorage so credentials are gone from BOTH // storage paths. Returns the list of fields that were cleared. app.post("/api/providers/:id/clear", async (req, res) => { const providerId = req.params.id; if (!PROVIDER_NAMES.includes(providerId)) { return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` }); } // Build a patch of { config_field: "" } for every PROVIDER_KEY_FIELDS // entry mapped to this provider. Unknown providers or providers with // no server-side fields (e.g. relay, whose URL is hardcoded) end // up clearing nothing — still a 200, just with empty `cleared`. const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js"); const fields = PROVIDER_KEY_FIELDS[providerId] || {}; const patch = {}; for (const cfgKey of Object.values(fields)) { if (typeof cfgKey === "string") patch[cfgKey] = ""; } try { await config.mergeConfig(patch); return res.json({ ok: true, cleared: Object.keys(patch) }); } catch (err) { return res.status(500).json({ ok: false, error: (err?.message || String(err)).slice(0, 300), }); } }); app.post("/api/process", async (req, res) => { // ── Multi-mode credit gate ─────────────────────────────────────────────── // Four reachable states arriving here in multi-mode: // (a) req.user is admin → no local gate (operator pool) // (b) req.user with license → license tier handles the gate (license-mw) // (b2) paid cloud user → relay-side user:<id> quota gates it; NO // local tenant_credits gate (core-decoupling) // (c) req.user free tenant → tenant_credits.balance gate + debit // (d) req.trial holder → trial cookie gate + debit // (e) anonymous, no trial → try issueIfEligible, then re-check // // The signed-in-free-tenant case (c) was unhandled pre-0.2.92: their // tenant_credits.balance was DISPLAYED but never gated or debited, // so they could summarize indefinitely. Now we check + debit // alongside the trial flow. // // Core-decoupling: paid status is the relay-owned tier (pro/max), NOT a // Keysat license. Paid cloud users are metered by the relay's user:<id> // monthly quota (its own 402 envelope), so they skip this local gate. // Everyone else signed in (tier core) is a free tenant gated on // tenant_credits — including accounts that still carry a now-ignored // legacy license. const isPaidCloudUser = req.user && (req.user.tier === "pro" || req.user.tier === "max"); if ( RECAP_MODE === "multi" && req.user && !req.user.is_admin && !isPaidCloudUser ) { // Free signed-in tenant — gate on tenant_credits.balance. We // CHECK here at request start; the actual debit happens after // saveToHistory succeeds (so a failed pipeline doesn't burn a // credit). Same pattern as the trial flow below. try { const { getOrInit } = await import("./tenant-credits.js"); const credits = await getOrInit(req.user.id); if (!credits || credits.total <= 0) { return res.status(402).json({ error: "credits_exhausted", message: "You're out of credits. Buy more or wait for your free credits to refresh.", }); } // Stash for the post-save debit step req.creditsToDebit = "tenant"; } catch (err) { console.error("[/api/process] tenant_credits gate failed:", err); return res.status(500).json({ error: "internal_error" }); } } if (RECAP_MODE === "multi" && !req.user) { if (!req.trial && typeof req.userId === "undefined") { // Pre-trial visitor on their first POST. Try to mint a cookie. try { const { issueIfEligible } = await import("./anon-trial.js"); const trial = await issueIfEligible({ req, res }); if (trial) { req.userId = `anon:${trial.cookie_id}`; req.trial = trial; } } catch (err) { console.warn("[/api/process] anon-trial mint failed:", err); } } if (!req.trial) { // No session, no trial — either trials disabled, IP-capped, or // their previous trial is exhausted (middleware fell through). return res.status(401).json({ error: "trial_unavailable", message: "Sign in to keep using Recaps. New here? Create an account in a minute — no payment required.", }); } // Verify trial still has budget at request time. Cheap to check, // catches the case where a parallel request just spent the last // credit between middleware lookup and this handler firing. const { lookupTrial, hasTrialBudget } = await import("./anon-trial.js"); const fresh = lookupTrial(req.trial.cookie_id); if (!fresh || !hasTrialBudget(fresh)) { return res.status(402).json({ error: "trial_exhausted", message: "You've used all your free trials. Sign up for an account to keep going.", }); } req.trial = fresh; } let { url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId, transcriptionProvider: reqTransProvider, transcriptionModel: reqTransModel, analysisProvider: reqAnaProvider, analysisModel: reqAnaModel, providerOpts: reqProviderOpts, useYouTubeCaptions: reqUseYTCaptions, } = req.body; // Defensive: a stale browser cache or older client might submit // the literal string "Untitled" as the title (a sentinel that // earlier Recap versions used as a falsy placeholder but which is // actually truthy and pollutes every downstream gate — see relay // 0.2.57's same normalization). Strip it here at the request // boundary so all subsequent code can rely on "itemTitle is either // a real title or empty". Also trim whitespace + drop "untitled" // case-insensitively in case some other client variant emits it. if (typeof itemTitle === "string") { const trimmed = itemTitle.trim(); if (trimmed === "" || trimmed.toLowerCase() === "untitled") { itemTitle = ""; } else { itemTitle = trimmed; } } // Default: use captions when available (huge speed/cost win). The // picker-UI toggle lets users force a full transcription pass when // they want speaker labels (captions don't have them) or when the // auto-captions quality is too low. const useYouTubeCaptions = reqUseYTCaptions !== false; // Sentinel error message thrown by checkCancelled() when the user // hits the in-flight banner's Cancel button. Declared at handler // scope (not inside the try) so the catch block can compare against // it — see the catch a few hundred lines below. const CANCELLED_MARK = "__recap_cancelled__"; // Per-provider client-side opts: { gemini: {apiKey}, anthropic: {apiKey}, // openai: {apiKey}, "openai-compatible": {apiKey, baseURL}, ollama: {baseURL} }. // Each provider's opts override that provider's config-stored values // (set via the StartOS actions). Used by the picker UI to BYO keys // per provider without round-tripping the StartOS dashboard. const providerOpts = (reqProviderOpts && typeof reqProviderOpts === "object") ? reqProviderOpts : {}; // Provider selection: each pipeline step (transcribe + analyze) can // independently target any registered provider. Both default to gemini // so existing clients (which don't send provider fields) keep working. const transcriptionProvider = reqTransProvider || "gemini"; const analysisProvider = reqAnaProvider || "gemini"; // Free tier: unlicensed users can summarize one video at a time. They // still bring their own key — same as paid users today; the key can // come from either the StartOS config action (server-side) or the // web UI Settings panel (client-side). The future "bundled key" relay // (paid users' requests proxied through the operator's service) isn't // built yet, so there's nothing here that gates key sourcing by tier. // AbortController for this request. Fired by abortCurrentFreeJob() // when the user hits Cancel — passed through to every provider SDK // call (transcription + analysis) so in-flight network requests // reject immediately instead of running to completion. const abortController = new AbortController(); // Stable identifier for this summarize job. Sent to the relay // (when used) as `X-Recap-Job-Id`. The relay charges 1 credit on // the first call with a given jobId and treats subsequent calls // with the same id as free — so a full summary (transcribe + // analyze) costs one credit regardless of which steps route // through the relay. Non-relay providers ignore this opt. const jobId = randomUUID(); // The free-tier single-flight lock is a single-mode concept (one operator, // BYO key, one job at a time). In multi mode, per-tenant credit metering is // the resource control, so a process-global lock would wrongly serialize // every tenant onto one job at a time — never apply it there. const isFree = req.recapMode !== "multi" && isFreeUser(); if (isFree) { if (!tryAcquireFreeSlot({ url, title: itemTitle, abortController })) { const current = getCurrentFreeJob(); const elapsedSec = current ? Math.round(current.elapsedMs / 1000) : 0; const what = current?.title || current?.url || "another video"; return res.status(409).json({ error: "processing_in_progress", message: `A summary is already being processed (${what}, started ${elapsedSec}s ago). Free mode handles one video at a time — wait for it to finish, or cancel it from the status bar at the top of the app.`, currentJob: current, }); } } if (!url) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "Missing url" }); } if (!PROVIDER_NAMES.includes(transcriptionProvider)) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "unknown_provider", message: `Unknown transcription provider: ${transcriptionProvider}` }); } if (!PROVIDER_NAMES.includes(analysisProvider)) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "unknown_provider", message: `Unknown analysis provider: ${analysisProvider}` }); } // Resolve per-provider construction opts from the StartOS config blob, // overlaying any client-supplied opts (req.body.providerOpts[name]). // For Gemini, the legacy single "apiKey" field on the request body // also flows through as a fallback when providerOpts.gemini.apiKey // isn't set — keeps pre-picker-UI clients working. const cfg = await getConfigSnapshot(); function clientOptsFor(name) { const fromBody = (providerOpts[name] && typeof providerOpts[name] === "object") ? providerOpts[name] : {}; if (name === "gemini" && !fromBody.apiKey) { const legacy = resolveApiKey(clientKey); if (legacy) return { ...fromBody, apiKey: legacy }; } return fromBody; } const transcriptionOpts = resolveProviderOpts(transcriptionProvider, { config: cfg, clientOpts: clientOptsFor(transcriptionProvider), req, }); const analysisOpts = resolveProviderOpts(analysisProvider, { config: cfg, clientOpts: clientOptsFor(analysisProvider), req, }); let transcriber; try { transcriber = getProvider(transcriptionProvider, transcriptionOpts); } catch (err) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "transcription_provider_not_configured", message: `Transcription provider ${transcriptionProvider} is not configured: ${err.message}`, }); } if (!transcriber.capabilities.transcribe) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "transcription_unsupported", message: `Provider ${transcriptionProvider} does not support audio transcription. Pick a different transcription provider (gemini or openai).`, }); } let analyzer; try { analyzer = transcriptionProvider === analysisProvider ? transcriber : getProvider(analysisProvider, analysisOpts); } catch (err) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "analysis_provider_not_configured", message: `Analysis provider ${analysisProvider} is not configured: ${err.message}`, }); } if (!analyzer.capabilities.analyze) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "analysis_unsupported", message: `Provider ${analysisProvider} does not support text analysis.`, }); } // ── Resolve Apple Podcasts / Spotify share URLs ── // Most users paste apple/spotify share links rather than RSS feeds. // Resolve those to a direct audio enclosure URL up-front so the rest // of the handler runs the existing podcast pipeline unchanged. On // resolver failure we surface a clear error rather than letting the // downstream code stumble on a URL it can't process. let resolvedFrom = null; // "apple" | "spotify" | null — for the result event if (isResolvableShareUrl(url)) { try { const resolved = await resolveShareUrl(url, { podcastIndexKey: cfg.podcastindex_api_key, podcastIndexSecret: cfg.podcastindex_api_secret, }); if (resolved) { console.log( ` [url-resolver] ${resolved.source} → ${resolved.audioUrl.slice(0, 80)} ("${resolved.title}")` ); url = resolved.audioUrl; if (!itemTitle) itemTitle = resolved.title; if (!itemUploadDate) itemUploadDate = resolved.uploadDate; if (!episodeId) episodeId = resolved.episodeId; itemType = "podcast"; resolvedFrom = resolved.source; } } catch (err) { if (isFree) releaseFreeSlot(); const code = err instanceof URLResolveError ? err.code : "resolve_failed"; return res.status(400).json({ error: code, message: err?.message || "Failed to resolve share URL", }); } } // Determine if this is a podcast episode or YouTube video const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url); const videoId = isPodcast ? (episodeId || url) : extractVideoId(url); if (!isPodcast && !videoId) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "Invalid YouTube URL" }); } const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-")); const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3"; const audioPath = path.join(tmpDir, `audio.${audioExt}`); const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3"; try { const pipelineStart = Date.now(); // Set up SSE res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", }); // Helper to send log entries with elapsed time. Also pushes onto the // free-tier in-flight job's server-side buffer (no-op when there is // no free job, e.g. licensed user) so a browser refresh during a // long pipeline can re-hydrate the activity log from the server // instead of starting blank. const logHistory = []; function log(step, message, detail) { const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1); const logMsg = `[${elapsed}s] ${message}`; console.log(` ${logMsg}`); const entry = { elapsed, message, detail: detail || null }; logHistory.push(entry); appendCurrentJobLog(entry); sendEvent(res, "status", { step, message }); sendEvent(res, "log", entry); } // Cancellation checkpoint. Polled at each major pipeline boundary; // throws CANCELLED_MARK (declared at handler scope above) when the // user has hit Cancel on the status banner so the request bails // out cleanly via the catch path. Also catches abort signals fired // by abortCurrentFreeJob() in case an SDK call swallowed the abort // and returned cleanly. function checkCancelled() { if (abortController.signal.aborted) { throw new Error(CANCELLED_MARK); } if (isFree && isFreeJobAborted()) { throw new Error(CANCELLED_MARK); } } // ── Recap Relay mode (unified pipeline) ────────────────────── // When the user picks "Recap Relay" in the picker, both providers // are set to "relay". The relay's /relay/summarize-url endpoint // does the WHOLE pipeline (download + transcribe + chunked-analyze) // server-side and streams per-window section results back to us // over SSE. We forward those events to the browser as the existing // event types (transcript_ready / sections_partial / result) so // the front-end's incremental rendering keeps working unchanged. // // Why this branch exists instead of just letting the per-step // pickers route to relay independently: // 1. Latency — saves 12+ round-trips (one per analyze window) // and the bandwidth of sending the full transcript back // to Recap just for Recap to ship it back as 12 prompts. // 2. Settings respect — the operator's Settings-tab knobs // (window body, overlap, concurrency, etc.) only take // effect when the relay drives the analyze loop. Per-step // relay use would leave Recap's hardcoded constants in // charge of windowing. // 3. Credits clarity — one summarize = one credit, charged // atomically on the relay side. No "what if transcribe // succeeded but analyze fell back to Anthropic" accounting. const relayMode = transcriptionProvider === "relay" && analysisProvider === "relay"; if (relayMode) { // Title surrogate — used for the activity log display + as a // fallback if the relay doesn't echo a real title back (older // relay, or a podcast/non-YouTube URL where yt-dlp metadata // isn't available). Defaults to "Untitled" so log messages // remain readable. const titleSurrogate = itemTitle || "Untitled"; // titleHint sent to the relay — must NOT be the surrogate. // Passing "Untitled" as a hint causes a subtle bug: the relay's // download step uses `if (!title && audio.title) title = audio.title;` // which treats any truthy operator-supplied title (including // "Untitled") as a real hint and skips the yt-dlp-extracted // title fallback. Net effect: the library entry stays // "Untitled" forever even though yt-dlp got the real title. // Pass only the actual operator/subscription-supplied title (or // empty string, which the provider serializes as omitted JSON). const titleHintRaw = itemTitle ? String(itemTitle).trim() : ""; // For YouTube URLs, fetch the metadata block (channel, description, // chapters) BEFORE handing off to the relay. The relay can extract // these fields itself via yt-dlp (relay 0.2.63+ does so) but // fetching here gives the relay a head-start AND lets older relays // that don't yet do their own extraction still get the speaker-ID // metadata. The fetch costs ~1-2 seconds — small relative to the // 1-3 minute pipeline. Defense-in-depth: belt-and-suspenders with // the relay-side fallback. // // For podcasts, skip — fetchYouTubeMetadata is YouTube-specific // and would return null anyway. The podcast resolver above // already populated itemTitle when relevant; channel/description/ // chapters aren't available for podcast feeds. let relayChannelHint = ""; let relayDescriptionHint = ""; let relayChaptersHint = []; if (!isPodcast && videoId) { try { const meta = await fetchYouTubeMetadata(videoId); if (meta) { relayChannelHint = meta.channel || ""; relayDescriptionHint = meta.description || ""; relayChaptersHint = Array.isArray(meta.chapters) ? meta.chapters : []; log( 1, `YouTube metadata fetched: channel="${relayChannelHint.slice(0, 60)}", ${relayChaptersHint.length} chapter(s), description ${relayDescriptionHint.length} chars` ); } } catch (err) { // Non-fatal — the relay's own yt-dlp extraction will fill these // fields when our request body sends them as empty strings. log(1, `YouTube metadata fetch failed: ${err?.message || err} — relay will extract via its own yt-dlp`); } } log(1, `Recap Relay mode — full pipeline on operator's relay`); // Cache of parsed transcript entries — populated by the // onTranscribeComplete provider callback below as soon as the // relay finishes the transcribe phase. Used by the per-window // streaming callback to build full { title, summary, entries, // startTime } chunk objects, which is the shape the browser's // sections_partial handler reads via `data.chunks`. Without // this cache, per-window streaming sent just titles + summaries // and the browser had no entries to render under each section. let streamedRelayEntries = []; let relayResult; // Track whether we've emitted the early transcript_ready event // yet. In sequential mode this fires from onTranscribeComplete // (when relay's transcribe finishes); in pipelined mode it can // fire even earlier from the FIRST onWindowComplete (when an // early window's sections arrive before transcribe is done). // Either way: we want the browser to flip from loading view to // results view ASAP so it can render the streaming sections. let earlyTranscriptReadyEmitted = false; const emitEarlyTranscriptReady = (extra = {}) => { if (earlyTranscriptReadyEmitted) return; earlyTranscriptReadyEmitted = true; sendEvent(res, "transcript_ready", { videoId, // Best-known title at this point — the surrogate from the // user-supplied title hint or "Untitled". The final result // event also carries videoTitle and the browser updates // it then, so any final title mismatch self-heals. videoTitle: titleSurrogate, videoChannel: relayChannelHint || "", videoChapters: relayChaptersHint || [], videoUploadDate: itemUploadDate || "", videoDescription: relayDescriptionHint || "", entries: streamedRelayEntries || [], type: isPodcast ? "podcast" : "youtube", willChunk: true, // pessimistic; corrected at result time ...extra, }); }; try { relayResult = await transcriber.summarizeUrl({ mediaUrl: url, mediaType: isPodcast ? "podcast" : "youtube", mimeType: undefined, titleHint: titleHintRaw, channelHint: relayChannelHint, descriptionHint: relayDescriptionHint, chaptersHint: relayChaptersHint, onProgress: (msg) => log(2, msg), // Capture transcript entries the moment the relay finishes // transcribing. Analyze runs serially AFTER transcribe in // the relay's pipeline — so by the time the FIRST // onWindowComplete fires, this callback has already populated // the entries cache below. That lets the onWindowComplete // callback build proper { title, summary, entries, startTime } // chunk objects (the shape the browser's sections_partial // handler expects under .chunks) rather than the raw // ownedSections-without-entries we sent pre-v0.2.75. onTranscribeComplete: ({ transcript }) => { streamedRelayEntries = parseTimestampedTranscript(transcript || ""); log(2, `Relay transcript parsed — ${streamedRelayEntries.length} segments cached for streaming`); // Sequential-mode UI flip: emit transcript_ready as soon // as transcribe is done so the browser switches from // loading view to results view BEFORE the analyze // sections start streaming in. Pipelined mode beats us to // this from onWindowComplete (the first window may // complete before transcribe is fully done) — either way // the emitter is idempotent. emitEarlyTranscriptReady(); }, // Per-window streaming: forward each window's owned sections // to the browser as sections_partial events with the chunk // SHAPE the browser handler expects (.chunks not // .partialSections). Without this, the browser's // `data.chunks || []` read at the sections_partial handler // resolved to an empty array and the operator saw zero // sections appear during the analyze phase — they only // landed when the terminal "result" event arrived at the // very end of the pipeline. onWindowComplete: ({ windowIdx, totalWindows, ownedSections, windowEntries }) => { // Pipelined-mode UI flip: if the first window completes // before transcribe-done (very possible — first window // only needs ~20min of audio fully transcribed), emit // transcript_ready NOW so the browser flips to results // view in time for these partial sections to render. emitEarlyTranscriptReady(); // Pipelined mode (relay v0.2.89+): each window arrives // with its OWN entries embedded, before transcribe has // fully completed. Section indices are LOCAL to those // window entries, so we slice them directly. // // Sequential mode (older relays, or Gemini-transcribe // path): windowEntries is null. The relay's transcribe // phase has already finished by the time these arrive, // so streamedRelayEntries (populated by // onTranscribeComplete) is non-empty and section // indices are global into it. const entries = Array.isArray(windowEntries) && windowEntries.length > 0 ? windowEntries : (streamedRelayEntries || []); const partialChunks = (ownedSections || []).map((s) => { const start = Math.max(0, s.startIndex ?? 0); const end = entries.length > 0 ? Math.min(entries.length - 1, s.endIndex ?? start) : (s.endIndex ?? start); const slice = entries.length > 0 ? entries.slice(start, end + 1) : []; return { title: s.title, summary: s.summary, entries: slice, startTime: slice[0]?.offset || 0, }; }).filter((c) => c.entries.length > 0 || !entries.length); sendEvent(res, "sections_partial", { windowIdx, totalWindows, chunks: partialChunks, }); }, signal: abortController.signal, jobId, }); } catch (err) { if (abortController.signal.aborted) throw new Error(CANCELLED_MARK); sendEvent(res, "error", { message: err?.message || String(err) }); sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: "" }); if (isFree) releaseFreeSlot(); res.end(); return; } checkCancelled(); // Backfill the real title now that the relay has finished. The // relay extracts it via yt-dlp during the download step (or // echoes back the client-supplied hint) and returns it in the // result envelope's `title` field — see relay 0.2.53+. For older // relays the field is null, so we keep the original surrogate. // Trim + drop empty strings to avoid storing whitespace-only // titles that would render as a blank row in the library list. const relayTitle = typeof relayResult.title === "string" ? relayResult.title.trim() : ""; const resolvedTitle = relayTitle || titleSurrogate; // Parse the bracketed transcript into the entries array shape // Recap's UI / history layer expects. const relayEntries = parseTimestampedTranscript(relayResult.transcript || ""); // Phase 1E — attach speaker labels to each parsed entry by // time-matching against the raw per-segment diarization data // the relay returned. The relay's `transcript_segments` are the // fine-grained Parakeet segments (often 1-3s each) with a // `speaker` field per segment. Recap's `relayEntries` are the // MERGED readable lines (sortAndDedupeTranscript + mergeShort- // Entries collapsed multiple Parakeet segments into one // sentence-sized thought). To attach speakers to merged // entries: for each entry, find the raw segment whose [start, // end] contains the entry's offset (start time). The merged // entry's start time always corresponds to the start of one of // the underlying segments — that segment's speaker is the // entry's speaker. Speaker stays null when: // - relay version < 0.2.88 (no transcript_segments field) // - diarization was off on the operator's relay // - relay couldn't match the entry's time to any segment const relaySegments = Array.isArray(relayResult.transcript_segments) ? relayResult.transcript_segments : null; if (relaySegments && relaySegments.length > 0) { // Build a sorted-by-start list for an O(N log N + M log N) match. // Linear scan would be O(N*M) which is fine for typical sizes // but the sorted form also makes the data debuggable. const sortedSegs = relaySegments .slice() .sort((a, b) => (a.start || 0) - (b.start || 0)); for (const entry of relayEntries) { const t = entry.offset || 0; // Find the segment whose [start, end] contains t. Use // binary search for efficiency on long transcripts. let lo = 0; let hi = sortedSegs.length - 1; let found = null; while (lo <= hi) { const mid = (lo + hi) >> 1; const seg = sortedSegs[mid]; if (t < (seg.start || 0)) { hi = mid - 1; } else if (t > (seg.end || 0)) { lo = mid + 1; } else { found = seg; break; } } // If no segment contains t (gaps in diarization output), // fall back to the segment that starts CLOSEST to t. if (!found && sortedSegs.length > 0) { const idx = Math.min(Math.max(lo, 0), sortedSegs.length - 1); const candidates = [ sortedSegs[idx], sortedSegs[idx - 1], sortedSegs[idx + 1], ].filter(Boolean); let bestDist = Infinity; for (const seg of candidates) { const segMid = ((seg.start || 0) + (seg.end || 0)) / 2; const d = Math.abs(segMid - t); if (d < bestDist) { bestDist = d; found = seg; } } // Cap the fallback at 5s — beyond that the assignment is // probably bogus, leave it null so the UI can render // "(speaker unknown)" or just drop the chip. if (bestDist > 5) found = null; } if (found && found.speaker) { entry.speaker = found.speaker; entry.speaker_confidence = typeof found.speaker_confidence === "number" ? found.speaker_confidence : null; // Phase 2 — surface the suppression-pass uncertainty // flag so the chip renders with a "?" suffix for // best-guess attribution. Absent on older relays. entry.speaker_uncertain = !!found.speaker_uncertain; } } } const contentType = isPodcast ? "podcast" : "youtube"; // Note: transcript_ready was emitted EARLIER by // emitEarlyTranscriptReady() — either from the first // onWindowComplete (pipelined mode) or from // onTranscribeComplete (sequential mode). Re-emitting here // would either (a) be a no-op since the browser handler is // already in results view, or (b) double-trigger the chunk // wipe / streamWindowsTotal reset. The real title arrives // via the result event below — browser's result handler // updates state.videoTitle on receipt — so dropping the // duplicate emit is safe. // Build chunks from the relay's stitched analysis sections. const sections = Array.isArray(relayResult.analysis?.sections) ? relayResult.analysis.sections : []; const chunks = sections .map((s) => { const start = Math.max(0, s.startIndex ?? 0); const end = Math.min(relayEntries.length - 1, s.endIndex ?? start); const slice = relayEntries.slice(start, end + 1); return { title: s.title, summary: s.summary, entries: slice, startTime: slice[0]?.offset || 0, }; }) .filter((c) => c.entries.length > 0); const historyId = await saveToHistory( scopeForRequest(req), videoId, url, resolvedTitle, chunks, relayEntries, logHistory, itemUploadDate || "", contentType, relayResult.speakers || null, relayResult.speaker_names || null, ).catch(() => null); // Debit local credit on success — trial cookie OR free signed-in // tenant, depending on who's making the request. AFTER // saveToHistory rather than at request-start so a failed pipeline // doesn't burn a credit. if (RECAP_MODE === "multi" && req.trial) { try { const { debitOne } = await import("./anon-trial.js"); debitOne(req.trial.cookie_id); } catch (err) { console.warn("[/api/process] trial debit failed:", err); } } else if (RECAP_MODE === "multi" && req.creditsToDebit === "tenant") { try { const { gateAndDebit } = await import("./tenant-credits.js"); await gateAndDebit(req.user.id); } catch (err) { console.warn("[/api/process] tenant_credits debit failed:", err); } } sendEvent(res, "result", { videoId, videoTitle: resolvedTitle, entries: relayEntries, chunks, historyId, type: contentType, // Phase 1E — speaker legend + per-segment confidence summary. // Null when diarization wasn't available (older relay, or // operator disabled it). Frontend renders a colored chip per // speaker above the topic list; each transcript line uses // the entry.speaker field (already attached above) to pick // the matching chip color. speakers: relayResult.speakers || null, // Phase 2 — inferred speaker names from the relay's post- // cluster polish pass. Map { Speaker_A: "Matt Hill", ... } // with null values for speakers the LLM couldn't confidently // name. Recap legend prefers the inferred name when present. speaker_names: relayResult.speaker_names || null, }); if (isFree) releaseFreeSlot(); res.end(); return; } let videoTitle = itemTitle || "Untitled"; let videoUploadDate = itemUploadDate || ""; // Rich metadata used to ground the transcription prompt (speaker // names from the channel/description/chapters) and surfaced to // downstream UI. Populated for YouTube videos only; podcasts have // their own (lighter) metadata path. let videoChannel = ""; let videoDescription = ""; let videoChapters = []; // If captions came from YouTube we skip Step 1 (download audio) + // Step 2 (transcribe) and jump straight to analysis. `entries` is // also the post-Step-2 output of the transcription path, so this // value just flows through. let entries = null; let captionSource = null; // "manual" | "auto" | null let transcriptText = ""; let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" }; // Populated from YouTube metadata or (later) Apple/Spotify resolution. // Used by the relay-URL fast-path to decide if the audio fits the // relay's reported capability. null = unknown → fast-path attempts // anyway and lets the relay reject if the file is over its cap. let knownDuration = null; // Hoisted out of the audio-path block so Step 3 (analysis) can // reference analysisModel even when the captions fast-path skips // the entire audio + transcription block. transcriptionModel is // only used inside the audio block but lives here for symmetry. // Per-provider model defaults: caller's request → provider's first // listed model → Gemini fallback (preserves the prior default for // legacy clients that don't send model fields). const transcriptionModel = reqTransModel || transcriber.listTranscriptionModels()[0] || "gemini-3-flash-preview"; const analysisModel = reqAnaModel || model || analyzer.listAnalysisModels()[0] || "gemini-3.1-pro-preview"; if (resolvedFrom) { log( 0, `Resolved ${ resolvedFrom === "apple" ? "Apple Podcasts" : "Spotify" } share URL → ${url.slice(0, 80)}${url.length > 80 ? "…" : ""}` ); } // ── Step 0 (YouTube only): metadata + captions fast path ── if (!isPodcast && videoId) { log(1, "Fetching video metadata..."); const meta = await fetchYouTubeMetadata(videoId); if (meta) { if (meta.title) videoTitle = meta.title; if (meta.uploadDate) videoUploadDate = meta.uploadDate; videoChannel = meta.channel; videoDescription = meta.description; videoChapters = meta.chapters; if (typeof meta.duration === "number" && meta.duration > 0) { knownDuration = meta.duration; } log(1, `Video title: ${videoTitle}${videoChannel ? ` (${videoChannel})` : ""}`); if ((meta.hasManualCaptions || meta.hasAutoCaptions) && !useYouTubeCaptions) { log(1, `YouTube captions available but the user has the captions fast-path disabled — will transcribe audio directly.`); } else if (meta.hasManualCaptions || meta.hasAutoCaptions) { log(1, `YouTube captions available (${meta.hasManualCaptions ? "manual" : "auto"}) — attempting fast-path…`); const cap = await tryFetchYouTubeCaptions( videoId, tmpDir, { hasManual: meta.hasManualCaptions, hasAuto: meta.hasAutoCaptions }, log ); if (cap && cap.entries && cap.entries.length > 0) { // Auto-captions fragment audio every 1–3 seconds (often only // 1–3 words per entry). Sending hundreds of micro-segments // to the analyzer balloons the prompt and overloads the // model's index-tracking ("Section N must start at index // K+1 of section M…"). Coalesce into ~15s blocks for a // saner analysis prompt while keeping timestamps accurate. const rawCount = cap.entries.length; entries = coalesceTranscriptEntries(cap.entries, 15); captionSource = cap.source; log( 1, `Using YouTube ${cap.source === "manual" ? "manual" : "auto-generated"} captions — ${rawCount} segments coalesced to ${entries.length}, skipping audio download + transcription` ); } else { log(1, "Captions unusable or failed to parse — falling back to audio transcription"); } } else { log(1, "No captions available — will download audio and transcribe"); } } } checkCancelled(); // ── Step 0.5: Relay-URL fast-path ── // When transcription routes through the operator's relay AND we // have a public source URL (YouTube watch URL, or direct podcast // .mp3/.m4a enclosure), hand the URL off to /relay/transcribe-url // so the relay does the download itself. This saves the buyer's // home upload bandwidth — often the slowest leg of the pipeline, // since residential connections rarely match the relay's symmetric // datacenter link. Only fires if: // 1. captions fast-path didn't already populate entries // 2. caller asked to use the relay for transcription // 3. duration (when known) fits the relay's reported capability // On any failure we fall through to the legacy local-download // path, which has its own retry/fallback machinery. if (!entries && transcriptionProvider === "relay") { // Fresh fetch so the fast-path decision matches the routing // decision the relay will make for THIS install — same // rationale as the Step 2 refresh below. await refreshRelayCapabilities().catch(() => {}); const caps = getRelayCapabilities(); const withinCapability = typeof caps.max_audio_minutes !== "number" || knownDuration === null || knownDuration <= caps.max_audio_minutes * 60; if (!withinCapability) { log( 1, `Relay-URL fast-path skipped — known duration ${(knownDuration / 60).toFixed(1)}min exceeds relay capability ${caps.max_audio_minutes}min` ); } else { const mediaType = isPodcast ? "podcast" : "youtube"; const handoffUrl = isPodcast ? url : `https://www.youtube.com/watch?v=${videoId}`; log( 1, `Relay-URL fast-path: handing ${mediaType} URL to relay (saves your upload bandwidth)…` ); try { const relayResult = await transcriber.transcribeUrl({ mediaUrl: handoffUrl, mediaType, mimeType, titleHint: videoTitle, channelHint: videoChannel, descriptionHint: videoDescription, chaptersHint: videoChapters, onProgress: (msg) => log(2, msg), signal: abortController.signal, jobId, }); if (relayResult && relayResult.text) { transcriptText = relayResult.text; const parsed = parseTimestampedTranscript(transcriptText); if (parsed.length === 0) { log( 1, `⚠ Relay-URL transcript parsed to zero segments — falling back to local download` ); transcriptText = ""; entries = null; } else { entries = parsed; } if (!entries) { // fall through to local-download flow } else { // Single-segment expansion — same as the local path below. // Parakeet-style backends return one big text blob; spread // it into per-sentence entries with interpolated timestamps // so the analyzer sees more than one section's worth of // structure. if ( entries.length === 1 && knownDuration && knownDuration > 30 && (entries[0].text || "").length > 100 ) { const synthesized = synthesizeEntriesFromText( entries[0].text, knownDuration ); if (synthesized.length > 1) { log( 2, `Single-segment relay transcript expanded into ${synthesized.length} synthetic entries` ); entries = synthesized; } } txCost = relayResult.cost || txCost; log( 2, `Relay-URL transcription complete — ${entries.length} segments, skipping local download` ); } } else { log( 1, `⚠ Relay-URL returned empty — falling back to local download` ); } } catch (err) { if (err?.message === CANCELLED_MARK || abortController.signal.aborted) { throw err; } const msg = err?.message || String(err); // Surface err.cause too — Node's fetch() wraps low-level // failures (DNS, TLS, dead-socket) in a bare "fetch failed" // message and stashes the real reason in .cause. Without it // the activity log is uninformative on transport errors. const cause = err?.cause?.message || err?.cause?.code || err?.cause; const detail = cause ? ` (cause: ${String(cause).slice(0, 200)})` : ""; // Did the relay accept the request and then fail at its // backend (Parakeet/Gemma/Gemini), or did we fail before // the relay even processed it (network blip, TLS issue, // relay unreachable)? // // Backend failures → DON'T fall back to local download. // The relay's /relay/transcribe endpoint would route to // the same broken backend, costing 60+ seconds of audio // upload bandwidth for the same error. // // Transport failures → DO fall back. The relay may be // intermittently unreachable; upload-path might catch a // moment when it works. const isBackendFailure = /CUDA error|Resource exhausted|Parakeet transcribe|Gemma analyze|model does not exist|model.*not found|backend_error|all attempts failed|HTTP 5\d\d/i.test( msg ); if (isBackendFailure) { log( 1, `⚠ Relay-URL fast-path failed: ${msg.slice(0, 300)}${detail}` ); log( 1, `Skipping local-download fallback — same relay backend would just fail again. ` + `Operator hardware (or Gemini quota) needs attention.` ); sendEvent(res, "error", { message: msg.slice(0, 500), }); res.end(); if (isFree) releaseFreeSlot(); return; } log( 1, `⚠ Relay-URL fast-path failed: ${msg.slice(0, 200)}${detail} — falling back to local download (transport-level error, relay may be intermittent)` ); } } } checkCancelled(); // ── Step 1: Download audio (skipped when captions populated entries) ── if (!entries) { const dlStart = Date.now(); if (isPodcast) { log(1, "Downloading podcast episode..."); await downloadPodcastAudio(url, audioPath); const stats = await fs.stat(audioPath); const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1); log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`); log(1, `Episode: ${videoTitle}`); } else { log(1, "Downloading audio from YouTube..."); const dlBaseArgs = [ "-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", audioPath, "--no-playlist", "--print", "%(title)s|||%(upload_date)s", "--no-simulate", ]; const dlUrl = `https://www.youtube.com/watch?v=${videoId}`; const cookieArgs = ytCookieArgs(); const hasCookies = cookieArgs.length > 0; let usedCookies = false; let dlStdout = ""; // Helper: attempt a single yt-dlp download async function attemptDownload(args, label) { const result = await execFileAsync("yt-dlp", args, { timeout: 600000 }); return result.stdout || ""; } // Helper: check if error is a bot detection / rate limit block function isBotBlock(errText) { return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText); } // ── Smart download with retry ── // Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays) const MAX_RETRIES = 3; const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff let downloaded = false; let lastError = ""; for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) { // On retry attempts, wait before trying again if (attempt > 0) { const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)]; log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`); sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` }); await new Promise(r => setTimeout(r, waitSec * 1000)); log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`); // Clean up any partial file from previous attempt await fs.unlink(audioPath).catch(() => {}); } // Try with cookies first if (hasCookies && !usedCookies) { try { log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies..."); dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies"); usedCookies = true; downloaded = true; break; } catch (cookieErr) { const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || ""); if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`); log(1, "Retrying without cookies..."); await fs.unlink(audioPath).catch(() => {}); } } // Try without cookies if (!downloaded) { try { dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies"); downloaded = true; break; } catch (dlErr) { lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || ""); const blocked = isBotBlock(lastError); if (blocked && attempt < MAX_RETRIES) { log(1, `⚠ YouTube bot detection triggered`); // Will loop back and wait continue; } if (blocked && attempt === MAX_RETRIES) { // Last resort: try yt-dlp auto-update in case there's a newer version that handles this log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort..."); const updateResult = await autoUpdateYtdlp(DATA_DIR); if (updateResult.success) { log(1, "yt-dlp updated! Final retry..."); try { const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update"); dlStdout = retryResult; downloaded = true; break; } catch { /* fall through to error */ } } } // Non-bot error or exhausted retries if (!downloaded) { log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`); } } } } if (!downloaded) { const blocked = isBotBlock(lastError); let hint = ""; if (blocked) { hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " + "This is usually caused by:\n" + "• Recent VPN use (YouTube flags VPN IPs)\n" + "• Too many downloads in a short period\n" + "• YouTube's general anti-bot measures\n\n" + "What to try:\n" + "• Wait 10-30 minutes and try again\n" + "• Disconnect any VPN/proxy\n" + "• Upload fresh cookies.txt via Settings\n" + "• Try a different network (mobile hotspot, etc.)"; } throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`); } if (!usedCookies && hasCookies) { log(1, "⚠ Downloaded without cookies — audio may contain ads"); } const stats = await fs.stat(audioPath); const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1); log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`); // Extract title from the --print output of the download command const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null; let gotTitle = false; // First try: parse title from the download stdout (most reliable — same call that succeeded) if (dlStdout) { const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean); for (const line of printLines) { if (line.includes("|||")) { const sep = line.indexOf("|||"); const t = line.slice(0, sep).trim(); const d = line.slice(sep + 3).trim(); if (t && t !== "NA") { videoTitle = t; if (d && d !== "NA") videoUploadDate = d; gotTitle = true; log(1, `Video title: ${videoTitle}`); break; } } } } // Second try: separate yt-dlp call (no cookies needed for public metadata) if (!gotTitle) { try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(title)s|||%(upload_date)s", "--no-download", `https://www.youtube.com/watch?v=${videoId}`, ], { timeout: 15000 }); const raw = stdout.trim(); const sep = raw.indexOf("|||"); if (sep > 0) { videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled"; const d = raw.slice(sep + 3).trim(); if (d && d !== "NA") videoUploadDate = d || videoUploadDate; } else { videoTitle = raw || fallbackTitle || "Untitled"; } gotTitle = videoTitle !== "Untitled"; if (gotTitle) log(1, `Video title: ${videoTitle}`); } catch { // Title fetch failed } } // Third try: use the queue-provided title if (!gotTitle && fallbackTitle) { videoTitle = fallbackTitle; log(1, `Using queue title: ${fallbackTitle}`); gotTitle = true; } if (!gotTitle) { log(1, "⚠ Could not fetch video title"); } } checkCancelled(); // ── Step 2: Transcribe audio ── // Detect audio duration to choose strategy const audioDuration = await getAudioDuration(audioPath); const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown"; log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`); // Strategy: // < threshold → full file (fast, cheap, reliable on capable backends) // ≥ threshold → chunked transcription // If full-file transcription is truncated or empty → fall back to chunks // // Thresholds are provider-aware. For the relay provider, we ask // the relay (via /relay/capabilities) what limits it can accept — // if the operator has routed transcription to their own Parakeet // hardware (which handles 2+ hour podcasts in one shot), the // relay reports very large limits and we skip chunking entirely. // For other providers, we use the legacy Gemini-safe defaults. let chunkTimeThresholdSec = 60 * 60; // 60 min let chunkSizeThresholdBytes = 30 * 1024 * 1024; // 30 MB let preferredChunkSeconds = 2700; // 45 min if (transcriptionProvider === "relay") { // Force a fresh fetch so the chunking decision matches the // routing decision the relay will make for THIS install RIGHT // NOW. The hourly background refresh is a fallback for when // this inline fetch fails (relay unreachable, etc.). await refreshRelayCapabilities().catch(() => {}); const caps = getRelayCapabilities(); if (typeof caps.max_audio_minutes === "number") { chunkTimeThresholdSec = caps.max_audio_minutes * 60; } if (typeof caps.max_audio_mb === "number") { chunkSizeThresholdBytes = caps.max_audio_mb * 1024 * 1024; } if (caps.preferred_chunk_seconds === null) { // Hardware-capable: never chunk. Set both thresholds to // Infinity so the needsChunking check stays simple. chunkTimeThresholdSec = Infinity; chunkSizeThresholdBytes = Infinity; preferredChunkSeconds = null; } else if (typeof caps.preferred_chunk_seconds === "number") { preferredChunkSeconds = caps.preferred_chunk_seconds; } // Plain-language line for the activity log. The full caps // payload (tier, backend pref, chunking thresholds) is verbose // and internal — surface only what a non-developer reader // benefits from: the max audio length the relay will accept, // since that's what dictates the chunking behavior they see. if (caps.max_audio_minutes) { log(2, `Relay will transcribe up to ${caps.max_audio_minutes} minutes per upload.`); } else { log(2, `Relay ready to transcribe.`); } } let audioFileSize = 0; try { audioFileSize = (await fs.stat(audioPath)).size; } catch {} const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1); const needsChunking = (audioDuration && audioDuration >= chunkTimeThresholdSec) || audioFileSize >= chunkSizeThresholdBytes; if (needsChunking) { const reason = audioDuration >= chunkTimeThresholdSec ? `${audioDurMin} min` : `${audioSizeMB} MB`; log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriber.name}/${transcriptionModel}`); } else if (transcriptionProvider === "relay" && preferredChunkSeconds === null) { log(2, `Sending full file to relay (relay will chunk server-side if needed for its backend)`); } // Transcription model fallback chain: user's chosen model first, // then the rest of the provider's list. If Gemini 3 Flash hits a // 503 capacity error, the wrapper transparently retries with // 2.5 Flash. Matches the analysis fallback pattern below. const transcriptionFallbacks = [ transcriptionModel, ...transcriber.listTranscriptionModels().filter((m) => m !== transcriptionModel), ]; // Thin wrapper: keeps the call-site shape the chunking + main // pipeline already use. `transcriber.transcribeAudio` returns // { text, usage, cost, finishReason, blockReason, raw } — callers // read off that normalized shape regardless of which provider is // doing the transcription. On hard failure (after the provider's // own retry loop), walks the fallback chain to the next model. async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) { // Build the per-call fallback list: caller's chosen model // first, then the others. (Same as transcriptionFallbacks but // honors per-chunk overrides like chunkResult retries.) const chain = [modelName, ...transcriptionFallbacks.filter((m) => m !== modelName)]; let lastErr; for (const tryModel of chain) { try { return await transcriber.transcribeAudio({ filePath, mimeType: mType, titleHint, // Rich-context hints used by the provider's prompt builder // to anchor speaker-name extraction. Provider implementations // that don't care (e.g. Whisper) ignore these. channelHint: videoChannel, descriptionHint: videoDescription, chaptersHint: videoChapters, model: tryModel, offsetSeconds, onProgress: (msg) => log(2, msg), signal: abortController.signal, // Shared with the analyze call below so the relay bundles // both into one credit charge. Non-relay providers ignore it. jobId, }); } catch (err) { // Cancellation: bail immediately, don't try the next model. if (abortController.signal.aborted || err?.name === "AbortError") { throw new Error(CANCELLED_MARK); } lastErr = err; const msg = err?.message || String(err); log(2, `⚠ Transcription with ${tryModel} failed: ${msg.slice(0, 150)}`); if (tryModel !== chain[chain.length - 1]) { log(2, `Falling back to next transcription model...`); } } } throw lastErr || new Error("All transcription models failed"); } // ── Helper: chunked transcription for long audio ── async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) { const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`); await fs.mkdir(chunkDir, { recursive: true }); try { // Honor the provider's preferred chunk size (relay reports // this via /relay/capabilities; legacy defaults to 45 min). const chunkSec = preferredChunkSeconds || 2700; const audioChunks = await splitAudioFile(srcPath, chunkDir, chunkSec); if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed logFn(`Split audio into ${audioChunks.length} chunks for transcription (parallel, up to ${Math.min(audioChunks.length, 6)} in flight)`); let totalIn = 0, totalOut = 0; const perChunkEntries = new Array(audioChunks.length).fill(null); // Fire chunks in parallel with a bounded in-flight count. // 6 is well under Gemini's paid-tier RPM (1k for flash, 150 // for pro) and stays comfortable on the relay (which serves // each upload independently). Out-of-order completion is // fine — we sort by chunk index before merging at the end. const CHUNK_CONCURRENCY = 6; let nextChunkIdx = 0; const transcribeOne = async () => { while (true) { const my = nextChunkIdx++; if (my >= audioChunks.length) return; const chunk = audioChunks[my]; logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`); try { const chunkResult = await transcribeSingleFile( chunk.path, "audio/mpeg", title, modelName, chunk.startOffset ); totalIn += chunkResult.cost.inputTokens; totalOut += chunkResult.cost.outputTokens; const chunkText = chunkResult.text; if (!chunkText) { logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`); perChunkEntries[my] = []; continue; } const chunkEntries = parseTimestampedTranscript(chunkText); if (chunk.startOffset > 0) { const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0; const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8; if (!alreadyAdjusted) { for (const e of chunkEntries) { e.offset += chunk.startOffset; } logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`); } } // Sanity cap: drop any entries whose absolute offset // exceeds the chunk's true time window. Observed bug // (May 2026): gemini-3.1-flash-lite emitted timestamps // like [10:12:44] on a 45-min chunk, which then survived // the merge step and caused the stitched transcript to // claim 10:12:44 total length AND wiped out subsequent // chunks' entries (the merge dedupe rule dropped anything // <= the running max offset). Small tolerance (10s) for // models that mark the END of a phrase rather than the // start, where the actual emission can slightly exceed // the chunk boundary. const chunkEndAbsolute = chunk.startOffset + chunk.durationSec; const TOLERANCE_SEC = 10; const validUpper = chunkEndAbsolute + TOLERANCE_SEC; const filtered = chunkEntries.filter((e) => e.offset <= validUpper); const dropped = chunkEntries.length - filtered.length; if (dropped > 0) { const worstOffset = Math.max(...chunkEntries.map((e) => e.offset || 0)); logFn( `⚠ Chunk ${chunk.index + 1}: dropped ${dropped} segment(s) with bogus timestamps past chunk end ${formatTime(chunkEndAbsolute)} (worst: ${formatTime(worstOffset)}). Model: ${modelName}` ); } // Sort by offset before handing off to the merge step. // Some models emit segments out of chronological order // (observed on noisy transcribe outputs); the merge step // relies on monotonically-increasing offsets to dedupe // boundary overlaps. Sorting here guarantees that // invariant per chunk. filtered.sort((a, b) => (a.offset || 0) - (b.offset || 0)); // Detect truncated transcripts. If the last entry's // absolute offset is significantly less than the chunk's // expected end, the model probably hit its output-token // cap mid-transcript and lost the tail. Surface as a // loud warning so the operator knows which chunk + model // misbehaved and can switch to a model with bigger // output capacity. We keep the partial entries because // they're better than nothing, but the operator should // know they're looking at incomplete content. const lastAbsolute = filtered.length > 0 ? filtered[filtered.length - 1].offset : chunk.startOffset; const expectedEnd = chunk.startOffset + chunk.durationSec; const coverageRatio = chunk.durationSec > 0 ? (lastAbsolute - chunk.startOffset) / chunk.durationSec : 1; if (coverageRatio < 0.8 && chunk.durationSec > 60) { const missingSec = expectedEnd - lastAbsolute; logFn( `⚠ Chunk ${chunk.index + 1}: transcript appears TRUNCATED — last entry at ${formatTime(lastAbsolute)} but chunk ends at ${formatTime(expectedEnd)} (missing ${formatTime(missingSec)} of speech). Model: ${modelName}. Likely cause: model hit maxOutputTokens. Consider a model with larger output capacity, or shrink chunk size.` ); } logFn(`Chunk ${chunk.index + 1}: ${filtered.length} segments, last timestamp ${filtered.length > 0 ? formatTime(filtered[filtered.length - 1].offset) : "N/A"} (coverage ${(coverageRatio * 100).toFixed(0)}%)`); perChunkEntries[my] = filtered; } catch (err) { logFn(`⚠ Chunk ${chunk.index + 1} failed: ${err?.message?.slice(0, 120) || err}`); perChunkEntries[my] = []; } } }; const workers = Array.from( { length: Math.min(audioChunks.length, CHUNK_CONCURRENCY) }, transcribeOne ); await Promise.all(workers); // Merge chunks in chronological order. Per-chunk timestamps // are absolute (we adjusted by startOffset above), so merging // in index order is the same as in time order. The overlap- // skip rule preserves the original behavior: any entry whose // offset has already been covered by a prior chunk is dropped. const allEntries = []; for (const chunkEntries of perChunkEntries) { if (!chunkEntries) continue; const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1; for (const e of chunkEntries) { if (e.offset > lastExistingTime) allEntries.push(e); } } // Recalculate durations for (let i = 0; i < allEntries.length - 1; i++) { allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset; } if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15; logFn(`Chunked transcription complete: ${allEntries.length} total segments`); return { entries: allEntries, cost: { inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0, totalTokens: totalIn + totalOut, totalCost: "0", totalCostDisplay: "", }, }; } finally { try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {} } } // entries / transcriptText / txCost are declared earlier (top of // the request handler) since the captions-fast-path needs to // populate them before this audio-transcription block runs. const txStart = Date.now(); if (needsChunking) { // ── Very long audio: go straight to chunked transcription ── log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; } else { log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`); entries = null; // fall through to full-file attempt below } } if (!entries) { // ── Normal: transcribe full file ── const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel); transcriptText = transcriptResult.text; if (!transcriptText) { log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; } else { throw new Error(`${transcriber.name} returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.`); } } else { txCost = transcriptResult.cost; const txTime = ((Date.now() - txStart) / 1000).toFixed(1); log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`); entries = parseTimestampedTranscript(transcriptText); log(2, `Parsed ${entries.length} transcript segments`); } // ── Single-segment expansion ── // Whisper-API endpoints that don't return per-segment timestamps // (e.g. NVIDIA Parakeet, some bare-Whisper wrappers) hand back a // single text blob. parseTimestampedTranscript dumps that into // one entry at [0:00], which (a) trips the truncation detector // below and (b) leaves the analyzer with one giant segment so // it can only produce one section. Synthesize sentence-based // entries with interpolated timestamps so both code paths // downstream work like they do for Gemini/Whisper-1. if (entries.length === 1 && audioDuration && audioDuration > 30 && (entries[0].text || "").length > 100) { const synthesized = synthesizeEntriesFromText(entries[0].text, audioDuration); if (synthesized.length > 1) { log(2, `Single-segment transcript expanded into ${synthesized.length} synthetic sentence-based entries with interpolated timestamps`); entries = synthesized; } } // ── Truncation detection → fall back to chunks ── if (audioDuration && entries.length > 0) { const lastEntryTime = entries[entries.length - 1].offset; const coverageRatio = lastEntryTime / audioDuration; const missingSeconds = audioDuration - lastEntryTime; if (coverageRatio < 0.90 && missingSeconds > 120) { log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`); log(2, `Falling back to chunked transcription...`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; const finalCoverage = entries[entries.length - 1].offset; log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`); } } else { log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`); } } } const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1); log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`); if (!entries || entries.length === 0) { const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ "); log(2, `⚠ Transcript parse failed. Preview: ${preview}`); sendEvent(res, "error", { message: "Failed to parse transcript." }); sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText }); res.end(); return; } } // end if (!entries) — close the audio + transcription block if (!entries || entries.length === 0) { throw new Error("No transcript available — neither captions nor audio transcription produced segments."); } checkCancelled(); // ── Step 3: Topic analysis with model fallback ── // Fallback order: caller's chosen model first, then the rest of the // analyzer provider's analysis-model list. const providerModels = analyzer.listAnalysisModels(); const analysisFallbacks = [ analysisModel, ...providerModels.filter((m) => m !== analysisModel), ]; // ── Auto-coalesce input for the analysis LLM (single-shot path only) ── // Long content uses the chunked path (each ~18-min window only sees // a small slice of entries, so context isn't a concern) — coalescing // there would just degrade section-boundary precision unnecessarily. // Short content still goes through a single analyze call; for tiny // local models with 8-16K context windows, a dense Parakeet // transcript at full granularity can still overflow, so we keep // the safety-net coalesce for that path. const lastEntry = entries[entries.length - 1]; const totalSec = (lastEntry?.offset || 0) + (lastEntry?.duration || 0); const willChunkAnalysis = totalSec > CHUNKING_CUTOFF_SECONDS; const { coalesced: analysisEntries, indexMap: analysisIndexMap } = willChunkAnalysis ? { coalesced: entries, indexMap: null } : coalesceForAnalysis(entries, 400); if (analysisEntries !== entries) { log( 3, `⚠ Single-shot path: coalesced ${entries.length} → ${analysisEntries.length} segments for analyzer (transcript display keeps full granularity).` ); } else if (willChunkAnalysis) { log( 3, `Chunked path: keeping full segment granularity (${entries.length} entries) — chunker handles per-window context.` ); } // Helper: convert sections (in analysisEntries index space) into // Recap "chunks" (full UI shape with entries slice + startTime). // Used by both the per-window streaming callback AND the final // assembly below — keeps the two paths in lockstep so what // streams matches what gets saved. const sectionsToChunks = (sections) => sections .map((section) => { let start = Math.max(0, section.startIndex); let end = section.endIndex; if (analysisIndexMap) { const mappedStart = analysisIndexMap[Math.max(0, Math.min(start, analysisIndexMap.length - 1))]; const mappedEnd = analysisIndexMap[Math.max(0, Math.min(end, analysisIndexMap.length - 1))]; start = mappedStart ? mappedStart.startOrig : 0; end = mappedEnd ? mappedEnd.endOrig : entries.length - 1; } start = Math.max(0, start); end = Math.min(entries.length - 1, end); const sectionEntries = entries.slice(start, end + 1); return { title: section.title, summary: section.summary, entries: sectionEntries, startTime: sectionEntries[0]?.offset || 0, }; }) .filter((c) => c.entries.length > 0); // Emit transcript_ready so the browser can switch from the // loading screen to the results view immediately — transcript // pane populated, sections pane in "analyzing…" state. Sections // then stream in via sections_partial events as analyze windows // complete (only the chunked path streams; short content gets // its sections in the final result event). const contentType = isPodcast ? "podcast" : "youtube"; sendEvent(res, "transcript_ready", { videoId, videoTitle, videoChannel, videoChapters, videoUploadDate, videoDescription, entries, type: contentType, willChunk: willChunkAnalysis, }); // Single entry point: runChunkedAnalysis decides per-content // whether to single-shot (≤25 min) or window-and-stitch (>25 min). // Per-window concurrency is bounded inside; model fallback walks // the analysisFallbacks list within each window independently. let analysisResult = null; let usedAnalysisModel = analysisModel; const anaStart = Date.now(); log(3, `Analyzing topics across ${analysisEntries.length} segments with ${analyzer.name}...`); try { analysisResult = await runChunkedAnalysis({ entries: analysisEntries, analyzer, fallbackModels: analysisFallbacks, onProgress: (msg) => log(3, msg), onWindowComplete: ({ windowIdx, totalWindows, ownedSections }) => { // Convert this window's owned sections into the final // chunk shape and stream them to the browser. The stitcher's // "trust the next window for the overlap region" rule was // applied per-window inside runChunkedAnalysis, so what we // emit here is final — the UI won't have to revise it. const partialChunks = sectionsToChunks(ownedSections); if (partialChunks.length > 0) { sendEvent(res, "sections_partial", { windowIdx, totalWindows, chunks: partialChunks, }); } }, signal: abortController.signal, jobId, }); usedAnalysisModel = analysisResult.model || analysisModel; } catch (err) { if (abortController.signal.aborted || err?.name === "AbortError") { throw new Error(CANCELLED_MARK); } throw err; } if (!analysisResult) { throw new Error("All analysis models failed. Please try again later."); } const analysisText = analysisResult.text; if (!analysisText) { throw new Error(`${analyzer.name} returned an empty analysis. The transcript may be too long for the model. Try again.`); } const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1); const anaCost = analysisResult.cost; // Parse the analysis JSON let analysisJson; try { let jsonStr = analysisText.trim(); const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/); if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim(); analysisJson = JSON.parse(jsonStr); } catch (e) { console.error("Failed to parse analysis JSON:", e.message); sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." }); sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText }); res.end(); return; } // Build final chunks via the same helper used for streaming — // guarantees what the browser accumulated from sections_partial // events matches what gets saved to history and sent in the // canonical result event. const chunks = sectionsToChunks(analysisJson.sections); const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1); log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`); log(3, `Pipeline finished in ${totalTime}s`); // Save to history. As of the library-for-everyone change, free // users save too — the app feels broken otherwise (summarize a // video, never find it again). const historyId = await saveToHistory(scopeForRequest(req), videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null); // Debit local credit on success — trial cookie OR free signed-in tenant. if (RECAP_MODE === "multi" && req.trial) { try { const { debitOne } = await import("./anon-trial.js"); debitOne(req.trial.cookie_id); } catch (err) { console.warn("[/api/process] trial debit failed:", err); } } else if (RECAP_MODE === "multi" && req.creditsToDebit === "tenant") { try { const { gateAndDebit } = await import("./tenant-credits.js"); await gateAndDebit(req.user.id); } catch (err) { console.warn("[/api/process] tenant_credits debit failed:", err); } } sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType }); res.end(); } catch (err) { // Treat any AbortError or aborted-signal state as a user cancellation // even if it didn't surface as CANCELLED_MARK upstream (e.g. an SDK // throws AbortError before checkCancelled() runs). const cancelled = err?.message === CANCELLED_MARK || err?.name === "AbortError" || abortController.signal.aborted; if (!cancelled) { // Dump as much context as the runtime gives us. Generic // messages like "Error in input stream" are usually wrappers // around an underlying SDK / Node stream error; the cause + // stack are what actually tells us what happened. console.error("Pipeline error:", { name: err?.name, message: err?.message, code: err?.code, status: err?.status || err?.statusCode, cause: err?.cause, stack: err?.stack, transcriptionProvider, analysisProvider, }); } if (!res.headersSent) { res.status(cancelled ? 499 : 500).json({ error: cancelled ? "cancelled" : err.message, }); } else if (cancelled) { sendEvent(res, "cancelled", { message: "Cancelled by user" }); res.end(); } else { sendEvent(res, "error", { message: err.message }); res.end(); } } finally { if (isFree) releaseFreeSlot(); // Clean up temp files try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {} } }); // ── Helpers ──────────────────────────────────────────────────────────────── // getAudioDuration + splitAudioFile moved to ./audio.js // sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js // buildAnalysisPrompt moved to ./gemini-helpers.js // ── Network mode ────────────────────────────────────────────────────────── // On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking) // On local Mac dev: default to localhost (safe on public Wi-Fi) // - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling) // - Running "npm start" directly defaults to localhost const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, ".."); const lanMode = isStartOS ? true : process.env.LAN_MODE === "true"; const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1"; app.get("/api/network-mode", (req, res) => { res.json({ lan: lanMode }); }); // ── Start server ─────────────────────────────────────────────────────────── app.listen(PORT, BIND_HOST, async () => { console.log(`\n Recap API running on http://${BIND_HOST}:${PORT}`); console.log(` Data directory: ${DATA_DIR}`); console.log(` Recap mode: ${RECAP_MODE}`); console.log(` Checking yt-dlp...`); const info = await checkYtdlp(); if (!info.installed) { console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`); } else if (info.updateAvailable) { console.log(` ✓ yt-dlp ${info.version} found`); console.log(` ↑ Update available: ${info.latestVersion}`); console.log(` Auto-updating...`); const result = await autoUpdateYtdlp(DATA_DIR); if (result.success) { const refreshed = await checkYtdlp(); console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`); } else { console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`); } } else { console.log(` ✓ yt-dlp ${info.version} (up to date)\n`); } // One-time, idempotent: migrate any legacy install-wide subscription // files into the operator's own scope so storage is uniformly per-scope. try { const moved = await migrateGlobalSubscriptionsToOwner(); if (moved > 0) { console.log(` 📦 Migrated ${moved} subscription file(s) into the operator scope`); } } catch (err) { console.warn(" ⚠ Subscription migration skipped:", err.message); } // Prune queued items that have since been summarized, across every scope // that has a queue (owner + each subscribing tenant). for (const s of await listAutoQueueScopes()) { await reconcileAutoQueueWithLibrary(s).catch(() => {}); } // Check subscriptions on startup (fans out over all subscribing scopes). console.log(` 📡 Checking subscriptions for new videos...`); await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message)); // Recover items stuck in "processing" from a crash → re-queue as approved // (across all scopes), then start the processor if anything is ready. let anyApproved = false; for (const s of await listAutoQueueScopes()) { await mutateAutoQueue(s, (items) => { for (const item of items) { if (item.status === "processing") { console.log(` ⚠ Recovering stuck item (${s}): ${item.title}`); item.status = "approved"; } } if (items.some((q) => q.status === "approved")) anyApproved = true; }); } if (anyApproved) { console.log(` 🔄 Starting background processor...`); // Delay slightly so the server is fully ready before internal HTTP calls setTimeout(() => kickProcessor(), 2000); } console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`); // Check subscriptions every hour (runs continuously on StartOS) setInterval(() => { checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message)); }, 60 * 60 * 1000); });