import express from "express"; import cors from "cors"; import { execFile } from "child_process"; import { promisify } from "util"; import fs from "fs/promises"; import { createWriteStream } from "fs"; import path from "path"; import os from "os"; import https from "https"; import http from "http"; import { randomUUID } from "crypto"; import * as license from "./license.js"; import { sendEvent, extractVideoId, formatTime, parseTimestampedTranscript, synthesizeEntriesFromText, safeText, retryGemini, fetchUrl, } from "./util.js"; import { buildAnalysisPrompt } from "./gemini-helpers.js"; import { getProvider, resolveProviderOpts, PROVIDER_NAMES } from "./providers/index.js"; import { getAudioDuration, splitAudioFile, downloadPodcastAudio, } from "./audio.js"; import { checkYtdlp, autoUpdateYtdlp } from "./ytdlp.js"; import { initCookies, ytCookieArgs, ytExtraArgs, ytCookieMethod, setupCookieRoutes, getCookieFilePath, } from "./cookies.js"; import * as config from "./config.js"; import { initInstallId, getInstallId } from "./install-id.js"; import * as relayState from "./relay-state.js"; import * as relayDefault from "./relay-default.js"; import { resolveApiKey, getConfigSnapshot } from "./config.js"; import * as licenseMW from "./license-middleware.js"; import { setupLicenseMiddleware, setupLicenseRoutes, startLicenseRefresh, refreshLicenseOnline, isFreeUser, tryAcquireFreeSlot, releaseFreeSlot, getCurrentFreeJob, abortCurrentFreeJob, isFreeJobAborted, appendCurrentJobLog, } from "./license-middleware.js"; import { initHistory, saveToHistory, loadMeta, saveMeta, setupHistoryRoutes, getHistoryDir, } from "./history.js"; import { setupLibraryRoutes } from "./library.js"; import { initAdminAuth, setupAdminAuthMiddleware, setupAdminAuthRoutes, } from "./admin-auth.js"; const execFileAsync = promisify(execFile); const app = express(); const PORT = process.env.PORT || 3001; // ── Data directory (configurable for StartOS or local dev) ──────────────── // On StartOS: DATA_DIR=/data (persistent volume) // On local dev: defaults to project root (parent of server/) const __dirname = path.dirname(new URL(import.meta.url).pathname); const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, ".."); const historyDir = path.join(DATA_DIR, "history"); const configDir = path.join(DATA_DIR, "config"); await fs.mkdir(historyDir, { recursive: true }).catch(() => {}); await fs.mkdir(configDir, { recursive: true }).catch(() => {}); await initHistory({ dataDir: DATA_DIR }); // Per-install identity for the upcoming relay backend. Generated once // on first boot, persisted in DATA_DIR. Surfaced via /api/health and // /api/install-id (read-only). See ./install-id.js for details. await initInstallId({ dataDir: DATA_DIR }); // API key + live reload moved to ./config.js await config.initConfig({ dataDir: DATA_DIR }); const envPath = config.getEnvPath(); // Cookies state + helpers + routes moved to ./cookies.js await initCookies({ dataDir: DATA_DIR, envPath }); // Admin login gate state. Reads username + scrypt hash + session secret // from /data/config/startos-config.json (set via the "Set Admin // Password" StartOS action) and refreshes on the same poll cadence as // config.js. When no hash is set, the gate is a no-op. await initAdminAuth({ dataDir: DATA_DIR }); // resolveApiKey moved to ./config.js app.use(cors()); app.use(express.json({ limit: "100mb" })); // ── Admin login gate ─────────────────────────────────────────────────────── // MUST run before the license middleware: if an admin password is set, // nobody (licensed or not) reaches the activation flow without first // passing the login. Endpoints needed by the login UI itself // (/api/admin/status, /api/admin/login, /api/admin/logout) and // /api/health stay open. setupAdminAuthMiddleware(app); setupAdminAuthRoutes(app); // ── Keysat licensing ──────────────────────────────────────────────────────── // All license-aware request handling (gate, Pro feature gates, /api/license // routes, free-tier slot management, periodic online refresh) lives in // ./license-middleware.js. Importers read the current state via // licenseMW.LIC (a live binding). setupLicenseMiddleware(app); setupLicenseRoutes(app); startLicenseRefresh(); // History storage + routes moved to ./history.js // (saveToHistory, loadMeta, saveMeta are imported above) // Late-bound addToSkipList — defined later in this file for now. setupHistoryRoutes(app, { addToSkipList: (id) => addToSkipList(id) }); // Serve the frontend from ../public app.use(express.static(path.join(__dirname, "..", "public"))); app.use("/assets", express.static(path.join(__dirname, "..", "assets"))); // checkYtdlp + autoUpdateYtdlp moved to ./ytdlp.js // PRICING + calcCost + buildAnalysisPrompt moved to ./gemini-helpers.js // safeText + retryGemini moved to ./util.js // ── Health check ─────────────────────────────────────────────────────────── app.get("/api/health", async (req, res) => { const info = await checkYtdlp(); // Check cookies.txt freshness const cookieMethod = ytCookieMethod(); let cookieInfo = { method: cookieMethod }; if (cookieMethod === "cookies.txt") { try { const stat = await fs.stat(getCookieFilePath()); const ageMs = Date.now() - stat.mtimeMs; const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24)); cookieInfo.fileAgeDays = ageDays; cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days } catch {} } res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!config.serverApiKey, cookies: cookieInfo, installId: getInstallId(), ...info }); }); // Read-only install identity. Used by the UI's settings panel so the // operator can verify the install has been provisioned, and by the // future relay client to attach the X-Recap-Install-Id header. Open // path — license gate doesn't apply (the relay needs this ID to be // reachable before any credits have been granted). app.get("/api/install-id", (_req, res) => { res.json({ installId: getInstallId() }); }); // Proxy through to the relay's /relay/policy endpoint. Used by the UI // to render dynamic copy (e.g. "N relay credits" in the activation // screen reflects whatever the operator currently has the relay // configured to give Core users — no Recap update needed when the // operator tunes tier quotas). Cached in-memory for a short window // so a busy UI doesn't hammer the relay. let __cachedRelayPolicy = { at: 0, body: null }; app.get("/api/relay/policy", async (_req, res) => { const base = relayDefault.getRelayBaseURL(); if (!base) return res.json({ configured: false, tiers: null }); const ttl = 5 * 60 * 1000; if (__cachedRelayPolicy.body && Date.now() - __cachedRelayPolicy.at < ttl) { return res.json({ configured: true, ...__cachedRelayPolicy.body }); } try { const r = await fetch(`${base.replace(/\/$/, "")}/relay/policy`, { signal: AbortSignal.timeout(5000), }); if (!r.ok) { return res.json({ configured: true, tiers: null, error: `HTTP ${r.status}`, }); } const body = await r.json(); __cachedRelayPolicy = { at: Date.now(), body }; res.json({ configured: true, ...body }); } catch (err) { res.json({ configured: true, tiers: null, error: err?.message || String(err), }); } }); // Last-known relay state (credits + tier) cached in-process. The UI // polls this for the "N credits remaining · Tier: X" banner; the // underlying numbers are refreshed every time a relay provider call // lands. // // First-paint UX: when the cache is empty (no relay calls yet AND no // prior ping has populated it), opportunistically hit the relay's // /balance endpoint with a short timeout so the banner shows real // numbers on first page load instead of "balance unknown". Best // effort — if the relay is unreachable the cache stays null and the // UI falls back to its "unknown" copy. `?refresh=1` forces a ping // even when the cache already has a value. // // `configured` is true when relay-default.js has a non-empty URL — // operator-controlled at build time, never user-configurable. app.get("/api/relay/status", async (req, res) => { const configured = !!relayDefault.getRelayBaseURL(); let snapshot = relayState.getRelayState(); const wantsRefresh = req.query.refresh === "1" || req.query.refresh === "true"; const cacheEmpty = snapshot.creditsRemaining === null && snapshot.tier === null; if (configured && (cacheEmpty || wantsRefresh)) { try { const cfg = await getConfigSnapshot(); const opts = resolveProviderOpts("relay", { config: cfg, clientOpts: {} }); const provider = getProvider("relay", opts); await provider.pingBalance({ timeoutMs: 5000 }); snapshot = relayState.getRelayState(); } catch (err) { // Log + record so the UI shows a real error instead of a silent // "balance unknown". The provider call itself records when the // ping reaches the network; this catches the earlier failures // (e.g. createRelayProvider throwing because install-id wasn't // initialized yet) where lastError wouldn't otherwise be set. console.error( `[relay/status] ping failed: ${err?.message || err} (stack: ${err?.stack || "n/a"})` ); relayState.recordRelayError(err?.message || String(err)); snapshot = relayState.getRelayState(); } } res.json({ ...snapshot, configured }); }); // ── Status endpoints ─────────────────────────────────────────────────────── app.post("/api/heartbeat", (req, res) => { res.json({ ok: true, sleeping: false }); }); app.get("/api/status", (req, res) => { res.json({ ok: true, sleeping: false, uptime: process.uptime() }); }); // Shutdown: used by the macOS .app launcher to stop the server cleanly. // On StartOS this endpoint is unused (StartOS manages the container lifecycle). app.post("/api/shutdown", (req, res) => { res.json({ ok: true, message: "Server shutting down..." }); console.log("\n Server shutdown requested from browser. Goodbye!\n"); setTimeout(() => process.exit(0), 300); }); // ── Manual update endpoint ───────────────────────────────────────────────── app.post("/api/update-ytdlp", async (req, res) => { const result = await autoUpdateYtdlp(DATA_DIR); const info = await checkYtdlp(); res.json({ ...result, ...info }); }); // ── Cookie management endpoints ─────────────────────────────────────────── // /api/cookies/* routes registered via setupCookieRoutes (./cookies.js) setupCookieRoutes(app); // ── Library export/import ──── moved to ./library.js ───────── setupLibraryRoutes(app); // ── Subscriptions ───────────────────────────────────────────────────────── const subsPath = path.join(historyDir, "subscriptions.json"); async function loadSubscriptions() { try { return JSON.parse(await fs.readFile(subsPath, "utf-8")).subscriptions || []; } catch { return []; } } async function saveSubscriptions(subs) { await fs.writeFile(subsPath, JSON.stringify({ subscriptions: subs }, null, 2)); } // List recent videos from a channel/playlist via yt-dlp (no download) // Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode) async function listChannelVideosFast(url, limit = 15) { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(id)s|%(title)s", "--no-download", "--playlist-end", String(limit), "--flat-playlist", ...ytCookieArgs(), ...ytExtraArgs(), url, ], { timeout: 60000 }); return stdout.trim().split("\n").filter(Boolean).map(line => { const idx = line.indexOf("|"); return { id: line.slice(0, idx), title: line.slice(idx + 1) }; }); } // Fetch upload_date for a batch of video IDs (processes in batches of 3) // Bails after 2 consecutive failures to avoid grinding through blocked requests async function fetchUploadDates(videoIds) { if (videoIds.length === 0) return {}; const dateMap = {}; const batchSize = 50; let consecutiveFails = 0; for (let i = 0; i < videoIds.length; i += batchSize) { const batch = videoIds.slice(i, i + batchSize); const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`); try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(id)s|%(upload_date)s", "--no-download", ...ytCookieArgs(), ...urls, ], { timeout: 45000 }); for (const line of stdout.trim().split("\n").filter(Boolean)) { const [id, date] = line.split("|"); if (id && date && date !== "NA") dateMap[id] = date; } consecutiveFails = 0; subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`); } catch (err) { consecutiveFails++; subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`); if (consecutiveFails >= 2) { subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`); break; } } } return dateMap; } // ── RSS-based date fetching (bypasses bot detection) ───────────────────── // Fetch a URL and return the response body as a string // fetchUrl moved to ./util.js // Get channel_id from a YouTube channel/playlist URL using yt-dlp async function getChannelId(url) { // Method 1: flat-playlist channel_id (fast, may return NA) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel_id)s", "--no-download", "--playlist-items", "1", "--flat-playlist", ...ytCookieArgs(), url, ], { timeout: 15000 }); const id = stdout.trim().split("\n")[0]; if (id && id !== "NA" && id.length > 5) return id; } catch {} // Method 2: non-flat single video (slower but gets full metadata) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel_id)s", "--no-download", "--playlist-items", "1", ...ytCookieArgs(), url, ], { timeout: 30000 }); const id = stdout.trim().split("\n")[0]; if (id && id !== "NA" && id.length > 5) return id; } catch {} return null; } // Fetch video dates from YouTube RSS feed (no auth, no bot detection) // Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos async function fetchDatesFromRSS(channelId) { const dateMap = {}; if (!channelId) return dateMap; try { const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`; const xml = await fetchUrl(rssUrl); const entryRegex = /[\s\S]*?([^<]+)<\/yt:videoId>[\s\S]*?([^<]+)<\/published>[\s\S]*?<\/entry>/g; let match; while ((match = entryRegex.exec(xml)) !== null) { const videoId = match[1]; const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00" const date = published.slice(0, 10).replace(/-/g, ""); // "20251201" dateMap[videoId] = date; } } catch (err) { subLog(` ⚠ RSS feed fetch failed: ${err.message}`); } return dateMap; } // ── Podcast RSS feed parsing ──────────────────────────────────────────────── // Detect if a URL looks like a podcast RSS feed function isPodcastFeedUrl(url) { if (!url) return false; const u = url.trim().toLowerCase(); // Common podcast RSS feed patterns if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true; if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true; if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true; if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true; if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true; return false; } // Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] } async function parsePodcastRSS(feedUrl, limit = 200) { const xml = await fetchUrl(feedUrl); // Extract podcast title const titleMatch = xml.match(/[\s\S]*?(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/); const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast"; // Extract episodes from <item> elements const episodes = []; const itemRegex = /<item>([\s\S]*?)<\/item>/g; let match; while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) { const item = match[1]; // GUID (unique episode identifier) const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/); const guid = guidMatch ? guidMatch[1].trim() : null; // Title const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/); const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode"; // Publish date const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/); const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null; let dateStr = ""; // YYYYMMDD if (pubDate) { try { const d = new Date(pubDate); if (!isNaN(d.getTime())) { dateStr = d.toISOString().slice(0, 10).replace(/-/g, ""); } } catch {} } // Audio enclosure URL const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/); const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null; // Duration (itunes:duration) const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/); const duration = durMatch ? durMatch[1].trim() : ""; if (!audioUrl) continue; // Skip episodes without audio // Use guid, or fall back to audioUrl as unique ID const id = guid || audioUrl; episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration }); } return { title: podcastTitle, episodes }; } // Download a podcast episode audio file via HTTP(S) to a local path // downloadPodcastAudio moved to ./audio.js // Get channel name from URL async function fetchChannelName(url) { // Try fast flat-playlist approach first try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel)s", "--no-download", "--playlist-end", "1", "--flat-playlist", ...ytCookieArgs(), url, ], { timeout: 15000 }); const name = stdout.trim().split("\n")[0]; if (name && name !== "NA") return name; } catch {} // Fallback: fetch without flat-playlist (slower but gets channel from video metadata) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel)s", "--no-download", "--playlist-end", "1", ...ytCookieArgs(), url, ], { timeout: 30000 }); const name = stdout.trim().split("\n")[0]; if (name && name !== "NA") return name; } catch {} // Last resort: extract handle from URL try { const u = new URL(url); const handleMatch = u.pathname.match(/\/@([^/]+)/); if (handleMatch) return "@" + handleMatch[1]; } catch {} return "Unknown Channel"; } // ── In-process write serialization ──────────────────────────────────────── // Per-file promise chain to prevent lost-update races on read-modify-write // state files (skip-list, seen-list). Without this, two concurrent DELETE // handlers can each load the same snapshot, add their own id, and the // second write overwrites the first — silently dropping entries. const _fileLocks = new Map(); function withFileLock(key, fn) { const prev = _fileLocks.get(key) || Promise.resolve(); const next = prev.then(fn, fn); // run fn whether prev resolved or rejected // Keep the chain alive but don't leak errors to the next caller _fileLocks.set(key, next.catch(() => {})); return next; } // Skip list — videos deleted from history that subscriptions should not re-add const skipPath = path.join(historyDir, "skip-list.json"); async function loadSkipList() { try { return new Set(JSON.parse(await fs.readFile(skipPath, "utf-8")).videoIds || []); } catch { return new Set(); } } async function addToSkipList(videoId) { return withFileLock(skipPath, async () => { const skipIds = await loadSkipList(); skipIds.add(videoId); await fs.writeFile(skipPath, JSON.stringify({ videoIds: [...skipIds] })); }); } // Seen list — videos already offered for approval (persists across restarts) const seenPath = path.join(historyDir, "seen-list.json"); async function loadSeenList() { try { return new Set(JSON.parse(await fs.readFile(seenPath, "utf-8")).videoIds || []); } catch { return new Set(); } } async function addToSeenList(videoIds) { return withFileLock(seenPath, async () => { const seen = await loadSeenList(); for (const id of videoIds) seen.add(id); await fs.writeFile(seenPath, JSON.stringify({ videoIds: [...seen] })); }); } // Get all videoIds already processed in history async function getProcessedVideoIds() { const ids = new Set(); try { const files = await fs.readdir(historyDir); for (const file of files.filter(f => f.endsWith(".json") && f !== "_meta.json" && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) { try { const raw = await fs.readFile(path.join(historyDir, file), "utf-8"); const data = JSON.parse(raw); if (data.videoId) ids.add(data.videoId); } catch {} } } catch {} return ids; } // Server-side auto-queue for subscription-discovered videos (persisted to disk) const autoQueuePath = path.join(historyDir, "auto-queue.json"); let autoQueue = []; async function loadAutoQueue() { try { const data = JSON.parse(await fs.readFile(autoQueuePath, "utf-8")); return data.items || []; } catch { return []; } } async function saveAutoQueue() { await fs.writeFile(autoQueuePath, JSON.stringify({ items: autoQueue }, null, 2)); } // Load persisted queue on startup, filtering out already-processed items autoQueue = await loadAutoQueue(); { const processed = await getProcessedVideoIds(); const before = autoQueue.length; autoQueue = autoQueue.filter(q => !processed.has(q.videoId)); if (autoQueue.length < before) { console.log(` Auto-queue: removed ${before - autoQueue.length} already-processed item(s)`); await saveAutoQueue(); } } // ── Background processing queue ────────────────────────────────────────── // Processes "approved" auto-queue items sequentially with configurable delay // between items to avoid hammering YouTube with rapid-fire downloads. const processingConfigPath = path.join(configDir, "processing-config.json"); let processingConfig = { delaySeconds: 300, // Default delay between processing queue items (5 minutes) enabled: true, // Whether background processing is active }; let processingState = { running: false, // Is the processor loop currently active? currentItem: null, // The item currently being processed (or null) lastCompleted: null, // Timestamp of last completed item rush: false, // If true, skip delay before next item log: [], // Recent processing log entries }; function procLog(msg) { console.log(` [processor] ${msg}`); processingState.log.push({ t: new Date().toISOString(), msg }); if (processingState.log.length > 100) processingState.log.shift(); } async function loadProcessingConfig() { try { const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8")); if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds; if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled; } catch {} } async function saveProcessingConfig() { await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2)); } await loadProcessingConfig(); // The background processor: picks "approved" items, processes via internal HTTP, // waits the configured delay, then picks the next one. Runs continuously. async function backgroundProcessor() { if (processingState.running) return; // Already running processingState.running = true; procLog("Background processor started"); while (true) { // Find next approved item const item = autoQueue.find(q => q.status === "approved"); if (!item) { procLog("No approved items in queue — processor sleeping"); processingState.running = false; processingState.currentItem = null; return; } if (!processingConfig.enabled) { procLog("Processing is paused — processor sleeping"); processingState.running = false; processingState.currentItem = null; return; } // Wait the configured delay (unless rush mode or first item) if (processingState.lastCompleted && !processingState.rush) { const delaySec = processingConfig.delaySeconds; procLog(`Waiting ${delaySec}s before next item...`); await new Promise(r => setTimeout(r, delaySec * 1000)); } processingState.rush = false; // Re-check the item hasn't been removed/changed while we waited const freshItem = autoQueue.find(q => q.id === item.id); if (!freshItem || freshItem.status !== "approved") { procLog(`Item ${item.id} was removed or status changed — skipping`); continue; } // Mark as processing freshItem.status = "processing"; processingState.currentItem = freshItem; await saveAutoQueue(); procLog(`Processing: ${freshItem.title} (${freshItem.url})`); try { // Call the existing /api/process endpoint via internal HTTP request const result = await processItemInternally(freshItem); freshItem.status = "completed"; freshItem.completedAt = new Date().toISOString(); freshItem.historyId = result.historyId || null; processingState.lastCompleted = new Date().toISOString(); procLog(`✓ Completed: ${freshItem.title}`); } catch (err) { freshItem.status = "failed"; freshItem.error = err.message || String(err); freshItem.failedAt = new Date().toISOString(); procLog(`✗ Failed: ${freshItem.title} — ${freshItem.error.slice(0, 200)}`); } processingState.currentItem = null; await saveAutoQueue(); } } // Internal HTTP request to /api/process — consumes the SSE stream and // waits for the "result" or "error" event. This reuses the entire existing // pipeline without any code duplication. function processItemInternally(item) { return new Promise((resolve, reject) => { const apiKey = resolveApiKey(null); // Use server's stored key if (!apiKey) { return reject(new Error("No API key configured. Set your Gemini API key in Settings.")); } const body = JSON.stringify({ url: item.url, apiKey: "USE_SERVER_KEY", type: item.type || undefined, title: item.title || undefined, uploadDate: item.uploadDate || undefined, episodeId: item.videoId || undefined, }); const req = http.request({ hostname: "127.0.0.1", port: PORT, path: "/api/process", method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) }, timeout: 1800000, // 30 minutes max for very long videos }, (res) => { let buffer = ""; let lastResult = null; let lastError = null; res.on("data", (chunk) => { buffer += chunk.toString(); // Parse SSE events from the buffer const lines = buffer.split("\n"); buffer = lines.pop() || ""; // Keep incomplete line in buffer let currentEvent = null; for (const line of lines) { if (line.startsWith("event: ")) { currentEvent = line.slice(7).trim(); } else if (line.startsWith("data: ") && currentEvent) { try { const data = JSON.parse(line.slice(6)); if (currentEvent === "result") lastResult = data; if (currentEvent === "error") lastError = data; if (currentEvent === "log") { procLog(` [${data.elapsed || "?"}s] ${data.message}`); } } catch {} currentEvent = null; } else if (line === "") { currentEvent = null; } } }); res.on("end", () => { if (lastError) { reject(new Error(lastError.message || "Processing failed")); } else if (lastResult) { resolve(lastResult); } else { reject(new Error("Processing ended without result")); } }); res.on("error", reject); }); req.on("error", reject); req.on("timeout", () => { req.destroy(); reject(new Error("Processing timed out after 30 minutes")); }); req.write(body); req.end(); }); } // Wake up the processor whenever there are approved items function kickProcessor() { if (!processingState.running && processingConfig.enabled) { const hasApproved = autoQueue.some(q => q.status === "approved"); if (hasApproved) { backgroundProcessor().catch(err => procLog(`Processor error: ${err.message}`)); } } } let subCheckRunning = false; let subCheckPromise = null; let subCheckLog = []; // Stores recent check logs for debug endpoint function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); } async function checkSubscriptions() { if (subCheckRunning) { // Wait for current check to finish, then run again if (subCheckPromise) await subCheckPromise; return checkSubscriptions(); } subCheckRunning = true; subCheckPromise = _checkSubscriptionsInner().finally(() => { subCheckRunning = false; subCheckPromise = null; }); return subCheckPromise; } async function _checkSubscriptionsInner() { // Pro-tier feature: skip silently when not entitled. The HTTP gate above // returns 402 to callers; this guards the background timer + manual paths. if (!licenseMW.LIC.entitlements.has("subscriptions")) { subCheckLog = []; subLog("Skipped: subscriptions require a Pro license."); return; } subCheckLog = []; // Clear logs for fresh check const subs = await loadSubscriptions(); if (subs.length === 0) { subLog("No subscriptions found"); return; } const processedIds = await getProcessedVideoIds(); const skippedIds = await loadSkipList(); const seenIds = await loadSeenList(); const queuedIds = new Set(autoQueue.map(q => { // For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID) if (q.videoId) return q.videoId; const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/); return m ? m[1] : null; }).filter(Boolean)); subLog(`${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen, ${queuedIds.size} in queue`); let changed = false; for (const sub of subs) { if (sub.paused) { subLog(`⏸ ${sub.name} — paused, skipping`); continue; } try { const icon = sub.type === "podcast" ? "🎙" : "📡"; subLog(`${icon} Checking: ${sub.name} (${sub.url})`); const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8); if (sub.type === "podcast") { // ── Podcast subscription: discover episodes from RSS feed ── const { episodes } = await parsePodcastRSS(sub.url, 200); subLog(` Found ${episodes.length} episode(s) in RSS feed`); if (episodes.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } // Filter out already-known episodes const unknowns = episodes.filter(ep => { if (processedIds.has(ep.id)) return false; if (queuedIds.has(ep.id)) return false; if (skippedIds.has(ep.id)) return false; if (seenIds.has(ep.id)) return false; return true; }); const filtered = episodes.length - unknowns.length; subLog(` ${unknowns.length} to check, ${filtered} already known`); if (unknowns.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } subLog(` Cutoff date: ${cutoffDate}`); let newCount = 0; const seenNow = []; for (const ep of unknowns) { if (!ep.date || ep.date.length !== 8) continue; // skip undated if (ep.date < cutoffDate) { seenNow.push(ep.id); continue; // before cutoff } subLog(` ✅ ${ep.date} — ${ep.title.slice(0, 60)}`); const itemStatus = sub.autoDownload ? "approved" : "pending"; autoQueue.push({ id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`, videoId: ep.id, // episode GUID url: ep.audioUrl, title: ep.title, uploadDate: ep.date, subscriptionId: sub.id, subscriptionName: sub.name, status: itemStatus, type: "podcast", duration: ep.duration || "", }); queuedIds.add(ep.id); newCount++; } if (seenNow.length > 0) await addToSeenList(seenNow); sub.lastChecked = new Date().toISOString(); subLog(` → ${newCount} episode(s) queued for approval from ${sub.name}`); changed = true; } else { // ── YouTube subscription: discover videos via yt-dlp + RSS dates ── // Scale fetch limit based on how far back the subscription date goes const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000)); const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4; const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200); subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`); const candidates = await listChannelVideosFast(sub.url, fetchLimit); subLog(` Found ${candidates.length} recent video(s)`); if (candidates.length === 0) { subLog(` ⚠ No videos returned from yt-dlp for this channel`); sub.lastChecked = new Date().toISOString(); changed = true; continue; } const unknowns = candidates.filter(v => { if (processedIds.has(v.id)) return false; if (queuedIds.has(v.id)) return false; if (skippedIds.has(v.id)) return false; if (seenIds.has(v.id)) return false; return true; }); const filtered = candidates.length - unknowns.length; subLog(` ${unknowns.length} to check, ${filtered} already known`); if (unknowns.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`); // Step 1: Get channel ID (cached on subscription, or fetch once) if (!sub.channelId) { subLog(` Resolving channel ID...`); sub.channelId = await getChannelId(sub.url); if (sub.channelId) { subLog(` Resolved channel ID: ${sub.channelId}`); changed = true; } else { subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`); } } // Step 2: RSS feed — fast, no bot detection, covers ~15 most recent const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {}; const rssCount = Object.keys(dateMap).length; if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`); // Step 3: Smart early termination const needDates = unknowns.filter(v => !dateMap[v.id]); let earlyTermination = false; if (needDates.length > 0 && rssCount > 0) { const rssDates = Object.values(dateMap).sort(); const oldestRssDate = rssDates[0]; if (oldestRssDate < cutoffDate) { earlyTermination = true; subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`); } else { subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`); const ytDates = await fetchUploadDates(needDates.map(v => v.id)); Object.assign(dateMap, ytDates); const ytCount = Object.keys(ytDates).length; if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`); } } else if (needDates.length > 0 && rssCount === 0) { subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`); const ytDates = await fetchUploadDates(needDates.map(v => v.id)); Object.assign(dateMap, ytDates); const ytCount = Object.keys(ytDates).length; if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`); } const gotDates = Object.keys(dateMap).length; if (gotDates > 0 || needDates.length === 0) { subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`); } else { subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`); sub.lastChecked = new Date().toISOString(); changed = true; continue; } let newCount = 0; for (const video of unknowns) { const uploadDate = dateMap[video.id]; if (!uploadDate || uploadDate.length !== 8) { continue; } if (uploadDate < cutoffDate) { subLog(` ⏭ ${video.id} (${uploadDate}) — before cutoff`); continue; } subLog(` ✅ ${video.id}${uploadDate ? ` (${uploadDate})` : ""} — ${video.title.slice(0,50)}`); const ytItemStatus = sub.autoDownload ? "approved" : "pending"; autoQueue.push({ id: `auto-${Date.now()}-${video.id}`, videoId: video.id, url: `https://www.youtube.com/watch?v=${video.id}`, title: video.title, uploadDate: uploadDate || null, subscriptionId: sub.id, subscriptionName: sub.name, status: ytItemStatus, }); queuedIds.add(video.id); newCount++; } // Only add to seen list when we can PROVE a video is too old const seenNow = unknowns.filter(v => { if (queuedIds.has(v.id)) return false; const d = dateMap[v.id]; if (d && d.length === 8 && d < cutoffDate) return true; if (d && d.length === 8 && d >= cutoffDate) return false; return earlyTermination; }).map(v => v.id); if (seenNow.length > 0) await addToSeenList(seenNow); sub.lastChecked = new Date().toISOString(); subLog(` → ${newCount} video(s) queued for approval from ${sub.name}`); changed = true; } } catch (err) { subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`); } } const pendingCount = autoQueue.filter(q => q.status === "pending").length; const approvedCount = autoQueue.filter(q => q.status === "approved").length; subLog(`Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`); if (changed) await saveSubscriptions(subs); await saveAutoQueue(); // Wake up the background processor if there are approved items if (approvedCount > 0) { subLog(`Kicking background processor for ${approvedCount} approved item(s)...`); kickProcessor(); } } // CRUD endpoints app.get("/api/subscriptions", async (req, res) => { const subs = await loadSubscriptions(); res.json({ subscriptions: subs }); }); // Extract a normalized channel key from a YouTube URL for dedup function channelKeyFromUrl(url) { try { const u = new URL(url); const path = u.pathname.toLowerCase().replace(/\/+$/, ""); // /@handle/videos → @handle, /@handle/streams → @handle const handleMatch = path.match(/\/(@[^/]+)/); if (handleMatch) return handleMatch[1]; // /channel/UCxxx → channel/UCxxx const channelMatch = path.match(/\/(channel\/[^/]+)/); if (channelMatch) return channelMatch[1]; // /c/name or /user/name const cMatch = path.match(/\/(c|user)\/([^/]+)/); if (cMatch) return cMatch[0]; // /playlist?list=PLxxx const list = u.searchParams.get("list"); if (list) return `playlist/${list}`; return path; // fallback } catch { return url.toLowerCase().replace(/\/+$/, ""); } } app.post("/api/subscriptions", async (req, res) => { const { url, since, type, autoDownload } = req.body; if (!url) return res.status(400).json({ error: "Missing url" }); const isPodcast = type === "podcast" || isPodcastFeedUrl(url); const subs = await loadSubscriptions(); // Prevent duplicates if (isPodcast) { const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, ""); if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) { return res.status(409).json({ error: "Already subscribed to this podcast" }); } } else { const newKey = channelKeyFromUrl(url); if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) { return res.status(409).json({ error: "Already subscribed to this channel" }); } } // Use provided cutoff date, or default to right now const cutoff = since ? new Date(since).toISOString() : new Date().toISOString(); const sub = { id: `sub-${Date.now()}`, url, name: "Loading...", type: isPodcast ? "podcast" : "youtube", channelId: null, createdAt: cutoff, lastChecked: null, paused: false, autoDownload: autoDownload === true, }; subs.push(sub); await saveSubscriptions(subs); // Respond immediately so the UI isn't blocked res.json(sub); // Background: resolve name and check for new content (async () => { try { if (isPodcast) { // Fetch podcast title from RSS feed const { title } = await parsePodcastRSS(url, 1); const freshSubs = await loadSubscriptions(); const s = freshSubs.find(x => x.id === sub.id); if (s) { s.name = title || url; await saveSubscriptions(freshSubs); } console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`); } else { const [name, channelId] = await Promise.all([ fetchChannelName(url), getChannelId(url), ]); const freshSubs = await loadSubscriptions(); const s = freshSubs.find(x => x.id === sub.id); if (s) { s.name = name; if (channelId) s.channelId = channelId; await saveSubscriptions(freshSubs); } console.log(` 📡 New subscription: ${name} — checking for recent videos...`); } await checkSubscriptions(); } catch (err) { console.error(" ⚠ Background subscription setup error:", err.message); } })(); }); app.delete("/api/subscriptions/:id", async (req, res) => { let subs = await loadSubscriptions(); subs = subs.filter(s => s.id !== req.params.id); await saveSubscriptions(subs); // Also remove any pending auto-queue items from this subscription autoQueue = autoQueue.filter(q => q.subscriptionId !== req.params.id); await saveAutoQueue(); res.json({ ok: true }); }); app.put("/api/subscriptions/:id/pause", async (req, res) => { const subs = await loadSubscriptions(); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.paused = !sub.paused; await saveSubscriptions(subs); res.json(sub); }); app.put("/api/subscriptions/:id/since", async (req, res) => { const { since } = req.body; if (!since) return res.status(400).json({ error: "Missing 'since' date" }); const subs = await loadSubscriptions(); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.createdAt = new Date(since).toISOString(); await saveSubscriptions(subs); res.json(sub); }); // Debug: subscription check logs (viewable in-app) app.get("/api/sub-check-log", (req, res) => { res.json({ log: subCheckLog, autoQueueCount: autoQueue.length, autoQueue: autoQueue.map(q => ({ id: q.id, videoId: q.videoId, title: q.title, status: q.status, sub: q.subscriptionName })) }); }); // Auto-queue endpoints (frontend polls these) app.get("/api/auto-queue", (req, res) => { // Return all items grouped by status for the frontend const showAll = req.query.all === "true"; const items = showAll ? autoQueue : autoQueue.filter(q => ["pending", "approved", "processing"].includes(q.status)); res.json({ items, checkRunning: subCheckRunning, counts: { pending: autoQueue.filter(q => q.status === "pending").length, approved: autoQueue.filter(q => q.status === "approved").length, processing: autoQueue.filter(q => q.status === "processing").length, completed: autoQueue.filter(q => q.status === "completed").length, failed: autoQueue.filter(q => q.status === "failed").length, }, }); }); app.delete("/api/auto-queue/:id", async (req, res) => { autoQueue = autoQueue.filter(q => q.id !== req.params.id); await saveAutoQueue(); res.json({ ok: true }); }); app.post("/api/auto-queue/:id/skip", async (req, res) => { const item = autoQueue.find(q => q.id === req.params.id); if (item && item.videoId) { await addToSkipList(item.videoId); } autoQueue = autoQueue.filter(q => q.id !== req.params.id); await saveAutoQueue(); res.json({ ok: true }); }); // Approve a single auto-queue item for background processing app.post("/api/auto-queue/:id/approve", async (req, res) => { const item = autoQueue.find(q => q.id === req.params.id); if (!item) return res.status(404).json({ error: "Item not found" }); if (item.status !== "pending") return res.status(400).json({ error: `Cannot approve item with status '${item.status}'` }); item.status = "approved"; await saveAutoQueue(); kickProcessor(); res.json({ ok: true, item }); }); // Approve all pending items at once app.post("/api/auto-queue/approve-all", async (req, res) => { let count = 0; for (const item of autoQueue) { if (item.status === "pending") { item.status = "approved"; count++; } } if (count > 0) { await saveAutoQueue(); kickProcessor(); } res.json({ ok: true, approved: count }); }); // Retry a failed item app.post("/api/auto-queue/:id/retry", async (req, res) => { const item = autoQueue.find(q => q.id === req.params.id); if (!item) return res.status(404).json({ error: "Item not found" }); if (item.status !== "failed") return res.status(400).json({ error: `Cannot retry item with status '${item.status}'` }); item.status = "approved"; item.error = undefined; item.failedAt = undefined; await saveAutoQueue(); kickProcessor(); res.json({ ok: true, item }); }); // Clear completed and failed items from the queue app.post("/api/auto-queue/clear-finished", async (req, res) => { const before = autoQueue.length; autoQueue = autoQueue.filter(q => !["completed", "failed"].includes(q.status)); const removed = before - autoQueue.length; if (removed > 0) await saveAutoQueue(); res.json({ ok: true, removed }); }); app.post("/api/subscriptions/check-now", async (req, res) => { if (subCheckRunning) return res.json({ ok: true, message: "Already checking" }); checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message)); res.json({ ok: true, message: "Check started" }); }); // ── Auto-download toggle per subscription ────────────────────────────────── app.put("/api/subscriptions/:id/auto-download", async (req, res) => { const subs = await loadSubscriptions(); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.autoDownload = req.body.enabled === true; await saveSubscriptions(subs); res.json({ ok: true, subscription: sub }); }); // ── Background processing status & configuration ─────────────────────────── // Get current processing status (what's running, queue depth, config) app.get("/api/processing/status", (req, res) => { res.json({ running: processingState.running, currentItem: processingState.currentItem ? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url } : null, lastCompleted: processingState.lastCompleted, config: processingConfig, counts: { approved: autoQueue.filter(q => q.status === "approved").length, processing: autoQueue.filter(q => q.status === "processing").length, pending: autoQueue.filter(q => q.status === "pending").length, }, log: processingState.log.slice(-20), }); }); // Update processing configuration (delay between items, enable/disable) app.put("/api/processing/config", async (req, res) => { if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) { processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds)); } if (typeof req.body.enabled === "boolean") { processingConfig.enabled = req.body.enabled; // If enabling and there are approved items, kick the processor if (req.body.enabled) kickProcessor(); } await saveProcessingConfig(); res.json({ ok: true, config: processingConfig }); }); // Rush: skip the delay before the next item in the queue app.post("/api/processing/rush", (req, res) => { processingState.rush = true; // If not currently running but there are approved items, start processing kickProcessor(); res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" }); }); // Processing log (for debug/monitoring) app.get("/api/processing/log", (req, res) => { res.json({ log: processingState.log }); }); // ── Full pipeline: URL → audio → transcript → topic analysis ────────────── // Single yt-dlp call that returns the full info-dict as JSON. One shot // gives us title, channel, description, chapters, duration, and the // per-language caption availability — used both for richer transcription // prompts and for the captions-first path (skip audio + transcription // entirely when YouTube has usable captions for this video). async function fetchYouTubeMetadata(videoId) { try { const { stdout } = await execFileAsync( "yt-dlp", ["-j", "--no-warnings", "--skip-download", `https://www.youtube.com/watch?v=${videoId}`], { timeout: 30000, maxBuffer: 10 * 1024 * 1024 } ); const info = JSON.parse(stdout); return { title: info.title || "", uploadDate: info.upload_date || "", channel: info.channel || info.uploader || "", // Truncate to keep prompt size sane — descriptions can be huge // (release-notes-stuffed Lex Fridman podcasts run thousands of chars). description: (info.description || "").slice(0, 2000), chapters: Array.isArray(info.chapters) ? info.chapters : [], duration: typeof info.duration === "number" ? info.duration : 0, hasManualCaptions: !!(info.subtitles && Object.keys(info.subtitles).length > 0), hasAutoCaptions: !!(info.automatic_captions && Object.keys(info.automatic_captions).length > 0), }; } catch { return null; } } // Pulls YouTube captions for the given video and parses them into the // app's standard { offset, text, duration } entries shape. Prefers // manual captions over auto-generated. Returns null if no captions are // available or parsing produces too few segments to be useful. // // Output is the same shape that `parseTimestampedTranscript()` produces // from a Gemini transcription, so downstream code (the analysis step, // the chunk renderer) doesn't care how the transcript got here. async function tryFetchYouTubeCaptions(videoId, tmpDir, opts, log) { const { hasManual, hasAuto } = opts; if (!hasManual && !hasAuto) return null; const url = `https://www.youtube.com/watch?v=${videoId}`; const captionsBase = path.join(tmpDir, "captions"); // Try manual first (cleaner — punctuated, no fragmented words); // fall back to auto-generated if no manual subs. const langs = "en.*,en"; try { if (hasManual) { await execFileAsync( "yt-dlp", ["--skip-download", "--write-subs", "--sub-langs", langs, "--sub-format", "json3", "--no-warnings", "-o", captionsBase, url], { timeout: 30000 } ); } else { await execFileAsync( "yt-dlp", ["--skip-download", "--write-auto-subs", "--sub-langs", langs, "--sub-format", "json3", "--no-warnings", "-o", captionsBase, url], { timeout: 30000 } ); } } catch (err) { log(1, `⚠ Caption download failed: ${(err.message || "").slice(0, 200)}`); return null; } // yt-dlp names the file like `captions.en.json3` or `captions.en-US.json3`. let captionPath = null; try { const files = await fs.readdir(tmpDir); const match = files.find((f) => f.startsWith("captions.") && f.endsWith(".json3")); if (match) captionPath = path.join(tmpDir, match); } catch {} if (!captionPath) return null; let parsed; try { const content = await fs.readFile(captionPath, "utf-8"); parsed = JSON.parse(content); } catch (err) { log(1, `⚠ Caption parse failed: ${(err.message || "").slice(0, 200)}`); return null; } const rawEvents = Array.isArray(parsed.events) ? parsed.events : []; const entries = []; for (const evt of rawEvents) { const text = (Array.isArray(evt.segs) ? evt.segs : []) .map((s) => s.utf8 || "") .join("") .replace(/\n+/g, " ") .trim(); if (!text) continue; entries.push({ offset: (evt.tStartMs || 0) / 1000, text, duration: (evt.dDurationMs || 0) / 1000, }); } if (entries.length < 5) return null; return { entries, source: hasManual ? "manual" : "auto" }; } // Coalesce a list of timestamped entries into coarser chunks. Used // after pulling auto-captions (which are typically 1–3-word fragments // every 1–3 seconds) so the downstream analysis prompt has ~100 // medium-sized segments instead of ~900 tiny ones. Manual captions and // Gemini-transcribed entries already average 10–30s and pass through // unchanged when their median duration exceeds the threshold. function coalesceTranscriptEntries(entries, targetSeconds = 15) { if (!Array.isArray(entries) || entries.length === 0) return entries; // Sample median duration; only coalesce if entries are too fine. const durations = entries.map((e) => e.duration || 0).sort((a, b) => a - b); const median = durations[Math.floor(durations.length / 2)]; if (median >= 8) return entries; // already coarse enough const merged = []; let current = null; for (const e of entries) { const text = (e.text || "").trim(); if (!text) continue; if (!current) { current = { offset: e.offset, text, duration: e.duration || 0, }; continue; } const elapsedFromStart = (e.offset || 0) - current.offset; if (elapsedFromStart >= targetSeconds) { merged.push(current); current = { offset: e.offset, text, duration: e.duration || 0, }; } else { current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim(); current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset; } } if (current) merged.push(current); return merged; } // Reduce an over-long transcript down to roughly `maxEntries` segments // so the analysis prompt fits in smaller model contexts (typical local // models are 16k–32k tokens — a 2-hour podcast at Parakeet's ~5s // granularity easily blows past that). Bucket size is computed from // total audio duration so we end up with at most `maxEntries` segments // regardless of source granularity. Unlike coalesceTranscriptEntries // above, this one is purely count-driven — no median-duration guard, // because the failure mode is "context exceeded" not "bad UX from // over-fragmented entries". // // Returns { coalesced, indexMap } where indexMap[i] = { startOrig, // endOrig } maps coalesced-entry i back to a range of original-entry // indices. The caller uses this to translate section start/end indices // returned by the analyzer back into the original entries array, so // the final transcript displayed to the user keeps its full granularity // — only the analyzer sees the coarser view. // // FLAGGED TO WATCH: ship 0.2.28. Auto-coalesce may degrade analysis // quality on borderline content (the LLM sees fewer, longer segments // — section boundaries get blurrier). If users report missed topics or // imprecise section starts on long content, the alternative is real // chunked analysis (split into overlapping windows, analyze each, // stitch sections at boundaries) — significantly more involved. function coalesceForAnalysis(entries, maxEntries = 400) { if (!Array.isArray(entries) || entries.length <= maxEntries) { return { coalesced: entries, indexMap: null }; } const lastEntry = entries[entries.length - 1]; const totalDuration = (lastEntry.offset || 0) + (lastEntry.duration || 0); if (totalDuration <= 0) { return { coalesced: entries, indexMap: null }; } // Bucket size: roughly total / maxEntries, but never tighter than 15s // (a typical "natural pause" gap — going much smaller would defeat // the point of coalescing). const targetSeconds = Math.max(15, Math.ceil(totalDuration / maxEntries)); const merged = []; const indexMap = []; let current = null; let currentStartOrig = 0; for (let i = 0; i < entries.length; i++) { const e = entries[i]; const text = (e.text || "").trim(); if (!text) continue; if (!current) { current = { offset: e.offset, text, duration: e.duration || 0 }; currentStartOrig = i; continue; } const elapsedFromStart = (e.offset || 0) - current.offset; if (elapsedFromStart >= targetSeconds) { merged.push(current); indexMap.push({ startOrig: currentStartOrig, endOrig: i - 1 }); current = { offset: e.offset, text, duration: e.duration || 0 }; currentStartOrig = i; } else { current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim(); current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset; } } if (current) { merged.push(current); indexMap.push({ startOrig: currentStartOrig, endOrig: entries.length - 1 }); } return { coalesced: merged, indexMap }; } // ── In-flight free-tier job status + cancel ───────────────────────────────── // Lets the web UI render a "Currently processing X — Cancel" banner after // a browser refresh, when the SSE stream from the original /api/process // call is no longer attached. Only the free-tier slot is tracked today; // paid-tier batch queueing happens client-side. app.get("/api/process/current", (req, res) => { // ?logs=1 returns the in-flight job's accumulated log buffer so a // browser refresh mid-pipeline can repopulate the activity log // instead of starting blank. Default is the lightweight header-only // shape used by the 5s banner poll. const includeLogs = req.query.logs === "1" || req.query.logs === "true"; res.json({ job: getCurrentFreeJob({ includeLogs }) }); }); app.post("/api/process/cancel", (_req, res) => { const had = abortCurrentFreeJob(); if (!had) return res.json({ ok: true, cancelled: false }); // We don't kill the in-flight yt-dlp / model API call here — the // pipeline polls isFreeJobAborted() at each major step and throws // early, which lands in the request handler's finally block where the // slot is released. So cancellation latency is bounded by the time // until the next checkpoint (a few seconds in practice, up to the // length of one outstanding model call). res.json({ ok: true, cancelled: true }); }); // ── Auto-discovery of provider connection info ────────────────────────────── // The picker UI hits this on boot to pre-fill placeholders for providers // that have a server-detectable default — most notably Ollama on // StartOS, reachable at the documented `http://<package-id>.startos:<port>` // internal hostname when installed alongside Recap. app.get("/api/providers/discover", async (_req, res) => { const out = {}; // Ollama: prefer the URL the operator set via the StartOS action, // then try the canonical service-discovery hostname, finally fall // back to localhost (useful for dev outside StartOS). try { const cfg = await config.getConfigSnapshot(); const fromConfig = (cfg.ollama_base_url || "").trim(); let ollamaUrl = null; let source = null; if (fromConfig) { ollamaUrl = fromConfig; source = "config"; } else { const candidate = "http://ollama.startos:11434"; const ok = await fetch(`${candidate}/api/tags`, { signal: AbortSignal.timeout(1500), }).then((r) => r.ok).catch(() => false); if (ok) { ollamaUrl = candidate; source = "startos-dep"; } } if (ollamaUrl) { // Probe /api/tags to list installed models — picker UI surfaces // them as dropdown options so users don't have to type them. let models = []; try { const tagsRes = await fetch(`${ollamaUrl.replace(/\/$/, "")}/api/tags`, { signal: AbortSignal.timeout(2000), }); if (tagsRes.ok) { const data = await tagsRes.json(); models = (data.models || []).map((m) => m.name).filter(Boolean); } } catch {} out.ollama = { baseURL: ollamaUrl, source, models }; } } catch {} res.json(out); }); // Quick connection-test endpoint. The picker UI Test button hits this // to verify a provider+model+opts combo actually works before the user // commits to using it. Sends a tiny prompt and returns the model's // 3-word answer + latency, or a clear error string. app.post("/api/providers/test", async (req, res) => { const { providerId, model, opts: clientOpts } = req.body || {}; if (!providerId || typeof providerId !== "string") { return res.status(400).json({ ok: false, error: "missing providerId" }); } if (!model || typeof model !== "string") { return res.status(400).json({ ok: false, error: "missing model" }); } if (!PROVIDER_NAMES.includes(providerId)) { return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` }); } let provider; try { const cfg = await config.getConfigSnapshot(); const resolvedOpts = resolveProviderOpts(providerId, { config: cfg, clientOpts: (clientOpts && typeof clientOpts === "object") ? clientOpts : {}, }); provider = getProvider(providerId, resolvedOpts); } catch (err) { return res.status(400).json({ ok: false, error: err.message }); } if (!provider.capabilities.analyze) { return res.status(400).json({ ok: false, error: `${providerId} cannot analyze text` }); } const t0 = Date.now(); // Special-case the relay: a real analyzeText round-trip would burn // one of the user's lifetime credits per click. Use pingBalance // instead — verifies connectivity + auth, returns the current // balance, no charge. if (providerId === "relay") { try { const env = await provider.pingBalance({ timeoutMs: 5000 }); const latencyMs = Date.now() - t0; const credits = env?.credits_remaining; const tier = env?.tier || "core"; const summary = credits == null ? `Connected · Tier: ${tier}` : `Connected · Tier: ${tier} · ${credits} credit${credits === 1 ? "" : "s"} remaining`; return res.json({ ok: true, text: summary, latencyMs, provider: providerId, model, }); } catch (err) { return res.json({ ok: false, error: (err?.message || String(err)).slice(0, 300), latencyMs: Date.now() - t0, }); } } try { const result = await provider.analyzeText({ prompt: "Respond with exactly three words confirming you received this prompt.", model, retries: 0, }); const latencyMs = Date.now() - t0; return res.json({ ok: true, text: (result.text || "").trim().slice(0, 200), latencyMs, provider: providerId, model, }); } catch (err) { return res.json({ ok: false, error: (err?.message || String(err)).slice(0, 300), latencyMs: Date.now() - t0, }); } }); // Per-provider, per-field boolean indicating whether the StartOS // config has a non-empty value for each PROVIDER_KEY_FIELDS slot. // The picker UI uses this to (a) show a "✓ Server-configured" hint // under empty inputs, and (b) decide whether the Delete button is // visible when localStorage is empty but the server has a value. // // Never returns the actual values — only booleans. Anything stored in // startos-config.json could be secret and shouldn't surface in any // response that could end up in a screenshot. app.get("/api/providers/credentials-status", async (_req, res) => { const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js"); const cfg = await getConfigSnapshot(); const status = {}; for (const [providerId, fields] of Object.entries(PROVIDER_KEY_FIELDS)) { status[providerId] = {}; for (const [optName, cfgKey] of Object.entries(fields)) { const v = cfg[cfgKey]; status[providerId][optName] = typeof v === "string" && v.trim().length > 0; } } res.json({ status }); }); // Clear all server-side config fields for a provider (the StartOS // action-set values). The picker UI's Delete button calls this in // addition to wiping localStorage so credentials are gone from BOTH // storage paths. Returns the list of fields that were cleared. app.post("/api/providers/:id/clear", async (req, res) => { const providerId = req.params.id; if (!PROVIDER_NAMES.includes(providerId)) { return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` }); } // Build a patch of { config_field: "" } for every PROVIDER_KEY_FIELDS // entry mapped to this provider. Unknown providers or providers with // no server-side fields (e.g. relay, whose URL is hardcoded) end // up clearing nothing — still a 200, just with empty `cleared`. const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js"); const fields = PROVIDER_KEY_FIELDS[providerId] || {}; const patch = {}; for (const cfgKey of Object.values(fields)) { if (typeof cfgKey === "string") patch[cfgKey] = ""; } try { await config.mergeConfig(patch); return res.json({ ok: true, cleared: Object.keys(patch) }); } catch (err) { return res.status(500).json({ ok: false, error: (err?.message || String(err)).slice(0, 300), }); } }); app.post("/api/process", async (req, res) => { const { url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId, transcriptionProvider: reqTransProvider, transcriptionModel: reqTransModel, analysisProvider: reqAnaProvider, analysisModel: reqAnaModel, providerOpts: reqProviderOpts, useYouTubeCaptions: reqUseYTCaptions, } = req.body; // Default: use captions when available (huge speed/cost win). The // picker-UI toggle lets users force a full transcription pass when // they want speaker labels (captions don't have them) or when the // auto-captions quality is too low. const useYouTubeCaptions = reqUseYTCaptions !== false; // Sentinel error message thrown by checkCancelled() when the user // hits the in-flight banner's Cancel button. Declared at handler // scope (not inside the try) so the catch block can compare against // it — see the catch a few hundred lines below. const CANCELLED_MARK = "__recap_cancelled__"; // Per-provider client-side opts: { gemini: {apiKey}, anthropic: {apiKey}, // openai: {apiKey}, "openai-compatible": {apiKey, baseURL}, ollama: {baseURL} }. // Each provider's opts override that provider's config-stored values // (set via the StartOS actions). Used by the picker UI to BYO keys // per provider without round-tripping the StartOS dashboard. const providerOpts = (reqProviderOpts && typeof reqProviderOpts === "object") ? reqProviderOpts : {}; // Provider selection: each pipeline step (transcribe + analyze) can // independently target any registered provider. Both default to gemini // so existing clients (which don't send provider fields) keep working. const transcriptionProvider = reqTransProvider || "gemini"; const analysisProvider = reqAnaProvider || "gemini"; // Free tier: unlicensed users can summarize one video at a time. They // still bring their own key — same as paid users today; the key can // come from either the StartOS config action (server-side) or the // web UI Settings panel (client-side). The future "bundled key" relay // (paid users' requests proxied through the operator's service) isn't // built yet, so there's nothing here that gates key sourcing by tier. // AbortController for this request. Fired by abortCurrentFreeJob() // when the user hits Cancel — passed through to every provider SDK // call (transcription + analysis) so in-flight network requests // reject immediately instead of running to completion. const abortController = new AbortController(); // Stable identifier for this summarize job. Sent to the relay // (when used) as `X-Recap-Job-Id`. The relay charges 1 credit on // the first call with a given jobId and treats subsequent calls // with the same id as free — so a full summary (transcribe + // analyze) costs one credit regardless of which steps route // through the relay. Non-relay providers ignore this opt. const jobId = randomUUID(); const isFree = isFreeUser(); if (isFree) { if (!tryAcquireFreeSlot({ url, title: itemTitle, abortController })) { const current = getCurrentFreeJob(); const elapsedSec = current ? Math.round(current.elapsedMs / 1000) : 0; const what = current?.title || current?.url || "another video"; return res.status(409).json({ error: "processing_in_progress", message: `A summary is already being processed (${what}, started ${elapsedSec}s ago). Free mode handles one video at a time — wait for it to finish, or cancel it from the status bar at the top of the app.`, currentJob: current, }); } } if (!url) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "Missing url" }); } if (!PROVIDER_NAMES.includes(transcriptionProvider)) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "unknown_provider", message: `Unknown transcription provider: ${transcriptionProvider}` }); } if (!PROVIDER_NAMES.includes(analysisProvider)) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "unknown_provider", message: `Unknown analysis provider: ${analysisProvider}` }); } // Resolve per-provider construction opts from the StartOS config blob, // overlaying any client-supplied opts (req.body.providerOpts[name]). // For Gemini, the legacy single "apiKey" field on the request body // also flows through as a fallback when providerOpts.gemini.apiKey // isn't set — keeps pre-picker-UI clients working. const cfg = await getConfigSnapshot(); function clientOptsFor(name) { const fromBody = (providerOpts[name] && typeof providerOpts[name] === "object") ? providerOpts[name] : {}; if (name === "gemini" && !fromBody.apiKey) { const legacy = resolveApiKey(clientKey); if (legacy) return { ...fromBody, apiKey: legacy }; } return fromBody; } const transcriptionOpts = resolveProviderOpts(transcriptionProvider, { config: cfg, clientOpts: clientOptsFor(transcriptionProvider), }); const analysisOpts = resolveProviderOpts(analysisProvider, { config: cfg, clientOpts: clientOptsFor(analysisProvider), }); let transcriber; try { transcriber = getProvider(transcriptionProvider, transcriptionOpts); } catch (err) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "transcription_provider_not_configured", message: `Transcription provider ${transcriptionProvider} is not configured: ${err.message}`, }); } if (!transcriber.capabilities.transcribe) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "transcription_unsupported", message: `Provider ${transcriptionProvider} does not support audio transcription. Pick a different transcription provider (gemini or openai).`, }); } let analyzer; try { analyzer = transcriptionProvider === analysisProvider ? transcriber : getProvider(analysisProvider, analysisOpts); } catch (err) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "analysis_provider_not_configured", message: `Analysis provider ${analysisProvider} is not configured: ${err.message}`, }); } if (!analyzer.capabilities.analyze) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "analysis_unsupported", message: `Provider ${analysisProvider} does not support text analysis.`, }); } // Determine if this is a podcast episode or YouTube video const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url); const videoId = isPodcast ? (episodeId || url) : extractVideoId(url); if (!isPodcast && !videoId) { if (isFree) releaseFreeSlot(); return res.status(400).json({ error: "Invalid YouTube URL" }); } const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-")); const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3"; const audioPath = path.join(tmpDir, `audio.${audioExt}`); const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3"; try { const pipelineStart = Date.now(); // Set up SSE res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", }); // Helper to send log entries with elapsed time. Also pushes onto the // free-tier in-flight job's server-side buffer (no-op when there is // no free job, e.g. licensed user) so a browser refresh during a // long pipeline can re-hydrate the activity log from the server // instead of starting blank. const logHistory = []; function log(step, message, detail) { const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1); const logMsg = `[${elapsed}s] ${message}`; console.log(` ${logMsg}`); const entry = { elapsed, message, detail: detail || null }; logHistory.push(entry); appendCurrentJobLog(entry); sendEvent(res, "status", { step, message }); sendEvent(res, "log", entry); } // Cancellation checkpoint. Polled at each major pipeline boundary; // throws CANCELLED_MARK (declared at handler scope above) when the // user has hit Cancel on the status banner so the request bails // out cleanly via the catch path. Also catches abort signals fired // by abortCurrentFreeJob() in case an SDK call swallowed the abort // and returned cleanly. function checkCancelled() { if (abortController.signal.aborted) { throw new Error(CANCELLED_MARK); } if (isFree && isFreeJobAborted()) { throw new Error(CANCELLED_MARK); } } let videoTitle = itemTitle || "Untitled"; let videoUploadDate = itemUploadDate || ""; // Rich metadata used to ground the transcription prompt (speaker // names from the channel/description/chapters) and surfaced to // downstream UI. Populated for YouTube videos only; podcasts have // their own (lighter) metadata path. let videoChannel = ""; let videoDescription = ""; let videoChapters = []; // If captions came from YouTube we skip Step 1 (download audio) + // Step 2 (transcribe) and jump straight to analysis. `entries` is // also the post-Step-2 output of the transcription path, so this // value just flows through. let entries = null; let captionSource = null; // "manual" | "auto" | null let transcriptText = ""; let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" }; // Hoisted out of the audio-path block so Step 3 (analysis) can // reference analysisModel even when the captions fast-path skips // the entire audio + transcription block. transcriptionModel is // only used inside the audio block but lives here for symmetry. // Per-provider model defaults: caller's request → provider's first // listed model → Gemini fallback (preserves the prior default for // legacy clients that don't send model fields). const transcriptionModel = reqTransModel || transcriber.listTranscriptionModels()[0] || "gemini-3-flash-preview"; const analysisModel = reqAnaModel || model || analyzer.listAnalysisModels()[0] || "gemini-3.1-pro-preview"; // ── Step 0 (YouTube only): metadata + captions fast path ── if (!isPodcast && videoId) { log(1, "Fetching video metadata..."); const meta = await fetchYouTubeMetadata(videoId); if (meta) { if (meta.title) videoTitle = meta.title; if (meta.uploadDate) videoUploadDate = meta.uploadDate; videoChannel = meta.channel; videoDescription = meta.description; videoChapters = meta.chapters; log(1, `Video title: ${videoTitle}${videoChannel ? ` (${videoChannel})` : ""}`); if ((meta.hasManualCaptions || meta.hasAutoCaptions) && !useYouTubeCaptions) { log(1, `YouTube captions available but the user has the captions fast-path disabled — will transcribe audio directly.`); } else if (meta.hasManualCaptions || meta.hasAutoCaptions) { log(1, `YouTube captions available (${meta.hasManualCaptions ? "manual" : "auto"}) — attempting fast-path…`); const cap = await tryFetchYouTubeCaptions( videoId, tmpDir, { hasManual: meta.hasManualCaptions, hasAuto: meta.hasAutoCaptions }, log ); if (cap && cap.entries && cap.entries.length > 0) { // Auto-captions fragment audio every 1–3 seconds (often only // 1–3 words per entry). Sending hundreds of micro-segments // to the analyzer balloons the prompt and overloads the // model's index-tracking ("Section N must start at index // K+1 of section M…"). Coalesce into ~15s blocks for a // saner analysis prompt while keeping timestamps accurate. const rawCount = cap.entries.length; entries = coalesceTranscriptEntries(cap.entries, 15); captionSource = cap.source; log( 1, `Using YouTube ${cap.source === "manual" ? "manual" : "auto-generated"} captions — ${rawCount} segments coalesced to ${entries.length}, skipping audio download + transcription` ); } else { log(1, "Captions unusable or failed to parse — falling back to audio transcription"); } } else { log(1, "No captions available — will download audio and transcribe"); } } } checkCancelled(); // ── Step 1: Download audio (skipped when captions populated entries) ── if (!entries) { const dlStart = Date.now(); if (isPodcast) { log(1, "Downloading podcast episode..."); await downloadPodcastAudio(url, audioPath); const stats = await fs.stat(audioPath); const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1); log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`); log(1, `Episode: ${videoTitle}`); } else { log(1, "Downloading audio from YouTube..."); const dlBaseArgs = [ "-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", audioPath, "--no-playlist", "--print", "%(title)s|||%(upload_date)s", "--no-simulate", ]; const dlUrl = `https://www.youtube.com/watch?v=${videoId}`; const cookieArgs = ytCookieArgs(); const hasCookies = cookieArgs.length > 0; let usedCookies = false; let dlStdout = ""; // Helper: attempt a single yt-dlp download async function attemptDownload(args, label) { const result = await execFileAsync("yt-dlp", args, { timeout: 600000 }); return result.stdout || ""; } // Helper: check if error is a bot detection / rate limit block function isBotBlock(errText) { return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText); } // ── Smart download with retry ── // Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays) const MAX_RETRIES = 3; const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff let downloaded = false; let lastError = ""; for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) { // On retry attempts, wait before trying again if (attempt > 0) { const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)]; log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`); sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` }); await new Promise(r => setTimeout(r, waitSec * 1000)); log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`); // Clean up any partial file from previous attempt await fs.unlink(audioPath).catch(() => {}); } // Try with cookies first if (hasCookies && !usedCookies) { try { log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies..."); dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies"); usedCookies = true; downloaded = true; break; } catch (cookieErr) { const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || ""); if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`); log(1, "Retrying without cookies..."); await fs.unlink(audioPath).catch(() => {}); } } // Try without cookies if (!downloaded) { try { dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies"); downloaded = true; break; } catch (dlErr) { lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || ""); const blocked = isBotBlock(lastError); if (blocked && attempt < MAX_RETRIES) { log(1, `⚠ YouTube bot detection triggered`); // Will loop back and wait continue; } if (blocked && attempt === MAX_RETRIES) { // Last resort: try yt-dlp auto-update in case there's a newer version that handles this log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort..."); const updateResult = await autoUpdateYtdlp(DATA_DIR); if (updateResult.success) { log(1, "yt-dlp updated! Final retry..."); try { const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update"); dlStdout = retryResult; downloaded = true; break; } catch { /* fall through to error */ } } } // Non-bot error or exhausted retries if (!downloaded) { log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`); } } } } if (!downloaded) { const blocked = isBotBlock(lastError); let hint = ""; if (blocked) { hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " + "This is usually caused by:\n" + "• Recent VPN use (YouTube flags VPN IPs)\n" + "• Too many downloads in a short period\n" + "• YouTube's general anti-bot measures\n\n" + "What to try:\n" + "• Wait 10-30 minutes and try again\n" + "• Disconnect any VPN/proxy\n" + "• Upload fresh cookies.txt via Settings\n" + "• Try a different network (mobile hotspot, etc.)"; } throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`); } if (!usedCookies && hasCookies) { log(1, "⚠ Downloaded without cookies — audio may contain ads"); } const stats = await fs.stat(audioPath); const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1); log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`); // Extract title from the --print output of the download command const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null; let gotTitle = false; // First try: parse title from the download stdout (most reliable — same call that succeeded) if (dlStdout) { const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean); for (const line of printLines) { if (line.includes("|||")) { const sep = line.indexOf("|||"); const t = line.slice(0, sep).trim(); const d = line.slice(sep + 3).trim(); if (t && t !== "NA") { videoTitle = t; if (d && d !== "NA") videoUploadDate = d; gotTitle = true; log(1, `Video title: ${videoTitle}`); break; } } } } // Second try: separate yt-dlp call (no cookies needed for public metadata) if (!gotTitle) { try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(title)s|||%(upload_date)s", "--no-download", `https://www.youtube.com/watch?v=${videoId}`, ], { timeout: 15000 }); const raw = stdout.trim(); const sep = raw.indexOf("|||"); if (sep > 0) { videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled"; const d = raw.slice(sep + 3).trim(); if (d && d !== "NA") videoUploadDate = d || videoUploadDate; } else { videoTitle = raw || fallbackTitle || "Untitled"; } gotTitle = videoTitle !== "Untitled"; if (gotTitle) log(1, `Video title: ${videoTitle}`); } catch { // Title fetch failed } } // Third try: use the queue-provided title if (!gotTitle && fallbackTitle) { videoTitle = fallbackTitle; log(1, `Using queue title: ${fallbackTitle}`); gotTitle = true; } if (!gotTitle) { log(1, "⚠ Could not fetch video title"); } } checkCancelled(); // ── Step 2: Transcribe audio ── // Detect audio duration to choose strategy const audioDuration = await getAudioDuration(audioPath); const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown"; log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`); // Strategy: // < 60 min AND < 30 MB → Flash on full file (fast, cheap, reliable) // ≥ 60 min OR ≥ 30 MB → go straight to chunked transcription with Flash (45-min chunks) // If full-file transcription is truncated or empty → fall back to chunks const CHUNK_TIME_THRESHOLD = 60 * 60; // 60 minutes const CHUNK_SIZE_THRESHOLD = 30 * 1024 * 1024; // 30 MB let audioFileSize = 0; try { audioFileSize = (await fs.stat(audioPath)).size; } catch {} const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1); const needsChunking = (audioDuration && audioDuration >= CHUNK_TIME_THRESHOLD) || (audioFileSize >= CHUNK_SIZE_THRESHOLD); if (needsChunking) { const reason = audioDuration >= CHUNK_TIME_THRESHOLD ? `${audioDurMin} min` : `${audioSizeMB} MB`; log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriber.name}/${transcriptionModel}`); } // Transcription model fallback chain: user's chosen model first, // then the rest of the provider's list. If Gemini 3 Flash hits a // 503 capacity error, the wrapper transparently retries with // 2.5 Flash. Matches the analysis fallback pattern below. const transcriptionFallbacks = [ transcriptionModel, ...transcriber.listTranscriptionModels().filter((m) => m !== transcriptionModel), ]; // Thin wrapper: keeps the call-site shape the chunking + main // pipeline already use. `transcriber.transcribeAudio` returns // { text, usage, cost, finishReason, blockReason, raw } — callers // read off that normalized shape regardless of which provider is // doing the transcription. On hard failure (after the provider's // own retry loop), walks the fallback chain to the next model. async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) { // Build the per-call fallback list: caller's chosen model // first, then the others. (Same as transcriptionFallbacks but // honors per-chunk overrides like chunkResult retries.) const chain = [modelName, ...transcriptionFallbacks.filter((m) => m !== modelName)]; let lastErr; for (const tryModel of chain) { try { return await transcriber.transcribeAudio({ filePath, mimeType: mType, titleHint, // Rich-context hints used by the provider's prompt builder // to anchor speaker-name extraction. Provider implementations // that don't care (e.g. Whisper) ignore these. channelHint: videoChannel, descriptionHint: videoDescription, chaptersHint: videoChapters, model: tryModel, offsetSeconds, onProgress: (msg) => log(2, msg), signal: abortController.signal, // Shared with the analyze call below so the relay bundles // both into one credit charge. Non-relay providers ignore it. jobId, }); } catch (err) { // Cancellation: bail immediately, don't try the next model. if (abortController.signal.aborted || err?.name === "AbortError") { throw new Error(CANCELLED_MARK); } lastErr = err; const msg = err?.message || String(err); log(2, `⚠ Transcription with ${tryModel} failed: ${msg.slice(0, 150)}`); if (tryModel !== chain[chain.length - 1]) { log(2, `Falling back to next transcription model...`); } } } throw lastErr || new Error("All transcription models failed"); } // ── Helper: chunked transcription for long audio ── async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) { const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`); await fs.mkdir(chunkDir, { recursive: true }); try { const audioChunks = await splitAudioFile(srcPath, chunkDir, 2700); // 45 min chunks if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed logFn(`Split audio into ${audioChunks.length} chunks for transcription`); const allEntries = []; let totalIn = 0, totalOut = 0; for (const chunk of audioChunks) { logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`); const chunkResult = await transcribeSingleFile( chunk.path, "audio/mpeg", title, modelName, chunk.startOffset ); // Provider's normalized result: text + usage + cost. totalIn += chunkResult.cost.inputTokens; totalOut += chunkResult.cost.outputTokens; const chunkText = chunkResult.text; if (!chunkText) { logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`); continue; } const chunkEntries = parseTimestampedTranscript(chunkText); // Programmatically adjust timestamps: the model transcribes from 0:00 relative // to the chunk, so add the chunk's startOffset to get real video timestamps if (chunk.startOffset > 0) { // Check if the model already adjusted (first entry near the startOffset) const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0; const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8; if (!alreadyAdjusted) { for (const e of chunkEntries) { e.offset += chunk.startOffset; } logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`); } } logFn(`Chunk ${chunk.index + 1}: ${chunkEntries.length} segments, last timestamp ${chunkEntries.length > 0 ? formatTime(chunkEntries[chunkEntries.length - 1].offset) : "N/A"}`); // Merge: skip entries that overlap with already-transcribed content const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1; for (const e of chunkEntries) { if (e.offset > lastExistingTime) allEntries.push(e); } } // Recalculate durations for (let i = 0; i < allEntries.length - 1; i++) { allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset; } if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15; logFn(`Chunked transcription complete: ${allEntries.length} total segments`); return { entries: allEntries, cost: { inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0, totalTokens: totalIn + totalOut, totalCost: "0", totalCostDisplay: "", }, }; } finally { try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {} } } // entries / transcriptText / txCost are declared earlier (top of // the request handler) since the captions-fast-path needs to // populate them before this audio-transcription block runs. const txStart = Date.now(); if (needsChunking) { // ── Very long audio: go straight to chunked transcription ── log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; } else { log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`); entries = null; // fall through to full-file attempt below } } if (!entries) { // ── Normal: transcribe full file ── const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel); transcriptText = transcriptResult.text; if (!transcriptText) { log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; } else { throw new Error(`${transcriber.name} returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.`); } } else { txCost = transcriptResult.cost; const txTime = ((Date.now() - txStart) / 1000).toFixed(1); log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`); entries = parseTimestampedTranscript(transcriptText); log(2, `Parsed ${entries.length} transcript segments`); } // ── Single-segment expansion ── // Whisper-API endpoints that don't return per-segment timestamps // (e.g. NVIDIA Parakeet, some bare-Whisper wrappers) hand back a // single text blob. parseTimestampedTranscript dumps that into // one entry at [0:00], which (a) trips the truncation detector // below and (b) leaves the analyzer with one giant segment so // it can only produce one section. Synthesize sentence-based // entries with interpolated timestamps so both code paths // downstream work like they do for Gemini/Whisper-1. if (entries.length === 1 && audioDuration && audioDuration > 30 && (entries[0].text || "").length > 100) { const synthesized = synthesizeEntriesFromText(entries[0].text, audioDuration); if (synthesized.length > 1) { log(2, `Single-segment transcript expanded into ${synthesized.length} synthetic sentence-based entries with interpolated timestamps`); entries = synthesized; } } // ── Truncation detection → fall back to chunks ── if (audioDuration && entries.length > 0) { const lastEntryTime = entries[entries.length - 1].offset; const coverageRatio = lastEntryTime / audioDuration; const missingSeconds = audioDuration - lastEntryTime; if (coverageRatio < 0.90 && missingSeconds > 120) { log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`); log(2, `Falling back to chunked transcription...`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; const finalCoverage = entries[entries.length - 1].offset; log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`); } } else { log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`); } } } const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1); log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`); if (!entries || entries.length === 0) { const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ "); log(2, `⚠ Transcript parse failed. Preview: ${preview}`); sendEvent(res, "error", { message: "Failed to parse transcript." }); sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText }); res.end(); return; } } // end if (!entries) — close the audio + transcription block if (!entries || entries.length === 0) { throw new Error("No transcript available — neither captions nor audio transcription produced segments."); } checkCancelled(); // ── Step 3: Topic analysis with model fallback ── // Fallback order: caller's chosen model first, then the rest of the // analyzer provider's analysis-model list. const providerModels = analyzer.listAnalysisModels(); const analysisFallbacks = [ analysisModel, ...providerModels.filter((m) => m !== analysisModel), ]; // ── Auto-coalesce input for the analysis LLM ── // Long videos with fine-grained transcripts (Parakeet ~5s / segment // on a 2-hour podcast = ~1400 entries) easily exceed local-model // context windows (typical Gemma/Llama deployments are 16k–32k // tokens). Cap the analyzer's view at ~400 segments; the displayed // transcript keeps its full granularity because we translate the // returned section indices back through indexMap before building // the final chunks. FLAGGED TO WATCH — may impact section-boundary // precision on borderline content. const { coalesced: analysisEntries, indexMap: analysisIndexMap } = coalesceForAnalysis(entries, 400); if (analysisEntries !== entries) { log( 3, `⚠ AUTO-COALESCED ANALYSIS INPUT: ${entries.length} → ${analysisEntries.length} segments (large prompts can exceed local-model context limits). Transcript display keeps full granularity.` ); } const analysisPrompt = buildAnalysisPrompt(analysisEntries); let analysisResult = null; let usedAnalysisModel = analysisModel; const anaStart = Date.now(); for (const tryModel of analysisFallbacks) { try { log(3, `Analyzing topics across ${analysisEntries.length} segments with ${analyzer.name}/${tryModel}...`); analysisResult = await analyzer.analyzeText({ prompt: analysisPrompt, model: tryModel, onProgress: (msg) => log(3, msg), signal: abortController.signal, jobId, }); usedAnalysisModel = tryModel; break; } catch (fallbackErr) { // If the user hit Cancel mid-analysis, do not walk to the // next model — the signal is shared across attempts and would // just re-fail. Bubble the cancellation up to the request // handler's catch. if (abortController.signal.aborted || fallbackErr?.name === "AbortError") { throw new Error(CANCELLED_MARK); } const msg = fallbackErr?.message || String(fallbackErr); log(3, `⚠ ${tryModel} failed: ${msg.slice(0, 150)}`); if (tryModel !== analysisFallbacks[analysisFallbacks.length - 1]) { log(3, `Falling back to next model...`); } } } if (!analysisResult) { throw new Error("All analysis models failed. Please try again later."); } const analysisText = analysisResult.text; if (!analysisText) { throw new Error(`${analyzer.name} returned an empty analysis. The transcript may be too long for the model. Try again.`); } const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1); const anaCost = analysisResult.cost; // Parse the analysis JSON let analysisJson; try { let jsonStr = analysisText.trim(); const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/); if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim(); analysisJson = JSON.parse(jsonStr); } catch (e) { console.error("Failed to parse analysis JSON:", e.message); sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." }); sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText }); res.end(); return; } // Build final chunks. When the analyzer saw a coalesced view of the // transcript (analysisIndexMap is set), translate its section // indices back to the original-entry index space so the rendered // transcript keeps full granularity. Without translation, the // section slice would walk over a much shorter array and miss // 70%+ of the spoken content for very long videos. const chunks = analysisJson.sections.map((section) => { let start = Math.max(0, section.startIndex); let end = section.endIndex; if (analysisIndexMap) { const mappedStart = analysisIndexMap[Math.max(0, Math.min(start, analysisIndexMap.length - 1))]; const mappedEnd = analysisIndexMap[Math.max(0, Math.min(end, analysisIndexMap.length - 1))]; start = mappedStart ? mappedStart.startOrig : 0; end = mappedEnd ? mappedEnd.endOrig : entries.length - 1; } start = Math.max(0, start); end = Math.min(entries.length - 1, end); const sectionEntries = entries.slice(start, end + 1); return { title: section.title, summary: section.summary, entries: sectionEntries, startTime: sectionEntries[0]?.offset || 0, }; }).filter((c) => c.entries.length > 0); const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1); log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`); log(3, `Pipeline finished in ${totalTime}s`); // Save to history. As of the library-for-everyone change, free // users save too — the app feels broken otherwise (summarize a // video, never find it again). const contentType = isPodcast ? "podcast" : "youtube"; const historyId = await saveToHistory(videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null); sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType }); res.end(); } catch (err) { // Treat any AbortError or aborted-signal state as a user cancellation // even if it didn't surface as CANCELLED_MARK upstream (e.g. an SDK // throws AbortError before checkCancelled() runs). const cancelled = err?.message === CANCELLED_MARK || err?.name === "AbortError" || abortController.signal.aborted; if (!cancelled) { // Dump as much context as the runtime gives us. Generic // messages like "Error in input stream" are usually wrappers // around an underlying SDK / Node stream error; the cause + // stack are what actually tells us what happened. console.error("Pipeline error:", { name: err?.name, message: err?.message, code: err?.code, status: err?.status || err?.statusCode, cause: err?.cause, stack: err?.stack, transcriptionProvider, analysisProvider, }); } if (!res.headersSent) { res.status(cancelled ? 499 : 500).json({ error: cancelled ? "cancelled" : err.message, }); } else if (cancelled) { sendEvent(res, "cancelled", { message: "Cancelled by user" }); res.end(); } else { sendEvent(res, "error", { message: err.message }); res.end(); } } finally { if (isFree) releaseFreeSlot(); // Clean up temp files try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {} } }); // ── Helpers ──────────────────────────────────────────────────────────────── // getAudioDuration + splitAudioFile moved to ./audio.js // sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js // buildAnalysisPrompt moved to ./gemini-helpers.js // ── Network mode ────────────────────────────────────────────────────────── // On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking) // On local Mac dev: default to localhost (safe on public Wi-Fi) // - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling) // - Running "npm start" directly defaults to localhost const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, ".."); const lanMode = isStartOS ? true : process.env.LAN_MODE === "true"; const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1"; app.get("/api/network-mode", (req, res) => { res.json({ lan: lanMode }); }); // ── Start server ─────────────────────────────────────────────────────────── app.listen(PORT, BIND_HOST, async () => { console.log(`\n Recap API running on http://${BIND_HOST}:${PORT}`); console.log(` Data directory: ${DATA_DIR}`); console.log(` Checking yt-dlp...`); const info = await checkYtdlp(); if (!info.installed) { console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`); } else if (info.updateAvailable) { console.log(` ✓ yt-dlp ${info.version} found`); console.log(` ↑ Update available: ${info.latestVersion}`); console.log(` Auto-updating...`); const result = await autoUpdateYtdlp(DATA_DIR); if (result.success) { const refreshed = await checkYtdlp(); console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`); } else { console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`); } } else { console.log(` ✓ yt-dlp ${info.version} (up to date)\n`); } // Check subscriptions on startup const subs = await loadSubscriptions(); if (subs.length > 0) { console.log(` 📡 Checking ${subs.length} subscription(s) for new videos...`); await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message)); const pending = autoQueue.filter(q => q.status === "pending").length; const approved = autoQueue.filter(q => q.status === "approved").length; if (pending > 0) console.log(` ✚ ${pending} new video(s) queued from subscriptions`); if (approved > 0) console.log(` ⏳ ${approved} approved video(s) ready for processing`); if (pending === 0 && approved === 0) console.log(` ✓ No new videos from subscriptions`); } // Resume processing any approved items from a previous session const resumeApproved = autoQueue.filter(q => q.status === "approved").length; // Also recover any items stuck in "processing" state from a crash for (const item of autoQueue) { if (item.status === "processing") { console.log(` ⚠ Recovering stuck item: ${item.title}`); item.status = "approved"; // Re-queue for processing } } if (autoQueue.some(q => q.status === "approved")) { await saveAutoQueue(); console.log(` 🔄 Starting background processor...`); // Delay slightly so the server is fully ready before internal HTTP calls setTimeout(() => kickProcessor(), 2000); } console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`); // Check subscriptions every hour (runs continuously on StartOS) setInterval(() => { checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message)); }, 60 * 60 * 1000); });