import express from "express"; import cors from "cors"; import { execFile } from "child_process"; import { promisify } from "util"; import fs from "fs/promises"; import { createWriteStream } from "fs"; import path from "path"; import os from "os"; import https from "https"; import http from "http"; import { GoogleGenAI } from "@google/genai"; import * as license from "./license.js"; import { sendEvent, extractVideoId, formatTime, parseTimestampedTranscript, safeText, retryGemini, fetchUrl, } from "./util.js"; import { calcCost, buildAnalysisPrompt } from "./gemini-helpers.js"; import { getAudioDuration, splitAudioFile, downloadPodcastAudio, } from "./audio.js"; import { checkYtdlp, autoUpdateYtdlp } from "./ytdlp.js"; import { initCookies, ytCookieArgs, ytExtraArgs, ytCookieMethod, setupCookieRoutes, } from "./cookies.js"; const execFileAsync = promisify(execFile); const app = express(); const PORT = process.env.PORT || 3001; // ── Data directory (configurable for StartOS or local dev) ──────────────── // On StartOS: DATA_DIR=/data (persistent volume) // On local dev: defaults to project root (parent of server/) const __dirname = path.dirname(new URL(import.meta.url).pathname); const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, ".."); const historyDir = path.join(DATA_DIR, "history"); const configDir = path.join(DATA_DIR, "config"); await fs.mkdir(historyDir, { recursive: true }).catch(() => {}); await fs.mkdir(configDir, { recursive: true }).catch(() => {}); // ── Server-side API key (shared across all clients) ─────────────────────── // Priority: GEMINI_API_KEY env var → StartOS config → .env file // // The StartOS config path is watched for changes — when a user updates the // key via the "Set Gemini API Key" action, the new value is picked up // without a service restart. Env var takes priority and pins the value. const envPath = path.join(DATA_DIR, ".env"); const startosConfigPath = path.join(configDir, "startos-config.json"); const envApiKey = process.env.GEMINI_API_KEY || ""; let serverApiKey = envApiKey; async function readApiKeyFromConfig() { try { const content = await fs.readFile(startosConfigPath, "utf-8"); const config = JSON.parse(content); return config.gemini_api_key || ""; } catch { return ""; } } async function readApiKeyFromEnvFile() { try { const envContent = await fs.readFile(envPath, "utf-8"); const match = envContent.match(/^GEMINI_API_KEY=(.+)$/m); if (match) return match[1].trim().replace(/^["']|["']$/g, ""); } catch {} return ""; } async function refreshServerApiKey(reason) { if (envApiKey) return; // env var pins the value const fromConfig = await readApiKeyFromConfig(); const next = fromConfig || (await readApiKeyFromEnvFile()) || ""; if (next !== serverApiKey) { serverApiKey = next; console.log(`[config] server API key ${next ? "loaded" : "cleared"} (${reason})`); } } await refreshServerApiKey("startup"); // Poll the StartOS config file every few seconds and refresh the in-memory // API key when it changes — so a key updated via the "Set Gemini API Key" // action is picked up without a service restart. Polling is more reliable // than fs.watch (FSEvents on macOS, inotify edge cases on Linux, atomic- // write rename behavior in the SDK file model). The cost is a single stat // every 3 s, which is negligible. const CONFIG_POLL_MS = parseInt(process.env.RECAP_CONFIG_POLL_MS || "3000", 10); setInterval(() => { refreshServerApiKey("config poll").catch(() => {}); }, CONFIG_POLL_MS); // Cookies state + helpers + routes moved to ./cookies.js await initCookies({ dataDir: DATA_DIR, envPath }); function resolveApiKey(clientKey) { // Client can send "USE_SERVER_KEY" or empty to use the server's stored key if (!clientKey || clientKey === "USE_SERVER_KEY") return serverApiKey; return clientKey; } app.use(cors()); app.use(express.json({ limit: "100mb" })); // ── Keysat licensing (hard-gate / activate-screen flavor) ───────────────── // All /api/* routes return 402 until a valid license is activated, except // the allowlisted endpoints that exist precisely so the frontend can render // an activation UI. See server/license.js for the verifier. let LIC = license.checkLicense(); console.log( `[license] state=${LIC.state} entitlements=[${[...LIC.entitlements].join(",")}]` + (LIC.reason ? ` reason=${LIC.reason}` : "") ); // Periodic online validation against licensing.keysat.xyz. Catches // revocations, suspensions, and expirations that happen after activation — // without it, a revoked key keeps working until the customer reinstalls. // // On network errors we keep the prior state up to MAX_OFFLINE_DAYS (see // server/license.js); past the ceiling we lock out. const VALIDATE_INTERVAL_MS = parseInt( process.env.RECAP_VALIDATE_INTERVAL_MS || String(6 * 60 * 60 * 1000), 10 ); const ACTIVATE_VALIDATE_TIMEOUT_MS = 8000; async function refreshLicenseOnline(reason) { const prev = LIC; try { LIC = await license.validateOnline(); } catch (e) { // validateOnline shouldn't throw, but be defensive. console.error(`[license] refresh threw (${reason}):`, e?.message || e); return; } if (LIC.state !== prev.state || LIC.reason !== prev.reason) { console.log( `[license] refresh (${reason}): state=${LIC.state}` + (LIC.reason ? ` reason=${LIC.reason}` : "") + ` entitlements=[${[...LIC.entitlements].join(",")}]` ); } } // Async startup check — don't block the server from coming up. refreshLicenseOnline("startup").catch(() => {}); setInterval(() => { refreshLicenseOnline("scheduled").catch(() => {}); }, VALIDATE_INTERVAL_MS); // Endpoints reachable without a license — kept intentionally minimal. // /api/process is open so unlicensed (free-tier) users can summarize one // video at a time with their own Gemini key. The route handler enforces // BYO-key + a one-at-a-time concurrency lock for free users. const LICENSE_OPEN_PATHS = new Set([ "/api/health", "/api/heartbeat", "/api/status", "/api/license-status", "/api/license/activate", "/api/license/deactivate", "/api/process", ]); // Activation-screen gate: any /api/* request without a valid license is // rejected with 402, except the allowlist above. Non-/api requests // (the static frontend, /assets, etc.) pass through so the UI can load. app.use((req, res, next) => { if (!req.path.startsWith("/api/")) return next(); if (LICENSE_OPEN_PATHS.has(req.path)) return next(); if (LIC.state === "licensed" && LIC.entitlements.has("core")) return next(); return res.status(402).json({ error: "license_required", message: LIC.state === "licensed" ? "Your license is missing the 'core' entitlement. Contact the seller." : "This feature requires a Keysat license. Upgrade to unlock.", state: LIC.state, reason: LIC.reason, activate_url: "/#activate", keysat_base_url: license.KEYSAT_BASE_URL, product_slug: license.PRODUCT_SLUG, }); }); // Free-tier concurrency lock. Unlicensed users may process one video at // a time — second submission while another is in flight returns 409. // Released in a finally block at the bottom of /api/process. let freeJobInFlight = false; // Pro-tier feature gates. Each entry maps URL prefixes → required // entitlement; first match wins. A licensed user without the right // entitlement gets a clean 402 feature_not_in_tier (vs. the generic // activation gate above). const PRO_FEATURE_GATES = [ { prefixes: ["/api/subscriptions", "/api/auto-queue", "/api/sub-check-log"], entitlement: "subscriptions", feature: "subscriptions", message: "Channel subscriptions and auto-queue require a Pro license. Upgrade to unlock.", }, { prefixes: ["/api/history"], entitlement: "history", feature: "history", message: "Summary history requires a Pro license. Upgrade to unlock.", }, { prefixes: ["/api/library"], entitlement: "library", feature: "library", message: "Library import/export requires a Pro license. Upgrade to unlock.", }, ]; app.use((req, res, next) => { for (const gate of PRO_FEATURE_GATES) { if (gate.prefixes.some((p) => req.path.startsWith(p))) { if (LIC.entitlements.has(gate.entitlement)) return next(); return res.status(402).json({ error: "feature_not_in_tier", feature: gate.feature, message: gate.message, keysat_base_url: license.KEYSAT_BASE_URL, product_slug: license.PRODUCT_SLUG, }); } } next(); }); // License management endpoints — kept open by LICENSE_OPEN_PATHS above. app.get("/api/license-status", (_req, res) => { res.json(license.publicView(LIC)); }); app.post("/api/license/activate", async (req, res) => { try { LIC = license.activate(req.body && req.body.license_key); } catch (e) { if (e && e.code === "bad_format") { return res.status(400).json({ error: "bad_format", message: "Expected a license key starting with 'LIC1-'.", }); } return res.status(500).json({ error: "activation_failed", message: e?.message }); } if (LIC.state !== "licensed") { // Offline signature check failed — no point hitting the server. return res.status(400).json({ ok: false, error: LIC.reason || "invalid", ...license.publicView(LIC), }); } // Offline check passed. Confirm with the licensing server so a key that // was revoked before activation gets rejected immediately. Cap the wait // so a slow server doesn't hang the activation UI — if we time out, // accept the offline-verified state and let the periodic poll catch up. await Promise.race([ refreshLicenseOnline("activation"), new Promise((resolve) => setTimeout(resolve, ACTIVATE_VALIDATE_TIMEOUT_MS)), ]); if (LIC.state === "licensed") { return res.json({ ok: true, ...license.publicView(LIC) }); } return res.status(400).json({ ok: false, error: LIC.reason || "invalid", ...license.publicView(LIC), }); }); app.post("/api/license/deactivate", (_req, res) => { LIC = license.deactivate(); res.json({ ok: true, ...license.publicView(LIC) }); }); // ── History storage ─────────────────────────────────────────────────────── async function saveToHistory(videoId, url, title, chunks, entries, logs, uploadDate, type) { // For podcast episodes, videoId might be a long GUID/URL — use a short hash for the filename const idSuffix = type === "podcast" ? Buffer.from(videoId).toString("base64url").slice(0, 16) : videoId; const id = `${Date.now()}-${idSuffix}`; const record = { id, videoId, url, title: title || "Untitled", type: type || "youtube", // "youtube" or "podcast" topicCount: chunks.length, segmentCount: entries.length, createdAt: new Date().toISOString(), uploadDate: uploadDate || "", chunks, entries, logs, }; await fs.writeFile(path.join(historyDir, `${id}.json`), JSON.stringify(record)); return id; } // Serve the frontend from ../public app.use(express.static(path.join(__dirname, "..", "public"))); app.use("/assets", express.static(path.join(__dirname, "..", "assets"))); // checkYtdlp + autoUpdateYtdlp moved to ./ytdlp.js // PRICING + calcCost + buildAnalysisPrompt moved to ./gemini-helpers.js // safeText + retryGemini moved to ./util.js // ── Health check ─────────────────────────────────────────────────────────── app.get("/api/health", async (req, res) => { const info = await checkYtdlp(); // Check cookies.txt freshness let cookieInfo = { method: ytCookieMethod() }; if (ytCookiesFileExists) { try { const stat = await fs.stat(ytCookiesFilePath); const ageMs = Date.now() - stat.mtimeMs; const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24)); cookieInfo.fileAgeDays = ageDays; cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days } catch {} } res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!serverApiKey, cookies: cookieInfo, ...info }); }); // ── Status endpoints ─────────────────────────────────────────────────────── app.post("/api/heartbeat", (req, res) => { res.json({ ok: true, sleeping: false }); }); app.get("/api/status", (req, res) => { res.json({ ok: true, sleeping: false, uptime: process.uptime() }); }); // Shutdown: used by the macOS .app launcher to stop the server cleanly. // On StartOS this endpoint is unused (StartOS manages the container lifecycle). app.post("/api/shutdown", (req, res) => { res.json({ ok: true, message: "Server shutting down..." }); console.log("\n Server shutdown requested from browser. Goodbye!\n"); setTimeout(() => process.exit(0), 300); }); // ── Manual update endpoint ───────────────────────────────────────────────── app.post("/api/update-ytdlp", async (req, res) => { const result = await autoUpdateYtdlp(DATA_DIR); const info = await checkYtdlp(); res.json({ ...result, ...info }); }); // ── Cookie management endpoints ─────────────────────────────────────────── // /api/cookies/* routes registered via setupCookieRoutes (./cookies.js) setupCookieRoutes(app); // ── History endpoints ───────────────────────────────────────────────────── const metaPath = path.join(historyDir, "_meta.json"); // meta.json structure: { folders: [ { id, name, order, collapsed, items: [sessionId, ...] } ], uncategorized: [sessionId, ...] } async function loadMeta() { try { return JSON.parse(await fs.readFile(metaPath, "utf-8")); } catch { return { folders: [], uncategorized: [] }; } } async function saveMeta(meta) { await fs.writeFile(metaPath, JSON.stringify(meta, null, 2)); } // Get all history: sessions + folder structure app.get("/api/history", async (req, res) => { try { const files = await fs.readdir(historyDir); const sessionsMap = {}; for (const file of files.filter(f => f.endsWith(".json") && !f.startsWith("_") && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) { try { const raw = await fs.readFile(path.join(historyDir, file), "utf-8"); const data = JSON.parse(raw); sessionsMap[data.id] = { id: data.id, videoId: data.videoId, url: data.url, title: data.title, topicCount: data.topicCount, type: data.type || "youtube", segmentCount: data.segmentCount, createdAt: data.createdAt, uploadDate: data.uploadDate || "", }; } catch {} } const meta = await loadMeta(); // Clean up: remove references to deleted sessions for (const folder of meta.folders) { folder.items = folder.items.filter(id => sessionsMap[id]); } meta.uncategorized = meta.uncategorized.filter(id => sessionsMap[id]); // Add any sessions not in meta (newly created) const allReferenced = new Set([ ...meta.uncategorized, ...meta.folders.flatMap(f => f.items), ]); const allIds = Object.keys(sessionsMap); const orphans = allIds.filter(id => !allReferenced.has(id)) .sort((a, b) => new Date(sessionsMap[b].createdAt) - new Date(sessionsMap[a].createdAt)); meta.uncategorized = [...orphans, ...meta.uncategorized]; await saveMeta(meta); res.json({ sessions: sessionsMap, meta }); } catch (err) { res.json({ sessions: {}, meta: { folders: [], uncategorized: [] } }); } }); // Get a single session (full data) app.get("/api/history/:id", async (req, res) => { try { const raw = await fs.readFile(path.join(historyDir, `${req.params.id}.json`), "utf-8"); res.json(JSON.parse(raw)); } catch { res.status(404).json({ error: "Session not found" }); } }); // Rename a session title app.put("/api/history/:id/title", async (req, res) => { try { const filePath = path.join(historyDir, `${req.params.id}.json`); const raw = await fs.readFile(filePath, "utf-8"); const data = JSON.parse(raw); data.title = req.body.title || data.title; await fs.writeFile(filePath, JSON.stringify(data)); res.json({ ok: true, title: data.title }); } catch { res.status(404).json({ error: "Session not found" }); } }); // Delete a session app.delete("/api/history/:id", async (req, res) => { try { // Read the file first to get the videoId for the skip list const filePath = path.join(historyDir, `${req.params.id}.json`); let videoId = null; try { const raw = await fs.readFile(filePath, "utf-8"); videoId = JSON.parse(raw).videoId; } catch {} await fs.unlink(filePath); // Add to skip list so subscriptions don't re-queue it if (videoId) { await addToSkipList(videoId); } // Remove from meta const meta = await loadMeta(); meta.uncategorized = meta.uncategorized.filter(id => id !== req.params.id); for (const folder of meta.folders) { folder.items = folder.items.filter(id => id !== req.params.id); } await saveMeta(meta); res.json({ ok: true }); } catch { res.status(404).json({ error: "Session not found" }); } }); // Update meta (folders, ordering) — the frontend sends the full structure app.put("/api/history/meta", async (req, res) => { try { const meta = req.body; await saveMeta(meta); res.json({ ok: true }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Create a folder app.post("/api/history/folders", async (req, res) => { try { const meta = await loadMeta(); const folder = { id: `folder-${Date.now()}`, name: req.body.name || "New Folder", collapsed: false, items: [] }; meta.folders.push(folder); await saveMeta(meta); res.json(folder); } catch (err) { res.status(500).json({ error: err.message }); } }); // Rename a folder app.put("/api/history/folders/:id", async (req, res) => { try { const meta = await loadMeta(); const folder = meta.folders.find(f => f.id === req.params.id); if (!folder) return res.status(404).json({ error: "Folder not found" }); folder.name = req.body.name || folder.name; await saveMeta(meta); res.json(folder); } catch (err) { res.status(500).json({ error: err.message }); } }); // Update a folder's collapsed state app.put("/api/history/folders/:id/collapsed", async (req, res) => { try { const meta = await loadMeta(); const folder = meta.folders.find(f => f.id === req.params.id); if (!folder) return res.status(404).json({ error: "Folder not found" }); folder.collapsed = !!req.body.collapsed; await saveMeta(meta); res.json({ ok: true, collapsed: folder.collapsed }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Delete a folder (items move to uncategorized) app.delete("/api/history/folders/:id", async (req, res) => { try { const meta = await loadMeta(); const idx = meta.folders.findIndex(f => f.id === req.params.id); if (idx === -1) return res.status(404).json({ error: "Folder not found" }); const [folder] = meta.folders.splice(idx, 1); meta.uncategorized = [...folder.items, ...meta.uncategorized]; await saveMeta(meta); res.json({ ok: true }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Move a session to a folder (or uncategorized if folderId is null) app.put("/api/history/move", async (req, res) => { try { const { sessionId, folderId, index } = req.body; const meta = await loadMeta(); // Remove from current location meta.uncategorized = meta.uncategorized.filter(id => id !== sessionId); for (const folder of meta.folders) { folder.items = folder.items.filter(id => id !== sessionId); } // Add to new location if (folderId) { const folder = meta.folders.find(f => f.id === folderId); if (folder) { const i = typeof index === "number" ? index : folder.items.length; folder.items.splice(i, 0, sessionId); } } else { const i = typeof index === "number" ? index : 0; meta.uncategorized.splice(i, 0, sessionId); } await saveMeta(meta); res.json({ ok: true }); } catch (err) { res.status(500).json({ error: err.message }); } }); // ── Library export/import ──────────────────────────────────────────────── app.get("/api/library/export", async (req, res) => { try { const meta = await loadMeta(); const files = await fs.readdir(historyDir); const sessions = {}; for (const file of files) { if (!file.endsWith(".json") || file === "_meta.json" || file === "subscriptions.json" || file === "auto-queue.json") continue; try { const raw = await fs.readFile(path.join(historyDir, file), "utf-8"); const id = file.replace(".json", ""); sessions[id] = JSON.parse(raw); } catch {} } // Load subscriptions let subscriptions = []; try { subscriptions = JSON.parse(await fs.readFile(path.join(historyDir, "subscriptions.json"), "utf-8")).subscriptions || []; } catch {} const exportData = { version: 1, exportedAt: new Date().toISOString(), meta, sessions, subscriptions, }; res.setHeader("Content-Type", "application/json"); res.setHeader("Content-Disposition", 'attachment; filename="recap-library.json"'); res.json(exportData); } catch (err) { res.status(500).json({ error: err.message }); } }); app.post("/api/library/import", express.json({ limit: "200mb" }), async (req, res) => { try { const data = req.body; if (!data || !data.sessions) { return res.status(400).json({ error: "Invalid library file — missing sessions data" }); } let imported = 0; let skipped = 0; // Import sessions for (const [id, session] of Object.entries(data.sessions)) { const filePath = path.join(historyDir, `${id}.json`); // Skip if session already exists (don't overwrite) try { await fs.access(filePath); skipped++; continue; } catch {} await fs.writeFile(filePath, JSON.stringify(session)); imported++; } // Merge meta (add imported sessions to uncategorized if not already placed) if (data.meta) { const existingMeta = await loadMeta(); const allExistingIds = new Set([ ...existingMeta.uncategorized, ...existingMeta.folders.flatMap(f => f.items), ]); // Import folders that don't exist if (data.meta.folders) { for (const folder of data.meta.folders) { const existingFolder = existingMeta.folders.find(f => f.id === folder.id); if (!existingFolder) { existingMeta.folders.push(folder); folder.items.forEach(id => allExistingIds.add(id)); } } } // Add any uncategorized items that aren't already placed if (data.meta.uncategorized) { for (const id of data.meta.uncategorized) { if (!allExistingIds.has(id)) { existingMeta.uncategorized.unshift(id); } } } await saveMeta(existingMeta); } // Import subscriptions (merge, don't duplicate) if (data.subscriptions && data.subscriptions.length > 0) { let existingSubs = []; try { existingSubs = JSON.parse(await fs.readFile(path.join(historyDir, "subscriptions.json"), "utf-8")).subscriptions || []; } catch {} const existingUrls = new Set(existingSubs.map(s => s.url)); let subsAdded = 0; for (const sub of data.subscriptions) { if (!existingUrls.has(sub.url)) { existingSubs.push(sub); subsAdded++; } } await fs.writeFile(path.join(historyDir, "subscriptions.json"), JSON.stringify({ subscriptions: existingSubs })); } res.json({ ok: true, imported, skipped, total: Object.keys(data.sessions).length }); } catch (err) { res.status(500).json({ error: err.message }); } }); // ── Subscriptions ───────────────────────────────────────────────────────── const subsPath = path.join(historyDir, "subscriptions.json"); async function loadSubscriptions() { try { return JSON.parse(await fs.readFile(subsPath, "utf-8")).subscriptions || []; } catch { return []; } } async function saveSubscriptions(subs) { await fs.writeFile(subsPath, JSON.stringify({ subscriptions: subs }, null, 2)); } // List recent videos from a channel/playlist via yt-dlp (no download) // Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode) async function listChannelVideosFast(url, limit = 15) { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(id)s|%(title)s", "--no-download", "--playlist-end", String(limit), "--flat-playlist", ...ytCookieArgs(), ...ytExtraArgs(), url, ], { timeout: 60000 }); return stdout.trim().split("\n").filter(Boolean).map(line => { const idx = line.indexOf("|"); return { id: line.slice(0, idx), title: line.slice(idx + 1) }; }); } // Fetch upload_date for a batch of video IDs (processes in batches of 3) // Bails after 2 consecutive failures to avoid grinding through blocked requests async function fetchUploadDates(videoIds) { if (videoIds.length === 0) return {}; const dateMap = {}; const batchSize = 50; let consecutiveFails = 0; for (let i = 0; i < videoIds.length; i += batchSize) { const batch = videoIds.slice(i, i + batchSize); const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`); try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(id)s|%(upload_date)s", "--no-download", ...ytCookieArgs(), ...urls, ], { timeout: 45000 }); for (const line of stdout.trim().split("\n").filter(Boolean)) { const [id, date] = line.split("|"); if (id && date && date !== "NA") dateMap[id] = date; } consecutiveFails = 0; subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`); } catch (err) { consecutiveFails++; subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`); if (consecutiveFails >= 2) { subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`); break; } } } return dateMap; } // ── RSS-based date fetching (bypasses bot detection) ───────────────────── // Fetch a URL and return the response body as a string // fetchUrl moved to ./util.js // Get channel_id from a YouTube channel/playlist URL using yt-dlp async function getChannelId(url) { // Method 1: flat-playlist channel_id (fast, may return NA) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel_id)s", "--no-download", "--playlist-items", "1", "--flat-playlist", ...ytCookieArgs(), url, ], { timeout: 15000 }); const id = stdout.trim().split("\n")[0]; if (id && id !== "NA" && id.length > 5) return id; } catch {} // Method 2: non-flat single video (slower but gets full metadata) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel_id)s", "--no-download", "--playlist-items", "1", ...ytCookieArgs(), url, ], { timeout: 30000 }); const id = stdout.trim().split("\n")[0]; if (id && id !== "NA" && id.length > 5) return id; } catch {} return null; } // Fetch video dates from YouTube RSS feed (no auth, no bot detection) // Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos async function fetchDatesFromRSS(channelId) { const dateMap = {}; if (!channelId) return dateMap; try { const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`; const xml = await fetchUrl(rssUrl); const entryRegex = /[\s\S]*?([^<]+)<\/yt:videoId>[\s\S]*?([^<]+)<\/published>[\s\S]*?<\/entry>/g; let match; while ((match = entryRegex.exec(xml)) !== null) { const videoId = match[1]; const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00" const date = published.slice(0, 10).replace(/-/g, ""); // "20251201" dateMap[videoId] = date; } } catch (err) { subLog(` ⚠ RSS feed fetch failed: ${err.message}`); } return dateMap; } // ── Podcast RSS feed parsing ──────────────────────────────────────────────── // Detect if a URL looks like a podcast RSS feed function isPodcastFeedUrl(url) { if (!url) return false; const u = url.trim().toLowerCase(); // Common podcast RSS feed patterns if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true; if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true; if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true; if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true; if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true; return false; } // Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] } async function parsePodcastRSS(feedUrl, limit = 200) { const xml = await fetchUrl(feedUrl); // Extract podcast title const titleMatch = xml.match(/[\s\S]*?(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/); const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast"; // Extract episodes from <item> elements const episodes = []; const itemRegex = /<item>([\s\S]*?)<\/item>/g; let match; while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) { const item = match[1]; // GUID (unique episode identifier) const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/); const guid = guidMatch ? guidMatch[1].trim() : null; // Title const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/); const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode"; // Publish date const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/); const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null; let dateStr = ""; // YYYYMMDD if (pubDate) { try { const d = new Date(pubDate); if (!isNaN(d.getTime())) { dateStr = d.toISOString().slice(0, 10).replace(/-/g, ""); } } catch {} } // Audio enclosure URL const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/); const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null; // Duration (itunes:duration) const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/); const duration = durMatch ? durMatch[1].trim() : ""; if (!audioUrl) continue; // Skip episodes without audio // Use guid, or fall back to audioUrl as unique ID const id = guid || audioUrl; episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration }); } return { title: podcastTitle, episodes }; } // Download a podcast episode audio file via HTTP(S) to a local path // downloadPodcastAudio moved to ./audio.js // Get channel name from URL async function fetchChannelName(url) { // Try fast flat-playlist approach first try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel)s", "--no-download", "--playlist-end", "1", "--flat-playlist", ...ytCookieArgs(), url, ], { timeout: 15000 }); const name = stdout.trim().split("\n")[0]; if (name && name !== "NA") return name; } catch {} // Fallback: fetch without flat-playlist (slower but gets channel from video metadata) try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(channel)s", "--no-download", "--playlist-end", "1", ...ytCookieArgs(), url, ], { timeout: 30000 }); const name = stdout.trim().split("\n")[0]; if (name && name !== "NA") return name; } catch {} // Last resort: extract handle from URL try { const u = new URL(url); const handleMatch = u.pathname.match(/\/@([^/]+)/); if (handleMatch) return "@" + handleMatch[1]; } catch {} return "Unknown Channel"; } // ── In-process write serialization ──────────────────────────────────────── // Per-file promise chain to prevent lost-update races on read-modify-write // state files (skip-list, seen-list). Without this, two concurrent DELETE // handlers can each load the same snapshot, add their own id, and the // second write overwrites the first — silently dropping entries. const _fileLocks = new Map(); function withFileLock(key, fn) { const prev = _fileLocks.get(key) || Promise.resolve(); const next = prev.then(fn, fn); // run fn whether prev resolved or rejected // Keep the chain alive but don't leak errors to the next caller _fileLocks.set(key, next.catch(() => {})); return next; } // Skip list — videos deleted from history that subscriptions should not re-add const skipPath = path.join(historyDir, "skip-list.json"); async function loadSkipList() { try { return new Set(JSON.parse(await fs.readFile(skipPath, "utf-8")).videoIds || []); } catch { return new Set(); } } async function addToSkipList(videoId) { return withFileLock(skipPath, async () => { const skipIds = await loadSkipList(); skipIds.add(videoId); await fs.writeFile(skipPath, JSON.stringify({ videoIds: [...skipIds] })); }); } // Seen list — videos already offered for approval (persists across restarts) const seenPath = path.join(historyDir, "seen-list.json"); async function loadSeenList() { try { return new Set(JSON.parse(await fs.readFile(seenPath, "utf-8")).videoIds || []); } catch { return new Set(); } } async function addToSeenList(videoIds) { return withFileLock(seenPath, async () => { const seen = await loadSeenList(); for (const id of videoIds) seen.add(id); await fs.writeFile(seenPath, JSON.stringify({ videoIds: [...seen] })); }); } // Get all videoIds already processed in history async function getProcessedVideoIds() { const ids = new Set(); try { const files = await fs.readdir(historyDir); for (const file of files.filter(f => f.endsWith(".json") && f !== "_meta.json" && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) { try { const raw = await fs.readFile(path.join(historyDir, file), "utf-8"); const data = JSON.parse(raw); if (data.videoId) ids.add(data.videoId); } catch {} } } catch {} return ids; } // Server-side auto-queue for subscription-discovered videos (persisted to disk) const autoQueuePath = path.join(historyDir, "auto-queue.json"); let autoQueue = []; async function loadAutoQueue() { try { const data = JSON.parse(await fs.readFile(autoQueuePath, "utf-8")); return data.items || []; } catch { return []; } } async function saveAutoQueue() { await fs.writeFile(autoQueuePath, JSON.stringify({ items: autoQueue }, null, 2)); } // Load persisted queue on startup, filtering out already-processed items autoQueue = await loadAutoQueue(); { const processed = await getProcessedVideoIds(); const before = autoQueue.length; autoQueue = autoQueue.filter(q => !processed.has(q.videoId)); if (autoQueue.length < before) { console.log(` Auto-queue: removed ${before - autoQueue.length} already-processed item(s)`); await saveAutoQueue(); } } // ── Background processing queue ────────────────────────────────────────── // Processes "approved" auto-queue items sequentially with configurable delay // between items to avoid hammering YouTube with rapid-fire downloads. const processingConfigPath = path.join(configDir, "processing-config.json"); let processingConfig = { delaySeconds: 300, // Default delay between processing queue items (5 minutes) enabled: true, // Whether background processing is active }; let processingState = { running: false, // Is the processor loop currently active? currentItem: null, // The item currently being processed (or null) lastCompleted: null, // Timestamp of last completed item rush: false, // If true, skip delay before next item log: [], // Recent processing log entries }; function procLog(msg) { console.log(` [processor] ${msg}`); processingState.log.push({ t: new Date().toISOString(), msg }); if (processingState.log.length > 100) processingState.log.shift(); } async function loadProcessingConfig() { try { const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8")); if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds; if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled; } catch {} } async function saveProcessingConfig() { await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2)); } await loadProcessingConfig(); // The background processor: picks "approved" items, processes via internal HTTP, // waits the configured delay, then picks the next one. Runs continuously. async function backgroundProcessor() { if (processingState.running) return; // Already running processingState.running = true; procLog("Background processor started"); while (true) { // Find next approved item const item = autoQueue.find(q => q.status === "approved"); if (!item) { procLog("No approved items in queue — processor sleeping"); processingState.running = false; processingState.currentItem = null; return; } if (!processingConfig.enabled) { procLog("Processing is paused — processor sleeping"); processingState.running = false; processingState.currentItem = null; return; } // Wait the configured delay (unless rush mode or first item) if (processingState.lastCompleted && !processingState.rush) { const delaySec = processingConfig.delaySeconds; procLog(`Waiting ${delaySec}s before next item...`); await new Promise(r => setTimeout(r, delaySec * 1000)); } processingState.rush = false; // Re-check the item hasn't been removed/changed while we waited const freshItem = autoQueue.find(q => q.id === item.id); if (!freshItem || freshItem.status !== "approved") { procLog(`Item ${item.id} was removed or status changed — skipping`); continue; } // Mark as processing freshItem.status = "processing"; processingState.currentItem = freshItem; await saveAutoQueue(); procLog(`Processing: ${freshItem.title} (${freshItem.url})`); try { // Call the existing /api/process endpoint via internal HTTP request const result = await processItemInternally(freshItem); freshItem.status = "completed"; freshItem.completedAt = new Date().toISOString(); freshItem.historyId = result.historyId || null; processingState.lastCompleted = new Date().toISOString(); procLog(`✓ Completed: ${freshItem.title}`); } catch (err) { freshItem.status = "failed"; freshItem.error = err.message || String(err); freshItem.failedAt = new Date().toISOString(); procLog(`✗ Failed: ${freshItem.title} — ${freshItem.error.slice(0, 200)}`); } processingState.currentItem = null; await saveAutoQueue(); } } // Internal HTTP request to /api/process — consumes the SSE stream and // waits for the "result" or "error" event. This reuses the entire existing // pipeline without any code duplication. function processItemInternally(item) { return new Promise((resolve, reject) => { const apiKey = resolveApiKey(null); // Use server's stored key if (!apiKey) { return reject(new Error("No API key configured. Set your Gemini API key in Settings.")); } const body = JSON.stringify({ url: item.url, apiKey: "USE_SERVER_KEY", type: item.type || undefined, title: item.title || undefined, uploadDate: item.uploadDate || undefined, episodeId: item.videoId || undefined, }); const req = http.request({ hostname: "127.0.0.1", port: PORT, path: "/api/process", method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) }, timeout: 1800000, // 30 minutes max for very long videos }, (res) => { let buffer = ""; let lastResult = null; let lastError = null; res.on("data", (chunk) => { buffer += chunk.toString(); // Parse SSE events from the buffer const lines = buffer.split("\n"); buffer = lines.pop() || ""; // Keep incomplete line in buffer let currentEvent = null; for (const line of lines) { if (line.startsWith("event: ")) { currentEvent = line.slice(7).trim(); } else if (line.startsWith("data: ") && currentEvent) { try { const data = JSON.parse(line.slice(6)); if (currentEvent === "result") lastResult = data; if (currentEvent === "error") lastError = data; if (currentEvent === "log") { procLog(` [${data.elapsed || "?"}s] ${data.message}`); } } catch {} currentEvent = null; } else if (line === "") { currentEvent = null; } } }); res.on("end", () => { if (lastError) { reject(new Error(lastError.message || "Processing failed")); } else if (lastResult) { resolve(lastResult); } else { reject(new Error("Processing ended without result")); } }); res.on("error", reject); }); req.on("error", reject); req.on("timeout", () => { req.destroy(); reject(new Error("Processing timed out after 30 minutes")); }); req.write(body); req.end(); }); } // Wake up the processor whenever there are approved items function kickProcessor() { if (!processingState.running && processingConfig.enabled) { const hasApproved = autoQueue.some(q => q.status === "approved"); if (hasApproved) { backgroundProcessor().catch(err => procLog(`Processor error: ${err.message}`)); } } } let subCheckRunning = false; let subCheckPromise = null; let subCheckLog = []; // Stores recent check logs for debug endpoint function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); } async function checkSubscriptions() { if (subCheckRunning) { // Wait for current check to finish, then run again if (subCheckPromise) await subCheckPromise; return checkSubscriptions(); } subCheckRunning = true; subCheckPromise = _checkSubscriptionsInner().finally(() => { subCheckRunning = false; subCheckPromise = null; }); return subCheckPromise; } async function _checkSubscriptionsInner() { // Pro-tier feature: skip silently when not entitled. The HTTP gate above // returns 402 to callers; this guards the background timer + manual paths. if (!LIC.entitlements.has("subscriptions")) { subCheckLog = []; subLog("Skipped: subscriptions require a Pro license."); return; } subCheckLog = []; // Clear logs for fresh check const subs = await loadSubscriptions(); if (subs.length === 0) { subLog("No subscriptions found"); return; } const processedIds = await getProcessedVideoIds(); const skippedIds = await loadSkipList(); const seenIds = await loadSeenList(); const queuedIds = new Set(autoQueue.map(q => { // For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID) if (q.videoId) return q.videoId; const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/); return m ? m[1] : null; }).filter(Boolean)); subLog(`${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen, ${queuedIds.size} in queue`); let changed = false; for (const sub of subs) { if (sub.paused) { subLog(`⏸ ${sub.name} — paused, skipping`); continue; } try { const icon = sub.type === "podcast" ? "🎙" : "📡"; subLog(`${icon} Checking: ${sub.name} (${sub.url})`); const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8); if (sub.type === "podcast") { // ── Podcast subscription: discover episodes from RSS feed ── const { episodes } = await parsePodcastRSS(sub.url, 200); subLog(` Found ${episodes.length} episode(s) in RSS feed`); if (episodes.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } // Filter out already-known episodes const unknowns = episodes.filter(ep => { if (processedIds.has(ep.id)) return false; if (queuedIds.has(ep.id)) return false; if (skippedIds.has(ep.id)) return false; if (seenIds.has(ep.id)) return false; return true; }); const filtered = episodes.length - unknowns.length; subLog(` ${unknowns.length} to check, ${filtered} already known`); if (unknowns.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } subLog(` Cutoff date: ${cutoffDate}`); let newCount = 0; const seenNow = []; for (const ep of unknowns) { if (!ep.date || ep.date.length !== 8) continue; // skip undated if (ep.date < cutoffDate) { seenNow.push(ep.id); continue; // before cutoff } subLog(` ✅ ${ep.date} — ${ep.title.slice(0, 60)}`); const itemStatus = sub.autoDownload ? "approved" : "pending"; autoQueue.push({ id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`, videoId: ep.id, // episode GUID url: ep.audioUrl, title: ep.title, uploadDate: ep.date, subscriptionId: sub.id, subscriptionName: sub.name, status: itemStatus, type: "podcast", duration: ep.duration || "", }); queuedIds.add(ep.id); newCount++; } if (seenNow.length > 0) await addToSeenList(seenNow); sub.lastChecked = new Date().toISOString(); subLog(` → ${newCount} episode(s) queued for approval from ${sub.name}`); changed = true; } else { // ── YouTube subscription: discover videos via yt-dlp + RSS dates ── // Scale fetch limit based on how far back the subscription date goes const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000)); const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4; const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200); subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`); const candidates = await listChannelVideosFast(sub.url, fetchLimit); subLog(` Found ${candidates.length} recent video(s)`); if (candidates.length === 0) { subLog(` ⚠ No videos returned from yt-dlp for this channel`); sub.lastChecked = new Date().toISOString(); changed = true; continue; } const unknowns = candidates.filter(v => { if (processedIds.has(v.id)) return false; if (queuedIds.has(v.id)) return false; if (skippedIds.has(v.id)) return false; if (seenIds.has(v.id)) return false; return true; }); const filtered = candidates.length - unknowns.length; subLog(` ${unknowns.length} to check, ${filtered} already known`); if (unknowns.length === 0) { sub.lastChecked = new Date().toISOString(); changed = true; continue; } subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`); // Step 1: Get channel ID (cached on subscription, or fetch once) if (!sub.channelId) { subLog(` Resolving channel ID...`); sub.channelId = await getChannelId(sub.url); if (sub.channelId) { subLog(` Resolved channel ID: ${sub.channelId}`); changed = true; } else { subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`); } } // Step 2: RSS feed — fast, no bot detection, covers ~15 most recent const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {}; const rssCount = Object.keys(dateMap).length; if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`); // Step 3: Smart early termination const needDates = unknowns.filter(v => !dateMap[v.id]); let earlyTermination = false; if (needDates.length > 0 && rssCount > 0) { const rssDates = Object.values(dateMap).sort(); const oldestRssDate = rssDates[0]; if (oldestRssDate < cutoffDate) { earlyTermination = true; subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`); } else { subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`); const ytDates = await fetchUploadDates(needDates.map(v => v.id)); Object.assign(dateMap, ytDates); const ytCount = Object.keys(ytDates).length; if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`); } } else if (needDates.length > 0 && rssCount === 0) { subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`); const ytDates = await fetchUploadDates(needDates.map(v => v.id)); Object.assign(dateMap, ytDates); const ytCount = Object.keys(ytDates).length; if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`); } const gotDates = Object.keys(dateMap).length; if (gotDates > 0 || needDates.length === 0) { subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`); } else { subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`); sub.lastChecked = new Date().toISOString(); changed = true; continue; } let newCount = 0; for (const video of unknowns) { const uploadDate = dateMap[video.id]; if (!uploadDate || uploadDate.length !== 8) { continue; } if (uploadDate < cutoffDate) { subLog(` ⏭ ${video.id} (${uploadDate}) — before cutoff`); continue; } subLog(` ✅ ${video.id}${uploadDate ? ` (${uploadDate})` : ""} — ${video.title.slice(0,50)}`); const ytItemStatus = sub.autoDownload ? "approved" : "pending"; autoQueue.push({ id: `auto-${Date.now()}-${video.id}`, videoId: video.id, url: `https://www.youtube.com/watch?v=${video.id}`, title: video.title, uploadDate: uploadDate || null, subscriptionId: sub.id, subscriptionName: sub.name, status: ytItemStatus, }); queuedIds.add(video.id); newCount++; } // Only add to seen list when we can PROVE a video is too old const seenNow = unknowns.filter(v => { if (queuedIds.has(v.id)) return false; const d = dateMap[v.id]; if (d && d.length === 8 && d < cutoffDate) return true; if (d && d.length === 8 && d >= cutoffDate) return false; return earlyTermination; }).map(v => v.id); if (seenNow.length > 0) await addToSeenList(seenNow); sub.lastChecked = new Date().toISOString(); subLog(` → ${newCount} video(s) queued for approval from ${sub.name}`); changed = true; } } catch (err) { subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`); } } const pendingCount = autoQueue.filter(q => q.status === "pending").length; const approvedCount = autoQueue.filter(q => q.status === "approved").length; subLog(`Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`); if (changed) await saveSubscriptions(subs); await saveAutoQueue(); // Wake up the background processor if there are approved items if (approvedCount > 0) { subLog(`Kicking background processor for ${approvedCount} approved item(s)...`); kickProcessor(); } } // CRUD endpoints app.get("/api/subscriptions", async (req, res) => { const subs = await loadSubscriptions(); res.json({ subscriptions: subs }); }); // Extract a normalized channel key from a YouTube URL for dedup function channelKeyFromUrl(url) { try { const u = new URL(url); const path = u.pathname.toLowerCase().replace(/\/+$/, ""); // /@handle/videos → @handle, /@handle/streams → @handle const handleMatch = path.match(/\/(@[^/]+)/); if (handleMatch) return handleMatch[1]; // /channel/UCxxx → channel/UCxxx const channelMatch = path.match(/\/(channel\/[^/]+)/); if (channelMatch) return channelMatch[1]; // /c/name or /user/name const cMatch = path.match(/\/(c|user)\/([^/]+)/); if (cMatch) return cMatch[0]; // /playlist?list=PLxxx const list = u.searchParams.get("list"); if (list) return `playlist/${list}`; return path; // fallback } catch { return url.toLowerCase().replace(/\/+$/, ""); } } app.post("/api/subscriptions", async (req, res) => { const { url, since, type, autoDownload } = req.body; if (!url) return res.status(400).json({ error: "Missing url" }); const isPodcast = type === "podcast" || isPodcastFeedUrl(url); const subs = await loadSubscriptions(); // Prevent duplicates if (isPodcast) { const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, ""); if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) { return res.status(409).json({ error: "Already subscribed to this podcast" }); } } else { const newKey = channelKeyFromUrl(url); if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) { return res.status(409).json({ error: "Already subscribed to this channel" }); } } // Use provided cutoff date, or default to right now const cutoff = since ? new Date(since).toISOString() : new Date().toISOString(); const sub = { id: `sub-${Date.now()}`, url, name: "Loading...", type: isPodcast ? "podcast" : "youtube", channelId: null, createdAt: cutoff, lastChecked: null, paused: false, autoDownload: autoDownload === true, }; subs.push(sub); await saveSubscriptions(subs); // Respond immediately so the UI isn't blocked res.json(sub); // Background: resolve name and check for new content (async () => { try { if (isPodcast) { // Fetch podcast title from RSS feed const { title } = await parsePodcastRSS(url, 1); const freshSubs = await loadSubscriptions(); const s = freshSubs.find(x => x.id === sub.id); if (s) { s.name = title || url; await saveSubscriptions(freshSubs); } console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`); } else { const [name, channelId] = await Promise.all([ fetchChannelName(url), getChannelId(url), ]); const freshSubs = await loadSubscriptions(); const s = freshSubs.find(x => x.id === sub.id); if (s) { s.name = name; if (channelId) s.channelId = channelId; await saveSubscriptions(freshSubs); } console.log(` 📡 New subscription: ${name} — checking for recent videos...`); } await checkSubscriptions(); } catch (err) { console.error(" ⚠ Background subscription setup error:", err.message); } })(); }); app.delete("/api/subscriptions/:id", async (req, res) => { let subs = await loadSubscriptions(); subs = subs.filter(s => s.id !== req.params.id); await saveSubscriptions(subs); // Also remove any pending auto-queue items from this subscription autoQueue = autoQueue.filter(q => q.subscriptionId !== req.params.id); await saveAutoQueue(); res.json({ ok: true }); }); app.put("/api/subscriptions/:id/pause", async (req, res) => { const subs = await loadSubscriptions(); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.paused = !sub.paused; await saveSubscriptions(subs); res.json(sub); }); app.put("/api/subscriptions/:id/since", async (req, res) => { const { since } = req.body; if (!since) return res.status(400).json({ error: "Missing 'since' date" }); const subs = await loadSubscriptions(); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.createdAt = new Date(since).toISOString(); await saveSubscriptions(subs); res.json(sub); }); // Debug: subscription check logs (viewable in-app) app.get("/api/sub-check-log", (req, res) => { res.json({ log: subCheckLog, autoQueueCount: autoQueue.length, autoQueue: autoQueue.map(q => ({ id: q.id, videoId: q.videoId, title: q.title, status: q.status, sub: q.subscriptionName })) }); }); // Auto-queue endpoints (frontend polls these) app.get("/api/auto-queue", (req, res) => { // Return all items grouped by status for the frontend const showAll = req.query.all === "true"; const items = showAll ? autoQueue : autoQueue.filter(q => ["pending", "approved", "processing"].includes(q.status)); res.json({ items, checkRunning: subCheckRunning, counts: { pending: autoQueue.filter(q => q.status === "pending").length, approved: autoQueue.filter(q => q.status === "approved").length, processing: autoQueue.filter(q => q.status === "processing").length, completed: autoQueue.filter(q => q.status === "completed").length, failed: autoQueue.filter(q => q.status === "failed").length, }, }); }); app.delete("/api/auto-queue/:id", async (req, res) => { autoQueue = autoQueue.filter(q => q.id !== req.params.id); await saveAutoQueue(); res.json({ ok: true }); }); app.post("/api/auto-queue/:id/skip", async (req, res) => { const item = autoQueue.find(q => q.id === req.params.id); if (item && item.videoId) { await addToSkipList(item.videoId); } autoQueue = autoQueue.filter(q => q.id !== req.params.id); await saveAutoQueue(); res.json({ ok: true }); }); // Approve a single auto-queue item for background processing app.post("/api/auto-queue/:id/approve", async (req, res) => { const item = autoQueue.find(q => q.id === req.params.id); if (!item) return res.status(404).json({ error: "Item not found" }); if (item.status !== "pending") return res.status(400).json({ error: `Cannot approve item with status '${item.status}'` }); item.status = "approved"; await saveAutoQueue(); kickProcessor(); res.json({ ok: true, item }); }); // Approve all pending items at once app.post("/api/auto-queue/approve-all", async (req, res) => { let count = 0; for (const item of autoQueue) { if (item.status === "pending") { item.status = "approved"; count++; } } if (count > 0) { await saveAutoQueue(); kickProcessor(); } res.json({ ok: true, approved: count }); }); // Retry a failed item app.post("/api/auto-queue/:id/retry", async (req, res) => { const item = autoQueue.find(q => q.id === req.params.id); if (!item) return res.status(404).json({ error: "Item not found" }); if (item.status !== "failed") return res.status(400).json({ error: `Cannot retry item with status '${item.status}'` }); item.status = "approved"; item.error = undefined; item.failedAt = undefined; await saveAutoQueue(); kickProcessor(); res.json({ ok: true, item }); }); // Clear completed and failed items from the queue app.post("/api/auto-queue/clear-finished", async (req, res) => { const before = autoQueue.length; autoQueue = autoQueue.filter(q => !["completed", "failed"].includes(q.status)); const removed = before - autoQueue.length; if (removed > 0) await saveAutoQueue(); res.json({ ok: true, removed }); }); app.post("/api/subscriptions/check-now", async (req, res) => { if (subCheckRunning) return res.json({ ok: true, message: "Already checking" }); checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message)); res.json({ ok: true, message: "Check started" }); }); // ── Auto-download toggle per subscription ────────────────────────────────── app.put("/api/subscriptions/:id/auto-download", async (req, res) => { const subs = await loadSubscriptions(); const sub = subs.find(s => s.id === req.params.id); if (!sub) return res.status(404).json({ error: "Subscription not found" }); sub.autoDownload = req.body.enabled === true; await saveSubscriptions(subs); res.json({ ok: true, subscription: sub }); }); // ── Background processing status & configuration ─────────────────────────── // Get current processing status (what's running, queue depth, config) app.get("/api/processing/status", (req, res) => { res.json({ running: processingState.running, currentItem: processingState.currentItem ? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url } : null, lastCompleted: processingState.lastCompleted, config: processingConfig, counts: { approved: autoQueue.filter(q => q.status === "approved").length, processing: autoQueue.filter(q => q.status === "processing").length, pending: autoQueue.filter(q => q.status === "pending").length, }, log: processingState.log.slice(-20), }); }); // Update processing configuration (delay between items, enable/disable) app.put("/api/processing/config", async (req, res) => { if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) { processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds)); } if (typeof req.body.enabled === "boolean") { processingConfig.enabled = req.body.enabled; // If enabling and there are approved items, kick the processor if (req.body.enabled) kickProcessor(); } await saveProcessingConfig(); res.json({ ok: true, config: processingConfig }); }); // Rush: skip the delay before the next item in the queue app.post("/api/processing/rush", (req, res) => { processingState.rush = true; // If not currently running but there are approved items, start processing kickProcessor(); res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" }); }); // Processing log (for debug/monitoring) app.get("/api/processing/log", (req, res) => { res.json({ log: processingState.log }); }); // ── Full pipeline: URL → audio → transcript → topic analysis ────────────── app.post("/api/process", async (req, res) => { const { url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId } = req.body; // Free tier: unlicensed users can summarize one video at a time. They // still bring their own Gemini key — same as paid users today; the key // can come from either the StartOS config action (server-side) or the // web UI Settings panel (client-side). The future "bundled key" relay // (paid users' requests proxied through the operator's service) isn't // built yet, so there's nothing here that gates key sourcing by tier. const isFreeUser = !(LIC.state === "licensed" && LIC.entitlements.has("core")); if (isFreeUser) { if (freeJobInFlight) { return res.status(409).json({ error: "processing_in_progress", message: "A summary is already being processed. Free mode handles one video at a time — wait for the current one to finish.", }); } freeJobInFlight = true; } const apiKey = resolveApiKey(clientKey); if (!url) { if (isFreeUser) freeJobInFlight = false; return res.status(400).json({ error: "Missing url" }); } if (!apiKey) { if (isFreeUser) freeJobInFlight = false; return res.status(400).json({ error: "No API key provided. Set GEMINI_API_KEY in .env or enter one in Settings." }); } // Determine if this is a podcast episode or YouTube video const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url); const videoId = isPodcast ? (episodeId || url) : extractVideoId(url); if (!isPodcast && !videoId) { if (isFreeUser) freeJobInFlight = false; return res.status(400).json({ error: "Invalid YouTube URL" }); } const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-")); const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3"; const audioPath = path.join(tmpDir, `audio.${audioExt}`); const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3"; try { const pipelineStart = Date.now(); // Set up SSE res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", }); // Helper to send log entries with elapsed time const logHistory = []; function log(step, message, detail) { const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1); const logMsg = `[${elapsed}s] ${message}`; console.log(` ${logMsg}`); logHistory.push({ elapsed, message, detail: detail || null }); sendEvent(res, "status", { step, message }); sendEvent(res, "log", { elapsed, message, detail: detail || null }); } // ── Step 1: Download audio ── const dlStart = Date.now(); let videoTitle = itemTitle || "Untitled"; let videoUploadDate = itemUploadDate || ""; if (isPodcast) { log(1, "Downloading podcast episode..."); await downloadPodcastAudio(url, audioPath); const stats = await fs.stat(audioPath); const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1); log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`); log(1, `Episode: ${videoTitle}`); } else { log(1, "Downloading audio from YouTube..."); const dlBaseArgs = [ "-x", "--audio-format", "mp3", "--audio-quality", "5", "-o", audioPath, "--no-playlist", "--print", "%(title)s|||%(upload_date)s", "--no-simulate", ]; const dlUrl = `https://www.youtube.com/watch?v=${videoId}`; const cookieArgs = ytCookieArgs(); const hasCookies = cookieArgs.length > 0; let usedCookies = false; let dlStdout = ""; // Helper: attempt a single yt-dlp download async function attemptDownload(args, label) { const result = await execFileAsync("yt-dlp", args, { timeout: 600000 }); return result.stdout || ""; } // Helper: check if error is a bot detection / rate limit block function isBotBlock(errText) { return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText); } // ── Smart download with retry ── // Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays) const MAX_RETRIES = 3; const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff let downloaded = false; let lastError = ""; for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) { // On retry attempts, wait before trying again if (attempt > 0) { const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)]; log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`); sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` }); await new Promise(r => setTimeout(r, waitSec * 1000)); log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`); // Clean up any partial file from previous attempt await fs.unlink(audioPath).catch(() => {}); } // Try with cookies first if (hasCookies && !usedCookies) { try { log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies..."); dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies"); usedCookies = true; downloaded = true; break; } catch (cookieErr) { const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || ""); if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`); log(1, "Retrying without cookies..."); await fs.unlink(audioPath).catch(() => {}); } } // Try without cookies if (!downloaded) { try { dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies"); downloaded = true; break; } catch (dlErr) { lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || ""); const blocked = isBotBlock(lastError); if (blocked && attempt < MAX_RETRIES) { log(1, `⚠ YouTube bot detection triggered`); // Will loop back and wait continue; } if (blocked && attempt === MAX_RETRIES) { // Last resort: try yt-dlp auto-update in case there's a newer version that handles this log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort..."); const updateResult = await autoUpdateYtdlp(DATA_DIR); if (updateResult.success) { log(1, "yt-dlp updated! Final retry..."); try { const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update"); dlStdout = retryResult; downloaded = true; break; } catch { /* fall through to error */ } } } // Non-bot error or exhausted retries if (!downloaded) { log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`); } } } } if (!downloaded) { const blocked = isBotBlock(lastError); let hint = ""; if (blocked) { hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " + "This is usually caused by:\n" + "• Recent VPN use (YouTube flags VPN IPs)\n" + "• Too many downloads in a short period\n" + "• YouTube's general anti-bot measures\n\n" + "What to try:\n" + "• Wait 10-30 minutes and try again\n" + "• Disconnect any VPN/proxy\n" + "• Upload fresh cookies.txt via Settings\n" + "• Try a different network (mobile hotspot, etc.)"; } throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`); } if (!usedCookies && hasCookies) { log(1, "⚠ Downloaded without cookies — audio may contain ads"); } const stats = await fs.stat(audioPath); const sizeMB = (stats.size / (1024 * 1024)).toFixed(1); const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1); log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`); // Extract title from the --print output of the download command const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null; let gotTitle = false; // First try: parse title from the download stdout (most reliable — same call that succeeded) if (dlStdout) { const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean); for (const line of printLines) { if (line.includes("|||")) { const sep = line.indexOf("|||"); const t = line.slice(0, sep).trim(); const d = line.slice(sep + 3).trim(); if (t && t !== "NA") { videoTitle = t; if (d && d !== "NA") videoUploadDate = d; gotTitle = true; log(1, `Video title: ${videoTitle}`); break; } } } } // Second try: separate yt-dlp call (no cookies needed for public metadata) if (!gotTitle) { try { const { stdout } = await execFileAsync("yt-dlp", [ "--print", "%(title)s|||%(upload_date)s", "--no-download", `https://www.youtube.com/watch?v=${videoId}`, ], { timeout: 15000 }); const raw = stdout.trim(); const sep = raw.indexOf("|||"); if (sep > 0) { videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled"; const d = raw.slice(sep + 3).trim(); if (d && d !== "NA") videoUploadDate = d || videoUploadDate; } else { videoTitle = raw || fallbackTitle || "Untitled"; } gotTitle = videoTitle !== "Untitled"; if (gotTitle) log(1, `Video title: ${videoTitle}`); } catch { // Title fetch failed } } // Third try: use the queue-provided title if (!gotTitle && fallbackTitle) { videoTitle = fallbackTitle; log(1, `Using queue title: ${fallbackTitle}`); gotTitle = true; } if (!gotTitle) { log(1, "⚠ Could not fetch video title"); } } // ── Step 2: Transcribe audio ── // Detect audio duration to choose strategy const audioDuration = await getAudioDuration(audioPath); const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown"; log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`); // Strategy: // < 60 min AND < 30 MB → Flash on full file (fast, cheap, reliable) // ≥ 60 min OR ≥ 30 MB → go straight to chunked transcription with Flash (45-min chunks) // If full-file transcription is truncated or empty → fall back to chunks const CHUNK_TIME_THRESHOLD = 60 * 60; // 60 minutes const CHUNK_SIZE_THRESHOLD = 30 * 1024 * 1024; // 30 MB let audioFileSize = 0; try { audioFileSize = (await fs.stat(audioPath)).size; } catch {} const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1); const needsChunking = (audioDuration && audioDuration >= CHUNK_TIME_THRESHOLD) || (audioFileSize >= CHUNK_SIZE_THRESHOLD); const transcriptionModel = "gemini-3-flash-preview"; // Flash handles transcription best const analysisModel = model || "gemini-3.1-pro-preview"; if (needsChunking) { const reason = audioDuration >= CHUNK_TIME_THRESHOLD ? `${audioDurMin} min` : `${audioSizeMB} MB`; log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriptionModel}`); } const ai = new GoogleGenAI({ apiKey, httpOptions: { timeout: 900_000, headersTimeout: 900_000 } }); const transcriptionPrompt = (titleHint) => `${titleHint ? `Video title: "${titleHint}"\n\n` : ""}Transcribe this audio completely and verbatim. Include timestamps at regular intervals (every 15-30 seconds or at natural pauses). Format each line as: [MM:SS] The spoken text here... Rules: - Transcribe EVERY word spoken, do not skip or summarize anything. - Use [MM:SS] or [H:MM:SS] timestamp format at the start of each line. - Start a new timestamped line every 15-30 seconds or at natural speech pauses. - Include filler words (um, uh, you know) for accuracy. - If multiple speakers are distinguishable, identify them by name when possible (from introductions, context clues, or how they address each other). Format as: [MM:SS] Name: text. If you cannot determine a name, use descriptive labels like "Host" and "Guest" rather than "Speaker 1" and "Speaker 2". Return ONLY the timestamped transcript, nothing else.`; // Helper: upload a single audio file and transcribe it async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) { const upStart = Date.now(); log(2, `Uploading audio${offsetSeconds > 0 ? ` (offset ${formatTime(offsetSeconds)})` : ""} to Gemini File API...`); const uploaded = await ai.files.upload({ file: filePath, config: { mimeType: mType } }); const upTime = ((Date.now() - upStart) / 1000).toFixed(1); log(2, `Audio uploaded in ${upTime}s`, `File ref: ${uploaded.name}`); // Wait for processing let f = uploaded; const pStart = Date.now(); while (f.state === "PROCESSING") { const ws = ((Date.now() - pStart) / 1000).toFixed(0); log(2, `Waiting for Gemini to process audio... (${ws}s)`); await new Promise(r => setTimeout(r, 3000)); f = await ai.files.get({ name: f.name }); } if (f.state === "FAILED") throw new Error("Gemini failed to process audio file."); const pTime = ((Date.now() - pStart) / 1000).toFixed(1); log(2, `Audio processed in ${pTime}s. Transcribing with ${modelName}...`); const prompt = transcriptionPrompt(titleHint); // Only use thinkingConfig for Flash models — Pro doesn't support "minimal" const txConfig = modelName.includes("flash") ? { thinkingConfig: { thinkingLevel: "minimal" } } : {}; // Retry logic that handles both API errors (via retryGemini) AND empty responses const EMPTY_RETRIES = 3; let result; for (let emptyAttempt = 0; emptyAttempt < EMPTY_RETRIES; emptyAttempt++) { result = await retryGemini( () => ai.models.generateContent({ model: modelName, config: { ...txConfig, // Disable safety filters to prevent content blocking safetySettings: [ { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_HATE_SPEECH", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_NONE" }, { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" }, ], }, contents: [{ role: "user", parts: [ { fileData: { fileUri: f.uri, mimeType: mType } }, { text: prompt }, ], }], }), { retries: 3, delayMs: 5000, label: `Transcription${offsetSeconds > 0 ? ` (chunk@${formatTime(offsetSeconds)})` : ""}`, log: (msg) => log(2, msg) } ); const responseText = safeText(result); if (responseText) break; // Got actual content, done // Log why it's empty const candidate = result?.candidates?.[0]; const finishReason = candidate?.finishReason || "UNKNOWN"; const blockReason = result?.promptFeedback?.blockReason || "none"; log(2, `⚠ Empty response (attempt ${emptyAttempt + 1}/${EMPTY_RETRIES}) — finishReason: ${finishReason}, blockReason: ${blockReason}`); if (emptyAttempt < EMPTY_RETRIES - 1) { const waitSec = 10 * (emptyAttempt + 1); log(2, `Waiting ${waitSec}s before retry...`); await new Promise(r => setTimeout(r, waitSec * 1000)); } } // Clean up uploaded file try { await ai.files.delete({ name: f.name }); } catch {} return result; } // ── Helper: chunked transcription for long audio ── async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) { const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`); await fs.mkdir(chunkDir, { recursive: true }); try { const audioChunks = await splitAudioFile(srcPath, chunkDir, 2700); // 45 min chunks if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed logFn(`Split audio into ${audioChunks.length} chunks for transcription`); const allEntries = []; let totalIn = 0, totalOut = 0; for (const chunk of audioChunks) { logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`); const chunkResult = await transcribeSingleFile( chunk.path, "audio/mpeg", title, modelName, chunk.startOffset ); const chunkUsage = chunkResult.usageMetadata || {}; const chunkCost = calcCost(modelName, chunkUsage); totalIn += chunkCost.inputTokens; totalOut += chunkCost.outputTokens; const chunkText = safeText(chunkResult); if (!chunkText) { logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`); continue; } const chunkEntries = parseTimestampedTranscript(chunkText); // Programmatically adjust timestamps: the model transcribes from 0:00 relative // to the chunk, so add the chunk's startOffset to get real video timestamps if (chunk.startOffset > 0) { // Check if the model already adjusted (first entry near the startOffset) const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0; const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8; if (!alreadyAdjusted) { for (const e of chunkEntries) { e.offset += chunk.startOffset; } logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`); } } logFn(`Chunk ${chunk.index + 1}: ${chunkEntries.length} segments, last timestamp ${chunkEntries.length > 0 ? formatTime(chunkEntries[chunkEntries.length - 1].offset) : "N/A"}`); // Merge: skip entries that overlap with already-transcribed content const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1; for (const e of chunkEntries) { if (e.offset > lastExistingTime) allEntries.push(e); } } // Recalculate durations for (let i = 0; i < allEntries.length - 1; i++) { allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset; } if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15; logFn(`Chunked transcription complete: ${allEntries.length} total segments`); return { entries: allEntries, cost: { inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0, totalTokens: totalIn + totalOut, totalCost: "0", totalCostDisplay: "", }, }; } finally { try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {} } } let entries; let transcriptText = ""; let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" }; const txStart = Date.now(); if (needsChunking) { // ── Very long audio: go straight to chunked transcription ── log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; } else { log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`); entries = null; // fall through to full-file attempt below } } if (!entries) { // ── Normal: transcribe full file ── const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel); transcriptText = safeText(transcriptResult); if (!transcriptText) { log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; } else { throw new Error("Gemini returned empty transcription for both full file and chunked attempts. Try again or use a shorter video."); } } else { const txUsage = transcriptResult.usageMetadata || {}; txCost = calcCost(transcriptionModel, txUsage); const txTime = ((Date.now() - txStart) / 1000).toFixed(1); log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`); entries = parseTimestampedTranscript(transcriptText); log(2, `Parsed ${entries.length} transcript segments`); } // ── Truncation detection → fall back to chunks ── if (audioDuration && entries.length > 0) { const lastEntryTime = entries[entries.length - 1].offset; const coverageRatio = lastEntryTime / audioDuration; const missingSeconds = audioDuration - lastEntryTime; if (coverageRatio < 0.90 && missingSeconds > 120) { log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`); log(2, `Falling back to chunked transcription...`); const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg)); if (chunkedResult && chunkedResult.entries.length > 0) { entries = chunkedResult.entries; txCost = chunkedResult.cost; const finalCoverage = entries[entries.length - 1].offset; log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`); } } else { log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`); } } } const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1); log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`); if (!entries || entries.length === 0) { const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ "); log(2, `⚠ Transcript parse failed. Preview: ${preview}`); sendEvent(res, "error", { message: "Failed to parse transcript." }); sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText }); res.end(); return; } // ── Step 3: Topic analysis with model fallback ── const analysisFallbacks = [ analysisModel, ...[ "gemini-3.1-pro-preview", "gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-2.5-flash", ].filter(m => m !== analysisModel), ]; const analysisPrompt = buildAnalysisPrompt(entries); let analysisResult = null; let usedAnalysisModel = analysisModel; const anaStart = Date.now(); for (const tryModel of analysisFallbacks) { try { log(3, `Analyzing topics across ${entries.length} segments with ${tryModel}...`); analysisResult = await retryGemini( () => ai.models.generateContent({ model: tryModel, contents: [ { role: "user", parts: [{ text: analysisPrompt }], }, ], }), { retries: 2, delayMs: 5000, label: "Analysis", log: (msg) => log(3, msg) } ); usedAnalysisModel = tryModel; break; } catch (fallbackErr) { const msg = fallbackErr?.message || String(fallbackErr); log(3, `⚠ ${tryModel} failed: ${msg.slice(0, 150)}`); if (tryModel !== analysisFallbacks[analysisFallbacks.length - 1]) { log(3, `Falling back to next model...`); } } } if (!analysisResult) { throw new Error("All analysis models failed. Please try again later."); } const analysisText = safeText(analysisResult); if (!analysisText) { throw new Error("Gemini returned an empty analysis. The transcript may be too long for the model. Try again."); } const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1); const anaUsage = analysisResult.usageMetadata || {}; const anaCost = calcCost(usedAnalysisModel, anaUsage); // Parse the analysis JSON let analysisJson; try { let jsonStr = analysisText.trim(); const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/); if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim(); analysisJson = JSON.parse(jsonStr); } catch (e) { console.error("Failed to parse analysis JSON:", e.message); sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." }); sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText }); res.end(); return; } // Build final chunks const chunks = analysisJson.sections.map((section) => { const start = Math.max(0, section.startIndex); const end = Math.min(entries.length - 1, section.endIndex); const sectionEntries = entries.slice(start, end + 1); return { title: section.title, summary: section.summary, entries: sectionEntries, startTime: sectionEntries[0]?.offset || 0, }; }).filter((c) => c.entries.length > 0); const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1); log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`); log(3, `Pipeline finished in ${totalTime}s`); // Save to history — skipped for free-tier users so we don't pollute // the host's library with anonymous summaries. The result still streams // back so the UI can render it; it just isn't persisted. const contentType = isPodcast ? "podcast" : "youtube"; const historyId = isFreeUser ? null : await saveToHistory(videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null); sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType }); res.end(); } catch (err) { console.error("Pipeline error:", err); if (!res.headersSent) { res.status(500).json({ error: err.message }); } else { sendEvent(res, "error", { message: err.message }); res.end(); } } finally { if (isFreeUser) freeJobInFlight = false; // Clean up temp files try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {} } }); // ── Helpers ──────────────────────────────────────────────────────────────── // getAudioDuration + splitAudioFile moved to ./audio.js // sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js // buildAnalysisPrompt moved to ./gemini-helpers.js // ── Network mode ────────────────────────────────────────────────────────── // On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking) // On local Mac dev: default to localhost (safe on public Wi-Fi) // - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling) // - Running "npm start" directly defaults to localhost const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, ".."); const lanMode = isStartOS ? true : process.env.LAN_MODE === "true"; const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1"; app.get("/api/network-mode", (req, res) => { res.json({ lan: lanMode }); }); // ── Start server ─────────────────────────────────────────────────────────── app.listen(PORT, BIND_HOST, async () => { console.log(`\n Recap API running on http://${BIND_HOST}:${PORT}`); console.log(` Data directory: ${DATA_DIR}`); console.log(` Checking yt-dlp...`); const info = await checkYtdlp(); if (!info.installed) { console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`); } else if (info.updateAvailable) { console.log(` ✓ yt-dlp ${info.version} found`); console.log(` ↑ Update available: ${info.latestVersion}`); console.log(` Auto-updating...`); const result = await autoUpdateYtdlp(DATA_DIR); if (result.success) { const refreshed = await checkYtdlp(); console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`); } else { console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`); } } else { console.log(` ✓ yt-dlp ${info.version} (up to date)\n`); } // Check subscriptions on startup const subs = await loadSubscriptions(); if (subs.length > 0) { console.log(` 📡 Checking ${subs.length} subscription(s) for new videos...`); await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message)); const pending = autoQueue.filter(q => q.status === "pending").length; const approved = autoQueue.filter(q => q.status === "approved").length; if (pending > 0) console.log(` ✚ ${pending} new video(s) queued from subscriptions`); if (approved > 0) console.log(` ⏳ ${approved} approved video(s) ready for processing`); if (pending === 0 && approved === 0) console.log(` ✓ No new videos from subscriptions`); } // Resume processing any approved items from a previous session const resumeApproved = autoQueue.filter(q => q.status === "approved").length; // Also recover any items stuck in "processing" state from a crash for (const item of autoQueue) { if (item.status === "processing") { console.log(` ⚠ Recovering stuck item: ${item.title}`); item.status = "approved"; // Re-queue for processing } } if (autoQueue.some(q => q.status === "approved")) { await saveAutoQueue(); console.log(` 🔄 Starting background processor...`); // Delay slightly so the server is fully ready before internal HTTP calls setTimeout(() => kickProcessor(), 2000); } console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`); // Check subscriptions every hour (runs continuously on StartOS) setInterval(() => { checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message)); }, 60 * 60 * 1000); });