Files
recap/server/index.js
T
Keysat 373d10595b Pluggable AI providers, relay credit system, picker UX overhaul
Captures roughly forty version bumps (v0.2.6 → v0.2.47) of work that
accumulated without commits.

- Pluggable provider system under server/providers/: gemini, anthropic,
  openai, openai-compatible, ollama, whisper-compatible, relay. Mix and
  match transcription + analysis per request via the picker UI.
- Relay backend integration. Hardcoded relay URL in server/relay-default.js
  (operator-controlled at build time, not user-configurable). New
  /api/relay/{status,policy} endpoints proxy to the relay; balance pings
  populate a cached credit display.
- Per-install identity in server/install-id.js for relay credit accounting.
  Sent to the relay as X-Recap-Install-Id; persists across upgrades, lost
  on a full uninstall + reinstall. Not surfaced in the UI.
- Admin login gate (server/admin-auth.js + setAdminPassword action). Scrypt
  password hash + HMAC-signed session cookie.
- Entitlement scheme rename: pro / max (each paired with subscriptions and
  relay_pro / relay_max), replacing the misleading "core" entitlement
  that conflicted with the user-facing "Core" tier name.
- Activation screen: dynamic credit count pulled from /api/relay/policy,
  "Skip — use free mode" button, accurate paid-feature list.
- Top toolbar: inline credit-balance pill (or "BYO configured" fallback),
  Upgrade + "I have a key" buttons.
- Picker UI: per-provider sections with Save/Test/Delete buttons, sections
  collapsible by chevron, default-collapsed unless currently selected,
  "Use comped credits (reset to relay)" link when the user has strayed,
  green hint under inputs whose values are server-configured.
- Activity log: chevron-collapsible groups per video, refresh-survival via
  localStorage + a 500-entry server-side buffer, explicit Clear button.
- YouTube captions fast-path with user toggle (skips audio download + AI
  transcription when captions are available — uncheck for speaker labels).
- Cancel button: AbortController plumbed through every provider SDK call;
  retryAPI short-circuits on AbortError; cancellation events surface in
  the activity log instead of silent retries.
- Long-video analysis: auto-coalesce transcript entries before building the
  analysis prompt so local-model context windows (32k-ish) don't overflow.
  Original entries preserved for transcript display via an index map; the
  analyzer sees a coarser view but click-to-seek timestamps stay precise.
- StartOS action grouping (Setup / AI Providers) so the actions list is
  navigable.
- Manifest description rewritten to reflect multi-provider support and
  free-tier relay credits.
- Smaller fixes: summarize-button enablement no longer requires a Gemini
  key when other providers are configured; analysis fallback chain handles
  context-length and 503 capacity errors; single-segment expansion for
  providers that don't return per-segment timestamps (Parakeet et al.);
  many other UX polish items.
2026-05-11 23:46:20 -05:00

2772 lines
111 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import express from "express";
import cors from "cors";
import { execFile } from "child_process";
import { promisify } from "util";
import fs from "fs/promises";
import { createWriteStream } from "fs";
import path from "path";
import os from "os";
import https from "https";
import http from "http";
import { randomUUID } from "crypto";
import * as license from "./license.js";
import {
sendEvent,
extractVideoId,
formatTime,
parseTimestampedTranscript,
synthesizeEntriesFromText,
safeText,
retryGemini,
fetchUrl,
} from "./util.js";
import { buildAnalysisPrompt } from "./gemini-helpers.js";
import { getProvider, resolveProviderOpts, PROVIDER_NAMES } from "./providers/index.js";
import {
getAudioDuration,
splitAudioFile,
downloadPodcastAudio,
} from "./audio.js";
import { checkYtdlp, autoUpdateYtdlp } from "./ytdlp.js";
import {
initCookies,
ytCookieArgs,
ytExtraArgs,
ytCookieMethod,
setupCookieRoutes,
getCookieFilePath,
} from "./cookies.js";
import * as config from "./config.js";
import { initInstallId, getInstallId } from "./install-id.js";
import * as relayState from "./relay-state.js";
import * as relayDefault from "./relay-default.js";
import { resolveApiKey, getConfigSnapshot } from "./config.js";
import * as licenseMW from "./license-middleware.js";
import {
setupLicenseMiddleware,
setupLicenseRoutes,
startLicenseRefresh,
refreshLicenseOnline,
isFreeUser,
tryAcquireFreeSlot,
releaseFreeSlot,
getCurrentFreeJob,
abortCurrentFreeJob,
isFreeJobAborted,
appendCurrentJobLog,
} from "./license-middleware.js";
import {
initHistory,
saveToHistory,
loadMeta,
saveMeta,
setupHistoryRoutes,
getHistoryDir,
} from "./history.js";
import { setupLibraryRoutes } from "./library.js";
import {
initAdminAuth,
setupAdminAuthMiddleware,
setupAdminAuthRoutes,
} from "./admin-auth.js";
const execFileAsync = promisify(execFile);
const app = express();
const PORT = process.env.PORT || 3001;
// ── Data directory (configurable for StartOS or local dev) ────────────────
// On StartOS: DATA_DIR=/data (persistent volume)
// On local dev: defaults to project root (parent of server/)
const __dirname = path.dirname(new URL(import.meta.url).pathname);
const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, "..");
const historyDir = path.join(DATA_DIR, "history");
const configDir = path.join(DATA_DIR, "config");
await fs.mkdir(historyDir, { recursive: true }).catch(() => {});
await fs.mkdir(configDir, { recursive: true }).catch(() => {});
await initHistory({ dataDir: DATA_DIR });
// Per-install identity for the upcoming relay backend. Generated once
// on first boot, persisted in DATA_DIR. Surfaced via /api/health and
// /api/install-id (read-only). See ./install-id.js for details.
await initInstallId({ dataDir: DATA_DIR });
// API key + live reload moved to ./config.js
await config.initConfig({ dataDir: DATA_DIR });
const envPath = config.getEnvPath();
// Cookies state + helpers + routes moved to ./cookies.js
await initCookies({ dataDir: DATA_DIR, envPath });
// Admin login gate state. Reads username + scrypt hash + session secret
// from /data/config/startos-config.json (set via the "Set Admin
// Password" StartOS action) and refreshes on the same poll cadence as
// config.js. When no hash is set, the gate is a no-op.
await initAdminAuth({ dataDir: DATA_DIR });
// resolveApiKey moved to ./config.js
app.use(cors());
app.use(express.json({ limit: "100mb" }));
// ── Admin login gate ───────────────────────────────────────────────────────
// MUST run before the license middleware: if an admin password is set,
// nobody (licensed or not) reaches the activation flow without first
// passing the login. Endpoints needed by the login UI itself
// (/api/admin/status, /api/admin/login, /api/admin/logout) and
// /api/health stay open.
setupAdminAuthMiddleware(app);
setupAdminAuthRoutes(app);
// ── Keysat licensing ────────────────────────────────────────────────────────
// All license-aware request handling (gate, Pro feature gates, /api/license
// routes, free-tier slot management, periodic online refresh) lives in
// ./license-middleware.js. Importers read the current state via
// licenseMW.LIC (a live binding).
setupLicenseMiddleware(app);
setupLicenseRoutes(app);
startLicenseRefresh();
// History storage + routes moved to ./history.js
// (saveToHistory, loadMeta, saveMeta are imported above)
// Late-bound addToSkipList — defined later in this file for now.
setupHistoryRoutes(app, { addToSkipList: (id) => addToSkipList(id) });
// Serve the frontend from ../public
app.use(express.static(path.join(__dirname, "..", "public")));
app.use("/assets", express.static(path.join(__dirname, "..", "assets")));
// checkYtdlp + autoUpdateYtdlp moved to ./ytdlp.js
// PRICING + calcCost + buildAnalysisPrompt moved to ./gemini-helpers.js
// safeText + retryGemini moved to ./util.js
// ── Health check ───────────────────────────────────────────────────────────
app.get("/api/health", async (req, res) => {
const info = await checkYtdlp();
// Check cookies.txt freshness
const cookieMethod = ytCookieMethod();
let cookieInfo = { method: cookieMethod };
if (cookieMethod === "cookies.txt") {
try {
const stat = await fs.stat(getCookieFilePath());
const ageMs = Date.now() - stat.mtimeMs;
const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24));
cookieInfo.fileAgeDays = ageDays;
cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days
} catch {}
}
res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!config.serverApiKey, cookies: cookieInfo, installId: getInstallId(), ...info });
});
// Read-only install identity. Used by the UI's settings panel so the
// operator can verify the install has been provisioned, and by the
// future relay client to attach the X-Recap-Install-Id header. Open
// path — license gate doesn't apply (the relay needs this ID to be
// reachable before any credits have been granted).
app.get("/api/install-id", (_req, res) => {
res.json({ installId: getInstallId() });
});
// Proxy through to the relay's /relay/policy endpoint. Used by the UI
// to render dynamic copy (e.g. "N relay credits" in the activation
// screen reflects whatever the operator currently has the relay
// configured to give Core users — no Recap update needed when the
// operator tunes tier quotas). Cached in-memory for a short window
// so a busy UI doesn't hammer the relay.
let __cachedRelayPolicy = { at: 0, body: null };
app.get("/api/relay/policy", async (_req, res) => {
const base = relayDefault.getRelayBaseURL();
if (!base) return res.json({ configured: false, tiers: null });
const ttl = 5 * 60 * 1000;
if (__cachedRelayPolicy.body && Date.now() - __cachedRelayPolicy.at < ttl) {
return res.json({ configured: true, ...__cachedRelayPolicy.body });
}
try {
const r = await fetch(`${base.replace(/\/$/, "")}/relay/policy`, {
signal: AbortSignal.timeout(5000),
});
if (!r.ok) {
return res.json({
configured: true,
tiers: null,
error: `HTTP ${r.status}`,
});
}
const body = await r.json();
__cachedRelayPolicy = { at: Date.now(), body };
res.json({ configured: true, ...body });
} catch (err) {
res.json({
configured: true,
tiers: null,
error: err?.message || String(err),
});
}
});
// Last-known relay state (credits + tier) cached in-process. The UI
// polls this for the "N credits remaining · Tier: X" banner; the
// underlying numbers are refreshed every time a relay provider call
// lands.
//
// First-paint UX: when the cache is empty (no relay calls yet AND no
// prior ping has populated it), opportunistically hit the relay's
// /balance endpoint with a short timeout so the banner shows real
// numbers on first page load instead of "balance unknown". Best
// effort — if the relay is unreachable the cache stays null and the
// UI falls back to its "unknown" copy. `?refresh=1` forces a ping
// even when the cache already has a value.
//
// `configured` is true when relay-default.js has a non-empty URL —
// operator-controlled at build time, never user-configurable.
app.get("/api/relay/status", async (req, res) => {
const configured = !!relayDefault.getRelayBaseURL();
let snapshot = relayState.getRelayState();
const wantsRefresh = req.query.refresh === "1" || req.query.refresh === "true";
const cacheEmpty = snapshot.creditsRemaining === null && snapshot.tier === null;
if (configured && (cacheEmpty || wantsRefresh)) {
try {
const cfg = await getConfigSnapshot();
const opts = resolveProviderOpts("relay", { config: cfg, clientOpts: {} });
const provider = getProvider("relay", opts);
await provider.pingBalance({ timeoutMs: 5000 });
snapshot = relayState.getRelayState();
} catch (err) {
// Log + record so the UI shows a real error instead of a silent
// "balance unknown". The provider call itself records when the
// ping reaches the network; this catches the earlier failures
// (e.g. createRelayProvider throwing because install-id wasn't
// initialized yet) where lastError wouldn't otherwise be set.
console.error(
`[relay/status] ping failed: ${err?.message || err} (stack: ${err?.stack || "n/a"})`
);
relayState.recordRelayError(err?.message || String(err));
snapshot = relayState.getRelayState();
}
}
res.json({ ...snapshot, configured });
});
// ── Status endpoints ───────────────────────────────────────────────────────
app.post("/api/heartbeat", (req, res) => {
res.json({ ok: true, sleeping: false });
});
app.get("/api/status", (req, res) => {
res.json({ ok: true, sleeping: false, uptime: process.uptime() });
});
// Shutdown: used by the macOS .app launcher to stop the server cleanly.
// On StartOS this endpoint is unused (StartOS manages the container lifecycle).
app.post("/api/shutdown", (req, res) => {
res.json({ ok: true, message: "Server shutting down..." });
console.log("\n Server shutdown requested from browser. Goodbye!\n");
setTimeout(() => process.exit(0), 300);
});
// ── Manual update endpoint ─────────────────────────────────────────────────
app.post("/api/update-ytdlp", async (req, res) => {
const result = await autoUpdateYtdlp(DATA_DIR);
const info = await checkYtdlp();
res.json({ ...result, ...info });
});
// ── Cookie management endpoints ───────────────────────────────────────────
// /api/cookies/* routes registered via setupCookieRoutes (./cookies.js)
setupCookieRoutes(app);
// ── Library export/import ──── moved to ./library.js ─────────
setupLibraryRoutes(app);
// ── Subscriptions ─────────────────────────────────────────────────────────
const subsPath = path.join(historyDir, "subscriptions.json");
async function loadSubscriptions() {
try {
return JSON.parse(await fs.readFile(subsPath, "utf-8")).subscriptions || [];
} catch {
return [];
}
}
async function saveSubscriptions(subs) {
await fs.writeFile(subsPath, JSON.stringify({ subscriptions: subs }, null, 2));
}
// List recent videos from a channel/playlist via yt-dlp (no download)
// Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode)
async function listChannelVideosFast(url, limit = 15) {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(id)s|%(title)s",
"--no-download",
"--playlist-end", String(limit),
"--flat-playlist",
...ytCookieArgs(),
...ytExtraArgs(),
url,
], { timeout: 60000 });
return stdout.trim().split("\n").filter(Boolean).map(line => {
const idx = line.indexOf("|");
return { id: line.slice(0, idx), title: line.slice(idx + 1) };
});
}
// Fetch upload_date for a batch of video IDs (processes in batches of 3)
// Bails after 2 consecutive failures to avoid grinding through blocked requests
async function fetchUploadDates(videoIds) {
if (videoIds.length === 0) return {};
const dateMap = {};
const batchSize = 50;
let consecutiveFails = 0;
for (let i = 0; i < videoIds.length; i += batchSize) {
const batch = videoIds.slice(i, i + batchSize);
const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`);
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(id)s|%(upload_date)s",
"--no-download",
...ytCookieArgs(),
...urls,
], { timeout: 45000 });
for (const line of stdout.trim().split("\n").filter(Boolean)) {
const [id, date] = line.split("|");
if (id && date && date !== "NA") dateMap[id] = date;
}
consecutiveFails = 0;
subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`);
} catch (err) {
consecutiveFails++;
subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`);
if (consecutiveFails >= 2) {
subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`);
break;
}
}
}
return dateMap;
}
// ── RSS-based date fetching (bypasses bot detection) ─────────────────────
// Fetch a URL and return the response body as a string
// fetchUrl moved to ./util.js
// Get channel_id from a YouTube channel/playlist URL using yt-dlp
async function getChannelId(url) {
// Method 1: flat-playlist channel_id (fast, may return NA)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel_id)s",
"--no-download",
"--playlist-items", "1",
"--flat-playlist",
...ytCookieArgs(),
url,
], { timeout: 15000 });
const id = stdout.trim().split("\n")[0];
if (id && id !== "NA" && id.length > 5) return id;
} catch {}
// Method 2: non-flat single video (slower but gets full metadata)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel_id)s",
"--no-download",
"--playlist-items", "1",
...ytCookieArgs(),
url,
], { timeout: 30000 });
const id = stdout.trim().split("\n")[0];
if (id && id !== "NA" && id.length > 5) return id;
} catch {}
return null;
}
// Fetch video dates from YouTube RSS feed (no auth, no bot detection)
// Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos
async function fetchDatesFromRSS(channelId) {
const dateMap = {};
if (!channelId) return dateMap;
try {
const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
const xml = await fetchUrl(rssUrl);
const entryRegex = /<entry>[\s\S]*?<yt:videoId>([^<]+)<\/yt:videoId>[\s\S]*?<published>([^<]+)<\/published>[\s\S]*?<\/entry>/g;
let match;
while ((match = entryRegex.exec(xml)) !== null) {
const videoId = match[1];
const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00"
const date = published.slice(0, 10).replace(/-/g, ""); // "20251201"
dateMap[videoId] = date;
}
} catch (err) {
subLog(` ⚠ RSS feed fetch failed: ${err.message}`);
}
return dateMap;
}
// ── Podcast RSS feed parsing ────────────────────────────────────────────────
// Detect if a URL looks like a podcast RSS feed
function isPodcastFeedUrl(url) {
if (!url) return false;
const u = url.trim().toLowerCase();
// Common podcast RSS feed patterns
if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true;
if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true;
if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true;
if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true;
if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true;
return false;
}
// Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] }
async function parsePodcastRSS(feedUrl, limit = 200) {
const xml = await fetchUrl(feedUrl);
// Extract podcast title
const titleMatch = xml.match(/<channel>[\s\S]*?<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast";
// Extract episodes from <item> elements
const episodes = [];
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
let match;
while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) {
const item = match[1];
// GUID (unique episode identifier)
const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/);
const guid = guidMatch ? guidMatch[1].trim() : null;
// Title
const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode";
// Publish date
const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/);
const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null;
let dateStr = ""; // YYYYMMDD
if (pubDate) {
try {
const d = new Date(pubDate);
if (!isNaN(d.getTime())) {
dateStr = d.toISOString().slice(0, 10).replace(/-/g, "");
}
} catch {}
}
// Audio enclosure URL
const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/);
const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null;
// Duration (itunes:duration)
const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/);
const duration = durMatch ? durMatch[1].trim() : "";
if (!audioUrl) continue; // Skip episodes without audio
// Use guid, or fall back to audioUrl as unique ID
const id = guid || audioUrl;
episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration });
}
return { title: podcastTitle, episodes };
}
// Download a podcast episode audio file via HTTP(S) to a local path
// downloadPodcastAudio moved to ./audio.js
// Get channel name from URL
async function fetchChannelName(url) {
// Try fast flat-playlist approach first
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel)s",
"--no-download",
"--playlist-end", "1",
"--flat-playlist",
...ytCookieArgs(),
url,
], { timeout: 15000 });
const name = stdout.trim().split("\n")[0];
if (name && name !== "NA") return name;
} catch {}
// Fallback: fetch without flat-playlist (slower but gets channel from video metadata)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel)s",
"--no-download",
"--playlist-end", "1",
...ytCookieArgs(),
url,
], { timeout: 30000 });
const name = stdout.trim().split("\n")[0];
if (name && name !== "NA") return name;
} catch {}
// Last resort: extract handle from URL
try {
const u = new URL(url);
const handleMatch = u.pathname.match(/\/@([^/]+)/);
if (handleMatch) return "@" + handleMatch[1];
} catch {}
return "Unknown Channel";
}
// ── In-process write serialization ────────────────────────────────────────
// Per-file promise chain to prevent lost-update races on read-modify-write
// state files (skip-list, seen-list). Without this, two concurrent DELETE
// handlers can each load the same snapshot, add their own id, and the
// second write overwrites the first — silently dropping entries.
const _fileLocks = new Map();
function withFileLock(key, fn) {
const prev = _fileLocks.get(key) || Promise.resolve();
const next = prev.then(fn, fn); // run fn whether prev resolved or rejected
// Keep the chain alive but don't leak errors to the next caller
_fileLocks.set(key, next.catch(() => {}));
return next;
}
// Skip list — videos deleted from history that subscriptions should not re-add
const skipPath = path.join(historyDir, "skip-list.json");
async function loadSkipList() {
try {
return new Set(JSON.parse(await fs.readFile(skipPath, "utf-8")).videoIds || []);
} catch {
return new Set();
}
}
async function addToSkipList(videoId) {
return withFileLock(skipPath, async () => {
const skipIds = await loadSkipList();
skipIds.add(videoId);
await fs.writeFile(skipPath, JSON.stringify({ videoIds: [...skipIds] }));
});
}
// Seen list — videos already offered for approval (persists across restarts)
const seenPath = path.join(historyDir, "seen-list.json");
async function loadSeenList() {
try {
return new Set(JSON.parse(await fs.readFile(seenPath, "utf-8")).videoIds || []);
} catch {
return new Set();
}
}
async function addToSeenList(videoIds) {
return withFileLock(seenPath, async () => {
const seen = await loadSeenList();
for (const id of videoIds) seen.add(id);
await fs.writeFile(seenPath, JSON.stringify({ videoIds: [...seen] }));
});
}
// Get all videoIds already processed in history
async function getProcessedVideoIds() {
const ids = new Set();
try {
const files = await fs.readdir(historyDir);
for (const file of files.filter(f => f.endsWith(".json") && f !== "_meta.json" && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) {
try {
const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
const data = JSON.parse(raw);
if (data.videoId) ids.add(data.videoId);
} catch {}
}
} catch {}
return ids;
}
// Server-side auto-queue for subscription-discovered videos (persisted to disk)
const autoQueuePath = path.join(historyDir, "auto-queue.json");
let autoQueue = [];
async function loadAutoQueue() {
try {
const data = JSON.parse(await fs.readFile(autoQueuePath, "utf-8"));
return data.items || [];
} catch { return []; }
}
async function saveAutoQueue() {
await fs.writeFile(autoQueuePath, JSON.stringify({ items: autoQueue }, null, 2));
}
// Load persisted queue on startup, filtering out already-processed items
autoQueue = await loadAutoQueue();
{
const processed = await getProcessedVideoIds();
const before = autoQueue.length;
autoQueue = autoQueue.filter(q => !processed.has(q.videoId));
if (autoQueue.length < before) {
console.log(` Auto-queue: removed ${before - autoQueue.length} already-processed item(s)`);
await saveAutoQueue();
}
}
// ── Background processing queue ──────────────────────────────────────────
// Processes "approved" auto-queue items sequentially with configurable delay
// between items to avoid hammering YouTube with rapid-fire downloads.
const processingConfigPath = path.join(configDir, "processing-config.json");
let processingConfig = {
delaySeconds: 300, // Default delay between processing queue items (5 minutes)
enabled: true, // Whether background processing is active
};
let processingState = {
running: false, // Is the processor loop currently active?
currentItem: null, // The item currently being processed (or null)
lastCompleted: null, // Timestamp of last completed item
rush: false, // If true, skip delay before next item
log: [], // Recent processing log entries
};
function procLog(msg) {
console.log(` [processor] ${msg}`);
processingState.log.push({ t: new Date().toISOString(), msg });
if (processingState.log.length > 100) processingState.log.shift();
}
async function loadProcessingConfig() {
try {
const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8"));
if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds;
if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled;
} catch {}
}
async function saveProcessingConfig() {
await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2));
}
await loadProcessingConfig();
// The background processor: picks "approved" items, processes via internal HTTP,
// waits the configured delay, then picks the next one. Runs continuously.
async function backgroundProcessor() {
if (processingState.running) return; // Already running
processingState.running = true;
procLog("Background processor started");
while (true) {
// Find next approved item
const item = autoQueue.find(q => q.status === "approved");
if (!item) {
procLog("No approved items in queue — processor sleeping");
processingState.running = false;
processingState.currentItem = null;
return;
}
if (!processingConfig.enabled) {
procLog("Processing is paused — processor sleeping");
processingState.running = false;
processingState.currentItem = null;
return;
}
// Wait the configured delay (unless rush mode or first item)
if (processingState.lastCompleted && !processingState.rush) {
const delaySec = processingConfig.delaySeconds;
procLog(`Waiting ${delaySec}s before next item...`);
await new Promise(r => setTimeout(r, delaySec * 1000));
}
processingState.rush = false;
// Re-check the item hasn't been removed/changed while we waited
const freshItem = autoQueue.find(q => q.id === item.id);
if (!freshItem || freshItem.status !== "approved") {
procLog(`Item ${item.id} was removed or status changed — skipping`);
continue;
}
// Mark as processing
freshItem.status = "processing";
processingState.currentItem = freshItem;
await saveAutoQueue();
procLog(`Processing: ${freshItem.title} (${freshItem.url})`);
try {
// Call the existing /api/process endpoint via internal HTTP request
const result = await processItemInternally(freshItem);
freshItem.status = "completed";
freshItem.completedAt = new Date().toISOString();
freshItem.historyId = result.historyId || null;
processingState.lastCompleted = new Date().toISOString();
procLog(`✓ Completed: ${freshItem.title}`);
} catch (err) {
freshItem.status = "failed";
freshItem.error = err.message || String(err);
freshItem.failedAt = new Date().toISOString();
procLog(`✗ Failed: ${freshItem.title}${freshItem.error.slice(0, 200)}`);
}
processingState.currentItem = null;
await saveAutoQueue();
}
}
// Internal HTTP request to /api/process — consumes the SSE stream and
// waits for the "result" or "error" event. This reuses the entire existing
// pipeline without any code duplication.
function processItemInternally(item) {
return new Promise((resolve, reject) => {
const apiKey = resolveApiKey(null); // Use server's stored key
if (!apiKey) {
return reject(new Error("No API key configured. Set your Gemini API key in Settings."));
}
const body = JSON.stringify({
url: item.url,
apiKey: "USE_SERVER_KEY",
type: item.type || undefined,
title: item.title || undefined,
uploadDate: item.uploadDate || undefined,
episodeId: item.videoId || undefined,
});
const req = http.request({
hostname: "127.0.0.1",
port: PORT,
path: "/api/process",
method: "POST",
headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) },
timeout: 1800000, // 30 minutes max for very long videos
}, (res) => {
let buffer = "";
let lastResult = null;
let lastError = null;
res.on("data", (chunk) => {
buffer += chunk.toString();
// Parse SSE events from the buffer
const lines = buffer.split("\n");
buffer = lines.pop() || ""; // Keep incomplete line in buffer
let currentEvent = null;
for (const line of lines) {
if (line.startsWith("event: ")) {
currentEvent = line.slice(7).trim();
} else if (line.startsWith("data: ") && currentEvent) {
try {
const data = JSON.parse(line.slice(6));
if (currentEvent === "result") lastResult = data;
if (currentEvent === "error") lastError = data;
if (currentEvent === "log") {
procLog(` [${data.elapsed || "?"}s] ${data.message}`);
}
} catch {}
currentEvent = null;
} else if (line === "") {
currentEvent = null;
}
}
});
res.on("end", () => {
if (lastError) {
reject(new Error(lastError.message || "Processing failed"));
} else if (lastResult) {
resolve(lastResult);
} else {
reject(new Error("Processing ended without result"));
}
});
res.on("error", reject);
});
req.on("error", reject);
req.on("timeout", () => {
req.destroy();
reject(new Error("Processing timed out after 30 minutes"));
});
req.write(body);
req.end();
});
}
// Wake up the processor whenever there are approved items
function kickProcessor() {
if (!processingState.running && processingConfig.enabled) {
const hasApproved = autoQueue.some(q => q.status === "approved");
if (hasApproved) {
backgroundProcessor().catch(err => procLog(`Processor error: ${err.message}`));
}
}
}
let subCheckRunning = false;
let subCheckPromise = null;
let subCheckLog = []; // Stores recent check logs for debug endpoint
function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); }
async function checkSubscriptions() {
if (subCheckRunning) {
// Wait for current check to finish, then run again
if (subCheckPromise) await subCheckPromise;
return checkSubscriptions();
}
subCheckRunning = true;
subCheckPromise = _checkSubscriptionsInner().finally(() => {
subCheckRunning = false;
subCheckPromise = null;
});
return subCheckPromise;
}
async function _checkSubscriptionsInner() {
// Pro-tier feature: skip silently when not entitled. The HTTP gate above
// returns 402 to callers; this guards the background timer + manual paths.
if (!licenseMW.LIC.entitlements.has("subscriptions")) {
subCheckLog = [];
subLog("Skipped: subscriptions require a Pro license.");
return;
}
subCheckLog = []; // Clear logs for fresh check
const subs = await loadSubscriptions();
if (subs.length === 0) { subLog("No subscriptions found"); return; }
const processedIds = await getProcessedVideoIds();
const skippedIds = await loadSkipList();
const seenIds = await loadSeenList();
const queuedIds = new Set(autoQueue.map(q => {
// For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID)
if (q.videoId) return q.videoId;
const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/);
return m ? m[1] : null;
}).filter(Boolean));
subLog(`${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen, ${queuedIds.size} in queue`);
let changed = false;
for (const sub of subs) {
if (sub.paused) { subLog(`${sub.name} — paused, skipping`); continue; }
try {
const icon = sub.type === "podcast" ? "🎙" : "📡";
subLog(`${icon} Checking: ${sub.name} (${sub.url})`);
const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8);
if (sub.type === "podcast") {
// ── Podcast subscription: discover episodes from RSS feed ──
const { episodes } = await parsePodcastRSS(sub.url, 200);
subLog(` Found ${episodes.length} episode(s) in RSS feed`);
if (episodes.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
// Filter out already-known episodes
const unknowns = episodes.filter(ep => {
if (processedIds.has(ep.id)) return false;
if (queuedIds.has(ep.id)) return false;
if (skippedIds.has(ep.id)) return false;
if (seenIds.has(ep.id)) return false;
return true;
});
const filtered = episodes.length - unknowns.length;
subLog(` ${unknowns.length} to check, ${filtered} already known`);
if (unknowns.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
subLog(` Cutoff date: ${cutoffDate}`);
let newCount = 0;
const seenNow = [];
for (const ep of unknowns) {
if (!ep.date || ep.date.length !== 8) continue; // skip undated
if (ep.date < cutoffDate) {
seenNow.push(ep.id);
continue; // before cutoff
}
subLog(`${ep.date}${ep.title.slice(0, 60)}`);
const itemStatus = sub.autoDownload ? "approved" : "pending";
autoQueue.push({
id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`,
videoId: ep.id, // episode GUID
url: ep.audioUrl,
title: ep.title,
uploadDate: ep.date,
subscriptionId: sub.id,
subscriptionName: sub.name,
status: itemStatus,
type: "podcast",
duration: ep.duration || "",
});
queuedIds.add(ep.id);
newCount++;
}
if (seenNow.length > 0) await addToSeenList(seenNow);
sub.lastChecked = new Date().toISOString();
subLog(`${newCount} episode(s) queued for approval from ${sub.name}`);
changed = true;
} else {
// ── YouTube subscription: discover videos via yt-dlp + RSS dates ──
// Scale fetch limit based on how far back the subscription date goes
const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000));
const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4;
const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200);
subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`);
const candidates = await listChannelVideosFast(sub.url, fetchLimit);
subLog(` Found ${candidates.length} recent video(s)`);
if (candidates.length === 0) {
subLog(` ⚠ No videos returned from yt-dlp for this channel`);
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
const unknowns = candidates.filter(v => {
if (processedIds.has(v.id)) return false;
if (queuedIds.has(v.id)) return false;
if (skippedIds.has(v.id)) return false;
if (seenIds.has(v.id)) return false;
return true;
});
const filtered = candidates.length - unknowns.length;
subLog(` ${unknowns.length} to check, ${filtered} already known`);
if (unknowns.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`);
// Step 1: Get channel ID (cached on subscription, or fetch once)
if (!sub.channelId) {
subLog(` Resolving channel ID...`);
sub.channelId = await getChannelId(sub.url);
if (sub.channelId) {
subLog(` Resolved channel ID: ${sub.channelId}`);
changed = true;
} else {
subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`);
}
}
// Step 2: RSS feed — fast, no bot detection, covers ~15 most recent
const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {};
const rssCount = Object.keys(dateMap).length;
if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`);
// Step 3: Smart early termination
const needDates = unknowns.filter(v => !dateMap[v.id]);
let earlyTermination = false;
if (needDates.length > 0 && rssCount > 0) {
const rssDates = Object.values(dateMap).sort();
const oldestRssDate = rssDates[0];
if (oldestRssDate < cutoffDate) {
earlyTermination = true;
subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`);
} else {
subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`);
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
Object.assign(dateMap, ytDates);
const ytCount = Object.keys(ytDates).length;
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
}
} else if (needDates.length > 0 && rssCount === 0) {
subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`);
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
Object.assign(dateMap, ytDates);
const ytCount = Object.keys(ytDates).length;
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
}
const gotDates = Object.keys(dateMap).length;
if (gotDates > 0 || needDates.length === 0) {
subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`);
} else {
subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`);
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
let newCount = 0;
for (const video of unknowns) {
const uploadDate = dateMap[video.id];
if (!uploadDate || uploadDate.length !== 8) {
continue;
}
if (uploadDate < cutoffDate) {
subLog(`${video.id} (${uploadDate}) — before cutoff`);
continue;
}
subLog(`${video.id}${uploadDate ? ` (${uploadDate})` : ""}${video.title.slice(0,50)}`);
const ytItemStatus = sub.autoDownload ? "approved" : "pending";
autoQueue.push({
id: `auto-${Date.now()}-${video.id}`,
videoId: video.id,
url: `https://www.youtube.com/watch?v=${video.id}`,
title: video.title,
uploadDate: uploadDate || null,
subscriptionId: sub.id,
subscriptionName: sub.name,
status: ytItemStatus,
});
queuedIds.add(video.id);
newCount++;
}
// Only add to seen list when we can PROVE a video is too old
const seenNow = unknowns.filter(v => {
if (queuedIds.has(v.id)) return false;
const d = dateMap[v.id];
if (d && d.length === 8 && d < cutoffDate) return true;
if (d && d.length === 8 && d >= cutoffDate) return false;
return earlyTermination;
}).map(v => v.id);
if (seenNow.length > 0) await addToSeenList(seenNow);
sub.lastChecked = new Date().toISOString();
subLog(`${newCount} video(s) queued for approval from ${sub.name}`);
changed = true;
}
} catch (err) {
subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`);
}
}
const pendingCount = autoQueue.filter(q => q.status === "pending").length;
const approvedCount = autoQueue.filter(q => q.status === "approved").length;
subLog(`Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`);
if (changed) await saveSubscriptions(subs);
await saveAutoQueue();
// Wake up the background processor if there are approved items
if (approvedCount > 0) {
subLog(`Kicking background processor for ${approvedCount} approved item(s)...`);
kickProcessor();
}
}
// CRUD endpoints
app.get("/api/subscriptions", async (req, res) => {
const subs = await loadSubscriptions();
res.json({ subscriptions: subs });
});
// Extract a normalized channel key from a YouTube URL for dedup
function channelKeyFromUrl(url) {
try {
const u = new URL(url);
const path = u.pathname.toLowerCase().replace(/\/+$/, "");
// /@handle/videos → @handle, /@handle/streams → @handle
const handleMatch = path.match(/\/(@[^/]+)/);
if (handleMatch) return handleMatch[1];
// /channel/UCxxx → channel/UCxxx
const channelMatch = path.match(/\/(channel\/[^/]+)/);
if (channelMatch) return channelMatch[1];
// /c/name or /user/name
const cMatch = path.match(/\/(c|user)\/([^/]+)/);
if (cMatch) return cMatch[0];
// /playlist?list=PLxxx
const list = u.searchParams.get("list");
if (list) return `playlist/${list}`;
return path; // fallback
} catch {
return url.toLowerCase().replace(/\/+$/, "");
}
}
app.post("/api/subscriptions", async (req, res) => {
const { url, since, type, autoDownload } = req.body;
if (!url) return res.status(400).json({ error: "Missing url" });
const isPodcast = type === "podcast" || isPodcastFeedUrl(url);
const subs = await loadSubscriptions();
// Prevent duplicates
if (isPodcast) {
const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, "");
if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) {
return res.status(409).json({ error: "Already subscribed to this podcast" });
}
} else {
const newKey = channelKeyFromUrl(url);
if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) {
return res.status(409).json({ error: "Already subscribed to this channel" });
}
}
// Use provided cutoff date, or default to right now
const cutoff = since ? new Date(since).toISOString() : new Date().toISOString();
const sub = {
id: `sub-${Date.now()}`,
url,
name: "Loading...",
type: isPodcast ? "podcast" : "youtube",
channelId: null,
createdAt: cutoff,
lastChecked: null,
paused: false,
autoDownload: autoDownload === true,
};
subs.push(sub);
await saveSubscriptions(subs);
// Respond immediately so the UI isn't blocked
res.json(sub);
// Background: resolve name and check for new content
(async () => {
try {
if (isPodcast) {
// Fetch podcast title from RSS feed
const { title } = await parsePodcastRSS(url, 1);
const freshSubs = await loadSubscriptions();
const s = freshSubs.find(x => x.id === sub.id);
if (s) {
s.name = title || url;
await saveSubscriptions(freshSubs);
}
console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`);
} else {
const [name, channelId] = await Promise.all([
fetchChannelName(url),
getChannelId(url),
]);
const freshSubs = await loadSubscriptions();
const s = freshSubs.find(x => x.id === sub.id);
if (s) {
s.name = name;
if (channelId) s.channelId = channelId;
await saveSubscriptions(freshSubs);
}
console.log(` 📡 New subscription: ${name} — checking for recent videos...`);
}
await checkSubscriptions();
} catch (err) {
console.error(" ⚠ Background subscription setup error:", err.message);
}
})();
});
app.delete("/api/subscriptions/:id", async (req, res) => {
let subs = await loadSubscriptions();
subs = subs.filter(s => s.id !== req.params.id);
await saveSubscriptions(subs);
// Also remove any pending auto-queue items from this subscription
autoQueue = autoQueue.filter(q => q.subscriptionId !== req.params.id);
await saveAutoQueue();
res.json({ ok: true });
});
app.put("/api/subscriptions/:id/pause", async (req, res) => {
const subs = await loadSubscriptions();
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.paused = !sub.paused;
await saveSubscriptions(subs);
res.json(sub);
});
app.put("/api/subscriptions/:id/since", async (req, res) => {
const { since } = req.body;
if (!since) return res.status(400).json({ error: "Missing 'since' date" });
const subs = await loadSubscriptions();
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.createdAt = new Date(since).toISOString();
await saveSubscriptions(subs);
res.json(sub);
});
// Debug: subscription check logs (viewable in-app)
app.get("/api/sub-check-log", (req, res) => {
res.json({ log: subCheckLog, autoQueueCount: autoQueue.length, autoQueue: autoQueue.map(q => ({ id: q.id, videoId: q.videoId, title: q.title, status: q.status, sub: q.subscriptionName })) });
});
// Auto-queue endpoints (frontend polls these)
app.get("/api/auto-queue", (req, res) => {
// Return all items grouped by status for the frontend
const showAll = req.query.all === "true";
const items = showAll
? autoQueue
: autoQueue.filter(q => ["pending", "approved", "processing"].includes(q.status));
res.json({
items,
checkRunning: subCheckRunning,
counts: {
pending: autoQueue.filter(q => q.status === "pending").length,
approved: autoQueue.filter(q => q.status === "approved").length,
processing: autoQueue.filter(q => q.status === "processing").length,
completed: autoQueue.filter(q => q.status === "completed").length,
failed: autoQueue.filter(q => q.status === "failed").length,
},
});
});
app.delete("/api/auto-queue/:id", async (req, res) => {
autoQueue = autoQueue.filter(q => q.id !== req.params.id);
await saveAutoQueue();
res.json({ ok: true });
});
app.post("/api/auto-queue/:id/skip", async (req, res) => {
const item = autoQueue.find(q => q.id === req.params.id);
if (item && item.videoId) {
await addToSkipList(item.videoId);
}
autoQueue = autoQueue.filter(q => q.id !== req.params.id);
await saveAutoQueue();
res.json({ ok: true });
});
// Approve a single auto-queue item for background processing
app.post("/api/auto-queue/:id/approve", async (req, res) => {
const item = autoQueue.find(q => q.id === req.params.id);
if (!item) return res.status(404).json({ error: "Item not found" });
if (item.status !== "pending") return res.status(400).json({ error: `Cannot approve item with status '${item.status}'` });
item.status = "approved";
await saveAutoQueue();
kickProcessor();
res.json({ ok: true, item });
});
// Approve all pending items at once
app.post("/api/auto-queue/approve-all", async (req, res) => {
let count = 0;
for (const item of autoQueue) {
if (item.status === "pending") {
item.status = "approved";
count++;
}
}
if (count > 0) {
await saveAutoQueue();
kickProcessor();
}
res.json({ ok: true, approved: count });
});
// Retry a failed item
app.post("/api/auto-queue/:id/retry", async (req, res) => {
const item = autoQueue.find(q => q.id === req.params.id);
if (!item) return res.status(404).json({ error: "Item not found" });
if (item.status !== "failed") return res.status(400).json({ error: `Cannot retry item with status '${item.status}'` });
item.status = "approved";
item.error = undefined;
item.failedAt = undefined;
await saveAutoQueue();
kickProcessor();
res.json({ ok: true, item });
});
// Clear completed and failed items from the queue
app.post("/api/auto-queue/clear-finished", async (req, res) => {
const before = autoQueue.length;
autoQueue = autoQueue.filter(q => !["completed", "failed"].includes(q.status));
const removed = before - autoQueue.length;
if (removed > 0) await saveAutoQueue();
res.json({ ok: true, removed });
});
app.post("/api/subscriptions/check-now", async (req, res) => {
if (subCheckRunning) return res.json({ ok: true, message: "Already checking" });
checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message));
res.json({ ok: true, message: "Check started" });
});
// ── Auto-download toggle per subscription ──────────────────────────────────
app.put("/api/subscriptions/:id/auto-download", async (req, res) => {
const subs = await loadSubscriptions();
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.autoDownload = req.body.enabled === true;
await saveSubscriptions(subs);
res.json({ ok: true, subscription: sub });
});
// ── Background processing status & configuration ───────────────────────────
// Get current processing status (what's running, queue depth, config)
app.get("/api/processing/status", (req, res) => {
res.json({
running: processingState.running,
currentItem: processingState.currentItem
? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url }
: null,
lastCompleted: processingState.lastCompleted,
config: processingConfig,
counts: {
approved: autoQueue.filter(q => q.status === "approved").length,
processing: autoQueue.filter(q => q.status === "processing").length,
pending: autoQueue.filter(q => q.status === "pending").length,
},
log: processingState.log.slice(-20),
});
});
// Update processing configuration (delay between items, enable/disable)
app.put("/api/processing/config", async (req, res) => {
if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) {
processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds));
}
if (typeof req.body.enabled === "boolean") {
processingConfig.enabled = req.body.enabled;
// If enabling and there are approved items, kick the processor
if (req.body.enabled) kickProcessor();
}
await saveProcessingConfig();
res.json({ ok: true, config: processingConfig });
});
// Rush: skip the delay before the next item in the queue
app.post("/api/processing/rush", (req, res) => {
processingState.rush = true;
// If not currently running but there are approved items, start processing
kickProcessor();
res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" });
});
// Processing log (for debug/monitoring)
app.get("/api/processing/log", (req, res) => {
res.json({ log: processingState.log });
});
// ── Full pipeline: URL → audio → transcript → topic analysis ──────────────
// Single yt-dlp call that returns the full info-dict as JSON. One shot
// gives us title, channel, description, chapters, duration, and the
// per-language caption availability — used both for richer transcription
// prompts and for the captions-first path (skip audio + transcription
// entirely when YouTube has usable captions for this video).
async function fetchYouTubeMetadata(videoId) {
try {
const { stdout } = await execFileAsync(
"yt-dlp",
["-j", "--no-warnings", "--skip-download", `https://www.youtube.com/watch?v=${videoId}`],
{ timeout: 30000, maxBuffer: 10 * 1024 * 1024 }
);
const info = JSON.parse(stdout);
return {
title: info.title || "",
uploadDate: info.upload_date || "",
channel: info.channel || info.uploader || "",
// Truncate to keep prompt size sane — descriptions can be huge
// (release-notes-stuffed Lex Fridman podcasts run thousands of chars).
description: (info.description || "").slice(0, 2000),
chapters: Array.isArray(info.chapters) ? info.chapters : [],
duration: typeof info.duration === "number" ? info.duration : 0,
hasManualCaptions: !!(info.subtitles && Object.keys(info.subtitles).length > 0),
hasAutoCaptions: !!(info.automatic_captions && Object.keys(info.automatic_captions).length > 0),
};
} catch {
return null;
}
}
// Pulls YouTube captions for the given video and parses them into the
// app's standard { offset, text, duration } entries shape. Prefers
// manual captions over auto-generated. Returns null if no captions are
// available or parsing produces too few segments to be useful.
//
// Output is the same shape that `parseTimestampedTranscript()` produces
// from a Gemini transcription, so downstream code (the analysis step,
// the chunk renderer) doesn't care how the transcript got here.
async function tryFetchYouTubeCaptions(videoId, tmpDir, opts, log) {
const { hasManual, hasAuto } = opts;
if (!hasManual && !hasAuto) return null;
const url = `https://www.youtube.com/watch?v=${videoId}`;
const captionsBase = path.join(tmpDir, "captions");
// Try manual first (cleaner — punctuated, no fragmented words);
// fall back to auto-generated if no manual subs.
const langs = "en.*,en";
try {
if (hasManual) {
await execFileAsync(
"yt-dlp",
["--skip-download", "--write-subs", "--sub-langs", langs,
"--sub-format", "json3", "--no-warnings", "-o", captionsBase, url],
{ timeout: 30000 }
);
} else {
await execFileAsync(
"yt-dlp",
["--skip-download", "--write-auto-subs", "--sub-langs", langs,
"--sub-format", "json3", "--no-warnings", "-o", captionsBase, url],
{ timeout: 30000 }
);
}
} catch (err) {
log(1, `⚠ Caption download failed: ${(err.message || "").slice(0, 200)}`);
return null;
}
// yt-dlp names the file like `captions.en.json3` or `captions.en-US.json3`.
let captionPath = null;
try {
const files = await fs.readdir(tmpDir);
const match = files.find((f) => f.startsWith("captions.") && f.endsWith(".json3"));
if (match) captionPath = path.join(tmpDir, match);
} catch {}
if (!captionPath) return null;
let parsed;
try {
const content = await fs.readFile(captionPath, "utf-8");
parsed = JSON.parse(content);
} catch (err) {
log(1, `⚠ Caption parse failed: ${(err.message || "").slice(0, 200)}`);
return null;
}
const rawEvents = Array.isArray(parsed.events) ? parsed.events : [];
const entries = [];
for (const evt of rawEvents) {
const text = (Array.isArray(evt.segs) ? evt.segs : [])
.map((s) => s.utf8 || "")
.join("")
.replace(/\n+/g, " ")
.trim();
if (!text) continue;
entries.push({
offset: (evt.tStartMs || 0) / 1000,
text,
duration: (evt.dDurationMs || 0) / 1000,
});
}
if (entries.length < 5) return null;
return { entries, source: hasManual ? "manual" : "auto" };
}
// Coalesce a list of timestamped entries into coarser chunks. Used
// after pulling auto-captions (which are typically 13-word fragments
// every 13 seconds) so the downstream analysis prompt has ~100
// medium-sized segments instead of ~900 tiny ones. Manual captions and
// Gemini-transcribed entries already average 1030s and pass through
// unchanged when their median duration exceeds the threshold.
function coalesceTranscriptEntries(entries, targetSeconds = 15) {
if (!Array.isArray(entries) || entries.length === 0) return entries;
// Sample median duration; only coalesce if entries are too fine.
const durations = entries.map((e) => e.duration || 0).sort((a, b) => a - b);
const median = durations[Math.floor(durations.length / 2)];
if (median >= 8) return entries; // already coarse enough
const merged = [];
let current = null;
for (const e of entries) {
const text = (e.text || "").trim();
if (!text) continue;
if (!current) {
current = {
offset: e.offset,
text,
duration: e.duration || 0,
};
continue;
}
const elapsedFromStart = (e.offset || 0) - current.offset;
if (elapsedFromStart >= targetSeconds) {
merged.push(current);
current = {
offset: e.offset,
text,
duration: e.duration || 0,
};
} else {
current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim();
current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset;
}
}
if (current) merged.push(current);
return merged;
}
// Reduce an over-long transcript down to roughly `maxEntries` segments
// so the analysis prompt fits in smaller model contexts (typical local
// models are 16k32k tokens — a 2-hour podcast at Parakeet's ~5s
// granularity easily blows past that). Bucket size is computed from
// total audio duration so we end up with at most `maxEntries` segments
// regardless of source granularity. Unlike coalesceTranscriptEntries
// above, this one is purely count-driven — no median-duration guard,
// because the failure mode is "context exceeded" not "bad UX from
// over-fragmented entries".
//
// Returns { coalesced, indexMap } where indexMap[i] = { startOrig,
// endOrig } maps coalesced-entry i back to a range of original-entry
// indices. The caller uses this to translate section start/end indices
// returned by the analyzer back into the original entries array, so
// the final transcript displayed to the user keeps its full granularity
// — only the analyzer sees the coarser view.
//
// FLAGGED TO WATCH: ship 0.2.28. Auto-coalesce may degrade analysis
// quality on borderline content (the LLM sees fewer, longer segments
// — section boundaries get blurrier). If users report missed topics or
// imprecise section starts on long content, the alternative is real
// chunked analysis (split into overlapping windows, analyze each,
// stitch sections at boundaries) — significantly more involved.
function coalesceForAnalysis(entries, maxEntries = 400) {
if (!Array.isArray(entries) || entries.length <= maxEntries) {
return { coalesced: entries, indexMap: null };
}
const lastEntry = entries[entries.length - 1];
const totalDuration = (lastEntry.offset || 0) + (lastEntry.duration || 0);
if (totalDuration <= 0) {
return { coalesced: entries, indexMap: null };
}
// Bucket size: roughly total / maxEntries, but never tighter than 15s
// (a typical "natural pause" gap — going much smaller would defeat
// the point of coalescing).
const targetSeconds = Math.max(15, Math.ceil(totalDuration / maxEntries));
const merged = [];
const indexMap = [];
let current = null;
let currentStartOrig = 0;
for (let i = 0; i < entries.length; i++) {
const e = entries[i];
const text = (e.text || "").trim();
if (!text) continue;
if (!current) {
current = { offset: e.offset, text, duration: e.duration || 0 };
currentStartOrig = i;
continue;
}
const elapsedFromStart = (e.offset || 0) - current.offset;
if (elapsedFromStart >= targetSeconds) {
merged.push(current);
indexMap.push({ startOrig: currentStartOrig, endOrig: i - 1 });
current = { offset: e.offset, text, duration: e.duration || 0 };
currentStartOrig = i;
} else {
current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim();
current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset;
}
}
if (current) {
merged.push(current);
indexMap.push({ startOrig: currentStartOrig, endOrig: entries.length - 1 });
}
return { coalesced: merged, indexMap };
}
// ── In-flight free-tier job status + cancel ─────────────────────────────────
// Lets the web UI render a "Currently processing X — Cancel" banner after
// a browser refresh, when the SSE stream from the original /api/process
// call is no longer attached. Only the free-tier slot is tracked today;
// paid-tier batch queueing happens client-side.
app.get("/api/process/current", (req, res) => {
// ?logs=1 returns the in-flight job's accumulated log buffer so a
// browser refresh mid-pipeline can repopulate the activity log
// instead of starting blank. Default is the lightweight header-only
// shape used by the 5s banner poll.
const includeLogs = req.query.logs === "1" || req.query.logs === "true";
res.json({ job: getCurrentFreeJob({ includeLogs }) });
});
app.post("/api/process/cancel", (_req, res) => {
const had = abortCurrentFreeJob();
if (!had) return res.json({ ok: true, cancelled: false });
// We don't kill the in-flight yt-dlp / model API call here — the
// pipeline polls isFreeJobAborted() at each major step and throws
// early, which lands in the request handler's finally block where the
// slot is released. So cancellation latency is bounded by the time
// until the next checkpoint (a few seconds in practice, up to the
// length of one outstanding model call).
res.json({ ok: true, cancelled: true });
});
// ── Auto-discovery of provider connection info ──────────────────────────────
// The picker UI hits this on boot to pre-fill placeholders for providers
// that have a server-detectable default — most notably Ollama on
// StartOS, reachable at the documented `http://<package-id>.startos:<port>`
// internal hostname when installed alongside Recap.
app.get("/api/providers/discover", async (_req, res) => {
const out = {};
// Ollama: prefer the URL the operator set via the StartOS action,
// then try the canonical service-discovery hostname, finally fall
// back to localhost (useful for dev outside StartOS).
try {
const cfg = await config.getConfigSnapshot();
const fromConfig = (cfg.ollama_base_url || "").trim();
let ollamaUrl = null;
let source = null;
if (fromConfig) {
ollamaUrl = fromConfig;
source = "config";
} else {
const candidate = "http://ollama.startos:11434";
const ok = await fetch(`${candidate}/api/tags`, {
signal: AbortSignal.timeout(1500),
}).then((r) => r.ok).catch(() => false);
if (ok) {
ollamaUrl = candidate;
source = "startos-dep";
}
}
if (ollamaUrl) {
// Probe /api/tags to list installed models — picker UI surfaces
// them as dropdown options so users don't have to type them.
let models = [];
try {
const tagsRes = await fetch(`${ollamaUrl.replace(/\/$/, "")}/api/tags`, {
signal: AbortSignal.timeout(2000),
});
if (tagsRes.ok) {
const data = await tagsRes.json();
models = (data.models || []).map((m) => m.name).filter(Boolean);
}
} catch {}
out.ollama = { baseURL: ollamaUrl, source, models };
}
} catch {}
res.json(out);
});
// Quick connection-test endpoint. The picker UI Test button hits this
// to verify a provider+model+opts combo actually works before the user
// commits to using it. Sends a tiny prompt and returns the model's
// 3-word answer + latency, or a clear error string.
app.post("/api/providers/test", async (req, res) => {
const { providerId, model, opts: clientOpts } = req.body || {};
if (!providerId || typeof providerId !== "string") {
return res.status(400).json({ ok: false, error: "missing providerId" });
}
if (!model || typeof model !== "string") {
return res.status(400).json({ ok: false, error: "missing model" });
}
if (!PROVIDER_NAMES.includes(providerId)) {
return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` });
}
let provider;
try {
const cfg = await config.getConfigSnapshot();
const resolvedOpts = resolveProviderOpts(providerId, {
config: cfg,
clientOpts: (clientOpts && typeof clientOpts === "object") ? clientOpts : {},
});
provider = getProvider(providerId, resolvedOpts);
} catch (err) {
return res.status(400).json({ ok: false, error: err.message });
}
if (!provider.capabilities.analyze) {
return res.status(400).json({ ok: false, error: `${providerId} cannot analyze text` });
}
const t0 = Date.now();
// Special-case the relay: a real analyzeText round-trip would burn
// one of the user's lifetime credits per click. Use pingBalance
// instead — verifies connectivity + auth, returns the current
// balance, no charge.
if (providerId === "relay") {
try {
const env = await provider.pingBalance({ timeoutMs: 5000 });
const latencyMs = Date.now() - t0;
const credits = env?.credits_remaining;
const tier = env?.tier || "core";
const summary =
credits == null
? `Connected · Tier: ${tier}`
: `Connected · Tier: ${tier} · ${credits} credit${credits === 1 ? "" : "s"} remaining`;
return res.json({
ok: true,
text: summary,
latencyMs,
provider: providerId,
model,
});
} catch (err) {
return res.json({
ok: false,
error: (err?.message || String(err)).slice(0, 300),
latencyMs: Date.now() - t0,
});
}
}
try {
const result = await provider.analyzeText({
prompt: "Respond with exactly three words confirming you received this prompt.",
model,
retries: 0,
});
const latencyMs = Date.now() - t0;
return res.json({
ok: true,
text: (result.text || "").trim().slice(0, 200),
latencyMs,
provider: providerId,
model,
});
} catch (err) {
return res.json({
ok: false,
error: (err?.message || String(err)).slice(0, 300),
latencyMs: Date.now() - t0,
});
}
});
// Per-provider, per-field boolean indicating whether the StartOS
// config has a non-empty value for each PROVIDER_KEY_FIELDS slot.
// The picker UI uses this to (a) show a "✓ Server-configured" hint
// under empty inputs, and (b) decide whether the Delete button is
// visible when localStorage is empty but the server has a value.
//
// Never returns the actual values — only booleans. Anything stored in
// startos-config.json could be secret and shouldn't surface in any
// response that could end up in a screenshot.
app.get("/api/providers/credentials-status", async (_req, res) => {
const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js");
const cfg = await getConfigSnapshot();
const status = {};
for (const [providerId, fields] of Object.entries(PROVIDER_KEY_FIELDS)) {
status[providerId] = {};
for (const [optName, cfgKey] of Object.entries(fields)) {
const v = cfg[cfgKey];
status[providerId][optName] =
typeof v === "string" && v.trim().length > 0;
}
}
res.json({ status });
});
// Clear all server-side config fields for a provider (the StartOS
// action-set values). The picker UI's Delete button calls this in
// addition to wiping localStorage so credentials are gone from BOTH
// storage paths. Returns the list of fields that were cleared.
app.post("/api/providers/:id/clear", async (req, res) => {
const providerId = req.params.id;
if (!PROVIDER_NAMES.includes(providerId)) {
return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` });
}
// Build a patch of { config_field: "" } for every PROVIDER_KEY_FIELDS
// entry mapped to this provider. Unknown providers or providers with
// no server-side fields (e.g. relay, whose URL is hardcoded) end
// up clearing nothing — still a 200, just with empty `cleared`.
const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js");
const fields = PROVIDER_KEY_FIELDS[providerId] || {};
const patch = {};
for (const cfgKey of Object.values(fields)) {
if (typeof cfgKey === "string") patch[cfgKey] = "";
}
try {
await config.mergeConfig(patch);
return res.json({ ok: true, cleared: Object.keys(patch) });
} catch (err) {
return res.status(500).json({
ok: false,
error: (err?.message || String(err)).slice(0, 300),
});
}
});
app.post("/api/process", async (req, res) => {
const {
url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId,
transcriptionProvider: reqTransProvider,
transcriptionModel: reqTransModel,
analysisProvider: reqAnaProvider,
analysisModel: reqAnaModel,
providerOpts: reqProviderOpts,
useYouTubeCaptions: reqUseYTCaptions,
} = req.body;
// Default: use captions when available (huge speed/cost win). The
// picker-UI toggle lets users force a full transcription pass when
// they want speaker labels (captions don't have them) or when the
// auto-captions quality is too low.
const useYouTubeCaptions = reqUseYTCaptions !== false;
// Sentinel error message thrown by checkCancelled() when the user
// hits the in-flight banner's Cancel button. Declared at handler
// scope (not inside the try) so the catch block can compare against
// it — see the catch a few hundred lines below.
const CANCELLED_MARK = "__recap_cancelled__";
// Per-provider client-side opts: { gemini: {apiKey}, anthropic: {apiKey},
// openai: {apiKey}, "openai-compatible": {apiKey, baseURL}, ollama: {baseURL} }.
// Each provider's opts override that provider's config-stored values
// (set via the StartOS actions). Used by the picker UI to BYO keys
// per provider without round-tripping the StartOS dashboard.
const providerOpts = (reqProviderOpts && typeof reqProviderOpts === "object") ? reqProviderOpts : {};
// Provider selection: each pipeline step (transcribe + analyze) can
// independently target any registered provider. Both default to gemini
// so existing clients (which don't send provider fields) keep working.
const transcriptionProvider = reqTransProvider || "gemini";
const analysisProvider = reqAnaProvider || "gemini";
// Free tier: unlicensed users can summarize one video at a time. They
// still bring their own key — same as paid users today; the key can
// come from either the StartOS config action (server-side) or the
// web UI Settings panel (client-side). The future "bundled key" relay
// (paid users' requests proxied through the operator's service) isn't
// built yet, so there's nothing here that gates key sourcing by tier.
// AbortController for this request. Fired by abortCurrentFreeJob()
// when the user hits Cancel — passed through to every provider SDK
// call (transcription + analysis) so in-flight network requests
// reject immediately instead of running to completion.
const abortController = new AbortController();
// Stable identifier for this summarize job. Sent to the relay
// (when used) as `X-Recap-Job-Id`. The relay charges 1 credit on
// the first call with a given jobId and treats subsequent calls
// with the same id as free — so a full summary (transcribe +
// analyze) costs one credit regardless of which steps route
// through the relay. Non-relay providers ignore this opt.
const jobId = randomUUID();
const isFree = isFreeUser();
if (isFree) {
if (!tryAcquireFreeSlot({ url, title: itemTitle, abortController })) {
const current = getCurrentFreeJob();
const elapsedSec = current ? Math.round(current.elapsedMs / 1000) : 0;
const what = current?.title || current?.url || "another video";
return res.status(409).json({
error: "processing_in_progress",
message:
`A summary is already being processed (${what}, started ${elapsedSec}s ago). Free mode handles one video at a time — wait for it to finish, or cancel it from the status bar at the top of the app.`,
currentJob: current,
});
}
}
if (!url) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "Missing url" });
}
if (!PROVIDER_NAMES.includes(transcriptionProvider)) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "unknown_provider", message: `Unknown transcription provider: ${transcriptionProvider}` });
}
if (!PROVIDER_NAMES.includes(analysisProvider)) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "unknown_provider", message: `Unknown analysis provider: ${analysisProvider}` });
}
// Resolve per-provider construction opts from the StartOS config blob,
// overlaying any client-supplied opts (req.body.providerOpts[name]).
// For Gemini, the legacy single "apiKey" field on the request body
// also flows through as a fallback when providerOpts.gemini.apiKey
// isn't set — keeps pre-picker-UI clients working.
const cfg = await getConfigSnapshot();
function clientOptsFor(name) {
const fromBody = (providerOpts[name] && typeof providerOpts[name] === "object") ? providerOpts[name] : {};
if (name === "gemini" && !fromBody.apiKey) {
const legacy = resolveApiKey(clientKey);
if (legacy) return { ...fromBody, apiKey: legacy };
}
return fromBody;
}
const transcriptionOpts = resolveProviderOpts(transcriptionProvider, {
config: cfg,
clientOpts: clientOptsFor(transcriptionProvider),
});
const analysisOpts = resolveProviderOpts(analysisProvider, {
config: cfg,
clientOpts: clientOptsFor(analysisProvider),
});
let transcriber;
try {
transcriber = getProvider(transcriptionProvider, transcriptionOpts);
} catch (err) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "transcription_provider_not_configured",
message: `Transcription provider ${transcriptionProvider} is not configured: ${err.message}`,
});
}
if (!transcriber.capabilities.transcribe) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "transcription_unsupported",
message: `Provider ${transcriptionProvider} does not support audio transcription. Pick a different transcription provider (gemini or openai).`,
});
}
let analyzer;
try {
analyzer = transcriptionProvider === analysisProvider
? transcriber
: getProvider(analysisProvider, analysisOpts);
} catch (err) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "analysis_provider_not_configured",
message: `Analysis provider ${analysisProvider} is not configured: ${err.message}`,
});
}
if (!analyzer.capabilities.analyze) {
if (isFree) releaseFreeSlot();
return res.status(400).json({
error: "analysis_unsupported",
message: `Provider ${analysisProvider} does not support text analysis.`,
});
}
// Determine if this is a podcast episode or YouTube video
const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url);
const videoId = isPodcast ? (episodeId || url) : extractVideoId(url);
if (!isPodcast && !videoId) {
if (isFree) releaseFreeSlot();
return res.status(400).json({ error: "Invalid YouTube URL" });
}
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-"));
const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3";
const audioPath = path.join(tmpDir, `audio.${audioExt}`);
const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3";
try {
const pipelineStart = Date.now();
// Set up SSE
res.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
});
// Helper to send log entries with elapsed time. Also pushes onto the
// free-tier in-flight job's server-side buffer (no-op when there is
// no free job, e.g. licensed user) so a browser refresh during a
// long pipeline can re-hydrate the activity log from the server
// instead of starting blank.
const logHistory = [];
function log(step, message, detail) {
const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1);
const logMsg = `[${elapsed}s] ${message}`;
console.log(` ${logMsg}`);
const entry = { elapsed, message, detail: detail || null };
logHistory.push(entry);
appendCurrentJobLog(entry);
sendEvent(res, "status", { step, message });
sendEvent(res, "log", entry);
}
// Cancellation checkpoint. Polled at each major pipeline boundary;
// throws CANCELLED_MARK (declared at handler scope above) when the
// user has hit Cancel on the status banner so the request bails
// out cleanly via the catch path. Also catches abort signals fired
// by abortCurrentFreeJob() in case an SDK call swallowed the abort
// and returned cleanly.
function checkCancelled() {
if (abortController.signal.aborted) {
throw new Error(CANCELLED_MARK);
}
if (isFree && isFreeJobAborted()) {
throw new Error(CANCELLED_MARK);
}
}
let videoTitle = itemTitle || "Untitled";
let videoUploadDate = itemUploadDate || "";
// Rich metadata used to ground the transcription prompt (speaker
// names from the channel/description/chapters) and surfaced to
// downstream UI. Populated for YouTube videos only; podcasts have
// their own (lighter) metadata path.
let videoChannel = "";
let videoDescription = "";
let videoChapters = [];
// If captions came from YouTube we skip Step 1 (download audio) +
// Step 2 (transcribe) and jump straight to analysis. `entries` is
// also the post-Step-2 output of the transcription path, so this
// value just flows through.
let entries = null;
let captionSource = null; // "manual" | "auto" | null
let transcriptText = "";
let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" };
// Hoisted out of the audio-path block so Step 3 (analysis) can
// reference analysisModel even when the captions fast-path skips
// the entire audio + transcription block. transcriptionModel is
// only used inside the audio block but lives here for symmetry.
// Per-provider model defaults: caller's request → provider's first
// listed model → Gemini fallback (preserves the prior default for
// legacy clients that don't send model fields).
const transcriptionModel =
reqTransModel ||
transcriber.listTranscriptionModels()[0] ||
"gemini-3-flash-preview";
const analysisModel =
reqAnaModel ||
model ||
analyzer.listAnalysisModels()[0] ||
"gemini-3.1-pro-preview";
// ── Step 0 (YouTube only): metadata + captions fast path ──
if (!isPodcast && videoId) {
log(1, "Fetching video metadata...");
const meta = await fetchYouTubeMetadata(videoId);
if (meta) {
if (meta.title) videoTitle = meta.title;
if (meta.uploadDate) videoUploadDate = meta.uploadDate;
videoChannel = meta.channel;
videoDescription = meta.description;
videoChapters = meta.chapters;
log(1, `Video title: ${videoTitle}${videoChannel ? ` (${videoChannel})` : ""}`);
if ((meta.hasManualCaptions || meta.hasAutoCaptions) && !useYouTubeCaptions) {
log(1, `YouTube captions available but the user has the captions fast-path disabled — will transcribe audio directly.`);
} else if (meta.hasManualCaptions || meta.hasAutoCaptions) {
log(1, `YouTube captions available (${meta.hasManualCaptions ? "manual" : "auto"}) — attempting fast-path…`);
const cap = await tryFetchYouTubeCaptions(
videoId,
tmpDir,
{ hasManual: meta.hasManualCaptions, hasAuto: meta.hasAutoCaptions },
log
);
if (cap && cap.entries && cap.entries.length > 0) {
// Auto-captions fragment audio every 13 seconds (often only
// 13 words per entry). Sending hundreds of micro-segments
// to the analyzer balloons the prompt and overloads the
// model's index-tracking ("Section N must start at index
// K+1 of section M…"). Coalesce into ~15s blocks for a
// saner analysis prompt while keeping timestamps accurate.
const rawCount = cap.entries.length;
entries = coalesceTranscriptEntries(cap.entries, 15);
captionSource = cap.source;
log(
1,
`Using YouTube ${cap.source === "manual" ? "manual" : "auto-generated"} captions — ${rawCount} segments coalesced to ${entries.length}, skipping audio download + transcription`
);
} else {
log(1, "Captions unusable or failed to parse — falling back to audio transcription");
}
} else {
log(1, "No captions available — will download audio and transcribe");
}
}
}
checkCancelled();
// ── Step 1: Download audio (skipped when captions populated entries) ──
if (!entries) {
const dlStart = Date.now();
if (isPodcast) {
log(1, "Downloading podcast episode...");
await downloadPodcastAudio(url, audioPath);
const stats = await fs.stat(audioPath);
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
log(1, `Episode: ${videoTitle}`);
} else {
log(1, "Downloading audio from YouTube...");
const dlBaseArgs = [
"-x",
"--audio-format", "mp3",
"--audio-quality", "5",
"-o", audioPath,
"--no-playlist",
"--print", "%(title)s|||%(upload_date)s",
"--no-simulate",
];
const dlUrl = `https://www.youtube.com/watch?v=${videoId}`;
const cookieArgs = ytCookieArgs();
const hasCookies = cookieArgs.length > 0;
let usedCookies = false;
let dlStdout = "";
// Helper: attempt a single yt-dlp download
async function attemptDownload(args, label) {
const result = await execFileAsync("yt-dlp", args, { timeout: 600000 });
return result.stdout || "";
}
// Helper: check if error is a bot detection / rate limit block
function isBotBlock(errText) {
return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText);
}
// ── Smart download with retry ──
// Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays)
const MAX_RETRIES = 3;
const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff
let downloaded = false;
let lastError = "";
for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) {
// On retry attempts, wait before trying again
if (attempt > 0) {
const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)];
log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`);
sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` });
await new Promise(r => setTimeout(r, waitSec * 1000));
log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`);
// Clean up any partial file from previous attempt
await fs.unlink(audioPath).catch(() => {});
}
// Try with cookies first
if (hasCookies && !usedCookies) {
try {
log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies...");
dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies");
usedCookies = true;
downloaded = true;
break;
} catch (cookieErr) {
const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || "");
if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`);
log(1, "Retrying without cookies...");
await fs.unlink(audioPath).catch(() => {});
}
}
// Try without cookies
if (!downloaded) {
try {
dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies");
downloaded = true;
break;
} catch (dlErr) {
lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || "");
const blocked = isBotBlock(lastError);
if (blocked && attempt < MAX_RETRIES) {
log(1, `⚠ YouTube bot detection triggered`);
// Will loop back and wait
continue;
}
if (blocked && attempt === MAX_RETRIES) {
// Last resort: try yt-dlp auto-update in case there's a newer version that handles this
log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort...");
const updateResult = await autoUpdateYtdlp(DATA_DIR);
if (updateResult.success) {
log(1, "yt-dlp updated! Final retry...");
try {
const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update");
dlStdout = retryResult;
downloaded = true;
break;
} catch { /* fall through to error */ }
}
}
// Non-bot error or exhausted retries
if (!downloaded) {
log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`);
}
}
}
}
if (!downloaded) {
const blocked = isBotBlock(lastError);
let hint = "";
if (blocked) {
hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " +
"This is usually caused by:\n" +
"• Recent VPN use (YouTube flags VPN IPs)\n" +
"• Too many downloads in a short period\n" +
"• YouTube's general anti-bot measures\n\n" +
"What to try:\n" +
"• Wait 10-30 minutes and try again\n" +
"• Disconnect any VPN/proxy\n" +
"• Upload fresh cookies.txt via Settings\n" +
"• Try a different network (mobile hotspot, etc.)";
}
throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`);
}
if (!usedCookies && hasCookies) {
log(1, "⚠ Downloaded without cookies — audio may contain ads");
}
const stats = await fs.stat(audioPath);
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
// Extract title from the --print output of the download command
const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null;
let gotTitle = false;
// First try: parse title from the download stdout (most reliable — same call that succeeded)
if (dlStdout) {
const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean);
for (const line of printLines) {
if (line.includes("|||")) {
const sep = line.indexOf("|||");
const t = line.slice(0, sep).trim();
const d = line.slice(sep + 3).trim();
if (t && t !== "NA") {
videoTitle = t;
if (d && d !== "NA") videoUploadDate = d;
gotTitle = true;
log(1, `Video title: ${videoTitle}`);
break;
}
}
}
}
// Second try: separate yt-dlp call (no cookies needed for public metadata)
if (!gotTitle) {
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(title)s|||%(upload_date)s",
"--no-download",
`https://www.youtube.com/watch?v=${videoId}`,
], { timeout: 15000 });
const raw = stdout.trim();
const sep = raw.indexOf("|||");
if (sep > 0) {
videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled";
const d = raw.slice(sep + 3).trim();
if (d && d !== "NA") videoUploadDate = d || videoUploadDate;
} else {
videoTitle = raw || fallbackTitle || "Untitled";
}
gotTitle = videoTitle !== "Untitled";
if (gotTitle) log(1, `Video title: ${videoTitle}`);
} catch {
// Title fetch failed
}
}
// Third try: use the queue-provided title
if (!gotTitle && fallbackTitle) {
videoTitle = fallbackTitle;
log(1, `Using queue title: ${fallbackTitle}`);
gotTitle = true;
}
if (!gotTitle) {
log(1, "⚠ Could not fetch video title");
}
}
checkCancelled();
// ── Step 2: Transcribe audio ──
// Detect audio duration to choose strategy
const audioDuration = await getAudioDuration(audioPath);
const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown";
log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`);
// Strategy:
// < 60 min AND < 30 MB → Flash on full file (fast, cheap, reliable)
// ≥ 60 min OR ≥ 30 MB → go straight to chunked transcription with Flash (45-min chunks)
// If full-file transcription is truncated or empty → fall back to chunks
const CHUNK_TIME_THRESHOLD = 60 * 60; // 60 minutes
const CHUNK_SIZE_THRESHOLD = 30 * 1024 * 1024; // 30 MB
let audioFileSize = 0;
try { audioFileSize = (await fs.stat(audioPath)).size; } catch {}
const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1);
const needsChunking = (audioDuration && audioDuration >= CHUNK_TIME_THRESHOLD) || (audioFileSize >= CHUNK_SIZE_THRESHOLD);
if (needsChunking) {
const reason = audioDuration >= CHUNK_TIME_THRESHOLD ? `${audioDurMin} min` : `${audioSizeMB} MB`;
log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriber.name}/${transcriptionModel}`);
}
// Transcription model fallback chain: user's chosen model first,
// then the rest of the provider's list. If Gemini 3 Flash hits a
// 503 capacity error, the wrapper transparently retries with
// 2.5 Flash. Matches the analysis fallback pattern below.
const transcriptionFallbacks = [
transcriptionModel,
...transcriber.listTranscriptionModels().filter((m) => m !== transcriptionModel),
];
// Thin wrapper: keeps the call-site shape the chunking + main
// pipeline already use. `transcriber.transcribeAudio` returns
// { text, usage, cost, finishReason, blockReason, raw } — callers
// read off that normalized shape regardless of which provider is
// doing the transcription. On hard failure (after the provider's
// own retry loop), walks the fallback chain to the next model.
async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) {
// Build the per-call fallback list: caller's chosen model
// first, then the others. (Same as transcriptionFallbacks but
// honors per-chunk overrides like chunkResult retries.)
const chain = [modelName, ...transcriptionFallbacks.filter((m) => m !== modelName)];
let lastErr;
for (const tryModel of chain) {
try {
return await transcriber.transcribeAudio({
filePath,
mimeType: mType,
titleHint,
// Rich-context hints used by the provider's prompt builder
// to anchor speaker-name extraction. Provider implementations
// that don't care (e.g. Whisper) ignore these.
channelHint: videoChannel,
descriptionHint: videoDescription,
chaptersHint: videoChapters,
model: tryModel,
offsetSeconds,
onProgress: (msg) => log(2, msg),
signal: abortController.signal,
// Shared with the analyze call below so the relay bundles
// both into one credit charge. Non-relay providers ignore it.
jobId,
});
} catch (err) {
// Cancellation: bail immediately, don't try the next model.
if (abortController.signal.aborted || err?.name === "AbortError") {
throw new Error(CANCELLED_MARK);
}
lastErr = err;
const msg = err?.message || String(err);
log(2, `⚠ Transcription with ${tryModel} failed: ${msg.slice(0, 150)}`);
if (tryModel !== chain[chain.length - 1]) {
log(2, `Falling back to next transcription model...`);
}
}
}
throw lastErr || new Error("All transcription models failed");
}
// ── Helper: chunked transcription for long audio ──
async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) {
const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`);
await fs.mkdir(chunkDir, { recursive: true });
try {
const audioChunks = await splitAudioFile(srcPath, chunkDir, 2700); // 45 min chunks
if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed
logFn(`Split audio into ${audioChunks.length} chunks for transcription`);
const allEntries = [];
let totalIn = 0, totalOut = 0;
for (const chunk of audioChunks) {
logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`);
const chunkResult = await transcribeSingleFile(
chunk.path, "audio/mpeg", title,
modelName,
chunk.startOffset
);
// Provider's normalized result: text + usage + cost.
totalIn += chunkResult.cost.inputTokens;
totalOut += chunkResult.cost.outputTokens;
const chunkText = chunkResult.text;
if (!chunkText) {
logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`);
continue;
}
const chunkEntries = parseTimestampedTranscript(chunkText);
// Programmatically adjust timestamps: the model transcribes from 0:00 relative
// to the chunk, so add the chunk's startOffset to get real video timestamps
if (chunk.startOffset > 0) {
// Check if the model already adjusted (first entry near the startOffset)
const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0;
const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8;
if (!alreadyAdjusted) {
for (const e of chunkEntries) {
e.offset += chunk.startOffset;
}
logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`);
}
}
logFn(`Chunk ${chunk.index + 1}: ${chunkEntries.length} segments, last timestamp ${chunkEntries.length > 0 ? formatTime(chunkEntries[chunkEntries.length - 1].offset) : "N/A"}`);
// Merge: skip entries that overlap with already-transcribed content
const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1;
for (const e of chunkEntries) {
if (e.offset > lastExistingTime) allEntries.push(e);
}
}
// Recalculate durations
for (let i = 0; i < allEntries.length - 1; i++) {
allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset;
}
if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15;
logFn(`Chunked transcription complete: ${allEntries.length} total segments`);
return {
entries: allEntries,
cost: {
inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0,
totalTokens: totalIn + totalOut,
totalCost: "0", totalCostDisplay: "",
},
};
} finally {
try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {}
}
}
// entries / transcriptText / txCost are declared earlier (top of
// the request handler) since the captions-fast-path needs to
// populate them before this audio-transcription block runs.
const txStart = Date.now();
if (needsChunking) {
// ── Very long audio: go straight to chunked transcription ──
log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
} else {
log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`);
entries = null; // fall through to full-file attempt below
}
}
if (!entries) {
// ── Normal: transcribe full file ──
const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel);
transcriptText = transcriptResult.text;
if (!transcriptText) {
log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
} else {
throw new Error(`${transcriber.name} returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.`);
}
} else {
txCost = transcriptResult.cost;
const txTime = ((Date.now() - txStart) / 1000).toFixed(1);
log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`);
entries = parseTimestampedTranscript(transcriptText);
log(2, `Parsed ${entries.length} transcript segments`);
}
// ── Single-segment expansion ──
// Whisper-API endpoints that don't return per-segment timestamps
// (e.g. NVIDIA Parakeet, some bare-Whisper wrappers) hand back a
// single text blob. parseTimestampedTranscript dumps that into
// one entry at [0:00], which (a) trips the truncation detector
// below and (b) leaves the analyzer with one giant segment so
// it can only produce one section. Synthesize sentence-based
// entries with interpolated timestamps so both code paths
// downstream work like they do for Gemini/Whisper-1.
if (entries.length === 1 && audioDuration && audioDuration > 30 && (entries[0].text || "").length > 100) {
const synthesized = synthesizeEntriesFromText(entries[0].text, audioDuration);
if (synthesized.length > 1) {
log(2, `Single-segment transcript expanded into ${synthesized.length} synthetic sentence-based entries with interpolated timestamps`);
entries = synthesized;
}
}
// ── Truncation detection → fall back to chunks ──
if (audioDuration && entries.length > 0) {
const lastEntryTime = entries[entries.length - 1].offset;
const coverageRatio = lastEntryTime / audioDuration;
const missingSeconds = audioDuration - lastEntryTime;
if (coverageRatio < 0.90 && missingSeconds > 120) {
log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`);
log(2, `Falling back to chunked transcription...`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
const finalCoverage = entries[entries.length - 1].offset;
log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`);
}
} else {
log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`);
}
}
}
const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1);
log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`);
if (!entries || entries.length === 0) {
const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ ");
log(2, `⚠ Transcript parse failed. Preview: ${preview}`);
sendEvent(res, "error", { message: "Failed to parse transcript." });
sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText });
res.end();
return;
}
} // end if (!entries) — close the audio + transcription block
if (!entries || entries.length === 0) {
throw new Error("No transcript available — neither captions nor audio transcription produced segments.");
}
checkCancelled();
// ── Step 3: Topic analysis with model fallback ──
// Fallback order: caller's chosen model first, then the rest of the
// analyzer provider's analysis-model list.
const providerModels = analyzer.listAnalysisModels();
const analysisFallbacks = [
analysisModel,
...providerModels.filter((m) => m !== analysisModel),
];
// ── Auto-coalesce input for the analysis LLM ──
// Long videos with fine-grained transcripts (Parakeet ~5s / segment
// on a 2-hour podcast = ~1400 entries) easily exceed local-model
// context windows (typical Gemma/Llama deployments are 16k32k
// tokens). Cap the analyzer's view at ~400 segments; the displayed
// transcript keeps its full granularity because we translate the
// returned section indices back through indexMap before building
// the final chunks. FLAGGED TO WATCH — may impact section-boundary
// precision on borderline content.
const { coalesced: analysisEntries, indexMap: analysisIndexMap } =
coalesceForAnalysis(entries, 400);
if (analysisEntries !== entries) {
log(
3,
`⚠ AUTO-COALESCED ANALYSIS INPUT: ${entries.length}${analysisEntries.length} segments (large prompts can exceed local-model context limits). Transcript display keeps full granularity.`
);
}
const analysisPrompt = buildAnalysisPrompt(analysisEntries);
let analysisResult = null;
let usedAnalysisModel = analysisModel;
const anaStart = Date.now();
for (const tryModel of analysisFallbacks) {
try {
log(3, `Analyzing topics across ${analysisEntries.length} segments with ${analyzer.name}/${tryModel}...`);
analysisResult = await analyzer.analyzeText({
prompt: analysisPrompt,
model: tryModel,
onProgress: (msg) => log(3, msg),
signal: abortController.signal,
jobId,
});
usedAnalysisModel = tryModel;
break;
} catch (fallbackErr) {
// If the user hit Cancel mid-analysis, do not walk to the
// next model — the signal is shared across attempts and would
// just re-fail. Bubble the cancellation up to the request
// handler's catch.
if (abortController.signal.aborted || fallbackErr?.name === "AbortError") {
throw new Error(CANCELLED_MARK);
}
const msg = fallbackErr?.message || String(fallbackErr);
log(3, `${tryModel} failed: ${msg.slice(0, 150)}`);
if (tryModel !== analysisFallbacks[analysisFallbacks.length - 1]) {
log(3, `Falling back to next model...`);
}
}
}
if (!analysisResult) {
throw new Error("All analysis models failed. Please try again later.");
}
const analysisText = analysisResult.text;
if (!analysisText) {
throw new Error(`${analyzer.name} returned an empty analysis. The transcript may be too long for the model. Try again.`);
}
const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1);
const anaCost = analysisResult.cost;
// Parse the analysis JSON
let analysisJson;
try {
let jsonStr = analysisText.trim();
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim();
analysisJson = JSON.parse(jsonStr);
} catch (e) {
console.error("Failed to parse analysis JSON:", e.message);
sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." });
sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText });
res.end();
return;
}
// Build final chunks. When the analyzer saw a coalesced view of the
// transcript (analysisIndexMap is set), translate its section
// indices back to the original-entry index space so the rendered
// transcript keeps full granularity. Without translation, the
// section slice would walk over a much shorter array and miss
// 70%+ of the spoken content for very long videos.
const chunks = analysisJson.sections.map((section) => {
let start = Math.max(0, section.startIndex);
let end = section.endIndex;
if (analysisIndexMap) {
const mappedStart = analysisIndexMap[Math.max(0, Math.min(start, analysisIndexMap.length - 1))];
const mappedEnd = analysisIndexMap[Math.max(0, Math.min(end, analysisIndexMap.length - 1))];
start = mappedStart ? mappedStart.startOrig : 0;
end = mappedEnd ? mappedEnd.endOrig : entries.length - 1;
}
start = Math.max(0, start);
end = Math.min(entries.length - 1, end);
const sectionEntries = entries.slice(start, end + 1);
return {
title: section.title,
summary: section.summary,
entries: sectionEntries,
startTime: sectionEntries[0]?.offset || 0,
};
}).filter((c) => c.entries.length > 0);
const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1);
log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`);
log(3, `Pipeline finished in ${totalTime}s`);
// Save to history. As of the library-for-everyone change, free
// users save too — the app feels broken otherwise (summarize a
// video, never find it again).
const contentType = isPodcast ? "podcast" : "youtube";
const historyId = await saveToHistory(videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null);
sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType });
res.end();
} catch (err) {
// Treat any AbortError or aborted-signal state as a user cancellation
// even if it didn't surface as CANCELLED_MARK upstream (e.g. an SDK
// throws AbortError before checkCancelled() runs).
const cancelled =
err?.message === CANCELLED_MARK ||
err?.name === "AbortError" ||
abortController.signal.aborted;
if (!cancelled) {
// Dump as much context as the runtime gives us. Generic
// messages like "Error in input stream" are usually wrappers
// around an underlying SDK / Node stream error; the cause +
// stack are what actually tells us what happened.
console.error("Pipeline error:", {
name: err?.name,
message: err?.message,
code: err?.code,
status: err?.status || err?.statusCode,
cause: err?.cause,
stack: err?.stack,
transcriptionProvider,
analysisProvider,
});
}
if (!res.headersSent) {
res.status(cancelled ? 499 : 500).json({
error: cancelled ? "cancelled" : err.message,
});
} else if (cancelled) {
sendEvent(res, "cancelled", { message: "Cancelled by user" });
res.end();
} else {
sendEvent(res, "error", { message: err.message });
res.end();
}
} finally {
if (isFree) releaseFreeSlot();
// Clean up temp files
try {
await fs.rm(tmpDir, { recursive: true, force: true });
} catch {}
}
});
// ── Helpers ────────────────────────────────────────────────────────────────
// getAudioDuration + splitAudioFile moved to ./audio.js
// sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js
// buildAnalysisPrompt moved to ./gemini-helpers.js
// ── Network mode ──────────────────────────────────────────────────────────
// On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking)
// On local Mac dev: default to localhost (safe on public Wi-Fi)
// - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling)
// - Running "npm start" directly defaults to localhost
const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, "..");
const lanMode = isStartOS ? true : process.env.LAN_MODE === "true";
const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1";
app.get("/api/network-mode", (req, res) => {
res.json({ lan: lanMode });
});
// ── Start server ───────────────────────────────────────────────────────────
app.listen(PORT, BIND_HOST, async () => {
console.log(`\n Recap API running on http://${BIND_HOST}:${PORT}`);
console.log(` Data directory: ${DATA_DIR}`);
console.log(` Checking yt-dlp...`);
const info = await checkYtdlp();
if (!info.installed) {
console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`);
} else if (info.updateAvailable) {
console.log(` ✓ yt-dlp ${info.version} found`);
console.log(` ↑ Update available: ${info.latestVersion}`);
console.log(` Auto-updating...`);
const result = await autoUpdateYtdlp(DATA_DIR);
if (result.success) {
const refreshed = await checkYtdlp();
console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`);
} else {
console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`);
}
} else {
console.log(` ✓ yt-dlp ${info.version} (up to date)\n`);
}
// Check subscriptions on startup
const subs = await loadSubscriptions();
if (subs.length > 0) {
console.log(` 📡 Checking ${subs.length} subscription(s) for new videos...`);
await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
const pending = autoQueue.filter(q => q.status === "pending").length;
const approved = autoQueue.filter(q => q.status === "approved").length;
if (pending > 0) console.log(`${pending} new video(s) queued from subscriptions`);
if (approved > 0) console.log(`${approved} approved video(s) ready for processing`);
if (pending === 0 && approved === 0) console.log(` ✓ No new videos from subscriptions`);
}
// Resume processing any approved items from a previous session
const resumeApproved = autoQueue.filter(q => q.status === "approved").length;
// Also recover any items stuck in "processing" state from a crash
for (const item of autoQueue) {
if (item.status === "processing") {
console.log(` ⚠ Recovering stuck item: ${item.title}`);
item.status = "approved"; // Re-queue for processing
}
}
if (autoQueue.some(q => q.status === "approved")) {
await saveAutoQueue();
console.log(` 🔄 Starting background processor...`);
// Delay slightly so the server is fully ready before internal HTTP calls
setTimeout(() => kickProcessor(), 2000);
}
console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`);
// Check subscriptions every hour (runs continuously on StartOS)
setInterval(() => {
checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
}, 60 * 60 * 1000);
});