recap/server/index.js

import express from "express";
import cors from "cors";
import { execFile } from "child_process";
import { promisify } from "util";
import fs from "fs/promises";
import { createWriteStream } from "fs";
import path from "path";
import os from "os";
import https from "https";
import http from "http";
import { randomUUID } from "crypto";
import * as license from "./license.js";
import {
  sendEvent,
  extractVideoId,
  formatTime,
  parseTimestampedTranscript,
  synthesizeEntriesFromText,
  safeText,
  retryGemini,
  fetchUrl,
} from "./util.js";
import { buildAnalysisPrompt } from "./gemini-helpers.js";
import { getProvider, resolveProviderOpts, PROVIDER_NAMES } from "./providers/index.js";
import {
  getAudioDuration,
  splitAudioFile,
  downloadPodcastAudio,
} from "./audio.js";
import { checkYtdlp, autoUpdateYtdlp } from "./ytdlp.js";
import {
  initCookies,
  ytCookieArgs,
  ytExtraArgs,
  ytCookieMethod,
  setupCookieRoutes,
  getCookieFilePath,
} from "./cookies.js";
import * as config from "./config.js";
import { initInstallId, getInstallId } from "./install-id.js";
import * as relayState from "./relay-state.js";
import * as relayDefault from "./relay-default.js";
import { resolveApiKey, getConfigSnapshot } from "./config.js";
import * as licenseMW from "./license-middleware.js";
import {
  setupLicenseMiddleware,
  setupLicenseRoutes,
  startLicenseRefresh,
  refreshLicenseOnline,
  isFreeUser,
  tryAcquireFreeSlot,
  releaseFreeSlot,
  getCurrentFreeJob,
  abortCurrentFreeJob,
  isFreeJobAborted,
  appendCurrentJobLog,
} from "./license-middleware.js";
import {
  initHistory,
  saveToHistory,
  loadMeta,
  saveMeta,
  setupHistoryRoutes,
  getHistoryDir,
} from "./history.js";
import { setupLibraryRoutes } from "./library.js";
import {
  initAdminAuth,
  setupAdminAuthMiddleware,
  setupAdminAuthRoutes,
} from "./admin-auth.js";

const execFileAsync = promisify(execFile);
const app = express();
const PORT = process.env.PORT || 3001;

// ── Data directory (configurable for StartOS or local dev) ────────────────
// On StartOS: DATA_DIR=/data (persistent volume)
// On local dev: defaults to project root (parent of server/)
const __dirname = path.dirname(new URL(import.meta.url).pathname);
const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, "..");
const historyDir = path.join(DATA_DIR, "history");
const configDir = path.join(DATA_DIR, "config");
await fs.mkdir(historyDir, { recursive: true }).catch(() => {});
await fs.mkdir(configDir, { recursive: true }).catch(() => {});

await initHistory({ dataDir: DATA_DIR });

// Per-install identity for the upcoming relay backend. Generated once
// on first boot, persisted in DATA_DIR. Surfaced via /api/health and
// /api/install-id (read-only). See ./install-id.js for details.
await initInstallId({ dataDir: DATA_DIR });

// API key + live reload moved to ./config.js
await config.initConfig({ dataDir: DATA_DIR });
const envPath = config.getEnvPath();

// Cookies state + helpers + routes moved to ./cookies.js
await initCookies({ dataDir: DATA_DIR, envPath });

// Admin login gate state. Reads username + scrypt hash + session secret
// from /data/config/startos-config.json (set via the "Set Admin
// Password" StartOS action) and refreshes on the same poll cadence as
// config.js. When no hash is set, the gate is a no-op.
await initAdminAuth({ dataDir: DATA_DIR });

// resolveApiKey moved to ./config.js

app.use(cors());
app.use(express.json({ limit: "100mb" }));

// ── Admin login gate ───────────────────────────────────────────────────────
// MUST run before the license middleware: if an admin password is set,
// nobody (licensed or not) reaches the activation flow without first
// passing the login. Endpoints needed by the login UI itself
// (/api/admin/status, /api/admin/login, /api/admin/logout) and
// /api/health stay open.
setupAdminAuthMiddleware(app);
setupAdminAuthRoutes(app);

// ── Keysat licensing ────────────────────────────────────────────────────────
// All license-aware request handling (gate, Pro feature gates, /api/license
// routes, free-tier slot management, periodic online refresh) lives in
// ./license-middleware.js. Importers read the current state via
// licenseMW.LIC (a live binding).
setupLicenseMiddleware(app);
setupLicenseRoutes(app);
startLicenseRefresh();

// History storage + routes moved to ./history.js
// (saveToHistory, loadMeta, saveMeta are imported above)
// Late-bound addToSkipList — defined later in this file for now.
setupHistoryRoutes(app, { addToSkipList: (id) => addToSkipList(id) });

// Serve the frontend from ../public
app.use(express.static(path.join(__dirname, "..", "public")));
app.use("/assets", express.static(path.join(__dirname, "..", "assets")));

// checkYtdlp + autoUpdateYtdlp moved to ./ytdlp.js

// PRICING + calcCost + buildAnalysisPrompt moved to ./gemini-helpers.js
// safeText + retryGemini moved to ./util.js

// ── Health check ───────────────────────────────────────────────────────────

app.get("/api/health", async (req, res) => {
  const info = await checkYtdlp();
  // Check cookies.txt freshness
  const cookieMethod = ytCookieMethod();
  let cookieInfo = { method: cookieMethod };
  if (cookieMethod === "cookies.txt") {
    try {
      const stat = await fs.stat(getCookieFilePath());
      const ageMs = Date.now() - stat.mtimeMs;
      const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24));
      cookieInfo.fileAgeDays = ageDays;
      cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days
    } catch {}
  }
  res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!config.serverApiKey, cookies: cookieInfo, installId: getInstallId(), ...info });
});

// Read-only install identity. Used by the UI's settings panel so the
// operator can verify the install has been provisioned, and by the
// future relay client to attach the X-Recap-Install-Id header. Open
// path — license gate doesn't apply (the relay needs this ID to be
// reachable before any credits have been granted).
app.get("/api/install-id", (_req, res) => {
  res.json({ installId: getInstallId() });
});

// Proxy through to the relay's /relay/policy endpoint. Used by the UI
// to render dynamic copy (e.g. "N relay credits" in the activation
// screen reflects whatever the operator currently has the relay
// configured to give Core users — no Recap update needed when the
// operator tunes tier quotas). Cached in-memory for a short window
// so a busy UI doesn't hammer the relay.
let __cachedRelayPolicy = { at: 0, body: null };
app.get("/api/relay/policy", async (_req, res) => {
  const base = relayDefault.getRelayBaseURL();
  if (!base) return res.json({ configured: false, tiers: null });
  const ttl = 5 * 60 * 1000;
  if (__cachedRelayPolicy.body && Date.now() - __cachedRelayPolicy.at < ttl) {
    return res.json({ configured: true, ...__cachedRelayPolicy.body });
  }
  try {
    const r = await fetch(`${base.replace(/\/$/, "")}/relay/policy`, {
      signal: AbortSignal.timeout(5000),
    });
    if (!r.ok) {
      return res.json({
        configured: true,
        tiers: null,
        error: `HTTP ${r.status}`,
      });
    }
    const body = await r.json();
    __cachedRelayPolicy = { at: Date.now(), body };
    res.json({ configured: true, ...body });
  } catch (err) {
    res.json({
      configured: true,
      tiers: null,
      error: err?.message || String(err),
    });
  }
});

// Last-known relay state (credits + tier) cached in-process. The UI
// polls this for the "N credits remaining · Tier: X" banner; the
// underlying numbers are refreshed every time a relay provider call
// lands.
//
// First-paint UX: when the cache is empty (no relay calls yet AND no
// prior ping has populated it), opportunistically hit the relay's
// /balance endpoint with a short timeout so the banner shows real
// numbers on first page load instead of "balance unknown". Best
// effort — if the relay is unreachable the cache stays null and the
// UI falls back to its "unknown" copy. `?refresh=1` forces a ping
// even when the cache already has a value.
//
// `configured` is true when relay-default.js has a non-empty URL —
// operator-controlled at build time, never user-configurable.
app.get("/api/relay/status", async (req, res) => {
  const configured = !!relayDefault.getRelayBaseURL();
  let snapshot = relayState.getRelayState();
  const wantsRefresh = req.query.refresh === "1" || req.query.refresh === "true";
  const cacheEmpty = snapshot.creditsRemaining === null && snapshot.tier === null;
  if (configured && (cacheEmpty || wantsRefresh)) {
    try {
      const cfg = await getConfigSnapshot();
      const opts = resolveProviderOpts("relay", { config: cfg, clientOpts: {} });
      const provider = getProvider("relay", opts);
      await provider.pingBalance({ timeoutMs: 5000 });
      snapshot = relayState.getRelayState();
    } catch (err) {
      // Log + record so the UI shows a real error instead of a silent
      // "balance unknown". The provider call itself records when the
      // ping reaches the network; this catches the earlier failures
      // (e.g. createRelayProvider throwing because install-id wasn't
      // initialized yet) where lastError wouldn't otherwise be set.
      console.error(
        `[relay/status] ping failed: ${err?.message || err} (stack: ${err?.stack || "n/a"})`
      );
      relayState.recordRelayError(err?.message || String(err));
      snapshot = relayState.getRelayState();
    }
  }
  res.json({ ...snapshot, configured });
});

// ── Status endpoints ───────────────────────────────────────────────────────

app.post("/api/heartbeat", (req, res) => {
  res.json({ ok: true, sleeping: false });
});

app.get("/api/status", (req, res) => {
  res.json({ ok: true, sleeping: false, uptime: process.uptime() });
});

// Shutdown: used by the macOS .app launcher to stop the server cleanly.
// On StartOS this endpoint is unused (StartOS manages the container lifecycle).
app.post("/api/shutdown", (req, res) => {
  res.json({ ok: true, message: "Server shutting down..." });
  console.log("\n  Server shutdown requested from browser. Goodbye!\n");
  setTimeout(() => process.exit(0), 300);
});

// ── Manual update endpoint ─────────────────────────────────────────────────

app.post("/api/update-ytdlp", async (req, res) => {
  const result = await autoUpdateYtdlp(DATA_DIR);
  const info = await checkYtdlp();
  res.json({ ...result, ...info });
});

// ── Cookie management endpoints ───────────────────────────────────────────

// /api/cookies/* routes registered via setupCookieRoutes (./cookies.js)
setupCookieRoutes(app);


// ── Library export/import ──── moved to ./library.js ─────────
setupLibraryRoutes(app);


// ── Subscriptions ─────────────────────────────────────────────────────────

const subsPath = path.join(historyDir, "subscriptions.json");

async function loadSubscriptions() {
  try {
    return JSON.parse(await fs.readFile(subsPath, "utf-8")).subscriptions || [];
  } catch {
    return [];
  }
}
async function saveSubscriptions(subs) {
  await fs.writeFile(subsPath, JSON.stringify({ subscriptions: subs }, null, 2));
}

// List recent videos from a channel/playlist via yt-dlp (no download)
// Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode)
async function listChannelVideosFast(url, limit = 15) {
  const { stdout } = await execFileAsync("yt-dlp", [
    "--print", "%(id)s|%(title)s",
    "--no-download",
    "--playlist-end", String(limit),
    "--flat-playlist",
    ...ytCookieArgs(),
    ...ytExtraArgs(),
    url,
  ], { timeout: 60000 });
  return stdout.trim().split("\n").filter(Boolean).map(line => {
    const idx = line.indexOf("|");
    return { id: line.slice(0, idx), title: line.slice(idx + 1) };
  });
}

// Fetch upload_date for a batch of video IDs (processes in batches of 3)
// Bails after 2 consecutive failures to avoid grinding through blocked requests
async function fetchUploadDates(videoIds) {
  if (videoIds.length === 0) return {};
  const dateMap = {};
  const batchSize = 50;
  let consecutiveFails = 0;
  for (let i = 0; i < videoIds.length; i += batchSize) {
    const batch = videoIds.slice(i, i + batchSize);
    const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`);
    try {
      const { stdout } = await execFileAsync("yt-dlp", [
        "--print", "%(id)s|%(upload_date)s",
        "--no-download",
        ...ytCookieArgs(),
        ...urls,
      ], { timeout: 45000 });
      for (const line of stdout.trim().split("\n").filter(Boolean)) {
        const [id, date] = line.split("|");
        if (id && date && date !== "NA") dateMap[id] = date;
      }
      consecutiveFails = 0;
      subLog(`    Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`);
    } catch (err) {
      consecutiveFails++;
      subLog(`    ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`);
      if (consecutiveFails >= 2) {
        subLog(`    ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`);
        break;
      }
    }
  }
  return dateMap;
}

// ── RSS-based date fetching (bypasses bot detection) ─────────────────────

// Fetch a URL and return the response body as a string
// fetchUrl moved to ./util.js

// Get channel_id from a YouTube channel/playlist URL using yt-dlp
async function getChannelId(url) {
  // Method 1: flat-playlist channel_id (fast, may return NA)
  try {
    const { stdout } = await execFileAsync("yt-dlp", [
      "--print", "%(channel_id)s",
      "--no-download",
      "--playlist-items", "1",
      "--flat-playlist",
      ...ytCookieArgs(),
      url,
    ], { timeout: 15000 });
    const id = stdout.trim().split("\n")[0];
    if (id && id !== "NA" && id.length > 5) return id;
  } catch {}
  // Method 2: non-flat single video (slower but gets full metadata)
  try {
    const { stdout } = await execFileAsync("yt-dlp", [
      "--print", "%(channel_id)s",
      "--no-download",
      "--playlist-items", "1",
      ...ytCookieArgs(),
      url,
    ], { timeout: 30000 });
    const id = stdout.trim().split("\n")[0];
    if (id && id !== "NA" && id.length > 5) return id;
  } catch {}
  return null;
}

// Fetch video dates from YouTube RSS feed (no auth, no bot detection)
// Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos
async function fetchDatesFromRSS(channelId) {
  const dateMap = {};
  if (!channelId) return dateMap;
  try {
    const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
    const xml = await fetchUrl(rssUrl);
    const entryRegex = /<entry>[\s\S]*?<yt:videoId>([^<]+)<\/yt:videoId>[\s\S]*?<published>([^<]+)<\/published>[\s\S]*?<\/entry>/g;
    let match;
    while ((match = entryRegex.exec(xml)) !== null) {
      const videoId = match[1];
      const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00"
      const date = published.slice(0, 10).replace(/-/g, ""); // "20251201"
      dateMap[videoId] = date;
    }
  } catch (err) {
    subLog(`    ⚠ RSS feed fetch failed: ${err.message}`);
  }
  return dateMap;
}

// ── Podcast RSS feed parsing ────────────────────────────────────────────────

// Detect if a URL looks like a podcast RSS feed
function isPodcastFeedUrl(url) {
  if (!url) return false;
  const u = url.trim().toLowerCase();
  // Common podcast RSS feed patterns
  if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true;
  if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true;
  if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true;
  if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true;
  if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true;
  return false;
}

// Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] }
async function parsePodcastRSS(feedUrl, limit = 200) {
  const xml = await fetchUrl(feedUrl);

  // Extract podcast title
  const titleMatch = xml.match(/<channel>[\s\S]*?<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
  const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast";

  // Extract episodes from <item> elements
  const episodes = [];
  const itemRegex = /<item>([\s\S]*?)<\/item>/g;
  let match;
  while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) {
    const item = match[1];

    // GUID (unique episode identifier)
    const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/);
    const guid = guidMatch ? guidMatch[1].trim() : null;

    // Title
    const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
    const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode";

    // Publish date
    const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/);
    const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null;
    let dateStr = ""; // YYYYMMDD
    if (pubDate) {
      try {
        const d = new Date(pubDate);
        if (!isNaN(d.getTime())) {
          dateStr = d.toISOString().slice(0, 10).replace(/-/g, "");
        }
      } catch {}
    }

    // Audio enclosure URL
    const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/);
    const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null;

    // Duration (itunes:duration)
    const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/);
    const duration = durMatch ? durMatch[1].trim() : "";

    if (!audioUrl) continue; // Skip episodes without audio

    // Use guid, or fall back to audioUrl as unique ID
    const id = guid || audioUrl;

    episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration });
  }

  return { title: podcastTitle, episodes };
}

// Download a podcast episode audio file via HTTP(S) to a local path
// downloadPodcastAudio moved to ./audio.js

// Get channel name from URL
async function fetchChannelName(url) {
  // Try fast flat-playlist approach first
  try {
    const { stdout } = await execFileAsync("yt-dlp", [
      "--print", "%(channel)s",
      "--no-download",
      "--playlist-end", "1",
      "--flat-playlist",
      ...ytCookieArgs(),
      url,
    ], { timeout: 15000 });
    const name = stdout.trim().split("\n")[0];
    if (name && name !== "NA") return name;
  } catch {}
  // Fallback: fetch without flat-playlist (slower but gets channel from video metadata)
  try {
    const { stdout } = await execFileAsync("yt-dlp", [
      "--print", "%(channel)s",
      "--no-download",
      "--playlist-end", "1",
      ...ytCookieArgs(),
      url,
    ], { timeout: 30000 });
    const name = stdout.trim().split("\n")[0];
    if (name && name !== "NA") return name;
  } catch {}
  // Last resort: extract handle from URL
  try {
    const u = new URL(url);
    const handleMatch = u.pathname.match(/\/@([^/]+)/);
    if (handleMatch) return "@" + handleMatch[1];
  } catch {}
  return "Unknown Channel";
}

// ── In-process write serialization ────────────────────────────────────────
// Per-file promise chain to prevent lost-update races on read-modify-write
// state files (skip-list, seen-list). Without this, two concurrent DELETE
// handlers can each load the same snapshot, add their own id, and the
// second write overwrites the first — silently dropping entries.
const _fileLocks = new Map();
function withFileLock(key, fn) {
  const prev = _fileLocks.get(key) || Promise.resolve();
  const next = prev.then(fn, fn); // run fn whether prev resolved or rejected
  // Keep the chain alive but don't leak errors to the next caller
  _fileLocks.set(key, next.catch(() => {}));
  return next;
}

// Skip list — videos deleted from history that subscriptions should not re-add
const skipPath = path.join(historyDir, "skip-list.json");

async function loadSkipList() {
  try {
    return new Set(JSON.parse(await fs.readFile(skipPath, "utf-8")).videoIds || []);
  } catch {
    return new Set();
  }
}

async function addToSkipList(videoId) {
  return withFileLock(skipPath, async () => {
    const skipIds = await loadSkipList();
    skipIds.add(videoId);
    await fs.writeFile(skipPath, JSON.stringify({ videoIds: [...skipIds] }));
  });
}

// Seen list — videos already offered for approval (persists across restarts)
const seenPath = path.join(historyDir, "seen-list.json");

async function loadSeenList() {
  try {
    return new Set(JSON.parse(await fs.readFile(seenPath, "utf-8")).videoIds || []);
  } catch {
    return new Set();
  }
}

async function addToSeenList(videoIds) {
  return withFileLock(seenPath, async () => {
    const seen = await loadSeenList();
    for (const id of videoIds) seen.add(id);
    await fs.writeFile(seenPath, JSON.stringify({ videoIds: [...seen] }));
  });
}

// Get all videoIds already processed in history
async function getProcessedVideoIds() {
  const ids = new Set();
  try {
    const files = await fs.readdir(historyDir);
    for (const file of files.filter(f => f.endsWith(".json") && f !== "_meta.json" && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) {
      try {
        const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
        const data = JSON.parse(raw);
        if (data.videoId) ids.add(data.videoId);
      } catch {}
    }
  } catch {}
  return ids;
}

// Server-side auto-queue for subscription-discovered videos (persisted to disk)
const autoQueuePath = path.join(historyDir, "auto-queue.json");
let autoQueue = [];

async function loadAutoQueue() {
  try {
    const data = JSON.parse(await fs.readFile(autoQueuePath, "utf-8"));
    return data.items || [];
  } catch { return []; }
}
async function saveAutoQueue() {
  await fs.writeFile(autoQueuePath, JSON.stringify({ items: autoQueue }, null, 2));
}

// Load persisted queue on startup, filtering out already-processed items
autoQueue = await loadAutoQueue();
{
  const processed = await getProcessedVideoIds();
  const before = autoQueue.length;
  autoQueue = autoQueue.filter(q => !processed.has(q.videoId));
  if (autoQueue.length < before) {
    console.log(`  Auto-queue: removed ${before - autoQueue.length} already-processed item(s)`);
    await saveAutoQueue();
  }
}

// ── Background processing queue ──────────────────────────────────────────
// Processes "approved" auto-queue items sequentially with configurable delay
// between items to avoid hammering YouTube with rapid-fire downloads.
const processingConfigPath = path.join(configDir, "processing-config.json");
let processingConfig = {
  delaySeconds: 300,   // Default delay between processing queue items (5 minutes)
  enabled: true,       // Whether background processing is active
};
let processingState = {
  running: false,       // Is the processor loop currently active?
  currentItem: null,    // The item currently being processed (or null)
  lastCompleted: null,  // Timestamp of last completed item
  rush: false,          // If true, skip delay before next item
  log: [],              // Recent processing log entries
};

function procLog(msg) {
  console.log(`  [processor] ${msg}`);
  processingState.log.push({ t: new Date().toISOString(), msg });
  if (processingState.log.length > 100) processingState.log.shift();
}

async function loadProcessingConfig() {
  try {
    const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8"));
    if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds;
    if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled;
  } catch {}
}
async function saveProcessingConfig() {
  await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2));
}
await loadProcessingConfig();

// The background processor: picks "approved" items, processes via internal HTTP,
// waits the configured delay, then picks the next one. Runs continuously.
async function backgroundProcessor() {
  if (processingState.running) return; // Already running
  processingState.running = true;
  procLog("Background processor started");

  while (true) {
    // Find next approved item
    const item = autoQueue.find(q => q.status === "approved");
    if (!item) {
      procLog("No approved items in queue — processor sleeping");
      processingState.running = false;
      processingState.currentItem = null;
      return;
    }

    if (!processingConfig.enabled) {
      procLog("Processing is paused — processor sleeping");
      processingState.running = false;
      processingState.currentItem = null;
      return;
    }

    // Wait the configured delay (unless rush mode or first item)
    if (processingState.lastCompleted && !processingState.rush) {
      const delaySec = processingConfig.delaySeconds;
      procLog(`Waiting ${delaySec}s before next item...`);
      await new Promise(r => setTimeout(r, delaySec * 1000));
    }
    processingState.rush = false;

    // Re-check the item hasn't been removed/changed while we waited
    const freshItem = autoQueue.find(q => q.id === item.id);
    if (!freshItem || freshItem.status !== "approved") {
      procLog(`Item ${item.id} was removed or status changed — skipping`);
      continue;
    }

    // Mark as processing
    freshItem.status = "processing";
    processingState.currentItem = freshItem;
    await saveAutoQueue();
    procLog(`Processing: ${freshItem.title} (${freshItem.url})`);

    try {
      // Call the existing /api/process endpoint via internal HTTP request
      const result = await processItemInternally(freshItem);
      freshItem.status = "completed";
      freshItem.completedAt = new Date().toISOString();
      freshItem.historyId = result.historyId || null;
      processingState.lastCompleted = new Date().toISOString();
      procLog(`✓ Completed: ${freshItem.title}`);
    } catch (err) {
      freshItem.status = "failed";
      freshItem.error = err.message || String(err);
      freshItem.failedAt = new Date().toISOString();
      procLog(`✗ Failed: ${freshItem.title} — ${freshItem.error.slice(0, 200)}`);
    }

    processingState.currentItem = null;
    await saveAutoQueue();
  }
}

// Internal HTTP request to /api/process — consumes the SSE stream and
// waits for the "result" or "error" event. This reuses the entire existing
// pipeline without any code duplication.
function processItemInternally(item) {
  return new Promise((resolve, reject) => {
    const apiKey = resolveApiKey(null); // Use server's stored key
    if (!apiKey) {
      return reject(new Error("No API key configured. Set your Gemini API key in Settings."));
    }

    const body = JSON.stringify({
      url: item.url,
      apiKey: "USE_SERVER_KEY",
      type: item.type || undefined,
      title: item.title || undefined,
      uploadDate: item.uploadDate || undefined,
      episodeId: item.videoId || undefined,
    });

    const req = http.request({
      hostname: "127.0.0.1",
      port: PORT,
      path: "/api/process",
      method: "POST",
      headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) },
      timeout: 1800000, // 30 minutes max for very long videos
    }, (res) => {
      let buffer = "";
      let lastResult = null;
      let lastError = null;

      res.on("data", (chunk) => {
        buffer += chunk.toString();

        // Parse SSE events from the buffer
        const lines = buffer.split("\n");
        buffer = lines.pop() || ""; // Keep incomplete line in buffer

        let currentEvent = null;
        for (const line of lines) {
          if (line.startsWith("event: ")) {
            currentEvent = line.slice(7).trim();
          } else if (line.startsWith("data: ") && currentEvent) {
            try {
              const data = JSON.parse(line.slice(6));
              if (currentEvent === "result") lastResult = data;
              if (currentEvent === "error") lastError = data;
              if (currentEvent === "log") {
                procLog(`  [${data.elapsed || "?"}s] ${data.message}`);
              }
            } catch {}
            currentEvent = null;
          } else if (line === "") {
            currentEvent = null;
          }
        }
      });

      res.on("end", () => {
        if (lastError) {
          reject(new Error(lastError.message || "Processing failed"));
        } else if (lastResult) {
          resolve(lastResult);
        } else {
          reject(new Error("Processing ended without result"));
        }
      });

      res.on("error", reject);
    });

    req.on("error", reject);
    req.on("timeout", () => {
      req.destroy();
      reject(new Error("Processing timed out after 30 minutes"));
    });

    req.write(body);
    req.end();
  });
}

// Wake up the processor whenever there are approved items
function kickProcessor() {
  if (!processingState.running && processingConfig.enabled) {
    const hasApproved = autoQueue.some(q => q.status === "approved");
    if (hasApproved) {
      backgroundProcessor().catch(err => procLog(`Processor error: ${err.message}`));
    }
  }
}

let subCheckRunning = false;
let subCheckPromise = null;
let subCheckLog = []; // Stores recent check logs for debug endpoint
function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); }

async function checkSubscriptions() {
  if (subCheckRunning) {
    // Wait for current check to finish, then run again
    if (subCheckPromise) await subCheckPromise;
    return checkSubscriptions();
  }
  subCheckRunning = true;
  subCheckPromise = _checkSubscriptionsInner().finally(() => {
    subCheckRunning = false;
    subCheckPromise = null;
  });
  return subCheckPromise;
}

async function _checkSubscriptionsInner() {
  // Pro-tier feature: skip silently when not entitled. The HTTP gate above
  // returns 402 to callers; this guards the background timer + manual paths.
  if (!licenseMW.LIC.entitlements.has("subscriptions")) {
    subCheckLog = [];
    subLog("Skipped: subscriptions require a Pro license.");
    return;
  }
  subCheckLog = []; // Clear logs for fresh check
  const subs = await loadSubscriptions();
  if (subs.length === 0) { subLog("No subscriptions found"); return; }

  const processedIds = await getProcessedVideoIds();
  const skippedIds = await loadSkipList();
  const seenIds = await loadSeenList();
  const queuedIds = new Set(autoQueue.map(q => {
    // For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID)
    if (q.videoId) return q.videoId;
    const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/);
    return m ? m[1] : null;
  }).filter(Boolean));

  subLog(`${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen, ${queuedIds.size} in queue`);

  let changed = false;

  for (const sub of subs) {
    if (sub.paused) { subLog(`⏸ ${sub.name} — paused, skipping`); continue; }

    try {
      const icon = sub.type === "podcast" ? "🎙" : "📡";
      subLog(`${icon} Checking: ${sub.name} (${sub.url})`);

      const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8);

      if (sub.type === "podcast") {
        // ── Podcast subscription: discover episodes from RSS feed ──
        const { episodes } = await parsePodcastRSS(sub.url, 200);
        subLog(`  Found ${episodes.length} episode(s) in RSS feed`);

        if (episodes.length === 0) {
          sub.lastChecked = new Date().toISOString();
          changed = true;
          continue;
        }

        // Filter out already-known episodes
        const unknowns = episodes.filter(ep => {
          if (processedIds.has(ep.id)) return false;
          if (queuedIds.has(ep.id)) return false;
          if (skippedIds.has(ep.id)) return false;
          if (seenIds.has(ep.id)) return false;
          return true;
        });
        const filtered = episodes.length - unknowns.length;
        subLog(`  ${unknowns.length} to check, ${filtered} already known`);

        if (unknowns.length === 0) {
          sub.lastChecked = new Date().toISOString();
          changed = true;
          continue;
        }

        subLog(`  Cutoff date: ${cutoffDate}`);

        let newCount = 0;
        const seenNow = [];
        for (const ep of unknowns) {
          if (!ep.date || ep.date.length !== 8) continue; // skip undated
          if (ep.date < cutoffDate) {
            seenNow.push(ep.id);
            continue; // before cutoff
          }
          subLog(`  ✅ ${ep.date} — ${ep.title.slice(0, 60)}`);

          const itemStatus = sub.autoDownload ? "approved" : "pending";
          autoQueue.push({
            id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`,
            videoId: ep.id, // episode GUID
            url: ep.audioUrl,
            title: ep.title,
            uploadDate: ep.date,
            subscriptionId: sub.id,
            subscriptionName: sub.name,
            status: itemStatus,
            type: "podcast",
            duration: ep.duration || "",
          });
          queuedIds.add(ep.id);
          newCount++;
        }

        if (seenNow.length > 0) await addToSeenList(seenNow);

        sub.lastChecked = new Date().toISOString();
        subLog(`  → ${newCount} episode(s) queued for approval from ${sub.name}`);
        changed = true;

      } else {
        // ── YouTube subscription: discover videos via yt-dlp + RSS dates ──

        // Scale fetch limit based on how far back the subscription date goes
        const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000));
        const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4;
        const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200);
        subLog(`  Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`);

        const candidates = await listChannelVideosFast(sub.url, fetchLimit);
        subLog(`  Found ${candidates.length} recent video(s)`);

        if (candidates.length === 0) {
          subLog(`  ⚠ No videos returned from yt-dlp for this channel`);
          sub.lastChecked = new Date().toISOString();
          changed = true;
          continue;
        }

        const unknowns = candidates.filter(v => {
          if (processedIds.has(v.id)) return false;
          if (queuedIds.has(v.id)) return false;
          if (skippedIds.has(v.id)) return false;
          if (seenIds.has(v.id)) return false;
          return true;
        });
        const filtered = candidates.length - unknowns.length;
        subLog(`  ${unknowns.length} to check, ${filtered} already known`);

        if (unknowns.length === 0) {
          sub.lastChecked = new Date().toISOString();
          changed = true;
          continue;
        }

        subLog(`  Cutoff date: ${cutoffDate} — fetching upload dates...`);

        // Step 1: Get channel ID (cached on subscription, or fetch once)
        if (!sub.channelId) {
          subLog(`  Resolving channel ID...`);
          sub.channelId = await getChannelId(sub.url);
          if (sub.channelId) {
            subLog(`  Resolved channel ID: ${sub.channelId}`);
            changed = true;
          } else {
            subLog(`  ⚠ Could not resolve channel ID — RSS will be skipped`);
          }
        }

        // Step 2: RSS feed — fast, no bot detection, covers ~15 most recent
        const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {};
        const rssCount = Object.keys(dateMap).length;
        if (sub.channelId) subLog(`  RSS feed: got dates for ${rssCount} videos`);

        // Step 3: Smart early termination
        const needDates = unknowns.filter(v => !dateMap[v.id]);
        let earlyTermination = false;
        if (needDates.length > 0 && rssCount > 0) {
          const rssDates = Object.values(dateMap).sort();
          const oldestRssDate = rssDates[0];
          if (oldestRssDate < cutoffDate) {
            earlyTermination = true;
            subLog(`  Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`);
          } else {
            subLog(`  ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`);
            const ytDates = await fetchUploadDates(needDates.map(v => v.id));
            Object.assign(dateMap, ytDates);
            const ytCount = Object.keys(ytDates).length;
            if (ytCount > 0) subLog(`  yt-dlp added dates for ${ytCount} more video(s)`);
          }
        } else if (needDates.length > 0 && rssCount === 0) {
          subLog(`  ${needDates.length} video(s) need dates — trying yt-dlp...`);
          const ytDates = await fetchUploadDates(needDates.map(v => v.id));
          Object.assign(dateMap, ytDates);
          const ytCount = Object.keys(ytDates).length;
          if (ytCount > 0) subLog(`  yt-dlp added dates for ${ytCount} more video(s)`);
        }

        const gotDates = Object.keys(dateMap).length;
        if (gotDates > 0 || needDates.length === 0) {
          subLog(`  Total dates: ${gotDates} of ${unknowns.length} videos`);
        } else {
          subLog(`  ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`);
          sub.lastChecked = new Date().toISOString();
          changed = true;
          continue;
        }

        let newCount = 0;
        for (const video of unknowns) {
          const uploadDate = dateMap[video.id];
          if (!uploadDate || uploadDate.length !== 8) {
            continue;
          }
          if (uploadDate < cutoffDate) {
            subLog(`  ⏭ ${video.id} (${uploadDate}) — before cutoff`);
            continue;
          }
          subLog(`  ✅ ${video.id}${uploadDate ? ` (${uploadDate})` : ""} — ${video.title.slice(0,50)}`);

          const ytItemStatus = sub.autoDownload ? "approved" : "pending";
          autoQueue.push({
            id: `auto-${Date.now()}-${video.id}`,
            videoId: video.id,
            url: `https://www.youtube.com/watch?v=${video.id}`,
            title: video.title,
            uploadDate: uploadDate || null,
            subscriptionId: sub.id,
            subscriptionName: sub.name,
            status: ytItemStatus,
          });
          queuedIds.add(video.id);
          newCount++;
        }

        // Only add to seen list when we can PROVE a video is too old
        const seenNow = unknowns.filter(v => {
          if (queuedIds.has(v.id)) return false;
          const d = dateMap[v.id];
          if (d && d.length === 8 && d < cutoffDate) return true;
          if (d && d.length === 8 && d >= cutoffDate) return false;
          return earlyTermination;
        }).map(v => v.id);
        if (seenNow.length > 0) await addToSeenList(seenNow);

        sub.lastChecked = new Date().toISOString();
        subLog(`  → ${newCount} video(s) queued for approval from ${sub.name}`);
        changed = true;
      }
    } catch (err) {
      subLog(`  ⚠ FAILED for ${sub.name}: ${err.message}`);
    }
  }

  const pendingCount = autoQueue.filter(q => q.status === "pending").length;
  const approvedCount = autoQueue.filter(q => q.status === "approved").length;
  subLog(`Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`);
  if (changed) await saveSubscriptions(subs);
  await saveAutoQueue();

  // Wake up the background processor if there are approved items
  if (approvedCount > 0) {
    subLog(`Kicking background processor for ${approvedCount} approved item(s)...`);
    kickProcessor();
  }
}

// CRUD endpoints
app.get("/api/subscriptions", async (req, res) => {
  const subs = await loadSubscriptions();
  res.json({ subscriptions: subs });
});

// Extract a normalized channel key from a YouTube URL for dedup
function channelKeyFromUrl(url) {
  try {
    const u = new URL(url);
    const path = u.pathname.toLowerCase().replace(/\/+$/, "");
    // /@handle/videos → @handle, /@handle/streams → @handle
    const handleMatch = path.match(/\/(@[^/]+)/);
    if (handleMatch) return handleMatch[1];
    // /channel/UCxxx → channel/UCxxx
    const channelMatch = path.match(/\/(channel\/[^/]+)/);
    if (channelMatch) return channelMatch[1];
    // /c/name or /user/name
    const cMatch = path.match(/\/(c|user)\/([^/]+)/);
    if (cMatch) return cMatch[0];
    // /playlist?list=PLxxx
    const list = u.searchParams.get("list");
    if (list) return `playlist/${list}`;
    return path; // fallback
  } catch {
    return url.toLowerCase().replace(/\/+$/, "");
  }
}

app.post("/api/subscriptions", async (req, res) => {
  const { url, since, type, autoDownload } = req.body;
  if (!url) return res.status(400).json({ error: "Missing url" });

  const isPodcast = type === "podcast" || isPodcastFeedUrl(url);

  const subs = await loadSubscriptions();

  // Prevent duplicates
  if (isPodcast) {
    const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, "");
    if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) {
      return res.status(409).json({ error: "Already subscribed to this podcast" });
    }
  } else {
    const newKey = channelKeyFromUrl(url);
    if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) {
      return res.status(409).json({ error: "Already subscribed to this channel" });
    }
  }

  // Use provided cutoff date, or default to right now
  const cutoff = since ? new Date(since).toISOString() : new Date().toISOString();
  const sub = {
    id: `sub-${Date.now()}`,
    url,
    name: "Loading...",
    type: isPodcast ? "podcast" : "youtube",
    channelId: null,
    createdAt: cutoff,
    lastChecked: null,
    paused: false,
    autoDownload: autoDownload === true,
  };
  subs.push(sub);
  await saveSubscriptions(subs);

  // Respond immediately so the UI isn't blocked
  res.json(sub);

  // Background: resolve name and check for new content
  (async () => {
    try {
      if (isPodcast) {
        // Fetch podcast title from RSS feed
        const { title } = await parsePodcastRSS(url, 1);
        const freshSubs = await loadSubscriptions();
        const s = freshSubs.find(x => x.id === sub.id);
        if (s) {
          s.name = title || url;
          await saveSubscriptions(freshSubs);
        }
        console.log(`  🎙  New podcast subscription: ${title} — checking for episodes...`);
      } else {
        const [name, channelId] = await Promise.all([
          fetchChannelName(url),
          getChannelId(url),
        ]);
        const freshSubs = await loadSubscriptions();
        const s = freshSubs.find(x => x.id === sub.id);
        if (s) {
          s.name = name;
          if (channelId) s.channelId = channelId;
          await saveSubscriptions(freshSubs);
        }
        console.log(`  📡  New subscription: ${name} — checking for recent videos...`);
      }
      await checkSubscriptions();
    } catch (err) {
      console.error("  ⚠  Background subscription setup error:", err.message);
    }
  })();
});

app.delete("/api/subscriptions/:id", async (req, res) => {
  let subs = await loadSubscriptions();
  subs = subs.filter(s => s.id !== req.params.id);
  await saveSubscriptions(subs);
  // Also remove any pending auto-queue items from this subscription
  autoQueue = autoQueue.filter(q => q.subscriptionId !== req.params.id);
  await saveAutoQueue();
  res.json({ ok: true });
});

app.put("/api/subscriptions/:id/pause", async (req, res) => {
  const subs = await loadSubscriptions();
  const sub = subs.find(s => s.id === req.params.id);
  if (!sub) return res.status(404).json({ error: "Subscription not found" });
  sub.paused = !sub.paused;
  await saveSubscriptions(subs);
  res.json(sub);
});

app.put("/api/subscriptions/:id/since", async (req, res) => {
  const { since } = req.body;
  if (!since) return res.status(400).json({ error: "Missing 'since' date" });
  const subs = await loadSubscriptions();
  const sub = subs.find(s => s.id === req.params.id);
  if (!sub) return res.status(404).json({ error: "Subscription not found" });
  sub.createdAt = new Date(since).toISOString();
  await saveSubscriptions(subs);
  res.json(sub);
});

// Debug: subscription check logs (viewable in-app)
app.get("/api/sub-check-log", (req, res) => {
  res.json({ log: subCheckLog, autoQueueCount: autoQueue.length, autoQueue: autoQueue.map(q => ({ id: q.id, videoId: q.videoId, title: q.title, status: q.status, sub: q.subscriptionName })) });
});

// Auto-queue endpoints (frontend polls these)
app.get("/api/auto-queue", (req, res) => {
  // Return all items grouped by status for the frontend
  const showAll = req.query.all === "true";
  const items = showAll
    ? autoQueue
    : autoQueue.filter(q => ["pending", "approved", "processing"].includes(q.status));
  res.json({
    items,
    checkRunning: subCheckRunning,
    counts: {
      pending: autoQueue.filter(q => q.status === "pending").length,
      approved: autoQueue.filter(q => q.status === "approved").length,
      processing: autoQueue.filter(q => q.status === "processing").length,
      completed: autoQueue.filter(q => q.status === "completed").length,
      failed: autoQueue.filter(q => q.status === "failed").length,
    },
  });
});

app.delete("/api/auto-queue/:id", async (req, res) => {
  autoQueue = autoQueue.filter(q => q.id !== req.params.id);
  await saveAutoQueue();
  res.json({ ok: true });
});

app.post("/api/auto-queue/:id/skip", async (req, res) => {
  const item = autoQueue.find(q => q.id === req.params.id);
  if (item && item.videoId) {
    await addToSkipList(item.videoId);
  }
  autoQueue = autoQueue.filter(q => q.id !== req.params.id);
  await saveAutoQueue();
  res.json({ ok: true });
});

// Approve a single auto-queue item for background processing
app.post("/api/auto-queue/:id/approve", async (req, res) => {
  const item = autoQueue.find(q => q.id === req.params.id);
  if (!item) return res.status(404).json({ error: "Item not found" });
  if (item.status !== "pending") return res.status(400).json({ error: `Cannot approve item with status '${item.status}'` });
  item.status = "approved";
  await saveAutoQueue();
  kickProcessor();
  res.json({ ok: true, item });
});

// Approve all pending items at once
app.post("/api/auto-queue/approve-all", async (req, res) => {
  let count = 0;
  for (const item of autoQueue) {
    if (item.status === "pending") {
      item.status = "approved";
      count++;
    }
  }
  if (count > 0) {
    await saveAutoQueue();
    kickProcessor();
  }
  res.json({ ok: true, approved: count });
});

// Retry a failed item
app.post("/api/auto-queue/:id/retry", async (req, res) => {
  const item = autoQueue.find(q => q.id === req.params.id);
  if (!item) return res.status(404).json({ error: "Item not found" });
  if (item.status !== "failed") return res.status(400).json({ error: `Cannot retry item with status '${item.status}'` });
  item.status = "approved";
  item.error = undefined;
  item.failedAt = undefined;
  await saveAutoQueue();
  kickProcessor();
  res.json({ ok: true, item });
});

// Clear completed and failed items from the queue
app.post("/api/auto-queue/clear-finished", async (req, res) => {
  const before = autoQueue.length;
  autoQueue = autoQueue.filter(q => !["completed", "failed"].includes(q.status));
  const removed = before - autoQueue.length;
  if (removed > 0) await saveAutoQueue();
  res.json({ ok: true, removed });
});

app.post("/api/subscriptions/check-now", async (req, res) => {
  if (subCheckRunning) return res.json({ ok: true, message: "Already checking" });
  checkSubscriptions().catch(err => console.error("  ⚠  Manual subscription check error:", err.message));
  res.json({ ok: true, message: "Check started" });
});

// ── Auto-download toggle per subscription ──────────────────────────────────
app.put("/api/subscriptions/:id/auto-download", async (req, res) => {
  const subs = await loadSubscriptions();
  const sub = subs.find(s => s.id === req.params.id);
  if (!sub) return res.status(404).json({ error: "Subscription not found" });
  sub.autoDownload = req.body.enabled === true;
  await saveSubscriptions(subs);
  res.json({ ok: true, subscription: sub });
});

// ── Background processing status & configuration ───────────────────────────

// Get current processing status (what's running, queue depth, config)
app.get("/api/processing/status", (req, res) => {
  res.json({
    running: processingState.running,
    currentItem: processingState.currentItem
      ? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url }
      : null,
    lastCompleted: processingState.lastCompleted,
    config: processingConfig,
    counts: {
      approved: autoQueue.filter(q => q.status === "approved").length,
      processing: autoQueue.filter(q => q.status === "processing").length,
      pending: autoQueue.filter(q => q.status === "pending").length,
    },
    log: processingState.log.slice(-20),
  });
});

// Update processing configuration (delay between items, enable/disable)
app.put("/api/processing/config", async (req, res) => {
  if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) {
    processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds));
  }
  if (typeof req.body.enabled === "boolean") {
    processingConfig.enabled = req.body.enabled;
    // If enabling and there are approved items, kick the processor
    if (req.body.enabled) kickProcessor();
  }
  await saveProcessingConfig();
  res.json({ ok: true, config: processingConfig });
});

// Rush: skip the delay before the next item in the queue
app.post("/api/processing/rush", (req, res) => {
  processingState.rush = true;
  // If not currently running but there are approved items, start processing
  kickProcessor();
  res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" });
});

// Processing log (for debug/monitoring)
app.get("/api/processing/log", (req, res) => {
  res.json({ log: processingState.log });
});

// ── Full pipeline: URL → audio → transcript → topic analysis ──────────────

// Single yt-dlp call that returns the full info-dict as JSON. One shot
// gives us title, channel, description, chapters, duration, and the
// per-language caption availability — used both for richer transcription
// prompts and for the captions-first path (skip audio + transcription
// entirely when YouTube has usable captions for this video).
async function fetchYouTubeMetadata(videoId) {
  try {
    const { stdout } = await execFileAsync(
      "yt-dlp",
      ["-j", "--no-warnings", "--skip-download", `https://www.youtube.com/watch?v=${videoId}`],
      { timeout: 30000, maxBuffer: 10 * 1024 * 1024 }
    );
    const info = JSON.parse(stdout);
    return {
      title: info.title || "",
      uploadDate: info.upload_date || "",
      channel: info.channel || info.uploader || "",
      // Truncate to keep prompt size sane — descriptions can be huge
      // (release-notes-stuffed Lex Fridman podcasts run thousands of chars).
      description: (info.description || "").slice(0, 2000),
      chapters: Array.isArray(info.chapters) ? info.chapters : [],
      duration: typeof info.duration === "number" ? info.duration : 0,
      hasManualCaptions: !!(info.subtitles && Object.keys(info.subtitles).length > 0),
      hasAutoCaptions: !!(info.automatic_captions && Object.keys(info.automatic_captions).length > 0),
    };
  } catch {
    return null;
  }
}

// Pulls YouTube captions for the given video and parses them into the
// app's standard { offset, text, duration } entries shape. Prefers
// manual captions over auto-generated. Returns null if no captions are
// available or parsing produces too few segments to be useful.
//
// Output is the same shape that `parseTimestampedTranscript()` produces
// from a Gemini transcription, so downstream code (the analysis step,
// the chunk renderer) doesn't care how the transcript got here.
async function tryFetchYouTubeCaptions(videoId, tmpDir, opts, log) {
  const { hasManual, hasAuto } = opts;
  if (!hasManual && !hasAuto) return null;
  const url = `https://www.youtube.com/watch?v=${videoId}`;
  const captionsBase = path.join(tmpDir, "captions");

  // Try manual first (cleaner — punctuated, no fragmented words);
  // fall back to auto-generated if no manual subs.
  const langs = "en.*,en";
  try {
    if (hasManual) {
      await execFileAsync(
        "yt-dlp",
        ["--skip-download", "--write-subs", "--sub-langs", langs,
         "--sub-format", "json3", "--no-warnings", "-o", captionsBase, url],
        { timeout: 30000 }
      );
    } else {
      await execFileAsync(
        "yt-dlp",
        ["--skip-download", "--write-auto-subs", "--sub-langs", langs,
         "--sub-format", "json3", "--no-warnings", "-o", captionsBase, url],
        { timeout: 30000 }
      );
    }
  } catch (err) {
    log(1, `⚠ Caption download failed: ${(err.message || "").slice(0, 200)}`);
    return null;
  }

  // yt-dlp names the file like `captions.en.json3` or `captions.en-US.json3`.
  let captionPath = null;
  try {
    const files = await fs.readdir(tmpDir);
    const match = files.find((f) => f.startsWith("captions.") && f.endsWith(".json3"));
    if (match) captionPath = path.join(tmpDir, match);
  } catch {}
  if (!captionPath) return null;

  let parsed;
  try {
    const content = await fs.readFile(captionPath, "utf-8");
    parsed = JSON.parse(content);
  } catch (err) {
    log(1, `⚠ Caption parse failed: ${(err.message || "").slice(0, 200)}`);
    return null;
  }

  const rawEvents = Array.isArray(parsed.events) ? parsed.events : [];
  const entries = [];
  for (const evt of rawEvents) {
    const text = (Array.isArray(evt.segs) ? evt.segs : [])
      .map((s) => s.utf8 || "")
      .join("")
      .replace(/\n+/g, " ")
      .trim();
    if (!text) continue;
    entries.push({
      offset: (evt.tStartMs || 0) / 1000,
      text,
      duration: (evt.dDurationMs || 0) / 1000,
    });
  }
  if (entries.length < 5) return null;
  return { entries, source: hasManual ? "manual" : "auto" };
}

// Coalesce a list of timestamped entries into coarser chunks. Used
// after pulling auto-captions (which are typically 1–3-word fragments
// every 1–3 seconds) so the downstream analysis prompt has ~100
// medium-sized segments instead of ~900 tiny ones. Manual captions and
// Gemini-transcribed entries already average 10–30s and pass through
// unchanged when their median duration exceeds the threshold.
function coalesceTranscriptEntries(entries, targetSeconds = 15) {
  if (!Array.isArray(entries) || entries.length === 0) return entries;
  // Sample median duration; only coalesce if entries are too fine.
  const durations = entries.map((e) => e.duration || 0).sort((a, b) => a - b);
  const median = durations[Math.floor(durations.length / 2)];
  if (median >= 8) return entries; // already coarse enough

  const merged = [];
  let current = null;
  for (const e of entries) {
    const text = (e.text || "").trim();
    if (!text) continue;
    if (!current) {
      current = {
        offset: e.offset,
        text,
        duration: e.duration || 0,
      };
      continue;
    }
    const elapsedFromStart = (e.offset || 0) - current.offset;
    if (elapsedFromStart >= targetSeconds) {
      merged.push(current);
      current = {
        offset: e.offset,
        text,
        duration: e.duration || 0,
      };
    } else {
      current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim();
      current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset;
    }
  }
  if (current) merged.push(current);
  return merged;
}

// Reduce an over-long transcript down to roughly `maxEntries` segments
// so the analysis prompt fits in smaller model contexts (typical local
// models are 16k–32k tokens — a 2-hour podcast at Parakeet's ~5s
// granularity easily blows past that). Bucket size is computed from
// total audio duration so we end up with at most `maxEntries` segments
// regardless of source granularity. Unlike coalesceTranscriptEntries
// above, this one is purely count-driven — no median-duration guard,
// because the failure mode is "context exceeded" not "bad UX from
// over-fragmented entries".
//
// Returns { coalesced, indexMap } where indexMap[i] = { startOrig,
// endOrig } maps coalesced-entry i back to a range of original-entry
// indices. The caller uses this to translate section start/end indices
// returned by the analyzer back into the original entries array, so
// the final transcript displayed to the user keeps its full granularity
// — only the analyzer sees the coarser view.
//
// FLAGGED TO WATCH: ship 0.2.28. Auto-coalesce may degrade analysis
// quality on borderline content (the LLM sees fewer, longer segments
// — section boundaries get blurrier). If users report missed topics or
// imprecise section starts on long content, the alternative is real
// chunked analysis (split into overlapping windows, analyze each,
// stitch sections at boundaries) — significantly more involved.
function coalesceForAnalysis(entries, maxEntries = 400) {
  if (!Array.isArray(entries) || entries.length <= maxEntries) {
    return { coalesced: entries, indexMap: null };
  }
  const lastEntry = entries[entries.length - 1];
  const totalDuration = (lastEntry.offset || 0) + (lastEntry.duration || 0);
  if (totalDuration <= 0) {
    return { coalesced: entries, indexMap: null };
  }
  // Bucket size: roughly total / maxEntries, but never tighter than 15s
  // (a typical "natural pause" gap — going much smaller would defeat
  // the point of coalescing).
  const targetSeconds = Math.max(15, Math.ceil(totalDuration / maxEntries));

  const merged = [];
  const indexMap = [];
  let current = null;
  let currentStartOrig = 0;
  for (let i = 0; i < entries.length; i++) {
    const e = entries[i];
    const text = (e.text || "").trim();
    if (!text) continue;
    if (!current) {
      current = { offset: e.offset, text, duration: e.duration || 0 };
      currentStartOrig = i;
      continue;
    }
    const elapsedFromStart = (e.offset || 0) - current.offset;
    if (elapsedFromStart >= targetSeconds) {
      merged.push(current);
      indexMap.push({ startOrig: currentStartOrig, endOrig: i - 1 });
      current = { offset: e.offset, text, duration: e.duration || 0 };
      currentStartOrig = i;
    } else {
      current.text = `${current.text} ${text}`.replace(/\s+/g, " ").trim();
      current.duration = ((e.offset || 0) + (e.duration || 0)) - current.offset;
    }
  }
  if (current) {
    merged.push(current);
    indexMap.push({ startOrig: currentStartOrig, endOrig: entries.length - 1 });
  }
  return { coalesced: merged, indexMap };
}

// ── In-flight free-tier job status + cancel ─────────────────────────────────
// Lets the web UI render a "Currently processing X — Cancel" banner after
// a browser refresh, when the SSE stream from the original /api/process
// call is no longer attached. Only the free-tier slot is tracked today;
// paid-tier batch queueing happens client-side.
app.get("/api/process/current", (req, res) => {
  // ?logs=1 returns the in-flight job's accumulated log buffer so a
  // browser refresh mid-pipeline can repopulate the activity log
  // instead of starting blank. Default is the lightweight header-only
  // shape used by the 5s banner poll.
  const includeLogs = req.query.logs === "1" || req.query.logs === "true";
  res.json({ job: getCurrentFreeJob({ includeLogs }) });
});

app.post("/api/process/cancel", (_req, res) => {
  const had = abortCurrentFreeJob();
  if (!had) return res.json({ ok: true, cancelled: false });
  // We don't kill the in-flight yt-dlp / model API call here — the
  // pipeline polls isFreeJobAborted() at each major step and throws
  // early, which lands in the request handler's finally block where the
  // slot is released. So cancellation latency is bounded by the time
  // until the next checkpoint (a few seconds in practice, up to the
  // length of one outstanding model call).
  res.json({ ok: true, cancelled: true });
});

// ── Auto-discovery of provider connection info ──────────────────────────────
// The picker UI hits this on boot to pre-fill placeholders for providers
// that have a server-detectable default — most notably Ollama on
// StartOS, reachable at the documented `http://<package-id>.startos:<port>`
// internal hostname when installed alongside Recap.
app.get("/api/providers/discover", async (_req, res) => {
  const out = {};

  // Ollama: prefer the URL the operator set via the StartOS action,
  // then try the canonical service-discovery hostname, finally fall
  // back to localhost (useful for dev outside StartOS).
  try {
    const cfg = await config.getConfigSnapshot();
    const fromConfig = (cfg.ollama_base_url || "").trim();
    let ollamaUrl = null;
    let source = null;
    if (fromConfig) {
      ollamaUrl = fromConfig;
      source = "config";
    } else {
      const candidate = "http://ollama.startos:11434";
      const ok = await fetch(`${candidate}/api/tags`, {
        signal: AbortSignal.timeout(1500),
      }).then((r) => r.ok).catch(() => false);
      if (ok) {
        ollamaUrl = candidate;
        source = "startos-dep";
      }
    }
    if (ollamaUrl) {
      // Probe /api/tags to list installed models — picker UI surfaces
      // them as dropdown options so users don't have to type them.
      let models = [];
      try {
        const tagsRes = await fetch(`${ollamaUrl.replace(/\/$/, "")}/api/tags`, {
          signal: AbortSignal.timeout(2000),
        });
        if (tagsRes.ok) {
          const data = await tagsRes.json();
          models = (data.models || []).map((m) => m.name).filter(Boolean);
        }
      } catch {}
      out.ollama = { baseURL: ollamaUrl, source, models };
    }
  } catch {}

  res.json(out);
});

// Quick connection-test endpoint. The picker UI Test button hits this
// to verify a provider+model+opts combo actually works before the user
// commits to using it. Sends a tiny prompt and returns the model's
// 3-word answer + latency, or a clear error string.
app.post("/api/providers/test", async (req, res) => {
  const { providerId, model, opts: clientOpts } = req.body || {};
  if (!providerId || typeof providerId !== "string") {
    return res.status(400).json({ ok: false, error: "missing providerId" });
  }
  if (!model || typeof model !== "string") {
    return res.status(400).json({ ok: false, error: "missing model" });
  }
  if (!PROVIDER_NAMES.includes(providerId)) {
    return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` });
  }
  let provider;
  try {
    const cfg = await config.getConfigSnapshot();
    const resolvedOpts = resolveProviderOpts(providerId, {
      config: cfg,
      clientOpts: (clientOpts && typeof clientOpts === "object") ? clientOpts : {},
    });
    provider = getProvider(providerId, resolvedOpts);
  } catch (err) {
    return res.status(400).json({ ok: false, error: err.message });
  }
  if (!provider.capabilities.analyze) {
    return res.status(400).json({ ok: false, error: `${providerId} cannot analyze text` });
  }
  const t0 = Date.now();
  // Special-case the relay: a real analyzeText round-trip would burn
  // one of the user's lifetime credits per click. Use pingBalance
  // instead — verifies connectivity + auth, returns the current
  // balance, no charge.
  if (providerId === "relay") {
    try {
      const env = await provider.pingBalance({ timeoutMs: 5000 });
      const latencyMs = Date.now() - t0;
      const credits = env?.credits_remaining;
      const tier = env?.tier || "core";
      const summary =
        credits == null
          ? `Connected · Tier: ${tier}`
          : `Connected · Tier: ${tier} · ${credits} credit${credits === 1 ? "" : "s"} remaining`;
      return res.json({
        ok: true,
        text: summary,
        latencyMs,
        provider: providerId,
        model,
      });
    } catch (err) {
      return res.json({
        ok: false,
        error: (err?.message || String(err)).slice(0, 300),
        latencyMs: Date.now() - t0,
      });
    }
  }
  try {
    const result = await provider.analyzeText({
      prompt: "Respond with exactly three words confirming you received this prompt.",
      model,
      retries: 0,
    });
    const latencyMs = Date.now() - t0;
    return res.json({
      ok: true,
      text: (result.text || "").trim().slice(0, 200),
      latencyMs,
      provider: providerId,
      model,
    });
  } catch (err) {
    return res.json({
      ok: false,
      error: (err?.message || String(err)).slice(0, 300),
      latencyMs: Date.now() - t0,
    });
  }
});

// Per-provider, per-field boolean indicating whether the StartOS
// config has a non-empty value for each PROVIDER_KEY_FIELDS slot.
// The picker UI uses this to (a) show a "✓ Server-configured" hint
// under empty inputs, and (b) decide whether the Delete button is
// visible when localStorage is empty but the server has a value.
//
// Never returns the actual values — only booleans. Anything stored in
// startos-config.json could be secret and shouldn't surface in any
// response that could end up in a screenshot.
app.get("/api/providers/credentials-status", async (_req, res) => {
  const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js");
  const cfg = await getConfigSnapshot();
  const status = {};
  for (const [providerId, fields] of Object.entries(PROVIDER_KEY_FIELDS)) {
    status[providerId] = {};
    for (const [optName, cfgKey] of Object.entries(fields)) {
      const v = cfg[cfgKey];
      status[providerId][optName] =
        typeof v === "string" && v.trim().length > 0;
    }
  }
  res.json({ status });
});

// Clear all server-side config fields for a provider (the StartOS
// action-set values). The picker UI's Delete button calls this in
// addition to wiping localStorage so credentials are gone from BOTH
// storage paths. Returns the list of fields that were cleared.
app.post("/api/providers/:id/clear", async (req, res) => {
  const providerId = req.params.id;
  if (!PROVIDER_NAMES.includes(providerId)) {
    return res.status(400).json({ ok: false, error: `unknown provider: ${providerId}` });
  }
  // Build a patch of { config_field: "" } for every PROVIDER_KEY_FIELDS
  // entry mapped to this provider. Unknown providers or providers with
  // no server-side fields (e.g. relay, whose URL is hardcoded) end
  // up clearing nothing — still a 200, just with empty `cleared`.
  const { PROVIDER_KEY_FIELDS } = await import("./providers/index.js");
  const fields = PROVIDER_KEY_FIELDS[providerId] || {};
  const patch = {};
  for (const cfgKey of Object.values(fields)) {
    if (typeof cfgKey === "string") patch[cfgKey] = "";
  }
  try {
    await config.mergeConfig(patch);
    return res.json({ ok: true, cleared: Object.keys(patch) });
  } catch (err) {
    return res.status(500).json({
      ok: false,
      error: (err?.message || String(err)).slice(0, 300),
    });
  }
});

app.post("/api/process", async (req, res) => {
  const {
    url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId,
    transcriptionProvider: reqTransProvider,
    transcriptionModel: reqTransModel,
    analysisProvider: reqAnaProvider,
    analysisModel: reqAnaModel,
    providerOpts: reqProviderOpts,
    useYouTubeCaptions: reqUseYTCaptions,
  } = req.body;
  // Default: use captions when available (huge speed/cost win). The
  // picker-UI toggle lets users force a full transcription pass when
  // they want speaker labels (captions don't have them) or when the
  // auto-captions quality is too low.
  const useYouTubeCaptions = reqUseYTCaptions !== false;
  // Sentinel error message thrown by checkCancelled() when the user
  // hits the in-flight banner's Cancel button. Declared at handler
  // scope (not inside the try) so the catch block can compare against
  // it — see the catch a few hundred lines below.
  const CANCELLED_MARK = "__recap_cancelled__";
  // Per-provider client-side opts: { gemini: {apiKey}, anthropic: {apiKey},
  //   openai: {apiKey}, "openai-compatible": {apiKey, baseURL}, ollama: {baseURL} }.
  // Each provider's opts override that provider's config-stored values
  // (set via the StartOS actions). Used by the picker UI to BYO keys
  // per provider without round-tripping the StartOS dashboard.
  const providerOpts = (reqProviderOpts && typeof reqProviderOpts === "object") ? reqProviderOpts : {};

  // Provider selection: each pipeline step (transcribe + analyze) can
  // independently target any registered provider. Both default to gemini
  // so existing clients (which don't send provider fields) keep working.
  const transcriptionProvider = reqTransProvider || "gemini";
  const analysisProvider = reqAnaProvider || "gemini";

  // Free tier: unlicensed users can summarize one video at a time. They
  // still bring their own key — same as paid users today; the key can
  // come from either the StartOS config action (server-side) or the
  // web UI Settings panel (client-side). The future "bundled key" relay
  // (paid users' requests proxied through the operator's service) isn't
  // built yet, so there's nothing here that gates key sourcing by tier.
  // AbortController for this request. Fired by abortCurrentFreeJob()
  // when the user hits Cancel — passed through to every provider SDK
  // call (transcription + analysis) so in-flight network requests
  // reject immediately instead of running to completion.
  const abortController = new AbortController();

  // Stable identifier for this summarize job. Sent to the relay
  // (when used) as `X-Recap-Job-Id`. The relay charges 1 credit on
  // the first call with a given jobId and treats subsequent calls
  // with the same id as free — so a full summary (transcribe +
  // analyze) costs one credit regardless of which steps route
  // through the relay. Non-relay providers ignore this opt.
  const jobId = randomUUID();

  const isFree = isFreeUser();
  if (isFree) {
    if (!tryAcquireFreeSlot({ url, title: itemTitle, abortController })) {
      const current = getCurrentFreeJob();
      const elapsedSec = current ? Math.round(current.elapsedMs / 1000) : 0;
      const what = current?.title || current?.url || "another video";
      return res.status(409).json({
        error: "processing_in_progress",
        message:
          `A summary is already being processed (${what}, started ${elapsedSec}s ago). Free mode handles one video at a time — wait for it to finish, or cancel it from the status bar at the top of the app.`,
        currentJob: current,
      });
    }
  }

  if (!url) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({ error: "Missing url" });
  }
  if (!PROVIDER_NAMES.includes(transcriptionProvider)) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({ error: "unknown_provider", message: `Unknown transcription provider: ${transcriptionProvider}` });
  }
  if (!PROVIDER_NAMES.includes(analysisProvider)) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({ error: "unknown_provider", message: `Unknown analysis provider: ${analysisProvider}` });
  }

  // Resolve per-provider construction opts from the StartOS config blob,
  // overlaying any client-supplied opts (req.body.providerOpts[name]).
  // For Gemini, the legacy single "apiKey" field on the request body
  // also flows through as a fallback when providerOpts.gemini.apiKey
  // isn't set — keeps pre-picker-UI clients working.
  const cfg = await getConfigSnapshot();
  function clientOptsFor(name) {
    const fromBody = (providerOpts[name] && typeof providerOpts[name] === "object") ? providerOpts[name] : {};
    if (name === "gemini" && !fromBody.apiKey) {
      const legacy = resolveApiKey(clientKey);
      if (legacy) return { ...fromBody, apiKey: legacy };
    }
    return fromBody;
  }
  const transcriptionOpts = resolveProviderOpts(transcriptionProvider, {
    config: cfg,
    clientOpts: clientOptsFor(transcriptionProvider),
  });
  const analysisOpts = resolveProviderOpts(analysisProvider, {
    config: cfg,
    clientOpts: clientOptsFor(analysisProvider),
  });

  let transcriber;
  try {
    transcriber = getProvider(transcriptionProvider, transcriptionOpts);
  } catch (err) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({
      error: "transcription_provider_not_configured",
      message: `Transcription provider ${transcriptionProvider} is not configured: ${err.message}`,
    });
  }
  if (!transcriber.capabilities.transcribe) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({
      error: "transcription_unsupported",
      message: `Provider ${transcriptionProvider} does not support audio transcription. Pick a different transcription provider (gemini or openai).`,
    });
  }

  let analyzer;
  try {
    analyzer = transcriptionProvider === analysisProvider
      ? transcriber
      : getProvider(analysisProvider, analysisOpts);
  } catch (err) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({
      error: "analysis_provider_not_configured",
      message: `Analysis provider ${analysisProvider} is not configured: ${err.message}`,
    });
  }
  if (!analyzer.capabilities.analyze) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({
      error: "analysis_unsupported",
      message: `Provider ${analysisProvider} does not support text analysis.`,
    });
  }

  // Determine if this is a podcast episode or YouTube video
  const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url);
  const videoId = isPodcast ? (episodeId || url) : extractVideoId(url);

  if (!isPodcast && !videoId) {
    if (isFree) releaseFreeSlot();
    return res.status(400).json({ error: "Invalid YouTube URL" });
  }

  const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-"));
  const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3";
  const audioPath = path.join(tmpDir, `audio.${audioExt}`);
  const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3";

  try {
    const pipelineStart = Date.now();

    // Set up SSE
    res.writeHead(200, {
      "Content-Type": "text/event-stream",
      "Cache-Control": "no-cache",
      Connection: "keep-alive",
    });

    // Helper to send log entries with elapsed time. Also pushes onto the
    // free-tier in-flight job's server-side buffer (no-op when there is
    // no free job, e.g. licensed user) so a browser refresh during a
    // long pipeline can re-hydrate the activity log from the server
    // instead of starting blank.
    const logHistory = [];
    function log(step, message, detail) {
      const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1);
      const logMsg = `[${elapsed}s] ${message}`;
      console.log(`  ${logMsg}`);
      const entry = { elapsed, message, detail: detail || null };
      logHistory.push(entry);
      appendCurrentJobLog(entry);
      sendEvent(res, "status", { step, message });
      sendEvent(res, "log", entry);
    }

    // Cancellation checkpoint. Polled at each major pipeline boundary;
    // throws CANCELLED_MARK (declared at handler scope above) when the
    // user has hit Cancel on the status banner so the request bails
    // out cleanly via the catch path. Also catches abort signals fired
    // by abortCurrentFreeJob() in case an SDK call swallowed the abort
    // and returned cleanly.
    function checkCancelled() {
      if (abortController.signal.aborted) {
        throw new Error(CANCELLED_MARK);
      }
      if (isFree && isFreeJobAborted()) {
        throw new Error(CANCELLED_MARK);
      }
    }

    let videoTitle = itemTitle || "Untitled";
    let videoUploadDate = itemUploadDate || "";
    // Rich metadata used to ground the transcription prompt (speaker
    // names from the channel/description/chapters) and surfaced to
    // downstream UI. Populated for YouTube videos only; podcasts have
    // their own (lighter) metadata path.
    let videoChannel = "";
    let videoDescription = "";
    let videoChapters = [];
    // If captions came from YouTube we skip Step 1 (download audio) +
    // Step 2 (transcribe) and jump straight to analysis. `entries` is
    // also the post-Step-2 output of the transcription path, so this
    // value just flows through.
    let entries = null;
    let captionSource = null; // "manual" | "auto" | null
    let transcriptText = "";
    let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" };
    // Hoisted out of the audio-path block so Step 3 (analysis) can
    // reference analysisModel even when the captions fast-path skips
    // the entire audio + transcription block. transcriptionModel is
    // only used inside the audio block but lives here for symmetry.
    // Per-provider model defaults: caller's request → provider's first
    // listed model → Gemini fallback (preserves the prior default for
    // legacy clients that don't send model fields).
    const transcriptionModel =
      reqTransModel ||
      transcriber.listTranscriptionModels()[0] ||
      "gemini-3-flash-preview";
    const analysisModel =
      reqAnaModel ||
      model ||
      analyzer.listAnalysisModels()[0] ||
      "gemini-3.1-pro-preview";

    // ── Step 0 (YouTube only): metadata + captions fast path ──
    if (!isPodcast && videoId) {
      log(1, "Fetching video metadata...");
      const meta = await fetchYouTubeMetadata(videoId);
      if (meta) {
        if (meta.title) videoTitle = meta.title;
        if (meta.uploadDate) videoUploadDate = meta.uploadDate;
        videoChannel = meta.channel;
        videoDescription = meta.description;
        videoChapters = meta.chapters;
        log(1, `Video title: ${videoTitle}${videoChannel ? ` (${videoChannel})` : ""}`);
        if ((meta.hasManualCaptions || meta.hasAutoCaptions) && !useYouTubeCaptions) {
          log(1, `YouTube captions available but the user has the captions fast-path disabled — will transcribe audio directly.`);
        } else if (meta.hasManualCaptions || meta.hasAutoCaptions) {
          log(1, `YouTube captions available (${meta.hasManualCaptions ? "manual" : "auto"}) — attempting fast-path…`);
          const cap = await tryFetchYouTubeCaptions(
            videoId,
            tmpDir,
            { hasManual: meta.hasManualCaptions, hasAuto: meta.hasAutoCaptions },
            log
          );
          if (cap && cap.entries && cap.entries.length > 0) {
            // Auto-captions fragment audio every 1–3 seconds (often only
            // 1–3 words per entry). Sending hundreds of micro-segments
            // to the analyzer balloons the prompt and overloads the
            // model's index-tracking ("Section N must start at index
            // K+1 of section M…"). Coalesce into ~15s blocks for a
            // saner analysis prompt while keeping timestamps accurate.
            const rawCount = cap.entries.length;
            entries = coalesceTranscriptEntries(cap.entries, 15);
            captionSource = cap.source;
            log(
              1,
              `Using YouTube ${cap.source === "manual" ? "manual" : "auto-generated"} captions — ${rawCount} segments coalesced to ${entries.length}, skipping audio download + transcription`
            );
          } else {
            log(1, "Captions unusable or failed to parse — falling back to audio transcription");
          }
        } else {
          log(1, "No captions available — will download audio and transcribe");
        }
      }
    }

    checkCancelled();

    // ── Step 1: Download audio (skipped when captions populated entries) ──
   if (!entries) {
    const dlStart = Date.now();

    if (isPodcast) {
      log(1, "Downloading podcast episode...");
      await downloadPodcastAudio(url, audioPath);

      const stats = await fs.stat(audioPath);
      const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
      const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
      log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
      log(1, `Episode: ${videoTitle}`);

    } else {
      log(1, "Downloading audio from YouTube...");

      const dlBaseArgs = [
        "-x",
        "--audio-format", "mp3",
        "--audio-quality", "5",
        "-o", audioPath,
        "--no-playlist",
        "--print", "%(title)s|||%(upload_date)s",
        "--no-simulate",
      ];
      const dlUrl = `https://www.youtube.com/watch?v=${videoId}`;
      const cookieArgs = ytCookieArgs();
      const hasCookies = cookieArgs.length > 0;
      let usedCookies = false;
      let dlStdout = "";

      // Helper: attempt a single yt-dlp download
      async function attemptDownload(args, label) {
        const result = await execFileAsync("yt-dlp", args, { timeout: 600000 });
        return result.stdout || "";
      }

      // Helper: check if error is a bot detection / rate limit block
      function isBotBlock(errText) {
        return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText);
      }

      // ── Smart download with retry ──
      // Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays)
      const MAX_RETRIES = 3;
      const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff
      let downloaded = false;
      let lastError = "";

      for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) {
        // On retry attempts, wait before trying again
        if (attempt > 0) {
          const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)];
          log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`);
          sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` });
          await new Promise(r => setTimeout(r, waitSec * 1000));
          log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`);
          // Clean up any partial file from previous attempt
          await fs.unlink(audioPath).catch(() => {});
        }

        // Try with cookies first
        if (hasCookies && !usedCookies) {
          try {
            log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies...");
            dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies");
            usedCookies = true;
            downloaded = true;
            break;
          } catch (cookieErr) {
            const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || "");
            if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`);
            log(1, "Retrying without cookies...");
            await fs.unlink(audioPath).catch(() => {});
          }
        }

        // Try without cookies
        if (!downloaded) {
          try {
            dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies");
            downloaded = true;
            break;
          } catch (dlErr) {
            lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || "");
            const blocked = isBotBlock(lastError);

            if (blocked && attempt < MAX_RETRIES) {
              log(1, `⚠ YouTube bot detection triggered`);
              // Will loop back and wait
              continue;
            }

            if (blocked && attempt === MAX_RETRIES) {
              // Last resort: try yt-dlp auto-update in case there's a newer version that handles this
              log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort...");
              const updateResult = await autoUpdateYtdlp(DATA_DIR);
              if (updateResult.success) {
                log(1, "yt-dlp updated! Final retry...");
                try {
                  const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update");
                  dlStdout = retryResult;
                  downloaded = true;
                  break;
                } catch { /* fall through to error */ }
              }
            }

            // Non-bot error or exhausted retries
            if (!downloaded) {
              log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`);
            }
          }
        }
      }

      if (!downloaded) {
        const blocked = isBotBlock(lastError);
        let hint = "";
        if (blocked) {
          hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " +
            "This is usually caused by:\n" +
            "• Recent VPN use (YouTube flags VPN IPs)\n" +
            "• Too many downloads in a short period\n" +
            "• YouTube's general anti-bot measures\n\n" +
            "What to try:\n" +
            "• Wait 10-30 minutes and try again\n" +
            "• Disconnect any VPN/proxy\n" +
            "• Upload fresh cookies.txt via Settings\n" +
            "• Try a different network (mobile hotspot, etc.)";
        }
        throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`);
      }

      if (!usedCookies && hasCookies) {
        log(1, "⚠ Downloaded without cookies — audio may contain ads");
      }

      const stats = await fs.stat(audioPath);
      const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
      const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
      log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);

      // Extract title from the --print output of the download command
      const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null;
      let gotTitle = false;

      // First try: parse title from the download stdout (most reliable — same call that succeeded)
      if (dlStdout) {
        const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean);
        for (const line of printLines) {
          if (line.includes("|||")) {
            const sep = line.indexOf("|||");
            const t = line.slice(0, sep).trim();
            const d = line.slice(sep + 3).trim();
            if (t && t !== "NA") {
              videoTitle = t;
              if (d && d !== "NA") videoUploadDate = d;
              gotTitle = true;
              log(1, `Video title: ${videoTitle}`);
              break;
            }
          }
        }
      }

      // Second try: separate yt-dlp call (no cookies needed for public metadata)
      if (!gotTitle) {
        try {
          const { stdout } = await execFileAsync("yt-dlp", [
            "--print", "%(title)s|||%(upload_date)s",
            "--no-download",
            `https://www.youtube.com/watch?v=${videoId}`,
          ], { timeout: 15000 });
          const raw = stdout.trim();
          const sep = raw.indexOf("|||");
          if (sep > 0) {
            videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled";
            const d = raw.slice(sep + 3).trim();
            if (d && d !== "NA") videoUploadDate = d || videoUploadDate;
          } else {
            videoTitle = raw || fallbackTitle || "Untitled";
          }
          gotTitle = videoTitle !== "Untitled";
          if (gotTitle) log(1, `Video title: ${videoTitle}`);
        } catch {
          // Title fetch failed
        }
      }

      // Third try: use the queue-provided title
      if (!gotTitle && fallbackTitle) {
        videoTitle = fallbackTitle;
        log(1, `Using queue title: ${fallbackTitle}`);
        gotTitle = true;
      }

      if (!gotTitle) {
        log(1, "⚠ Could not fetch video title");
      }
    }

    checkCancelled();

    // ── Step 2: Transcribe audio ──
    // Detect audio duration to choose strategy
    const audioDuration = await getAudioDuration(audioPath);
    const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown";
    log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`);

    // Strategy:
    //   < 60 min AND < 30 MB  → Flash on full file (fast, cheap, reliable)
    //   ≥ 60 min OR ≥ 30 MB   → go straight to chunked transcription with Flash (45-min chunks)
    //   If full-file transcription is truncated or empty → fall back to chunks
    const CHUNK_TIME_THRESHOLD = 60 * 60; // 60 minutes
    const CHUNK_SIZE_THRESHOLD = 30 * 1024 * 1024; // 30 MB
    let audioFileSize = 0;
    try { audioFileSize = (await fs.stat(audioPath)).size; } catch {}
    const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1);
    const needsChunking = (audioDuration && audioDuration >= CHUNK_TIME_THRESHOLD) || (audioFileSize >= CHUNK_SIZE_THRESHOLD);
    if (needsChunking) {
      const reason = audioDuration >= CHUNK_TIME_THRESHOLD ? `${audioDurMin} min` : `${audioSizeMB} MB`;
      log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriber.name}/${transcriptionModel}`);
    }

    // Transcription model fallback chain: user's chosen model first,
    // then the rest of the provider's list. If Gemini 3 Flash hits a
    // 503 capacity error, the wrapper transparently retries with
    // 2.5 Flash. Matches the analysis fallback pattern below.
    const transcriptionFallbacks = [
      transcriptionModel,
      ...transcriber.listTranscriptionModels().filter((m) => m !== transcriptionModel),
    ];

    // Thin wrapper: keeps the call-site shape the chunking + main
    // pipeline already use. `transcriber.transcribeAudio` returns
    // { text, usage, cost, finishReason, blockReason, raw } — callers
    // read off that normalized shape regardless of which provider is
    // doing the transcription. On hard failure (after the provider's
    // own retry loop), walks the fallback chain to the next model.
    async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) {
      // Build the per-call fallback list: caller's chosen model
      // first, then the others. (Same as transcriptionFallbacks but
      // honors per-chunk overrides like chunkResult retries.)
      const chain = [modelName, ...transcriptionFallbacks.filter((m) => m !== modelName)];
      let lastErr;
      for (const tryModel of chain) {
        try {
          return await transcriber.transcribeAudio({
            filePath,
            mimeType: mType,
            titleHint,
            // Rich-context hints used by the provider's prompt builder
            // to anchor speaker-name extraction. Provider implementations
            // that don't care (e.g. Whisper) ignore these.
            channelHint: videoChannel,
            descriptionHint: videoDescription,
            chaptersHint: videoChapters,
            model: tryModel,
            offsetSeconds,
            onProgress: (msg) => log(2, msg),
            signal: abortController.signal,
            // Shared with the analyze call below so the relay bundles
            // both into one credit charge. Non-relay providers ignore it.
            jobId,
          });
        } catch (err) {
          // Cancellation: bail immediately, don't try the next model.
          if (abortController.signal.aborted || err?.name === "AbortError") {
            throw new Error(CANCELLED_MARK);
          }
          lastErr = err;
          const msg = err?.message || String(err);
          log(2, `⚠ Transcription with ${tryModel} failed: ${msg.slice(0, 150)}`);
          if (tryModel !== chain[chain.length - 1]) {
            log(2, `Falling back to next transcription model...`);
          }
        }
      }
      throw lastErr || new Error("All transcription models failed");
    }

    // ── Helper: chunked transcription for long audio ──
    async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) {
      const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`);
      await fs.mkdir(chunkDir, { recursive: true });

      try {
        const audioChunks = await splitAudioFile(srcPath, chunkDir, 2700); // 45 min chunks
        if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed

        logFn(`Split audio into ${audioChunks.length} chunks for transcription`);
        const allEntries = [];
        let totalIn = 0, totalOut = 0;

        for (const chunk of audioChunks) {
          logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`);
          const chunkResult = await transcribeSingleFile(
            chunk.path, "audio/mpeg", title,
            modelName,
            chunk.startOffset
          );

          // Provider's normalized result: text + usage + cost.
          totalIn += chunkResult.cost.inputTokens;
          totalOut += chunkResult.cost.outputTokens;

          const chunkText = chunkResult.text;
          if (!chunkText) {
            logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`);
            continue;
          }
          const chunkEntries = parseTimestampedTranscript(chunkText);

          // Programmatically adjust timestamps: the model transcribes from 0:00 relative
          // to the chunk, so add the chunk's startOffset to get real video timestamps
          if (chunk.startOffset > 0) {
            // Check if the model already adjusted (first entry near the startOffset)
            const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0;
            const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8;
            if (!alreadyAdjusted) {
              for (const e of chunkEntries) {
                e.offset += chunk.startOffset;
              }
              logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`);
            }
          }

          logFn(`Chunk ${chunk.index + 1}: ${chunkEntries.length} segments, last timestamp ${chunkEntries.length > 0 ? formatTime(chunkEntries[chunkEntries.length - 1].offset) : "N/A"}`);

          // Merge: skip entries that overlap with already-transcribed content
          const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1;
          for (const e of chunkEntries) {
            if (e.offset > lastExistingTime) allEntries.push(e);
          }
        }

        // Recalculate durations
        for (let i = 0; i < allEntries.length - 1; i++) {
          allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset;
        }
        if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15;

        logFn(`Chunked transcription complete: ${allEntries.length} total segments`);
        return {
          entries: allEntries,
          cost: {
            inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0,
            totalTokens: totalIn + totalOut,
            totalCost: "0", totalCostDisplay: "",
          },
        };
      } finally {
        try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {}
      }
    }

    // entries / transcriptText / txCost are declared earlier (top of
    // the request handler) since the captions-fast-path needs to
    // populate them before this audio-transcription block runs.
    const txStart = Date.now();

    if (needsChunking) {
      // ── Very long audio: go straight to chunked transcription ──
      log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`);
      const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
      if (chunkedResult && chunkedResult.entries.length > 0) {
        entries = chunkedResult.entries;
        txCost = chunkedResult.cost;
      } else {
        log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`);
        entries = null; // fall through to full-file attempt below
      }
    }

    if (!entries) {
      // ── Normal: transcribe full file ──
      const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel);

      transcriptText = transcriptResult.text;
      if (!transcriptText) {
        log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`);
        const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
        if (chunkedResult && chunkedResult.entries.length > 0) {
          entries = chunkedResult.entries;
          txCost = chunkedResult.cost;
        } else {
          throw new Error(`${transcriber.name} returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.`);
        }
      } else {
        txCost = transcriptResult.cost;
        const txTime = ((Date.now() - txStart) / 1000).toFixed(1);
        log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`);

        entries = parseTimestampedTranscript(transcriptText);
        log(2, `Parsed ${entries.length} transcript segments`);
      }

      // ── Single-segment expansion ──
      // Whisper-API endpoints that don't return per-segment timestamps
      // (e.g. NVIDIA Parakeet, some bare-Whisper wrappers) hand back a
      // single text blob. parseTimestampedTranscript dumps that into
      // one entry at [0:00], which (a) trips the truncation detector
      // below and (b) leaves the analyzer with one giant segment so
      // it can only produce one section. Synthesize sentence-based
      // entries with interpolated timestamps so both code paths
      // downstream work like they do for Gemini/Whisper-1.
      if (entries.length === 1 && audioDuration && audioDuration > 30 && (entries[0].text || "").length > 100) {
        const synthesized = synthesizeEntriesFromText(entries[0].text, audioDuration);
        if (synthesized.length > 1) {
          log(2, `Single-segment transcript expanded into ${synthesized.length} synthetic sentence-based entries with interpolated timestamps`);
          entries = synthesized;
        }
      }

      // ── Truncation detection → fall back to chunks ──
      if (audioDuration && entries.length > 0) {
        const lastEntryTime = entries[entries.length - 1].offset;
        const coverageRatio = lastEntryTime / audioDuration;
        const missingSeconds = audioDuration - lastEntryTime;

        if (coverageRatio < 0.90 && missingSeconds > 120) {
          log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`);
          log(2, `Falling back to chunked transcription...`);
          const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
          if (chunkedResult && chunkedResult.entries.length > 0) {
            entries = chunkedResult.entries;
            txCost = chunkedResult.cost;
            const finalCoverage = entries[entries.length - 1].offset;
            log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`);
          }
        } else {
          log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`);
        }
      }
    }

    const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1);
    log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`);

    if (!entries || entries.length === 0) {
      const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ ");
      log(2, `⚠ Transcript parse failed. Preview: ${preview}`);
      sendEvent(res, "error", { message: "Failed to parse transcript." });
      sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText });
      res.end();
      return;
    }

    } // end if (!entries) — close the audio + transcription block

    if (!entries || entries.length === 0) {
      throw new Error("No transcript available — neither captions nor audio transcription produced segments.");
    }

    checkCancelled();

    // ── Step 3: Topic analysis with model fallback ──
    // Fallback order: caller's chosen model first, then the rest of the
    // analyzer provider's analysis-model list.
    const providerModels = analyzer.listAnalysisModels();
    const analysisFallbacks = [
      analysisModel,
      ...providerModels.filter((m) => m !== analysisModel),
    ];

    // ── Auto-coalesce input for the analysis LLM ──
    // Long videos with fine-grained transcripts (Parakeet ~5s / segment
    // on a 2-hour podcast = ~1400 entries) easily exceed local-model
    // context windows (typical Gemma/Llama deployments are 16k–32k
    // tokens). Cap the analyzer's view at ~400 segments; the displayed
    // transcript keeps its full granularity because we translate the
    // returned section indices back through indexMap before building
    // the final chunks. FLAGGED TO WATCH — may impact section-boundary
    // precision on borderline content.
    const { coalesced: analysisEntries, indexMap: analysisIndexMap } =
      coalesceForAnalysis(entries, 400);
    if (analysisEntries !== entries) {
      log(
        3,
        `⚠ AUTO-COALESCED ANALYSIS INPUT: ${entries.length} → ${analysisEntries.length} segments (large prompts can exceed local-model context limits). Transcript display keeps full granularity.`
      );
    }

    const analysisPrompt = buildAnalysisPrompt(analysisEntries);
    let analysisResult = null;
    let usedAnalysisModel = analysisModel;
    const anaStart = Date.now();

    for (const tryModel of analysisFallbacks) {
      try {
        log(3, `Analyzing topics across ${analysisEntries.length} segments with ${analyzer.name}/${tryModel}...`);
        analysisResult = await analyzer.analyzeText({
          prompt: analysisPrompt,
          model: tryModel,
          onProgress: (msg) => log(3, msg),
          signal: abortController.signal,
          jobId,
        });
        usedAnalysisModel = tryModel;
        break;
      } catch (fallbackErr) {
        // If the user hit Cancel mid-analysis, do not walk to the
        // next model — the signal is shared across attempts and would
        // just re-fail. Bubble the cancellation up to the request
        // handler's catch.
        if (abortController.signal.aborted || fallbackErr?.name === "AbortError") {
          throw new Error(CANCELLED_MARK);
        }
        const msg = fallbackErr?.message || String(fallbackErr);
        log(3, `⚠ ${tryModel} failed: ${msg.slice(0, 150)}`);
        if (tryModel !== analysisFallbacks[analysisFallbacks.length - 1]) {
          log(3, `Falling back to next model...`);
        }
      }
    }

    if (!analysisResult) {
      throw new Error("All analysis models failed. Please try again later.");
    }

    const analysisText = analysisResult.text;
    if (!analysisText) {
      throw new Error(`${analyzer.name} returned an empty analysis. The transcript may be too long for the model. Try again.`);
    }
    const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1);
    const anaCost = analysisResult.cost;

    // Parse the analysis JSON
    let analysisJson;
    try {
      let jsonStr = analysisText.trim();
      const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
      if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim();
      analysisJson = JSON.parse(jsonStr);
    } catch (e) {
      console.error("Failed to parse analysis JSON:", e.message);
      sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." });
      sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText });
      res.end();
      return;
    }

    // Build final chunks. When the analyzer saw a coalesced view of the
    // transcript (analysisIndexMap is set), translate its section
    // indices back to the original-entry index space so the rendered
    // transcript keeps full granularity. Without translation, the
    // section slice would walk over a much shorter array and miss
    // 70%+ of the spoken content for very long videos.
    const chunks = analysisJson.sections.map((section) => {
      let start = Math.max(0, section.startIndex);
      let end = section.endIndex;
      if (analysisIndexMap) {
        const mappedStart = analysisIndexMap[Math.max(0, Math.min(start, analysisIndexMap.length - 1))];
        const mappedEnd = analysisIndexMap[Math.max(0, Math.min(end, analysisIndexMap.length - 1))];
        start = mappedStart ? mappedStart.startOrig : 0;
        end = mappedEnd ? mappedEnd.endOrig : entries.length - 1;
      }
      start = Math.max(0, start);
      end = Math.min(entries.length - 1, end);
      const sectionEntries = entries.slice(start, end + 1);
      return {
        title: section.title,
        summary: section.summary,
        entries: sectionEntries,
        startTime: sectionEntries[0]?.offset || 0,
      };
    }).filter((c) => c.entries.length > 0);

    const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1);
    log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`);
    log(3, `Pipeline finished in ${totalTime}s`);

    // Save to history. As of the library-for-everyone change, free
    // users save too — the app feels broken otherwise (summarize a
    // video, never find it again).
    const contentType = isPodcast ? "podcast" : "youtube";
    const historyId = await saveToHistory(videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null);

    sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType });
    res.end();

  } catch (err) {
    // Treat any AbortError or aborted-signal state as a user cancellation
    // even if it didn't surface as CANCELLED_MARK upstream (e.g. an SDK
    // throws AbortError before checkCancelled() runs).
    const cancelled =
      err?.message === CANCELLED_MARK ||
      err?.name === "AbortError" ||
      abortController.signal.aborted;
    if (!cancelled) {
      // Dump as much context as the runtime gives us. Generic
      // messages like "Error in input stream" are usually wrappers
      // around an underlying SDK / Node stream error; the cause +
      // stack are what actually tells us what happened.
      console.error("Pipeline error:", {
        name: err?.name,
        message: err?.message,
        code: err?.code,
        status: err?.status || err?.statusCode,
        cause: err?.cause,
        stack: err?.stack,
        transcriptionProvider,
        analysisProvider,
      });
    }
    if (!res.headersSent) {
      res.status(cancelled ? 499 : 500).json({
        error: cancelled ? "cancelled" : err.message,
      });
    } else if (cancelled) {
      sendEvent(res, "cancelled", { message: "Cancelled by user" });
      res.end();
    } else {
      sendEvent(res, "error", { message: err.message });
      res.end();
    }
  } finally {
    if (isFree) releaseFreeSlot();
    // Clean up temp files
    try {
      await fs.rm(tmpDir, { recursive: true, force: true });
    } catch {}
  }
});

// ── Helpers ────────────────────────────────────────────────────────────────

// getAudioDuration + splitAudioFile moved to ./audio.js

// sendEvent / extractVideoId / formatTime / parseTimestampedTranscript moved to ./util.js

// buildAnalysisPrompt moved to ./gemini-helpers.js

// ── Network mode ──────────────────────────────────────────────────────────
// On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking)
// On local Mac dev: default to localhost (safe on public Wi-Fi)
//   - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling)
//   - Running "npm start" directly defaults to localhost
const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, "..");
const lanMode = isStartOS ? true : process.env.LAN_MODE === "true";
const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1";

app.get("/api/network-mode", (req, res) => {
  res.json({ lan: lanMode });
});

// ── Start server ───────────────────────────────────────────────────────────

app.listen(PORT, BIND_HOST, async () => {
  console.log(`\n  Recap API running on http://${BIND_HOST}:${PORT}`);
  console.log(`  Data directory: ${DATA_DIR}`);
  console.log(`  Checking yt-dlp...`);

  const info = await checkYtdlp();
  if (!info.installed) {
    console.log(`  ⚠  yt-dlp not found. Install it: pip install yt-dlp\n`);
  } else if (info.updateAvailable) {
    console.log(`  ✓  yt-dlp ${info.version} found`);
    console.log(`  ↑  Update available: ${info.latestVersion}`);
    console.log(`     Auto-updating...`);
    const result = await autoUpdateYtdlp(DATA_DIR);
    if (result.success) {
      const refreshed = await checkYtdlp();
      console.log(`  ✓  yt-dlp updated to ${refreshed.version}\n`);
    } else {
      console.log(`  ⚠  Auto-update failed. Run manually: yt-dlp -U\n`);
    }
  } else {
    console.log(`  ✓  yt-dlp ${info.version} (up to date)\n`);
  }

  // Check subscriptions on startup
  const subs = await loadSubscriptions();
  if (subs.length > 0) {
    console.log(`  📡  Checking ${subs.length} subscription(s) for new videos...`);
    await checkSubscriptions().catch(err => console.error("  ⚠  Subscription check error:", err.message));
    const pending = autoQueue.filter(q => q.status === "pending").length;
    const approved = autoQueue.filter(q => q.status === "approved").length;
    if (pending > 0) console.log(`  ✚  ${pending} new video(s) queued from subscriptions`);
    if (approved > 0) console.log(`  ⏳  ${approved} approved video(s) ready for processing`);
    if (pending === 0 && approved === 0) console.log(`  ✓  No new videos from subscriptions`);
  }

  // Resume processing any approved items from a previous session
  const resumeApproved = autoQueue.filter(q => q.status === "approved").length;
  // Also recover any items stuck in "processing" state from a crash
  for (const item of autoQueue) {
    if (item.status === "processing") {
      console.log(`  ⚠  Recovering stuck item: ${item.title}`);
      item.status = "approved"; // Re-queue for processing
    }
  }
  if (autoQueue.some(q => q.status === "approved")) {
    await saveAutoQueue();
    console.log(`  🔄  Starting background processor...`);
    // Delay slightly so the server is fully ready before internal HTTP calls
    setTimeout(() => kickProcessor(), 2000);
  }

  console.log(`  ⚙  Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`);

  // Check subscriptions every hour (runs continuously on StartOS)
  setInterval(() => {
    checkSubscriptions().catch(err => console.error("  ⚠  Subscription check error:", err.message));
  }, 60 * 60 * 1000);
});