Files
recap/server/index.js
T
Keysat a226113a10 Drop transcription/analysis cost lines from logs
The cost breakdowns (token counts + dollar figures) are tied to the
hardcoded Gemini PRICING table, which won't make sense once we add
OpenAI/Claude/local providers — different APIs report tokens
differently, some are free, and the pricing table can't keep up. Drop
the cost and token-count lines from the activity log so the per-step
timing is the durable signal we keep.

Removed:
  • "Total cost: <in> in / <out> out — cost: $X" (chunked transcription)
  • "Transcription tokens: <in>/<out> — cost: $X" (single-shot)
  • "Analysis tokens: <in>/<out>/<thinking> — cost: $X"
  • "Pipeline finished in Ns — total cost: $X (Y tokens)" → now just
    "Pipeline finished in Ns"

Cost calculation helpers (calcCost, PRICING) stay in the codebase for
now — they may come back via a per-provider plugin layer later. They
just no longer write to the activity-log stream.
2026-05-08 12:50:10 -05:00

2883 lines
110 KiB
JavaScript

import express from "express";
import cors from "cors";
import { execFile } from "child_process";
import { promisify } from "util";
import fs from "fs/promises";
import { createWriteStream } from "fs";
import path from "path";
import os from "os";
import https from "https";
import http from "http";
import { GoogleGenAI } from "@google/genai";
import * as license from "./license.js";
const execFileAsync = promisify(execFile);
const app = express();
const PORT = process.env.PORT || 3001;
// ── Data directory (configurable for StartOS or local dev) ────────────────
// On StartOS: DATA_DIR=/data (persistent volume)
// On local dev: defaults to project root (parent of server/)
const __dirname = path.dirname(new URL(import.meta.url).pathname);
const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, "..");
const historyDir = path.join(DATA_DIR, "history");
const configDir = path.join(DATA_DIR, "config");
await fs.mkdir(historyDir, { recursive: true }).catch(() => {});
await fs.mkdir(configDir, { recursive: true }).catch(() => {});
// ── Server-side API key (shared across all clients) ───────────────────────
// Priority: GEMINI_API_KEY env var → StartOS config → .env file
let serverApiKey = process.env.GEMINI_API_KEY || "";
const envPath = path.join(DATA_DIR, ".env");
if (!serverApiKey) {
// Try StartOS config file
try {
const configContent = await fs.readFile(path.join(configDir, "startos-config.json"), "utf-8").catch(() => "{}");
const config = JSON.parse(configContent);
if (config.gemini_api_key) serverApiKey = config.gemini_api_key;
} catch {}
}
if (!serverApiKey) {
// Fall back to .env file
try {
const envContent = await fs.readFile(envPath, "utf-8").catch(() => "");
const match = envContent.match(/^GEMINI_API_KEY=(.+)$/m);
if (match) serverApiKey = match[1].trim().replace(/^["']|["']$/g, "");
} catch {}
}
// ── YouTube cookies (bypass bot detection) ──────────────────────────────
// Priority: 1) cookies.txt file 2) --cookies-from-browser (local dev only) 3) no cookies
// On StartOS: cookies.txt lives in DATA_DIR
// On local dev: cookies.txt in project root, or --cookies-from-browser via YT_COOKIES_FROM
let ytCookiesBrowser = "";
const ytCookiesFilePath = path.join(DATA_DIR, "cookies.txt");
let ytCookiesFileExists = false;
try {
const envContent2 = await fs.readFile(envPath, "utf-8").catch(() => "");
const cm = envContent2.match(/^YT_COOKIES_FROM=(.+)$/m);
if (cm) ytCookiesBrowser = cm[1].trim().replace(/^["']|["']$/g, "").toLowerCase();
} catch {}
try {
await fs.access(ytCookiesFilePath);
ytCookiesFileExists = true;
console.log(" 🍪 Found cookies.txt — will use for YouTube authentication");
} catch {
ytCookiesFileExists = false;
}
function ytCookieArgs() {
// Prefer cookies.txt file (stable, doesn't depend on browser session)
if (ytCookiesFileExists) return ["--cookies", ytCookiesFilePath];
// Fall back to live browser cookies (local dev only, not available on StartOS)
if (ytCookiesBrowser) return ["--cookies-from-browser", ytCookiesBrowser];
return [];
}
// Extra yt-dlp args for robustness (rate limiting)
function ytExtraArgs() {
const args = [];
// Sleep between requests to avoid rate limiting during batch operations
args.push("--sleep-interval", "1", "--max-sleep-interval", "3");
return args;
}
function ytCookieMethod() {
if (ytCookiesFileExists) return "cookies.txt";
if (ytCookiesBrowser) return ytCookiesBrowser;
return "none";
}
function resolveApiKey(clientKey) {
// Client can send "USE_SERVER_KEY" or empty to use the server's stored key
if (!clientKey || clientKey === "USE_SERVER_KEY") return serverApiKey;
return clientKey;
}
app.use(cors());
app.use(express.json({ limit: "100mb" }));
// ── Keysat licensing (hard-gate / activate-screen flavor) ─────────────────
// All /api/* routes return 402 until a valid license is activated, except
// the allowlisted endpoints that exist precisely so the frontend can render
// an activation UI. See server/license.js for the verifier.
let LIC = license.checkLicense();
console.log(
`[license] state=${LIC.state} entitlements=[${[...LIC.entitlements].join(",")}]` +
(LIC.reason ? ` reason=${LIC.reason}` : "")
);
// Periodic online validation against licensing.keysat.xyz. Catches
// revocations, suspensions, and expirations that happen after activation —
// without it, a revoked key keeps working until the customer reinstalls.
//
// On network errors we keep the prior state up to MAX_OFFLINE_DAYS (see
// server/license.js); past the ceiling we lock out.
const VALIDATE_INTERVAL_MS = parseInt(
process.env.YT_SUMMARIZER_VALIDATE_INTERVAL_MS || String(6 * 60 * 60 * 1000),
10
);
const ACTIVATE_VALIDATE_TIMEOUT_MS = 8000;
async function refreshLicenseOnline(reason) {
const prev = LIC;
try {
LIC = await license.validateOnline();
} catch (e) {
// validateOnline shouldn't throw, but be defensive.
console.error(`[license] refresh threw (${reason}):`, e?.message || e);
return;
}
if (LIC.state !== prev.state || LIC.reason !== prev.reason) {
console.log(
`[license] refresh (${reason}): state=${LIC.state}` +
(LIC.reason ? ` reason=${LIC.reason}` : "") +
` entitlements=[${[...LIC.entitlements].join(",")}]`
);
}
}
// Async startup check — don't block the server from coming up.
refreshLicenseOnline("startup").catch(() => {});
setInterval(() => {
refreshLicenseOnline("scheduled").catch(() => {});
}, VALIDATE_INTERVAL_MS);
// Endpoints reachable without a license — kept intentionally minimal.
// /api/process is open so unlicensed (free-tier) users can summarize one
// video at a time with their own Gemini key. The route handler enforces
// BYO-key + a one-at-a-time concurrency lock for free users.
const LICENSE_OPEN_PATHS = new Set([
"/api/health",
"/api/heartbeat",
"/api/status",
"/api/license-status",
"/api/license/activate",
"/api/license/deactivate",
"/api/process",
]);
// Activation-screen gate: any /api/* request without a valid license is
// rejected with 402, except the allowlist above. Non-/api requests
// (the static frontend, /assets, etc.) pass through so the UI can load.
app.use((req, res, next) => {
if (!req.path.startsWith("/api/")) return next();
if (LICENSE_OPEN_PATHS.has(req.path)) return next();
if (LIC.state === "licensed" && LIC.entitlements.has("core")) return next();
return res.status(402).json({
error: "license_required",
message:
LIC.state === "licensed"
? "Your license is missing the 'core' entitlement. Contact the seller."
: "This feature requires a Keysat license. Upgrade to unlock.",
state: LIC.state,
reason: LIC.reason,
activate_url: "/#activate",
keysat_base_url: license.KEYSAT_BASE_URL,
product_slug: license.PRODUCT_SLUG,
});
});
// Free-tier concurrency lock. Unlicensed users may process one video at
// a time — second submission while another is in flight returns 409.
// Released in a finally block at the bottom of /api/process.
let freeJobInFlight = false;
// Pro-tier feature gates. Each entry maps URL prefixes → required
// entitlement; first match wins. A licensed user without the right
// entitlement gets a clean 402 feature_not_in_tier (vs. the generic
// activation gate above).
const PRO_FEATURE_GATES = [
{
prefixes: ["/api/subscriptions", "/api/auto-queue", "/api/sub-check-log"],
entitlement: "subscriptions",
feature: "subscriptions",
message:
"Channel subscriptions and auto-queue require a Pro license. Upgrade to unlock.",
},
{
prefixes: ["/api/history"],
entitlement: "history",
feature: "history",
message:
"Summary history requires a Pro license. Upgrade to unlock.",
},
{
prefixes: ["/api/library"],
entitlement: "library",
feature: "library",
message:
"Library import/export requires a Pro license. Upgrade to unlock.",
},
];
app.use((req, res, next) => {
for (const gate of PRO_FEATURE_GATES) {
if (gate.prefixes.some((p) => req.path.startsWith(p))) {
if (LIC.entitlements.has(gate.entitlement)) return next();
return res.status(402).json({
error: "feature_not_in_tier",
feature: gate.feature,
message: gate.message,
keysat_base_url: license.KEYSAT_BASE_URL,
product_slug: license.PRODUCT_SLUG,
});
}
}
next();
});
// License management endpoints — kept open by LICENSE_OPEN_PATHS above.
app.get("/api/license-status", (_req, res) => {
res.json(license.publicView(LIC));
});
app.post("/api/license/activate", async (req, res) => {
try {
LIC = license.activate(req.body && req.body.license_key);
} catch (e) {
if (e && e.code === "bad_format") {
return res.status(400).json({
error: "bad_format",
message: "Expected a license key starting with 'LIC1-'.",
});
}
return res.status(500).json({ error: "activation_failed", message: e?.message });
}
if (LIC.state !== "licensed") {
// Offline signature check failed — no point hitting the server.
return res.status(400).json({
ok: false,
error: LIC.reason || "invalid",
...license.publicView(LIC),
});
}
// Offline check passed. Confirm with the licensing server so a key that
// was revoked before activation gets rejected immediately. Cap the wait
// so a slow server doesn't hang the activation UI — if we time out,
// accept the offline-verified state and let the periodic poll catch up.
await Promise.race([
refreshLicenseOnline("activation"),
new Promise((resolve) => setTimeout(resolve, ACTIVATE_VALIDATE_TIMEOUT_MS)),
]);
if (LIC.state === "licensed") {
return res.json({ ok: true, ...license.publicView(LIC) });
}
return res.status(400).json({
ok: false,
error: LIC.reason || "invalid",
...license.publicView(LIC),
});
});
app.post("/api/license/deactivate", (_req, res) => {
LIC = license.deactivate();
res.json({ ok: true, ...license.publicView(LIC) });
});
// ── History storage ───────────────────────────────────────────────────────
async function saveToHistory(videoId, url, title, chunks, entries, logs, uploadDate, type) {
// For podcast episodes, videoId might be a long GUID/URL — use a short hash for the filename
const idSuffix = type === "podcast" ? Buffer.from(videoId).toString("base64url").slice(0, 16) : videoId;
const id = `${Date.now()}-${idSuffix}`;
const record = {
id,
videoId,
url,
title: title || "Untitled",
type: type || "youtube", // "youtube" or "podcast"
topicCount: chunks.length,
segmentCount: entries.length,
createdAt: new Date().toISOString(),
uploadDate: uploadDate || "",
chunks,
entries,
logs,
};
await fs.writeFile(path.join(historyDir, `${id}.json`), JSON.stringify(record));
return id;
}
// Serve the frontend from ../public
app.use(express.static(path.join(__dirname, "..", "public")));
app.use("/assets", express.static(path.join(__dirname, "..", "assets")));
// ── yt-dlp auto-update ─────────────────────────────────────────────────────
let ytdlpVersion = null;
let ytdlpLastCheck = 0;
const UPDATE_CHECK_INTERVAL = 24 * 60 * 60 * 1000; // 24 hours
async function checkYtdlp() {
const info = { installed: false, version: null, updateAvailable: false, latestVersion: null };
try {
const { stdout } = await execFileAsync("yt-dlp", ["--version"]);
info.installed = true;
info.version = stdout.trim();
ytdlpVersion = info.version;
} catch {
return info;
}
// Check for updates at most once per 24h
const now = Date.now();
if (now - ytdlpLastCheck > UPDATE_CHECK_INTERVAL) {
ytdlpLastCheck = now;
try {
const resp = await fetch("https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest", {
headers: { "Accept": "application/vnd.github.v3+json" },
signal: AbortSignal.timeout(5000),
});
if (resp.ok) {
const data = await resp.json();
info.latestVersion = data.tag_name?.replace(/^v/, "") || null;
if (info.latestVersion && info.version !== info.latestVersion) {
info.updateAvailable = true;
}
}
} catch {} // Network errors are fine, just skip the check
}
return info;
}
async function autoUpdateYtdlp() {
console.log(" ↻ Updating yt-dlp...");
// Strategy 1: yt-dlp's built-in self-update
try {
const { stdout } = await execFileAsync("yt-dlp", ["-U"], { timeout: 60000 });
console.log(` ${stdout.trim()}`);
return { success: true, message: stdout.trim() };
} catch (e1) {
console.log(" … Self-update failed, trying pip...");
}
// Strategy 2: pip3 / pip (works in containers and on macOS)
for (const pip of ["pip3", "pip"]) {
try {
await execFileAsync(pip, ["install", "-U", "yt-dlp"], { timeout: 120000 });
console.log(` ✓ Updated via ${pip}`);
return { success: true, message: `Updated via ${pip}` };
} catch {}
}
// Strategy 3: Homebrew (macOS local dev only)
try {
await execFileAsync("brew", ["upgrade", "yt-dlp"], { timeout: 120000 });
console.log(" ✓ Updated via Homebrew");
return { success: true, message: "Updated via Homebrew" };
} catch {}
// Strategy 4: Direct binary download to persistent storage (StartOS fallback)
try {
const binDir = path.join(DATA_DIR, "bin");
await fs.mkdir(binDir, { recursive: true });
const binPath = path.join(binDir, "yt-dlp");
console.log(" … Trying direct binary download...");
const { stdout } = await execFileAsync("sh", ["-c",
`curl -L -o "${binPath}" "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp" && chmod +x "${binPath}" && "${binPath}" --version`
], { timeout: 120000 });
console.log(` ✓ Downloaded yt-dlp binary: ${stdout.trim()}`);
return { success: true, message: `Downloaded binary: ${stdout.trim()}` };
} catch (e4) {
console.log(" ✗ All update strategies failed");
return { success: false, message: "Auto-update failed. Try updating manually from the StartOS Actions menu." };
}
}
// ── Pricing (per 1M tokens) ───────────────────────────────────────────────
const PRICING = {
"gemini-3-flash-preview": { input: 0.50, output: 3.00, thinking: 3.00 },
"gemini-3-pro-preview": { input: 2.00, output: 12.00, thinking: 12.00 },
"gemini-3.1-pro-preview": { input: 2.00, output: 12.00, thinking: 12.00 },
"gemini-2.5-flash": { input: 0.15, output: 0.60, thinking: 0.60 },
// Fallback for unknown models
"default": { input: 1.00, output: 5.00, thinking: 5.00 },
};
function calcCost(modelName, usage) {
const rates = PRICING[modelName] || PRICING["default"];
const inputTokens = usage.promptTokenCount || 0;
const outputTokens = usage.candidatesTokenCount || 0;
const thinkingTokens = usage.thoughtsTokenCount || 0;
const inputCost = (inputTokens / 1_000_000) * rates.input;
const outputCost = (outputTokens / 1_000_000) * rates.output;
const thinkingCost = (thinkingTokens / 1_000_000) * rates.thinking;
const totalCost = inputCost + outputCost + thinkingCost;
return {
inputTokens,
outputTokens,
thinkingTokens,
totalTokens: usage.totalTokenCount || (inputTokens + outputTokens + thinkingTokens),
inputCost: inputCost.toFixed(6),
outputCost: outputCost.toFixed(6),
thinkingCost: thinkingCost.toFixed(6),
totalCost: totalCost.toFixed(6),
totalCostDisplay: totalCost < 0.01 ? `$${(totalCost * 100).toFixed(3)}¢` : `$${totalCost.toFixed(4)}`,
};
}
// ── Safe text extraction from Gemini responses ──────────────────────────
function safeText(result) {
// The Gemini SDK .text getter can throw or return undefined
try {
if (result.text) return result.text;
} catch {}
// Fallback: dig into candidates manually
try {
const parts = result?.candidates?.[0]?.content?.parts;
if (parts) return parts.map(p => p.text || "").join("");
} catch {}
return "";
}
// ── Retry helper for transient Gemini API errors ──────────────────────────
async function retryGemini(fn, { retries = 3, delayMs = 3000, label = "Gemini call", log: logFn } = {}) {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
return await fn();
} catch (err) {
const msg = err?.message || String(err);
const status = err?.status || err?.httpStatusCode || 0;
const isRetryable = status === 503 || status === 429 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg);
if (isRetryable && attempt < retries) {
const waitSec = (delayMs * attempt / 1000).toFixed(0);
if (logFn) logFn(`${label} failed (${status || "error"}), retrying in ${waitSec}s... (attempt ${attempt}/${retries})`);
await new Promise(r => setTimeout(r, delayMs * attempt));
} else {
throw err;
}
}
}
}
// ── Health check ───────────────────────────────────────────────────────────
app.get("/api/health", async (req, res) => {
const info = await checkYtdlp();
// Check cookies.txt freshness
let cookieInfo = { method: ytCookieMethod() };
if (ytCookiesFileExists) {
try {
const stat = await fs.stat(ytCookiesFilePath);
const ageMs = Date.now() - stat.mtimeMs;
const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24));
cookieInfo.fileAgeDays = ageDays;
cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days
} catch {}
}
res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!serverApiKey, cookies: cookieInfo, ...info });
});
// ── Status endpoints ───────────────────────────────────────────────────────
app.post("/api/heartbeat", (req, res) => {
res.json({ ok: true, sleeping: false });
});
app.get("/api/status", (req, res) => {
res.json({ ok: true, sleeping: false, uptime: process.uptime() });
});
// Shutdown: used by the macOS .app launcher to stop the server cleanly.
// On StartOS this endpoint is unused (StartOS manages the container lifecycle).
app.post("/api/shutdown", (req, res) => {
res.json({ ok: true, message: "Server shutting down..." });
console.log("\n Server shutdown requested from browser. Goodbye!\n");
setTimeout(() => process.exit(0), 300);
});
// ── Manual update endpoint ─────────────────────────────────────────────────
app.post("/api/update-ytdlp", async (req, res) => {
const result = await autoUpdateYtdlp();
const info = await checkYtdlp();
res.json({ ...result, ...info });
});
// ── Cookie management endpoints ───────────────────────────────────────────
// Upload cookies.txt content
app.post("/api/cookies/upload", express.text({ type: "*/*", limit: "2mb" }), async (req, res) => {
try {
const content = req.body;
if (!content || typeof content !== "string" || content.length < 20) {
return res.status(400).json({ error: "Invalid cookie file content" });
}
// Basic validation: should look like a Netscape cookie file
const firstLine = content.split("\n")[0].trim();
const looksValid = /^#.*cookie|^#.*http|^\./i.test(firstLine) || content.includes("\tTRUE\t") || content.includes("\tFALSE\t");
if (!looksValid) {
return res.status(400).json({ error: "File doesn't look like a valid Netscape cookies.txt file. The first line should start with '# Netscape HTTP Cookie File' or '# HTTP Cookie File'." });
}
await fs.writeFile(ytCookiesFilePath, content, "utf-8");
ytCookiesFileExists = true;
console.log(" 🍪 cookies.txt uploaded via web UI");
res.json({ ok: true, message: "Cookies saved successfully" });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Delete cookies.txt
app.post("/api/cookies/delete", async (req, res) => {
try {
await fs.unlink(ytCookiesFilePath).catch(() => {});
ytCookiesFileExists = false;
console.log(" 🍪 cookies.txt deleted via web UI");
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Test if cookies work by trying a lightweight yt-dlp operation
app.post("/api/cookies/test", async (req, res) => {
try {
const cookieArgs = ytCookieArgs();
if (cookieArgs.length === 0) {
return res.json({ ok: false, error: "No cookies configured" });
}
// Try to fetch title of a known video — lightweight, no download
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(title)s",
"--no-download",
...cookieArgs,
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", // well-known test video
], { timeout: 20000 });
const title = stdout.trim();
if (title && title.length > 0) {
res.json({ ok: true, message: `Cookies working! Fetched: "${title}"` });
} else {
res.json({ ok: false, error: "yt-dlp returned empty response" });
}
} catch (err) {
const msg = (err.stderr || "") + " " + (err.message || "");
const isAuth = /Sign in|bot|403/i.test(msg);
res.json({ ok: false, error: isAuth ? "Cookies are expired or invalid — YouTube still requires sign-in" : msg.slice(0, 300) });
}
});
// Get cookie status
app.get("/api/cookies/status", async (req, res) => {
const info = { method: ytCookieMethod() };
if (ytCookiesFileExists) {
try {
const stat = await fs.stat(ytCookiesFilePath);
info.fileAgeDays = Math.floor((Date.now() - stat.mtimeMs) / (1000 * 60 * 60 * 24));
info.fileExpiring = info.fileAgeDays > 12;
info.fileSize = stat.size;
} catch {}
}
res.json(info);
});
// ── History endpoints ─────────────────────────────────────────────────────
const metaPath = path.join(historyDir, "_meta.json");
// meta.json structure: { folders: [ { id, name, order, collapsed, items: [sessionId, ...] } ], uncategorized: [sessionId, ...] }
async function loadMeta() {
try {
return JSON.parse(await fs.readFile(metaPath, "utf-8"));
} catch {
return { folders: [], uncategorized: [] };
}
}
async function saveMeta(meta) {
await fs.writeFile(metaPath, JSON.stringify(meta, null, 2));
}
// Get all history: sessions + folder structure
app.get("/api/history", async (req, res) => {
try {
const files = await fs.readdir(historyDir);
const sessionsMap = {};
for (const file of files.filter(f => f.endsWith(".json") && !f.startsWith("_") && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) {
try {
const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
const data = JSON.parse(raw);
sessionsMap[data.id] = {
id: data.id, videoId: data.videoId, url: data.url,
title: data.title, topicCount: data.topicCount,
type: data.type || "youtube",
segmentCount: data.segmentCount, createdAt: data.createdAt,
uploadDate: data.uploadDate || "",
};
} catch {}
}
const meta = await loadMeta();
// Clean up: remove references to deleted sessions
for (const folder of meta.folders) {
folder.items = folder.items.filter(id => sessionsMap[id]);
}
meta.uncategorized = meta.uncategorized.filter(id => sessionsMap[id]);
// Add any sessions not in meta (newly created)
const allReferenced = new Set([
...meta.uncategorized,
...meta.folders.flatMap(f => f.items),
]);
const allIds = Object.keys(sessionsMap);
const orphans = allIds.filter(id => !allReferenced.has(id))
.sort((a, b) => new Date(sessionsMap[b].createdAt) - new Date(sessionsMap[a].createdAt));
meta.uncategorized = [...orphans, ...meta.uncategorized];
await saveMeta(meta);
res.json({ sessions: sessionsMap, meta });
} catch (err) {
res.json({ sessions: {}, meta: { folders: [], uncategorized: [] } });
}
});
// Get a single session (full data)
app.get("/api/history/:id", async (req, res) => {
try {
const raw = await fs.readFile(path.join(historyDir, `${req.params.id}.json`), "utf-8");
res.json(JSON.parse(raw));
} catch {
res.status(404).json({ error: "Session not found" });
}
});
// Rename a session title
app.put("/api/history/:id/title", async (req, res) => {
try {
const filePath = path.join(historyDir, `${req.params.id}.json`);
const raw = await fs.readFile(filePath, "utf-8");
const data = JSON.parse(raw);
data.title = req.body.title || data.title;
await fs.writeFile(filePath, JSON.stringify(data));
res.json({ ok: true, title: data.title });
} catch {
res.status(404).json({ error: "Session not found" });
}
});
// Delete a session
app.delete("/api/history/:id", async (req, res) => {
try {
// Read the file first to get the videoId for the skip list
const filePath = path.join(historyDir, `${req.params.id}.json`);
let videoId = null;
try {
const raw = await fs.readFile(filePath, "utf-8");
videoId = JSON.parse(raw).videoId;
} catch {}
await fs.unlink(filePath);
// Add to skip list so subscriptions don't re-queue it
if (videoId) {
await addToSkipList(videoId);
}
// Remove from meta
const meta = await loadMeta();
meta.uncategorized = meta.uncategorized.filter(id => id !== req.params.id);
for (const folder of meta.folders) {
folder.items = folder.items.filter(id => id !== req.params.id);
}
await saveMeta(meta);
res.json({ ok: true });
} catch {
res.status(404).json({ error: "Session not found" });
}
});
// Update meta (folders, ordering) — the frontend sends the full structure
app.put("/api/history/meta", async (req, res) => {
try {
const meta = req.body;
await saveMeta(meta);
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Create a folder
app.post("/api/history/folders", async (req, res) => {
try {
const meta = await loadMeta();
const folder = { id: `folder-${Date.now()}`, name: req.body.name || "New Folder", collapsed: false, items: [] };
meta.folders.push(folder);
await saveMeta(meta);
res.json(folder);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Rename a folder
app.put("/api/history/folders/:id", async (req, res) => {
try {
const meta = await loadMeta();
const folder = meta.folders.find(f => f.id === req.params.id);
if (!folder) return res.status(404).json({ error: "Folder not found" });
folder.name = req.body.name || folder.name;
await saveMeta(meta);
res.json(folder);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Update a folder's collapsed state
app.put("/api/history/folders/:id/collapsed", async (req, res) => {
try {
const meta = await loadMeta();
const folder = meta.folders.find(f => f.id === req.params.id);
if (!folder) return res.status(404).json({ error: "Folder not found" });
folder.collapsed = !!req.body.collapsed;
await saveMeta(meta);
res.json({ ok: true, collapsed: folder.collapsed });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Delete a folder (items move to uncategorized)
app.delete("/api/history/folders/:id", async (req, res) => {
try {
const meta = await loadMeta();
const idx = meta.folders.findIndex(f => f.id === req.params.id);
if (idx === -1) return res.status(404).json({ error: "Folder not found" });
const [folder] = meta.folders.splice(idx, 1);
meta.uncategorized = [...folder.items, ...meta.uncategorized];
await saveMeta(meta);
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Move a session to a folder (or uncategorized if folderId is null)
app.put("/api/history/move", async (req, res) => {
try {
const { sessionId, folderId, index } = req.body;
const meta = await loadMeta();
// Remove from current location
meta.uncategorized = meta.uncategorized.filter(id => id !== sessionId);
for (const folder of meta.folders) {
folder.items = folder.items.filter(id => id !== sessionId);
}
// Add to new location
if (folderId) {
const folder = meta.folders.find(f => f.id === folderId);
if (folder) {
const i = typeof index === "number" ? index : folder.items.length;
folder.items.splice(i, 0, sessionId);
}
} else {
const i = typeof index === "number" ? index : 0;
meta.uncategorized.splice(i, 0, sessionId);
}
await saveMeta(meta);
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// ── Library export/import ────────────────────────────────────────────────
app.get("/api/library/export", async (req, res) => {
try {
const meta = await loadMeta();
const files = await fs.readdir(historyDir);
const sessions = {};
for (const file of files) {
if (!file.endsWith(".json") || file === "_meta.json" || file === "subscriptions.json" || file === "auto-queue.json") continue;
try {
const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
const id = file.replace(".json", "");
sessions[id] = JSON.parse(raw);
} catch {}
}
// Load subscriptions
let subscriptions = [];
try {
subscriptions = JSON.parse(await fs.readFile(path.join(historyDir, "subscriptions.json"), "utf-8")).subscriptions || [];
} catch {}
const exportData = {
version: 1,
exportedAt: new Date().toISOString(),
meta,
sessions,
subscriptions,
};
res.setHeader("Content-Type", "application/json");
res.setHeader("Content-Disposition", 'attachment; filename="youtube-summarizer-library.json"');
res.json(exportData);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
app.post("/api/library/import", express.json({ limit: "200mb" }), async (req, res) => {
try {
const data = req.body;
if (!data || !data.sessions) {
return res.status(400).json({ error: "Invalid library file — missing sessions data" });
}
let imported = 0;
let skipped = 0;
// Import sessions
for (const [id, session] of Object.entries(data.sessions)) {
const filePath = path.join(historyDir, `${id}.json`);
// Skip if session already exists (don't overwrite)
try {
await fs.access(filePath);
skipped++;
continue;
} catch {}
await fs.writeFile(filePath, JSON.stringify(session));
imported++;
}
// Merge meta (add imported sessions to uncategorized if not already placed)
if (data.meta) {
const existingMeta = await loadMeta();
const allExistingIds = new Set([
...existingMeta.uncategorized,
...existingMeta.folders.flatMap(f => f.items),
]);
// Import folders that don't exist
if (data.meta.folders) {
for (const folder of data.meta.folders) {
const existingFolder = existingMeta.folders.find(f => f.id === folder.id);
if (!existingFolder) {
existingMeta.folders.push(folder);
folder.items.forEach(id => allExistingIds.add(id));
}
}
}
// Add any uncategorized items that aren't already placed
if (data.meta.uncategorized) {
for (const id of data.meta.uncategorized) {
if (!allExistingIds.has(id)) {
existingMeta.uncategorized.unshift(id);
}
}
}
await saveMeta(existingMeta);
}
// Import subscriptions (merge, don't duplicate)
if (data.subscriptions && data.subscriptions.length > 0) {
let existingSubs = [];
try {
existingSubs = JSON.parse(await fs.readFile(path.join(historyDir, "subscriptions.json"), "utf-8")).subscriptions || [];
} catch {}
const existingUrls = new Set(existingSubs.map(s => s.url));
let subsAdded = 0;
for (const sub of data.subscriptions) {
if (!existingUrls.has(sub.url)) {
existingSubs.push(sub);
subsAdded++;
}
}
await fs.writeFile(path.join(historyDir, "subscriptions.json"), JSON.stringify({ subscriptions: existingSubs }));
}
res.json({ ok: true, imported, skipped, total: Object.keys(data.sessions).length });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// ── Subscriptions ─────────────────────────────────────────────────────────
const subsPath = path.join(historyDir, "subscriptions.json");
async function loadSubscriptions() {
try {
return JSON.parse(await fs.readFile(subsPath, "utf-8")).subscriptions || [];
} catch {
return [];
}
}
async function saveSubscriptions(subs) {
await fs.writeFile(subsPath, JSON.stringify({ subscriptions: subs }, null, 2));
}
// List recent videos from a channel/playlist via yt-dlp (no download)
// Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode)
async function listChannelVideosFast(url, limit = 15) {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(id)s|%(title)s",
"--no-download",
"--playlist-end", String(limit),
"--flat-playlist",
...ytCookieArgs(),
...ytExtraArgs(),
url,
], { timeout: 60000 });
return stdout.trim().split("\n").filter(Boolean).map(line => {
const idx = line.indexOf("|");
return { id: line.slice(0, idx), title: line.slice(idx + 1) };
});
}
// Fetch upload_date for a batch of video IDs (processes in batches of 3)
// Bails after 2 consecutive failures to avoid grinding through blocked requests
async function fetchUploadDates(videoIds) {
if (videoIds.length === 0) return {};
const dateMap = {};
const batchSize = 50;
let consecutiveFails = 0;
for (let i = 0; i < videoIds.length; i += batchSize) {
const batch = videoIds.slice(i, i + batchSize);
const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`);
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(id)s|%(upload_date)s",
"--no-download",
...ytCookieArgs(),
...urls,
], { timeout: 45000 });
for (const line of stdout.trim().split("\n").filter(Boolean)) {
const [id, date] = line.split("|");
if (id && date && date !== "NA") dateMap[id] = date;
}
consecutiveFails = 0;
subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`);
} catch (err) {
consecutiveFails++;
subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`);
if (consecutiveFails >= 2) {
subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`);
break;
}
}
}
return dateMap;
}
// ── RSS-based date fetching (bypasses bot detection) ─────────────────────
// Fetch a URL and return the response body as a string
function fetchUrl(url) {
return new Promise((resolve, reject) => {
https.get(url, (res) => {
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
return fetchUrl(res.headers.location).then(resolve, reject);
}
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => resolve(data));
res.on("error", reject);
}).on("error", reject);
});
}
// Get channel_id from a YouTube channel/playlist URL using yt-dlp
async function getChannelId(url) {
// Method 1: flat-playlist channel_id (fast, may return NA)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel_id)s",
"--no-download",
"--playlist-items", "1",
"--flat-playlist",
...ytCookieArgs(),
url,
], { timeout: 15000 });
const id = stdout.trim().split("\n")[0];
if (id && id !== "NA" && id.length > 5) return id;
} catch {}
// Method 2: non-flat single video (slower but gets full metadata)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel_id)s",
"--no-download",
"--playlist-items", "1",
...ytCookieArgs(),
url,
], { timeout: 30000 });
const id = stdout.trim().split("\n")[0];
if (id && id !== "NA" && id.length > 5) return id;
} catch {}
return null;
}
// Fetch video dates from YouTube RSS feed (no auth, no bot detection)
// Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos
async function fetchDatesFromRSS(channelId) {
const dateMap = {};
if (!channelId) return dateMap;
try {
const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
const xml = await fetchUrl(rssUrl);
const entryRegex = /<entry>[\s\S]*?<yt:videoId>([^<]+)<\/yt:videoId>[\s\S]*?<published>([^<]+)<\/published>[\s\S]*?<\/entry>/g;
let match;
while ((match = entryRegex.exec(xml)) !== null) {
const videoId = match[1];
const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00"
const date = published.slice(0, 10).replace(/-/g, ""); // "20251201"
dateMap[videoId] = date;
}
} catch (err) {
subLog(` ⚠ RSS feed fetch failed: ${err.message}`);
}
return dateMap;
}
// ── Podcast RSS feed parsing ────────────────────────────────────────────────
// Detect if a URL looks like a podcast RSS feed
function isPodcastFeedUrl(url) {
if (!url) return false;
const u = url.trim().toLowerCase();
// Common podcast RSS feed patterns
if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true;
if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true;
if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true;
if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true;
if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true;
return false;
}
// Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] }
async function parsePodcastRSS(feedUrl, limit = 200) {
const xml = await fetchUrl(feedUrl);
// Extract podcast title
const titleMatch = xml.match(/<channel>[\s\S]*?<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast";
// Extract episodes from <item> elements
const episodes = [];
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
let match;
while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) {
const item = match[1];
// GUID (unique episode identifier)
const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/);
const guid = guidMatch ? guidMatch[1].trim() : null;
// Title
const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode";
// Publish date
const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/);
const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null;
let dateStr = ""; // YYYYMMDD
if (pubDate) {
try {
const d = new Date(pubDate);
if (!isNaN(d.getTime())) {
dateStr = d.toISOString().slice(0, 10).replace(/-/g, "");
}
} catch {}
}
// Audio enclosure URL
const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/);
const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null;
// Duration (itunes:duration)
const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/);
const duration = durMatch ? durMatch[1].trim() : "";
if (!audioUrl) continue; // Skip episodes without audio
// Use guid, or fall back to audioUrl as unique ID
const id = guid || audioUrl;
episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration });
}
return { title: podcastTitle, episodes };
}
// Download a podcast episode audio file via HTTP(S) to a local path
function downloadPodcastAudio(audioUrl, destPath) {
return new Promise((resolve, reject) => {
const doFetch = (url) => {
const getter = url.startsWith("https") ? https : http;
getter.get(url, (res) => {
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
return doFetch(res.headers.location);
}
if (res.statusCode !== 200) {
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
}
const fileStream = createWriteStream(destPath);
res.pipe(fileStream);
fileStream.on("finish", () => fileStream.close(resolve));
fileStream.on("error", reject);
}).on("error", reject);
};
doFetch(audioUrl);
});
}
// Get channel name from URL
async function fetchChannelName(url) {
// Try fast flat-playlist approach first
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel)s",
"--no-download",
"--playlist-end", "1",
"--flat-playlist",
...ytCookieArgs(),
url,
], { timeout: 15000 });
const name = stdout.trim().split("\n")[0];
if (name && name !== "NA") return name;
} catch {}
// Fallback: fetch without flat-playlist (slower but gets channel from video metadata)
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(channel)s",
"--no-download",
"--playlist-end", "1",
...ytCookieArgs(),
url,
], { timeout: 30000 });
const name = stdout.trim().split("\n")[0];
if (name && name !== "NA") return name;
} catch {}
// Last resort: extract handle from URL
try {
const u = new URL(url);
const handleMatch = u.pathname.match(/\/@([^/]+)/);
if (handleMatch) return "@" + handleMatch[1];
} catch {}
return "Unknown Channel";
}
// ── In-process write serialization ────────────────────────────────────────
// Per-file promise chain to prevent lost-update races on read-modify-write
// state files (skip-list, seen-list). Without this, two concurrent DELETE
// handlers can each load the same snapshot, add their own id, and the
// second write overwrites the first — silently dropping entries.
const _fileLocks = new Map();
function withFileLock(key, fn) {
const prev = _fileLocks.get(key) || Promise.resolve();
const next = prev.then(fn, fn); // run fn whether prev resolved or rejected
// Keep the chain alive but don't leak errors to the next caller
_fileLocks.set(key, next.catch(() => {}));
return next;
}
// Skip list — videos deleted from history that subscriptions should not re-add
const skipPath = path.join(historyDir, "skip-list.json");
async function loadSkipList() {
try {
return new Set(JSON.parse(await fs.readFile(skipPath, "utf-8")).videoIds || []);
} catch {
return new Set();
}
}
async function addToSkipList(videoId) {
return withFileLock(skipPath, async () => {
const skipIds = await loadSkipList();
skipIds.add(videoId);
await fs.writeFile(skipPath, JSON.stringify({ videoIds: [...skipIds] }));
});
}
// Seen list — videos already offered for approval (persists across restarts)
const seenPath = path.join(historyDir, "seen-list.json");
async function loadSeenList() {
try {
return new Set(JSON.parse(await fs.readFile(seenPath, "utf-8")).videoIds || []);
} catch {
return new Set();
}
}
async function addToSeenList(videoIds) {
return withFileLock(seenPath, async () => {
const seen = await loadSeenList();
for (const id of videoIds) seen.add(id);
await fs.writeFile(seenPath, JSON.stringify({ videoIds: [...seen] }));
});
}
// Get all videoIds already processed in history
async function getProcessedVideoIds() {
const ids = new Set();
try {
const files = await fs.readdir(historyDir);
for (const file of files.filter(f => f.endsWith(".json") && f !== "_meta.json" && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) {
try {
const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
const data = JSON.parse(raw);
if (data.videoId) ids.add(data.videoId);
} catch {}
}
} catch {}
return ids;
}
// Server-side auto-queue for subscription-discovered videos (persisted to disk)
const autoQueuePath = path.join(historyDir, "auto-queue.json");
let autoQueue = [];
async function loadAutoQueue() {
try {
const data = JSON.parse(await fs.readFile(autoQueuePath, "utf-8"));
return data.items || [];
} catch { return []; }
}
async function saveAutoQueue() {
await fs.writeFile(autoQueuePath, JSON.stringify({ items: autoQueue }, null, 2));
}
// Load persisted queue on startup, filtering out already-processed items
autoQueue = await loadAutoQueue();
{
const processed = await getProcessedVideoIds();
const before = autoQueue.length;
autoQueue = autoQueue.filter(q => !processed.has(q.videoId));
if (autoQueue.length < before) {
console.log(` Auto-queue: removed ${before - autoQueue.length} already-processed item(s)`);
await saveAutoQueue();
}
}
// ── Background processing queue ──────────────────────────────────────────
// Processes "approved" auto-queue items sequentially with configurable delay
// between items to avoid hammering YouTube with rapid-fire downloads.
const processingConfigPath = path.join(configDir, "processing-config.json");
let processingConfig = {
delaySeconds: 300, // Default delay between processing queue items (5 minutes)
enabled: true, // Whether background processing is active
};
let processingState = {
running: false, // Is the processor loop currently active?
currentItem: null, // The item currently being processed (or null)
lastCompleted: null, // Timestamp of last completed item
rush: false, // If true, skip delay before next item
log: [], // Recent processing log entries
};
function procLog(msg) {
console.log(` [processor] ${msg}`);
processingState.log.push({ t: new Date().toISOString(), msg });
if (processingState.log.length > 100) processingState.log.shift();
}
async function loadProcessingConfig() {
try {
const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8"));
if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds;
if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled;
} catch {}
}
async function saveProcessingConfig() {
await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2));
}
await loadProcessingConfig();
// The background processor: picks "approved" items, processes via internal HTTP,
// waits the configured delay, then picks the next one. Runs continuously.
async function backgroundProcessor() {
if (processingState.running) return; // Already running
processingState.running = true;
procLog("Background processor started");
while (true) {
// Find next approved item
const item = autoQueue.find(q => q.status === "approved");
if (!item) {
procLog("No approved items in queue — processor sleeping");
processingState.running = false;
processingState.currentItem = null;
return;
}
if (!processingConfig.enabled) {
procLog("Processing is paused — processor sleeping");
processingState.running = false;
processingState.currentItem = null;
return;
}
// Wait the configured delay (unless rush mode or first item)
if (processingState.lastCompleted && !processingState.rush) {
const delaySec = processingConfig.delaySeconds;
procLog(`Waiting ${delaySec}s before next item...`);
await new Promise(r => setTimeout(r, delaySec * 1000));
}
processingState.rush = false;
// Re-check the item hasn't been removed/changed while we waited
const freshItem = autoQueue.find(q => q.id === item.id);
if (!freshItem || freshItem.status !== "approved") {
procLog(`Item ${item.id} was removed or status changed — skipping`);
continue;
}
// Mark as processing
freshItem.status = "processing";
processingState.currentItem = freshItem;
await saveAutoQueue();
procLog(`Processing: ${freshItem.title} (${freshItem.url})`);
try {
// Call the existing /api/process endpoint via internal HTTP request
const result = await processItemInternally(freshItem);
freshItem.status = "completed";
freshItem.completedAt = new Date().toISOString();
freshItem.historyId = result.historyId || null;
processingState.lastCompleted = new Date().toISOString();
procLog(`✓ Completed: ${freshItem.title}`);
} catch (err) {
freshItem.status = "failed";
freshItem.error = err.message || String(err);
freshItem.failedAt = new Date().toISOString();
procLog(`✗ Failed: ${freshItem.title}${freshItem.error.slice(0, 200)}`);
}
processingState.currentItem = null;
await saveAutoQueue();
}
}
// Internal HTTP request to /api/process — consumes the SSE stream and
// waits for the "result" or "error" event. This reuses the entire existing
// pipeline without any code duplication.
function processItemInternally(item) {
return new Promise((resolve, reject) => {
const apiKey = resolveApiKey(null); // Use server's stored key
if (!apiKey) {
return reject(new Error("No API key configured. Set your Gemini API key in Settings."));
}
const body = JSON.stringify({
url: item.url,
apiKey: "USE_SERVER_KEY",
type: item.type || undefined,
title: item.title || undefined,
uploadDate: item.uploadDate || undefined,
episodeId: item.videoId || undefined,
});
const req = http.request({
hostname: "127.0.0.1",
port: PORT,
path: "/api/process",
method: "POST",
headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) },
timeout: 1800000, // 30 minutes max for very long videos
}, (res) => {
let buffer = "";
let lastResult = null;
let lastError = null;
res.on("data", (chunk) => {
buffer += chunk.toString();
// Parse SSE events from the buffer
const lines = buffer.split("\n");
buffer = lines.pop() || ""; // Keep incomplete line in buffer
let currentEvent = null;
for (const line of lines) {
if (line.startsWith("event: ")) {
currentEvent = line.slice(7).trim();
} else if (line.startsWith("data: ") && currentEvent) {
try {
const data = JSON.parse(line.slice(6));
if (currentEvent === "result") lastResult = data;
if (currentEvent === "error") lastError = data;
if (currentEvent === "log") {
procLog(` [${data.elapsed || "?"}s] ${data.message}`);
}
} catch {}
currentEvent = null;
} else if (line === "") {
currentEvent = null;
}
}
});
res.on("end", () => {
if (lastError) {
reject(new Error(lastError.message || "Processing failed"));
} else if (lastResult) {
resolve(lastResult);
} else {
reject(new Error("Processing ended without result"));
}
});
res.on("error", reject);
});
req.on("error", reject);
req.on("timeout", () => {
req.destroy();
reject(new Error("Processing timed out after 30 minutes"));
});
req.write(body);
req.end();
});
}
// Wake up the processor whenever there are approved items
function kickProcessor() {
if (!processingState.running && processingConfig.enabled) {
const hasApproved = autoQueue.some(q => q.status === "approved");
if (hasApproved) {
backgroundProcessor().catch(err => procLog(`Processor error: ${err.message}`));
}
}
}
let subCheckRunning = false;
let subCheckPromise = null;
let subCheckLog = []; // Stores recent check logs for debug endpoint
function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); }
async function checkSubscriptions() {
if (subCheckRunning) {
// Wait for current check to finish, then run again
if (subCheckPromise) await subCheckPromise;
return checkSubscriptions();
}
subCheckRunning = true;
subCheckPromise = _checkSubscriptionsInner().finally(() => {
subCheckRunning = false;
subCheckPromise = null;
});
return subCheckPromise;
}
async function _checkSubscriptionsInner() {
// Pro-tier feature: skip silently when not entitled. The HTTP gate above
// returns 402 to callers; this guards the background timer + manual paths.
if (!LIC.entitlements.has("subscriptions")) {
subCheckLog = [];
subLog("Skipped: subscriptions require a Pro license.");
return;
}
subCheckLog = []; // Clear logs for fresh check
const subs = await loadSubscriptions();
if (subs.length === 0) { subLog("No subscriptions found"); return; }
const processedIds = await getProcessedVideoIds();
const skippedIds = await loadSkipList();
const seenIds = await loadSeenList();
const queuedIds = new Set(autoQueue.map(q => {
// For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID)
if (q.videoId) return q.videoId;
const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/);
return m ? m[1] : null;
}).filter(Boolean));
subLog(`${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen, ${queuedIds.size} in queue`);
let changed = false;
for (const sub of subs) {
if (sub.paused) { subLog(`${sub.name} — paused, skipping`); continue; }
try {
const icon = sub.type === "podcast" ? "🎙" : "📡";
subLog(`${icon} Checking: ${sub.name} (${sub.url})`);
const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8);
if (sub.type === "podcast") {
// ── Podcast subscription: discover episodes from RSS feed ──
const { episodes } = await parsePodcastRSS(sub.url, 200);
subLog(` Found ${episodes.length} episode(s) in RSS feed`);
if (episodes.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
// Filter out already-known episodes
const unknowns = episodes.filter(ep => {
if (processedIds.has(ep.id)) return false;
if (queuedIds.has(ep.id)) return false;
if (skippedIds.has(ep.id)) return false;
if (seenIds.has(ep.id)) return false;
return true;
});
const filtered = episodes.length - unknowns.length;
subLog(` ${unknowns.length} to check, ${filtered} already known`);
if (unknowns.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
subLog(` Cutoff date: ${cutoffDate}`);
let newCount = 0;
const seenNow = [];
for (const ep of unknowns) {
if (!ep.date || ep.date.length !== 8) continue; // skip undated
if (ep.date < cutoffDate) {
seenNow.push(ep.id);
continue; // before cutoff
}
subLog(`${ep.date}${ep.title.slice(0, 60)}`);
const itemStatus = sub.autoDownload ? "approved" : "pending";
autoQueue.push({
id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`,
videoId: ep.id, // episode GUID
url: ep.audioUrl,
title: ep.title,
uploadDate: ep.date,
subscriptionId: sub.id,
subscriptionName: sub.name,
status: itemStatus,
type: "podcast",
duration: ep.duration || "",
});
queuedIds.add(ep.id);
newCount++;
}
if (seenNow.length > 0) await addToSeenList(seenNow);
sub.lastChecked = new Date().toISOString();
subLog(`${newCount} episode(s) queued for approval from ${sub.name}`);
changed = true;
} else {
// ── YouTube subscription: discover videos via yt-dlp + RSS dates ──
// Scale fetch limit based on how far back the subscription date goes
const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000));
const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4;
const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200);
subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`);
const candidates = await listChannelVideosFast(sub.url, fetchLimit);
subLog(` Found ${candidates.length} recent video(s)`);
if (candidates.length === 0) {
subLog(` ⚠ No videos returned from yt-dlp for this channel`);
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
const unknowns = candidates.filter(v => {
if (processedIds.has(v.id)) return false;
if (queuedIds.has(v.id)) return false;
if (skippedIds.has(v.id)) return false;
if (seenIds.has(v.id)) return false;
return true;
});
const filtered = candidates.length - unknowns.length;
subLog(` ${unknowns.length} to check, ${filtered} already known`);
if (unknowns.length === 0) {
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`);
// Step 1: Get channel ID (cached on subscription, or fetch once)
if (!sub.channelId) {
subLog(` Resolving channel ID...`);
sub.channelId = await getChannelId(sub.url);
if (sub.channelId) {
subLog(` Resolved channel ID: ${sub.channelId}`);
changed = true;
} else {
subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`);
}
}
// Step 2: RSS feed — fast, no bot detection, covers ~15 most recent
const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {};
const rssCount = Object.keys(dateMap).length;
if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`);
// Step 3: Smart early termination
const needDates = unknowns.filter(v => !dateMap[v.id]);
let earlyTermination = false;
if (needDates.length > 0 && rssCount > 0) {
const rssDates = Object.values(dateMap).sort();
const oldestRssDate = rssDates[0];
if (oldestRssDate < cutoffDate) {
earlyTermination = true;
subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`);
} else {
subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`);
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
Object.assign(dateMap, ytDates);
const ytCount = Object.keys(ytDates).length;
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
}
} else if (needDates.length > 0 && rssCount === 0) {
subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`);
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
Object.assign(dateMap, ytDates);
const ytCount = Object.keys(ytDates).length;
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
}
const gotDates = Object.keys(dateMap).length;
if (gotDates > 0 || needDates.length === 0) {
subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`);
} else {
subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`);
sub.lastChecked = new Date().toISOString();
changed = true;
continue;
}
let newCount = 0;
for (const video of unknowns) {
const uploadDate = dateMap[video.id];
if (!uploadDate || uploadDate.length !== 8) {
continue;
}
if (uploadDate < cutoffDate) {
subLog(`${video.id} (${uploadDate}) — before cutoff`);
continue;
}
subLog(`${video.id}${uploadDate ? ` (${uploadDate})` : ""}${video.title.slice(0,50)}`);
const ytItemStatus = sub.autoDownload ? "approved" : "pending";
autoQueue.push({
id: `auto-${Date.now()}-${video.id}`,
videoId: video.id,
url: `https://www.youtube.com/watch?v=${video.id}`,
title: video.title,
uploadDate: uploadDate || null,
subscriptionId: sub.id,
subscriptionName: sub.name,
status: ytItemStatus,
});
queuedIds.add(video.id);
newCount++;
}
// Only add to seen list when we can PROVE a video is too old
const seenNow = unknowns.filter(v => {
if (queuedIds.has(v.id)) return false;
const d = dateMap[v.id];
if (d && d.length === 8 && d < cutoffDate) return true;
if (d && d.length === 8 && d >= cutoffDate) return false;
return earlyTermination;
}).map(v => v.id);
if (seenNow.length > 0) await addToSeenList(seenNow);
sub.lastChecked = new Date().toISOString();
subLog(`${newCount} video(s) queued for approval from ${sub.name}`);
changed = true;
}
} catch (err) {
subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`);
}
}
const pendingCount = autoQueue.filter(q => q.status === "pending").length;
const approvedCount = autoQueue.filter(q => q.status === "approved").length;
subLog(`Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`);
if (changed) await saveSubscriptions(subs);
await saveAutoQueue();
// Wake up the background processor if there are approved items
if (approvedCount > 0) {
subLog(`Kicking background processor for ${approvedCount} approved item(s)...`);
kickProcessor();
}
}
// CRUD endpoints
app.get("/api/subscriptions", async (req, res) => {
const subs = await loadSubscriptions();
res.json({ subscriptions: subs });
});
// Extract a normalized channel key from a YouTube URL for dedup
function channelKeyFromUrl(url) {
try {
const u = new URL(url);
const path = u.pathname.toLowerCase().replace(/\/+$/, "");
// /@handle/videos → @handle, /@handle/streams → @handle
const handleMatch = path.match(/\/(@[^/]+)/);
if (handleMatch) return handleMatch[1];
// /channel/UCxxx → channel/UCxxx
const channelMatch = path.match(/\/(channel\/[^/]+)/);
if (channelMatch) return channelMatch[1];
// /c/name or /user/name
const cMatch = path.match(/\/(c|user)\/([^/]+)/);
if (cMatch) return cMatch[0];
// /playlist?list=PLxxx
const list = u.searchParams.get("list");
if (list) return `playlist/${list}`;
return path; // fallback
} catch {
return url.toLowerCase().replace(/\/+$/, "");
}
}
app.post("/api/subscriptions", async (req, res) => {
const { url, since, type, autoDownload } = req.body;
if (!url) return res.status(400).json({ error: "Missing url" });
const isPodcast = type === "podcast" || isPodcastFeedUrl(url);
const subs = await loadSubscriptions();
// Prevent duplicates
if (isPodcast) {
const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, "");
if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) {
return res.status(409).json({ error: "Already subscribed to this podcast" });
}
} else {
const newKey = channelKeyFromUrl(url);
if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) {
return res.status(409).json({ error: "Already subscribed to this channel" });
}
}
// Use provided cutoff date, or default to right now
const cutoff = since ? new Date(since).toISOString() : new Date().toISOString();
const sub = {
id: `sub-${Date.now()}`,
url,
name: "Loading...",
type: isPodcast ? "podcast" : "youtube",
channelId: null,
createdAt: cutoff,
lastChecked: null,
paused: false,
autoDownload: autoDownload === true,
};
subs.push(sub);
await saveSubscriptions(subs);
// Respond immediately so the UI isn't blocked
res.json(sub);
// Background: resolve name and check for new content
(async () => {
try {
if (isPodcast) {
// Fetch podcast title from RSS feed
const { title } = await parsePodcastRSS(url, 1);
const freshSubs = await loadSubscriptions();
const s = freshSubs.find(x => x.id === sub.id);
if (s) {
s.name = title || url;
await saveSubscriptions(freshSubs);
}
console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`);
} else {
const [name, channelId] = await Promise.all([
fetchChannelName(url),
getChannelId(url),
]);
const freshSubs = await loadSubscriptions();
const s = freshSubs.find(x => x.id === sub.id);
if (s) {
s.name = name;
if (channelId) s.channelId = channelId;
await saveSubscriptions(freshSubs);
}
console.log(` 📡 New subscription: ${name} — checking for recent videos...`);
}
await checkSubscriptions();
} catch (err) {
console.error(" ⚠ Background subscription setup error:", err.message);
}
})();
});
app.delete("/api/subscriptions/:id", async (req, res) => {
let subs = await loadSubscriptions();
subs = subs.filter(s => s.id !== req.params.id);
await saveSubscriptions(subs);
// Also remove any pending auto-queue items from this subscription
autoQueue = autoQueue.filter(q => q.subscriptionId !== req.params.id);
await saveAutoQueue();
res.json({ ok: true });
});
app.put("/api/subscriptions/:id/pause", async (req, res) => {
const subs = await loadSubscriptions();
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.paused = !sub.paused;
await saveSubscriptions(subs);
res.json(sub);
});
app.put("/api/subscriptions/:id/since", async (req, res) => {
const { since } = req.body;
if (!since) return res.status(400).json({ error: "Missing 'since' date" });
const subs = await loadSubscriptions();
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.createdAt = new Date(since).toISOString();
await saveSubscriptions(subs);
res.json(sub);
});
// Debug: subscription check logs (viewable in-app)
app.get("/api/sub-check-log", (req, res) => {
res.json({ log: subCheckLog, autoQueueCount: autoQueue.length, autoQueue: autoQueue.map(q => ({ id: q.id, videoId: q.videoId, title: q.title, status: q.status, sub: q.subscriptionName })) });
});
// Auto-queue endpoints (frontend polls these)
app.get("/api/auto-queue", (req, res) => {
// Return all items grouped by status for the frontend
const showAll = req.query.all === "true";
const items = showAll
? autoQueue
: autoQueue.filter(q => ["pending", "approved", "processing"].includes(q.status));
res.json({
items,
checkRunning: subCheckRunning,
counts: {
pending: autoQueue.filter(q => q.status === "pending").length,
approved: autoQueue.filter(q => q.status === "approved").length,
processing: autoQueue.filter(q => q.status === "processing").length,
completed: autoQueue.filter(q => q.status === "completed").length,
failed: autoQueue.filter(q => q.status === "failed").length,
},
});
});
app.delete("/api/auto-queue/:id", async (req, res) => {
autoQueue = autoQueue.filter(q => q.id !== req.params.id);
await saveAutoQueue();
res.json({ ok: true });
});
app.post("/api/auto-queue/:id/skip", async (req, res) => {
const item = autoQueue.find(q => q.id === req.params.id);
if (item && item.videoId) {
await addToSkipList(item.videoId);
}
autoQueue = autoQueue.filter(q => q.id !== req.params.id);
await saveAutoQueue();
res.json({ ok: true });
});
// Approve a single auto-queue item for background processing
app.post("/api/auto-queue/:id/approve", async (req, res) => {
const item = autoQueue.find(q => q.id === req.params.id);
if (!item) return res.status(404).json({ error: "Item not found" });
if (item.status !== "pending") return res.status(400).json({ error: `Cannot approve item with status '${item.status}'` });
item.status = "approved";
await saveAutoQueue();
kickProcessor();
res.json({ ok: true, item });
});
// Approve all pending items at once
app.post("/api/auto-queue/approve-all", async (req, res) => {
let count = 0;
for (const item of autoQueue) {
if (item.status === "pending") {
item.status = "approved";
count++;
}
}
if (count > 0) {
await saveAutoQueue();
kickProcessor();
}
res.json({ ok: true, approved: count });
});
// Retry a failed item
app.post("/api/auto-queue/:id/retry", async (req, res) => {
const item = autoQueue.find(q => q.id === req.params.id);
if (!item) return res.status(404).json({ error: "Item not found" });
if (item.status !== "failed") return res.status(400).json({ error: `Cannot retry item with status '${item.status}'` });
item.status = "approved";
item.error = undefined;
item.failedAt = undefined;
await saveAutoQueue();
kickProcessor();
res.json({ ok: true, item });
});
// Clear completed and failed items from the queue
app.post("/api/auto-queue/clear-finished", async (req, res) => {
const before = autoQueue.length;
autoQueue = autoQueue.filter(q => !["completed", "failed"].includes(q.status));
const removed = before - autoQueue.length;
if (removed > 0) await saveAutoQueue();
res.json({ ok: true, removed });
});
app.post("/api/subscriptions/check-now", async (req, res) => {
if (subCheckRunning) return res.json({ ok: true, message: "Already checking" });
checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message));
res.json({ ok: true, message: "Check started" });
});
// ── Auto-download toggle per subscription ──────────────────────────────────
app.put("/api/subscriptions/:id/auto-download", async (req, res) => {
const subs = await loadSubscriptions();
const sub = subs.find(s => s.id === req.params.id);
if (!sub) return res.status(404).json({ error: "Subscription not found" });
sub.autoDownload = req.body.enabled === true;
await saveSubscriptions(subs);
res.json({ ok: true, subscription: sub });
});
// ── Background processing status & configuration ───────────────────────────
// Get current processing status (what's running, queue depth, config)
app.get("/api/processing/status", (req, res) => {
res.json({
running: processingState.running,
currentItem: processingState.currentItem
? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url }
: null,
lastCompleted: processingState.lastCompleted,
config: processingConfig,
counts: {
approved: autoQueue.filter(q => q.status === "approved").length,
processing: autoQueue.filter(q => q.status === "processing").length,
pending: autoQueue.filter(q => q.status === "pending").length,
},
log: processingState.log.slice(-20),
});
});
// Update processing configuration (delay between items, enable/disable)
app.put("/api/processing/config", async (req, res) => {
if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) {
processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds));
}
if (typeof req.body.enabled === "boolean") {
processingConfig.enabled = req.body.enabled;
// If enabling and there are approved items, kick the processor
if (req.body.enabled) kickProcessor();
}
await saveProcessingConfig();
res.json({ ok: true, config: processingConfig });
});
// Rush: skip the delay before the next item in the queue
app.post("/api/processing/rush", (req, res) => {
processingState.rush = true;
// If not currently running but there are approved items, start processing
kickProcessor();
res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" });
});
// Processing log (for debug/monitoring)
app.get("/api/processing/log", (req, res) => {
res.json({ log: processingState.log });
});
// ── Full pipeline: URL → audio → transcript → topic analysis ──────────────
app.post("/api/process", async (req, res) => {
const { url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId } = req.body;
// Free tier: unlicensed users can summarize one video at a time. They
// still bring their own Gemini key — same as paid users today; the key
// can come from either the StartOS config action (server-side) or the
// web UI Settings panel (client-side). The future "bundled key" relay
// (paid users' requests proxied through the operator's service) isn't
// built yet, so there's nothing here that gates key sourcing by tier.
const isFreeUser = !(LIC.state === "licensed" && LIC.entitlements.has("core"));
if (isFreeUser) {
if (freeJobInFlight) {
return res.status(409).json({
error: "processing_in_progress",
message:
"A summary is already being processed. Free mode handles one video at a time — wait for the current one to finish.",
});
}
freeJobInFlight = true;
}
const apiKey = resolveApiKey(clientKey);
if (!url) {
if (isFreeUser) freeJobInFlight = false;
return res.status(400).json({ error: "Missing url" });
}
if (!apiKey) {
if (isFreeUser) freeJobInFlight = false;
return res.status(400).json({ error: "No API key provided. Set GEMINI_API_KEY in .env or enter one in Settings." });
}
// Determine if this is a podcast episode or YouTube video
const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url);
const videoId = isPodcast ? (episodeId || url) : extractVideoId(url);
if (!isPodcast && !videoId) {
if (isFreeUser) freeJobInFlight = false;
return res.status(400).json({ error: "Invalid YouTube URL" });
}
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-"));
const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3";
const audioPath = path.join(tmpDir, `audio.${audioExt}`);
const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3";
try {
const pipelineStart = Date.now();
// Set up SSE
res.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
});
// Helper to send log entries with elapsed time
const logHistory = [];
function log(step, message, detail) {
const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1);
const logMsg = `[${elapsed}s] ${message}`;
console.log(` ${logMsg}`);
logHistory.push({ elapsed, message, detail: detail || null });
sendEvent(res, "status", { step, message });
sendEvent(res, "log", { elapsed, message, detail: detail || null });
}
// ── Step 1: Download audio ──
const dlStart = Date.now();
let videoTitle = itemTitle || "Untitled";
let videoUploadDate = itemUploadDate || "";
if (isPodcast) {
log(1, "Downloading podcast episode...");
await downloadPodcastAudio(url, audioPath);
const stats = await fs.stat(audioPath);
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
log(1, `Episode: ${videoTitle}`);
} else {
log(1, "Downloading audio from YouTube...");
const dlBaseArgs = [
"-x",
"--audio-format", "mp3",
"--audio-quality", "5",
"-o", audioPath,
"--no-playlist",
"--print", "%(title)s|||%(upload_date)s",
"--no-simulate",
];
const dlUrl = `https://www.youtube.com/watch?v=${videoId}`;
const cookieArgs = ytCookieArgs();
const hasCookies = cookieArgs.length > 0;
let usedCookies = false;
let dlStdout = "";
// Helper: attempt a single yt-dlp download
async function attemptDownload(args, label) {
const result = await execFileAsync("yt-dlp", args, { timeout: 600000 });
return result.stdout || "";
}
// Helper: check if error is a bot detection / rate limit block
function isBotBlock(errText) {
return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText);
}
// ── Smart download with retry ──
// Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays)
const MAX_RETRIES = 3;
const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff
let downloaded = false;
let lastError = "";
for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) {
// On retry attempts, wait before trying again
if (attempt > 0) {
const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)];
log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`);
sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` });
await new Promise(r => setTimeout(r, waitSec * 1000));
log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`);
// Clean up any partial file from previous attempt
await fs.unlink(audioPath).catch(() => {});
}
// Try with cookies first
if (hasCookies && !usedCookies) {
try {
log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies...");
dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies");
usedCookies = true;
downloaded = true;
break;
} catch (cookieErr) {
const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || "");
if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`);
log(1, "Retrying without cookies...");
await fs.unlink(audioPath).catch(() => {});
}
}
// Try without cookies
if (!downloaded) {
try {
dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies");
downloaded = true;
break;
} catch (dlErr) {
lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || "");
const blocked = isBotBlock(lastError);
if (blocked && attempt < MAX_RETRIES) {
log(1, `⚠ YouTube bot detection triggered`);
// Will loop back and wait
continue;
}
if (blocked && attempt === MAX_RETRIES) {
// Last resort: try yt-dlp auto-update in case there's a newer version that handles this
log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort...");
const updateResult = await autoUpdateYtdlp();
if (updateResult.success) {
log(1, "yt-dlp updated! Final retry...");
try {
const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update");
dlStdout = retryResult;
downloaded = true;
break;
} catch { /* fall through to error */ }
}
}
// Non-bot error or exhausted retries
if (!downloaded) {
log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`);
}
}
}
}
if (!downloaded) {
const blocked = isBotBlock(lastError);
let hint = "";
if (blocked) {
hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " +
"This is usually caused by:\n" +
"• Recent VPN use (YouTube flags VPN IPs)\n" +
"• Too many downloads in a short period\n" +
"• YouTube's general anti-bot measures\n\n" +
"What to try:\n" +
"• Wait 10-30 minutes and try again\n" +
"• Disconnect any VPN/proxy\n" +
"• Upload fresh cookies.txt via Settings\n" +
"• Try a different network (mobile hotspot, etc.)";
}
throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`);
}
if (!usedCookies && hasCookies) {
log(1, "⚠ Downloaded without cookies — audio may contain ads");
}
const stats = await fs.stat(audioPath);
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
// Extract title from the --print output of the download command
const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null;
let gotTitle = false;
// First try: parse title from the download stdout (most reliable — same call that succeeded)
if (dlStdout) {
const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean);
for (const line of printLines) {
if (line.includes("|||")) {
const sep = line.indexOf("|||");
const t = line.slice(0, sep).trim();
const d = line.slice(sep + 3).trim();
if (t && t !== "NA") {
videoTitle = t;
if (d && d !== "NA") videoUploadDate = d;
gotTitle = true;
log(1, `Video title: ${videoTitle}`);
break;
}
}
}
}
// Second try: separate yt-dlp call (no cookies needed for public metadata)
if (!gotTitle) {
try {
const { stdout } = await execFileAsync("yt-dlp", [
"--print", "%(title)s|||%(upload_date)s",
"--no-download",
`https://www.youtube.com/watch?v=${videoId}`,
], { timeout: 15000 });
const raw = stdout.trim();
const sep = raw.indexOf("|||");
if (sep > 0) {
videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled";
const d = raw.slice(sep + 3).trim();
if (d && d !== "NA") videoUploadDate = d || videoUploadDate;
} else {
videoTitle = raw || fallbackTitle || "Untitled";
}
gotTitle = videoTitle !== "Untitled";
if (gotTitle) log(1, `Video title: ${videoTitle}`);
} catch {
// Title fetch failed
}
}
// Third try: use the queue-provided title
if (!gotTitle && fallbackTitle) {
videoTitle = fallbackTitle;
log(1, `Using queue title: ${fallbackTitle}`);
gotTitle = true;
}
if (!gotTitle) {
log(1, "⚠ Could not fetch video title");
}
}
// ── Step 2: Transcribe audio ──
// Detect audio duration to choose strategy
const audioDuration = await getAudioDuration(audioPath);
const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown";
log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`);
// Strategy:
// < 60 min AND < 30 MB → Flash on full file (fast, cheap, reliable)
// ≥ 60 min OR ≥ 30 MB → go straight to chunked transcription with Flash (45-min chunks)
// If full-file transcription is truncated or empty → fall back to chunks
const CHUNK_TIME_THRESHOLD = 60 * 60; // 60 minutes
const CHUNK_SIZE_THRESHOLD = 30 * 1024 * 1024; // 30 MB
let audioFileSize = 0;
try { audioFileSize = (await fs.stat(audioPath)).size; } catch {}
const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1);
const needsChunking = (audioDuration && audioDuration >= CHUNK_TIME_THRESHOLD) || (audioFileSize >= CHUNK_SIZE_THRESHOLD);
const transcriptionModel = "gemini-3-flash-preview"; // Flash handles transcription best
const analysisModel = model || "gemini-3.1-pro-preview";
if (needsChunking) {
const reason = audioDuration >= CHUNK_TIME_THRESHOLD ? `${audioDurMin} min` : `${audioSizeMB} MB`;
log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriptionModel}`);
}
const ai = new GoogleGenAI({ apiKey, httpOptions: { timeout: 900_000, headersTimeout: 900_000 } });
const transcriptionPrompt = (titleHint) => `${titleHint ? `Video title: "${titleHint}"\n\n` : ""}Transcribe this audio completely and verbatim. Include timestamps at regular intervals (every 15-30 seconds or at natural pauses).
Format each line as:
[MM:SS] The spoken text here...
Rules:
- Transcribe EVERY word spoken, do not skip or summarize anything.
- Use [MM:SS] or [H:MM:SS] timestamp format at the start of each line.
- Start a new timestamped line every 15-30 seconds or at natural speech pauses.
- Include filler words (um, uh, you know) for accuracy.
- If multiple speakers are distinguishable, identify them by name when possible (from introductions, context clues, or how they address each other). Format as: [MM:SS] Name: text. If you cannot determine a name, use descriptive labels like "Host" and "Guest" rather than "Speaker 1" and "Speaker 2".
Return ONLY the timestamped transcript, nothing else.`;
// Helper: upload a single audio file and transcribe it
async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) {
const upStart = Date.now();
log(2, `Uploading audio${offsetSeconds > 0 ? ` (offset ${formatTime(offsetSeconds)})` : ""} to Gemini File API...`);
const uploaded = await ai.files.upload({ file: filePath, config: { mimeType: mType } });
const upTime = ((Date.now() - upStart) / 1000).toFixed(1);
log(2, `Audio uploaded in ${upTime}s`, `File ref: ${uploaded.name}`);
// Wait for processing
let f = uploaded;
const pStart = Date.now();
while (f.state === "PROCESSING") {
const ws = ((Date.now() - pStart) / 1000).toFixed(0);
log(2, `Waiting for Gemini to process audio... (${ws}s)`);
await new Promise(r => setTimeout(r, 3000));
f = await ai.files.get({ name: f.name });
}
if (f.state === "FAILED") throw new Error("Gemini failed to process audio file.");
const pTime = ((Date.now() - pStart) / 1000).toFixed(1);
log(2, `Audio processed in ${pTime}s. Transcribing with ${modelName}...`);
const prompt = transcriptionPrompt(titleHint);
// Only use thinkingConfig for Flash models — Pro doesn't support "minimal"
const txConfig = modelName.includes("flash")
? { thinkingConfig: { thinkingLevel: "minimal" } }
: {};
// Retry logic that handles both API errors (via retryGemini) AND empty responses
const EMPTY_RETRIES = 3;
let result;
for (let emptyAttempt = 0; emptyAttempt < EMPTY_RETRIES; emptyAttempt++) {
result = await retryGemini(
() => ai.models.generateContent({
model: modelName,
config: {
...txConfig,
// Disable safety filters to prevent content blocking
safetySettings: [
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
{ category: "HARM_CATEGORY_HATE_SPEECH", threshold: "BLOCK_NONE" },
{ category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_NONE" },
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
],
},
contents: [{
role: "user",
parts: [
{ fileData: { fileUri: f.uri, mimeType: mType } },
{ text: prompt },
],
}],
}),
{ retries: 3, delayMs: 5000, label: `Transcription${offsetSeconds > 0 ? ` (chunk@${formatTime(offsetSeconds)})` : ""}`, log: (msg) => log(2, msg) }
);
const responseText = safeText(result);
if (responseText) break; // Got actual content, done
// Log why it's empty
const candidate = result?.candidates?.[0];
const finishReason = candidate?.finishReason || "UNKNOWN";
const blockReason = result?.promptFeedback?.blockReason || "none";
log(2, `⚠ Empty response (attempt ${emptyAttempt + 1}/${EMPTY_RETRIES}) — finishReason: ${finishReason}, blockReason: ${blockReason}`);
if (emptyAttempt < EMPTY_RETRIES - 1) {
const waitSec = 10 * (emptyAttempt + 1);
log(2, `Waiting ${waitSec}s before retry...`);
await new Promise(r => setTimeout(r, waitSec * 1000));
}
}
// Clean up uploaded file
try { await ai.files.delete({ name: f.name }); } catch {}
return result;
}
// ── Helper: chunked transcription for long audio ──
async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) {
const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`);
await fs.mkdir(chunkDir, { recursive: true });
try {
const audioChunks = await splitAudioFile(srcPath, chunkDir, 2700); // 45 min chunks
if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed
logFn(`Split audio into ${audioChunks.length} chunks for transcription`);
const allEntries = [];
let totalIn = 0, totalOut = 0;
for (const chunk of audioChunks) {
logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`);
const chunkResult = await transcribeSingleFile(
chunk.path, "audio/mpeg", title,
modelName,
chunk.startOffset
);
const chunkUsage = chunkResult.usageMetadata || {};
const chunkCost = calcCost(modelName, chunkUsage);
totalIn += chunkCost.inputTokens;
totalOut += chunkCost.outputTokens;
const chunkText = safeText(chunkResult);
if (!chunkText) {
logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`);
continue;
}
const chunkEntries = parseTimestampedTranscript(chunkText);
// Programmatically adjust timestamps: the model transcribes from 0:00 relative
// to the chunk, so add the chunk's startOffset to get real video timestamps
if (chunk.startOffset > 0) {
// Check if the model already adjusted (first entry near the startOffset)
const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0;
const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8;
if (!alreadyAdjusted) {
for (const e of chunkEntries) {
e.offset += chunk.startOffset;
}
logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`);
}
}
logFn(`Chunk ${chunk.index + 1}: ${chunkEntries.length} segments, last timestamp ${chunkEntries.length > 0 ? formatTime(chunkEntries[chunkEntries.length - 1].offset) : "N/A"}`);
// Merge: skip entries that overlap with already-transcribed content
const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1;
for (const e of chunkEntries) {
if (e.offset > lastExistingTime) allEntries.push(e);
}
}
// Recalculate durations
for (let i = 0; i < allEntries.length - 1; i++) {
allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset;
}
if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15;
logFn(`Chunked transcription complete: ${allEntries.length} total segments`);
return {
entries: allEntries,
cost: {
inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0,
totalTokens: totalIn + totalOut,
totalCost: "0", totalCostDisplay: "",
},
};
} finally {
try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {}
}
}
let entries;
let transcriptText = "";
let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" };
const txStart = Date.now();
if (needsChunking) {
// ── Very long audio: go straight to chunked transcription ──
log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
} else {
log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`);
entries = null; // fall through to full-file attempt below
}
}
if (!entries) {
// ── Normal: transcribe full file ──
const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel);
transcriptText = safeText(transcriptResult);
if (!transcriptText) {
log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
} else {
throw new Error("Gemini returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.");
}
} else {
const txUsage = transcriptResult.usageMetadata || {};
txCost = calcCost(transcriptionModel, txUsage);
const txTime = ((Date.now() - txStart) / 1000).toFixed(1);
log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`);
entries = parseTimestampedTranscript(transcriptText);
log(2, `Parsed ${entries.length} transcript segments`);
}
// ── Truncation detection → fall back to chunks ──
if (audioDuration && entries.length > 0) {
const lastEntryTime = entries[entries.length - 1].offset;
const coverageRatio = lastEntryTime / audioDuration;
const missingSeconds = audioDuration - lastEntryTime;
if (coverageRatio < 0.90 && missingSeconds > 120) {
log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`);
log(2, `Falling back to chunked transcription...`);
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
if (chunkedResult && chunkedResult.entries.length > 0) {
entries = chunkedResult.entries;
txCost = chunkedResult.cost;
const finalCoverage = entries[entries.length - 1].offset;
log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`);
}
} else {
log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`);
}
}
}
const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1);
log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`);
if (!entries || entries.length === 0) {
const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ ");
log(2, `⚠ Transcript parse failed. Preview: ${preview}`);
sendEvent(res, "error", { message: "Failed to parse transcript." });
sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText });
res.end();
return;
}
// ── Step 3: Topic analysis with model fallback ──
const analysisFallbacks = [
analysisModel,
...[
"gemini-3.1-pro-preview",
"gemini-3-pro-preview",
"gemini-3-flash-preview",
"gemini-2.5-flash",
].filter(m => m !== analysisModel),
];
const analysisPrompt = buildAnalysisPrompt(entries);
let analysisResult = null;
let usedAnalysisModel = analysisModel;
const anaStart = Date.now();
for (const tryModel of analysisFallbacks) {
try {
log(3, `Analyzing topics across ${entries.length} segments with ${tryModel}...`);
analysisResult = await retryGemini(
() => ai.models.generateContent({
model: tryModel,
contents: [
{
role: "user",
parts: [{ text: analysisPrompt }],
},
],
}),
{ retries: 2, delayMs: 5000, label: "Analysis", log: (msg) => log(3, msg) }
);
usedAnalysisModel = tryModel;
break;
} catch (fallbackErr) {
const msg = fallbackErr?.message || String(fallbackErr);
log(3, `${tryModel} failed: ${msg.slice(0, 150)}`);
if (tryModel !== analysisFallbacks[analysisFallbacks.length - 1]) {
log(3, `Falling back to next model...`);
}
}
}
if (!analysisResult) {
throw new Error("All analysis models failed. Please try again later.");
}
const analysisText = safeText(analysisResult);
if (!analysisText) {
throw new Error("Gemini returned an empty analysis. The transcript may be too long for the model. Try again.");
}
const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1);
const anaUsage = analysisResult.usageMetadata || {};
const anaCost = calcCost(usedAnalysisModel, anaUsage);
// Parse the analysis JSON
let analysisJson;
try {
let jsonStr = analysisText.trim();
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim();
analysisJson = JSON.parse(jsonStr);
} catch (e) {
console.error("Failed to parse analysis JSON:", e.message);
sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." });
sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText });
res.end();
return;
}
// Build final chunks
const chunks = analysisJson.sections.map((section) => {
const start = Math.max(0, section.startIndex);
const end = Math.min(entries.length - 1, section.endIndex);
const sectionEntries = entries.slice(start, end + 1);
return {
title: section.title,
summary: section.summary,
entries: sectionEntries,
startTime: sectionEntries[0]?.offset || 0,
};
}).filter((c) => c.entries.length > 0);
const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1);
log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`);
log(3, `Pipeline finished in ${totalTime}s`);
// Save to history — skipped for free-tier users so we don't pollute
// the host's library with anonymous summaries. The result still streams
// back so the UI can render it; it just isn't persisted.
const contentType = isPodcast ? "podcast" : "youtube";
const historyId = isFreeUser
? null
: await saveToHistory(videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null);
sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType });
res.end();
} catch (err) {
console.error("Pipeline error:", err);
if (!res.headersSent) {
res.status(500).json({ error: err.message });
} else {
sendEvent(res, "error", { message: err.message });
res.end();
}
} finally {
if (isFreeUser) freeJobInFlight = false;
// Clean up temp files
try {
await fs.rm(tmpDir, { recursive: true, force: true });
} catch {}
}
});
// ── Helpers ────────────────────────────────────────────────────────────────
// ── Audio duration helper (ffprobe) ─────────────────────────────────────
async function getAudioDuration(filePath) {
try {
const { stdout } = await execFileAsync("ffprobe", [
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
filePath,
], { timeout: 15000 });
const dur = parseFloat(stdout.trim());
return isNaN(dur) ? null : dur;
} catch {
return null;
}
}
// ── Split audio into chunks with ffmpeg ─────────────────────────────────
async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) {
const duration = await getAudioDuration(inputPath);
if (!duration || duration <= chunkSeconds) return null; // no split needed
const chunks = [];
let startSec = 0;
let i = 0;
while (startSec < duration) {
const chunkPath = path.join(outputDir, `chunk_${i}.mp3`);
const segLen = Math.min(chunkSeconds, duration - startSec);
await execFileAsync("ffmpeg", [
"-y", "-i", inputPath,
"-ss", String(startSec),
"-t", String(segLen),
"-acodec", "copy",
chunkPath,
], { timeout: 120000 });
chunks.push({ path: chunkPath, startOffset: startSec, index: i });
startSec += chunkSeconds;
i++;
}
return chunks;
}
function sendEvent(res, event, data) {
res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
}
function extractVideoId(url) {
if (!url) return null;
const patterns = [
/(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([a-zA-Z0-9_-]{11})/,
/^([a-zA-Z0-9_-]{11})$/,
];
for (const p of patterns) {
const m = url.match(p);
if (m) return m[1];
}
return null;
}
function formatTime(seconds) {
const s = Math.floor(seconds);
const h = Math.floor(s / 3600);
const m = Math.floor((s % 3600) / 60);
const sec = s % 60;
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
return `${m}:${String(sec).padStart(2, "0")}`;
}
function parseTimestampedTranscript(text) {
const lines = text.trim().split("\n").filter(Boolean);
const entries = [];
// Match timestamps in various formats:
// [0:00], [00:00], [0:00:00], (0:00), 0:00 -, **0:00**, etc.
// Optionally preceded by speaker labels, markdown bold, etc.
const tsRegex = /^(?:[*_]*)?(?:\[?\(?)(\d{1,2}):(\d{2})(?::(\d{2}))?[\])]?(?:[*_]*)?\s*[-–—:]?\s*(.*)/;
// Also match lines like "Speaker 1 [0:00]: text" or "**[0:00]** text"
const altRegex = /^(?:.*?)[\[(\s](\d{1,2}):(\d{2})(?::(\d{2}))?[\])]\s*[-–—:]?\s*(.*)/;
for (const line of lines) {
const trimmed = line.trim();
let m = trimmed.match(tsRegex);
if (!m) m = trimmed.match(altRegex);
if (m) {
const hours = m[3] !== undefined ? parseInt(m[1]) : 0;
const mins = m[3] !== undefined ? parseInt(m[2]) : parseInt(m[1]);
const secs = m[3] !== undefined ? parseInt(m[3]) : parseInt(m[2]);
const offset = hours * 3600 + mins * 60 + secs;
// Strip any leftover markdown or speaker prefix from the text
const lineText = m[4].replace(/^\*\*\s*/, "").replace(/\s*\*\*$/, "").trim();
if (lineText) entries.push({ text: lineText, offset, duration: 0 });
}
}
// Calculate durations from gaps between entries
for (let i = 0; i < entries.length - 1; i++) {
entries[i].duration = entries[i + 1].offset - entries[i].offset;
}
if (entries.length > 0) {
entries[entries.length - 1].duration = 15; // default last segment
}
return entries;
}
function buildAnalysisPrompt(entries) {
const numbered = entries
.map((e, i) => `[${i}] (${formatTime(e.offset)}) ${e.text}`)
.join("\n");
return `You are analyzing a video transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections.
TRANSCRIPT (each line is numbered with a timestamp):
${numbered}
INSTRUCTIONS:
1. Read the entire transcript carefully.
2. Identify where the discussion naturally shifts from one topic to another.
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
4. For each section, write:
- A short, specific topic title (3-8 words)
- A 1-3 sentence summary of what's discussed
- The start and end segment indices (inclusive)
IMPORTANT:
- Sections must be chronological and non-overlapping.
- Every segment index from 0 to ${entries.length - 1} must belong to exactly one section.
- startIndex of section N+1 must equal endIndex of section N plus 1.
- Create as many or as few sections as the content naturally requires.
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
Respond with ONLY valid JSON in this exact format, no other text:
{
"sections": [
{
"title": "Brief Topic Title",
"summary": "1-3 sentence summary of this discussion section.",
"startIndex": 0,
"endIndex": 15
}
]
}`;
}
// ── Network mode ──────────────────────────────────────────────────────────
// On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking)
// On local Mac dev: default to localhost (safe on public Wi-Fi)
// - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling)
// - Running "npm start" directly defaults to localhost
const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, "..");
const lanMode = isStartOS ? true : process.env.LAN_MODE === "true";
const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1";
app.get("/api/network-mode", (req, res) => {
res.json({ lan: lanMode });
});
// ── Start server ───────────────────────────────────────────────────────────
app.listen(PORT, BIND_HOST, async () => {
console.log(`\n YouTube Summarizer API running on http://${BIND_HOST}:${PORT}`);
console.log(` Data directory: ${DATA_DIR}`);
console.log(` Checking yt-dlp...`);
const info = await checkYtdlp();
if (!info.installed) {
console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`);
} else if (info.updateAvailable) {
console.log(` ✓ yt-dlp ${info.version} found`);
console.log(` ↑ Update available: ${info.latestVersion}`);
console.log(` Auto-updating...`);
const result = await autoUpdateYtdlp();
if (result.success) {
const refreshed = await checkYtdlp();
console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`);
} else {
console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`);
}
} else {
console.log(` ✓ yt-dlp ${info.version} (up to date)\n`);
}
// Check subscriptions on startup
const subs = await loadSubscriptions();
if (subs.length > 0) {
console.log(` 📡 Checking ${subs.length} subscription(s) for new videos...`);
await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
const pending = autoQueue.filter(q => q.status === "pending").length;
const approved = autoQueue.filter(q => q.status === "approved").length;
if (pending > 0) console.log(`${pending} new video(s) queued from subscriptions`);
if (approved > 0) console.log(`${approved} approved video(s) ready for processing`);
if (pending === 0 && approved === 0) console.log(` ✓ No new videos from subscriptions`);
}
// Resume processing any approved items from a previous session
const resumeApproved = autoQueue.filter(q => q.status === "approved").length;
// Also recover any items stuck in "processing" state from a crash
for (const item of autoQueue) {
if (item.status === "processing") {
console.log(` ⚠ Recovering stuck item: ${item.title}`);
item.status = "approved"; // Re-queue for processing
}
}
if (autoQueue.some(q => q.status === "approved")) {
await saveAutoQueue();
console.log(` 🔄 Starting background processor...`);
// Delay slightly so the server is fully ready before internal HTTP calls
setTimeout(() => kickProcessor(), 2000);
}
console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`);
// Check subscriptions every hour (runs continuously on StartOS)
setInterval(() => {
checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
}, 60 * 60 * 1000);
});