2622 lines
102 KiB
JavaScript
2622 lines
102 KiB
JavaScript
import express from "express";
|
|
import cors from "cors";
|
|
import { execFile } from "child_process";
|
|
import { promisify } from "util";
|
|
import fs from "fs/promises";
|
|
import { createWriteStream } from "fs";
|
|
import path from "path";
|
|
import os from "os";
|
|
import https from "https";
|
|
import http from "http";
|
|
import { GoogleGenAI } from "@google/genai";
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
const app = express();
|
|
const PORT = process.env.PORT || 3001;
|
|
|
|
// ── Data directory (configurable for StartOS or local dev) ────────────────
|
|
// On StartOS: DATA_DIR=/data (persistent volume)
|
|
// On local dev: defaults to project root (parent of server/)
|
|
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
const DATA_DIR = process.env.DATA_DIR || path.join(__dirname, "..");
|
|
const historyDir = path.join(DATA_DIR, "history");
|
|
const configDir = path.join(DATA_DIR, "config");
|
|
await fs.mkdir(historyDir, { recursive: true }).catch(() => {});
|
|
await fs.mkdir(configDir, { recursive: true }).catch(() => {});
|
|
|
|
// ── Server-side API key (shared across all clients) ───────────────────────
|
|
// Priority: GEMINI_API_KEY env var → StartOS config → .env file
|
|
let serverApiKey = process.env.GEMINI_API_KEY || "";
|
|
const envPath = path.join(DATA_DIR, ".env");
|
|
if (!serverApiKey) {
|
|
// Try StartOS config file
|
|
try {
|
|
const configContent = await fs.readFile(path.join(configDir, "startos-config.json"), "utf-8").catch(() => "{}");
|
|
const config = JSON.parse(configContent);
|
|
if (config.gemini_api_key) serverApiKey = config.gemini_api_key;
|
|
} catch {}
|
|
}
|
|
if (!serverApiKey) {
|
|
// Fall back to .env file
|
|
try {
|
|
const envContent = await fs.readFile(envPath, "utf-8").catch(() => "");
|
|
const match = envContent.match(/^GEMINI_API_KEY=(.+)$/m);
|
|
if (match) serverApiKey = match[1].trim().replace(/^["']|["']$/g, "");
|
|
} catch {}
|
|
}
|
|
|
|
// ── YouTube cookies (bypass bot detection) ──────────────────────────────
|
|
// Priority: 1) cookies.txt file 2) --cookies-from-browser (local dev only) 3) no cookies
|
|
// On StartOS: cookies.txt lives in DATA_DIR
|
|
// On local dev: cookies.txt in project root, or --cookies-from-browser via YT_COOKIES_FROM
|
|
let ytCookiesBrowser = "";
|
|
const ytCookiesFilePath = path.join(DATA_DIR, "cookies.txt");
|
|
let ytCookiesFileExists = false;
|
|
try {
|
|
const envContent2 = await fs.readFile(envPath, "utf-8").catch(() => "");
|
|
const cm = envContent2.match(/^YT_COOKIES_FROM=(.+)$/m);
|
|
if (cm) ytCookiesBrowser = cm[1].trim().replace(/^["']|["']$/g, "").toLowerCase();
|
|
} catch {}
|
|
try {
|
|
await fs.access(ytCookiesFilePath);
|
|
ytCookiesFileExists = true;
|
|
console.log(" 🍪 Found cookies.txt — will use for YouTube authentication");
|
|
} catch {
|
|
ytCookiesFileExists = false;
|
|
}
|
|
|
|
function ytCookieArgs() {
|
|
// Prefer cookies.txt file (stable, doesn't depend on browser session)
|
|
if (ytCookiesFileExists) return ["--cookies", ytCookiesFilePath];
|
|
// Fall back to live browser cookies (local dev only, not available on StartOS)
|
|
if (ytCookiesBrowser) return ["--cookies-from-browser", ytCookiesBrowser];
|
|
return [];
|
|
}
|
|
|
|
// Extra yt-dlp args for robustness (browser impersonation, rate limiting)
|
|
function ytExtraArgs() {
|
|
const args = [];
|
|
// Browser impersonation helps avoid TLS fingerprint detection on servers
|
|
args.push("--impersonate", "chrome");
|
|
// Sleep between requests to avoid rate limiting during batch operations
|
|
args.push("--sleep-interval", "1", "--max-sleep-interval", "3");
|
|
return args;
|
|
}
|
|
|
|
function ytCookieMethod() {
|
|
if (ytCookiesFileExists) return "cookies.txt";
|
|
if (ytCookiesBrowser) return ytCookiesBrowser;
|
|
return "none";
|
|
}
|
|
|
|
function resolveApiKey(clientKey) {
|
|
// Client can send "USE_SERVER_KEY" or empty to use the server's stored key
|
|
if (!clientKey || clientKey === "USE_SERVER_KEY") return serverApiKey;
|
|
return clientKey;
|
|
}
|
|
|
|
app.use(cors());
|
|
app.use(express.json({ limit: "100mb" }));
|
|
|
|
// ── History storage ───────────────────────────────────────────────────────
|
|
|
|
async function saveToHistory(videoId, url, title, chunks, entries, logs, uploadDate, type) {
|
|
// For podcast episodes, videoId might be a long GUID/URL — use a short hash for the filename
|
|
const idSuffix = type === "podcast" ? Buffer.from(videoId).toString("base64url").slice(0, 16) : videoId;
|
|
const id = `${Date.now()}-${idSuffix}`;
|
|
const record = {
|
|
id,
|
|
videoId,
|
|
url,
|
|
title: title || "Untitled",
|
|
type: type || "youtube", // "youtube" or "podcast"
|
|
topicCount: chunks.length,
|
|
segmentCount: entries.length,
|
|
createdAt: new Date().toISOString(),
|
|
uploadDate: uploadDate || "",
|
|
chunks,
|
|
entries,
|
|
logs,
|
|
};
|
|
await fs.writeFile(path.join(historyDir, `${id}.json`), JSON.stringify(record));
|
|
return id;
|
|
}
|
|
|
|
// Serve the frontend from ../public
|
|
app.use(express.static(path.join(__dirname, "..", "public")));
|
|
app.use("/assets", express.static(path.join(__dirname, "..", "assets")));
|
|
|
|
// ── yt-dlp auto-update ─────────────────────────────────────────────────────
|
|
|
|
let ytdlpVersion = null;
|
|
let ytdlpLastCheck = 0;
|
|
const UPDATE_CHECK_INTERVAL = 24 * 60 * 60 * 1000; // 24 hours
|
|
|
|
async function checkYtdlp() {
|
|
const info = { installed: false, version: null, updateAvailable: false, latestVersion: null };
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", ["--version"]);
|
|
info.installed = true;
|
|
info.version = stdout.trim();
|
|
ytdlpVersion = info.version;
|
|
} catch {
|
|
return info;
|
|
}
|
|
|
|
// Check for updates at most once per 24h
|
|
const now = Date.now();
|
|
if (now - ytdlpLastCheck > UPDATE_CHECK_INTERVAL) {
|
|
ytdlpLastCheck = now;
|
|
try {
|
|
const resp = await fetch("https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest", {
|
|
headers: { "Accept": "application/vnd.github.v3+json" },
|
|
signal: AbortSignal.timeout(5000),
|
|
});
|
|
if (resp.ok) {
|
|
const data = await resp.json();
|
|
info.latestVersion = data.tag_name?.replace(/^v/, "") || null;
|
|
if (info.latestVersion && info.version !== info.latestVersion) {
|
|
info.updateAvailable = true;
|
|
}
|
|
}
|
|
} catch {} // Network errors are fine, just skip the check
|
|
}
|
|
return info;
|
|
}
|
|
|
|
async function autoUpdateYtdlp() {
|
|
console.log(" ↻ Updating yt-dlp...");
|
|
|
|
// Strategy 1: yt-dlp's built-in self-update
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", ["-U"], { timeout: 60000 });
|
|
console.log(` ${stdout.trim()}`);
|
|
return { success: true, message: stdout.trim() };
|
|
} catch (e1) {
|
|
console.log(" … Self-update failed, trying pip...");
|
|
}
|
|
|
|
// Strategy 2: pip3 / pip (works in containers and on macOS)
|
|
for (const pip of ["pip3", "pip"]) {
|
|
try {
|
|
await execFileAsync(pip, ["install", "-U", "yt-dlp"], { timeout: 120000 });
|
|
console.log(` ✓ Updated via ${pip}`);
|
|
return { success: true, message: `Updated via ${pip}` };
|
|
} catch {}
|
|
}
|
|
|
|
// Strategy 3: Homebrew (macOS local dev only)
|
|
try {
|
|
await execFileAsync("brew", ["upgrade", "yt-dlp"], { timeout: 120000 });
|
|
console.log(" ✓ Updated via Homebrew");
|
|
return { success: true, message: "Updated via Homebrew" };
|
|
} catch {}
|
|
|
|
// Strategy 4: Direct binary download to persistent storage (StartOS fallback)
|
|
try {
|
|
const binDir = path.join(DATA_DIR, "bin");
|
|
await fs.mkdir(binDir, { recursive: true });
|
|
const binPath = path.join(binDir, "yt-dlp");
|
|
console.log(" … Trying direct binary download...");
|
|
const { stdout } = await execFileAsync("sh", ["-c",
|
|
`curl -L -o "${binPath}" "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp" && chmod +x "${binPath}" && "${binPath}" --version`
|
|
], { timeout: 120000 });
|
|
console.log(` ✓ Downloaded yt-dlp binary: ${stdout.trim()}`);
|
|
return { success: true, message: `Downloaded binary: ${stdout.trim()}` };
|
|
} catch (e4) {
|
|
console.log(" ✗ All update strategies failed");
|
|
return { success: false, message: "Auto-update failed. Try updating manually from the StartOS Actions menu." };
|
|
}
|
|
}
|
|
|
|
// ── Pricing (per 1M tokens) ───────────────────────────────────────────────
|
|
|
|
const PRICING = {
|
|
"gemini-3-flash-preview": { input: 0.50, output: 3.00, thinking: 3.00 },
|
|
"gemini-3-pro-preview": { input: 2.00, output: 12.00, thinking: 12.00 },
|
|
"gemini-3.1-pro-preview": { input: 2.00, output: 12.00, thinking: 12.00 },
|
|
"gemini-2.5-flash": { input: 0.15, output: 0.60, thinking: 0.60 },
|
|
// Fallback for unknown models
|
|
"default": { input: 1.00, output: 5.00, thinking: 5.00 },
|
|
};
|
|
|
|
function calcCost(modelName, usage) {
|
|
const rates = PRICING[modelName] || PRICING["default"];
|
|
const inputTokens = usage.promptTokenCount || 0;
|
|
const outputTokens = usage.candidatesTokenCount || 0;
|
|
const thinkingTokens = usage.thoughtsTokenCount || 0;
|
|
|
|
const inputCost = (inputTokens / 1_000_000) * rates.input;
|
|
const outputCost = (outputTokens / 1_000_000) * rates.output;
|
|
const thinkingCost = (thinkingTokens / 1_000_000) * rates.thinking;
|
|
const totalCost = inputCost + outputCost + thinkingCost;
|
|
|
|
return {
|
|
inputTokens,
|
|
outputTokens,
|
|
thinkingTokens,
|
|
totalTokens: usage.totalTokenCount || (inputTokens + outputTokens + thinkingTokens),
|
|
inputCost: inputCost.toFixed(6),
|
|
outputCost: outputCost.toFixed(6),
|
|
thinkingCost: thinkingCost.toFixed(6),
|
|
totalCost: totalCost.toFixed(6),
|
|
totalCostDisplay: totalCost < 0.01 ? `$${(totalCost * 100).toFixed(3)}¢` : `$${totalCost.toFixed(4)}`,
|
|
};
|
|
}
|
|
|
|
// ── Safe text extraction from Gemini responses ──────────────────────────
|
|
|
|
function safeText(result) {
|
|
// The Gemini SDK .text getter can throw or return undefined
|
|
try {
|
|
if (result.text) return result.text;
|
|
} catch {}
|
|
// Fallback: dig into candidates manually
|
|
try {
|
|
const parts = result?.candidates?.[0]?.content?.parts;
|
|
if (parts) return parts.map(p => p.text || "").join("");
|
|
} catch {}
|
|
return "";
|
|
}
|
|
|
|
// ── Retry helper for transient Gemini API errors ──────────────────────────
|
|
|
|
async function retryGemini(fn, { retries = 3, delayMs = 3000, label = "Gemini call", log: logFn } = {}) {
|
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
try {
|
|
return await fn();
|
|
} catch (err) {
|
|
const msg = err?.message || String(err);
|
|
const status = err?.status || err?.httpStatusCode || 0;
|
|
const isRetryable = status === 503 || status === 429 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg);
|
|
if (isRetryable && attempt < retries) {
|
|
const waitSec = (delayMs * attempt / 1000).toFixed(0);
|
|
if (logFn) logFn(`⚠ ${label} failed (${status || "error"}), retrying in ${waitSec}s... (attempt ${attempt}/${retries})`);
|
|
await new Promise(r => setTimeout(r, delayMs * attempt));
|
|
} else {
|
|
throw err;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Health check ───────────────────────────────────────────────────────────
|
|
|
|
app.get("/api/health", async (req, res) => {
|
|
const info = await checkYtdlp();
|
|
// Check cookies.txt freshness
|
|
let cookieInfo = { method: ytCookieMethod() };
|
|
if (ytCookiesFileExists) {
|
|
try {
|
|
const stat = await fs.stat(ytCookiesFilePath);
|
|
const ageMs = Date.now() - stat.mtimeMs;
|
|
const ageDays = Math.floor(ageMs / (1000 * 60 * 60 * 24));
|
|
cookieInfo.fileAgeDays = ageDays;
|
|
cookieInfo.fileExpiring = ageDays > 12; // cookies typically expire after ~14 days
|
|
} catch {}
|
|
}
|
|
res.json({ ok: true, ytdlp: info.installed, hasServerKey: !!serverApiKey, cookies: cookieInfo, ...info });
|
|
});
|
|
|
|
// ── Status endpoints ───────────────────────────────────────────────────────
|
|
|
|
app.post("/api/heartbeat", (req, res) => {
|
|
res.json({ ok: true, sleeping: false });
|
|
});
|
|
|
|
app.get("/api/status", (req, res) => {
|
|
res.json({ ok: true, sleeping: false, uptime: process.uptime() });
|
|
});
|
|
|
|
// Shutdown: used by the macOS .app launcher to stop the server cleanly.
|
|
// On StartOS this endpoint is unused (StartOS manages the container lifecycle).
|
|
app.post("/api/shutdown", (req, res) => {
|
|
res.json({ ok: true, message: "Server shutting down..." });
|
|
console.log("\n Server shutdown requested from browser. Goodbye!\n");
|
|
setTimeout(() => process.exit(0), 300);
|
|
});
|
|
|
|
// ── Manual update endpoint ─────────────────────────────────────────────────
|
|
|
|
app.post("/api/update-ytdlp", async (req, res) => {
|
|
const result = await autoUpdateYtdlp();
|
|
const info = await checkYtdlp();
|
|
res.json({ ...result, ...info });
|
|
});
|
|
|
|
// ── Cookie management endpoints ───────────────────────────────────────────
|
|
|
|
// Upload cookies.txt content
|
|
app.post("/api/cookies/upload", express.text({ type: "*/*", limit: "2mb" }), async (req, res) => {
|
|
try {
|
|
const content = req.body;
|
|
if (!content || typeof content !== "string" || content.length < 20) {
|
|
return res.status(400).json({ error: "Invalid cookie file content" });
|
|
}
|
|
// Basic validation: should look like a Netscape cookie file
|
|
const firstLine = content.split("\n")[0].trim();
|
|
const looksValid = /^#.*cookie|^#.*http|^\./i.test(firstLine) || content.includes("\tTRUE\t") || content.includes("\tFALSE\t");
|
|
if (!looksValid) {
|
|
return res.status(400).json({ error: "File doesn't look like a valid Netscape cookies.txt file. The first line should start with '# Netscape HTTP Cookie File' or '# HTTP Cookie File'." });
|
|
}
|
|
await fs.writeFile(ytCookiesFilePath, content, "utf-8");
|
|
ytCookiesFileExists = true;
|
|
console.log(" 🍪 cookies.txt uploaded via web UI");
|
|
res.json({ ok: true, message: "Cookies saved successfully" });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// Delete cookies.txt
|
|
app.post("/api/cookies/delete", async (req, res) => {
|
|
try {
|
|
await fs.unlink(ytCookiesFilePath).catch(() => {});
|
|
ytCookiesFileExists = false;
|
|
console.log(" 🍪 cookies.txt deleted via web UI");
|
|
res.json({ ok: true });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// Test if cookies work by trying a lightweight yt-dlp operation
|
|
app.post("/api/cookies/test", async (req, res) => {
|
|
try {
|
|
const cookieArgs = ytCookieArgs();
|
|
if (cookieArgs.length === 0) {
|
|
return res.json({ ok: false, error: "No cookies configured" });
|
|
}
|
|
// Try to fetch title of a known video — lightweight, no download
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(title)s",
|
|
"--no-download",
|
|
...cookieArgs,
|
|
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", // well-known test video
|
|
], { timeout: 20000 });
|
|
const title = stdout.trim();
|
|
if (title && title.length > 0) {
|
|
res.json({ ok: true, message: `Cookies working! Fetched: "${title}"` });
|
|
} else {
|
|
res.json({ ok: false, error: "yt-dlp returned empty response" });
|
|
}
|
|
} catch (err) {
|
|
const msg = (err.stderr || "") + " " + (err.message || "");
|
|
const isAuth = /Sign in|bot|403/i.test(msg);
|
|
res.json({ ok: false, error: isAuth ? "Cookies are expired or invalid — YouTube still requires sign-in" : msg.slice(0, 300) });
|
|
}
|
|
});
|
|
|
|
// Get cookie status
|
|
app.get("/api/cookies/status", async (req, res) => {
|
|
const info = { method: ytCookieMethod() };
|
|
if (ytCookiesFileExists) {
|
|
try {
|
|
const stat = await fs.stat(ytCookiesFilePath);
|
|
info.fileAgeDays = Math.floor((Date.now() - stat.mtimeMs) / (1000 * 60 * 60 * 24));
|
|
info.fileExpiring = info.fileAgeDays > 12;
|
|
info.fileSize = stat.size;
|
|
} catch {}
|
|
}
|
|
res.json(info);
|
|
});
|
|
|
|
// ── OAuth2 YouTube authentication (headless / StartOS) ────────────────────
|
|
// Uses yt-dlp's built-in OAuth2 device flow: user enters a code on any browser,
|
|
// token is cached in yt-dlp's cache dir (persisted on /data volume).
|
|
|
|
let oauthInProgress = false;
|
|
let oauthDeviceCode = null;
|
|
let oauthVerifyUrl = null;
|
|
|
|
// Check if OAuth token already exists in yt-dlp cache
|
|
app.get("/api/auth/oauth/status", async (req, res) => {
|
|
// yt-dlp stores OAuth tokens in its cache dir
|
|
const cacheDir = process.env.XDG_CACHE_HOME || path.join(DATA_DIR, "ytdlp-cache");
|
|
let hasToken = false;
|
|
try {
|
|
const files = await fs.readdir(path.join(cacheDir, "yt-dlp"), { recursive: true }).catch(() => []);
|
|
hasToken = files.some(f => f.includes("oauth") || f.includes("token"));
|
|
} catch {}
|
|
|
|
res.json({
|
|
hasToken,
|
|
hasCookies: ytCookiesFileExists,
|
|
cookieMethod: ytCookieMethod(),
|
|
inProgress: oauthInProgress,
|
|
deviceCode: oauthInProgress ? oauthDeviceCode : null,
|
|
verifyUrl: oauthInProgress ? oauthVerifyUrl : null,
|
|
});
|
|
});
|
|
|
|
// Initiate OAuth2 device code flow
|
|
app.post("/api/auth/oauth/start", async (req, res) => {
|
|
if (oauthInProgress) {
|
|
return res.json({ ok: true, message: "OAuth flow already in progress", deviceCode: oauthDeviceCode, verifyUrl: oauthVerifyUrl });
|
|
}
|
|
|
|
oauthInProgress = true;
|
|
oauthDeviceCode = null;
|
|
oauthVerifyUrl = null;
|
|
|
|
res.json({ ok: true, message: "OAuth flow started. Check /api/auth/oauth/status for the device code." });
|
|
|
|
// Run yt-dlp in background — it will output the device code to stderr
|
|
(async () => {
|
|
try {
|
|
const cacheDir = path.join(DATA_DIR, "ytdlp-cache");
|
|
await fs.mkdir(cacheDir, { recursive: true });
|
|
|
|
const proc = execFile("yt-dlp", [
|
|
"--username", "oauth",
|
|
"--password", "",
|
|
"--cache-dir", path.join(cacheDir, "yt-dlp"),
|
|
"--print", "%(title)s",
|
|
"--no-download",
|
|
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
|
], { timeout: 300000, env: { ...process.env, XDG_CACHE_HOME: cacheDir } });
|
|
|
|
let stderrBuf = "";
|
|
proc.stderr?.on("data", (chunk) => {
|
|
stderrBuf += chunk.toString();
|
|
// Look for the device code pattern in yt-dlp output
|
|
const codeMatch = stderrBuf.match(/go to\s+(https?:\/\/\S+)\s+.*?enter.*?code[:\s]+([A-Z0-9-]+)/i);
|
|
if (codeMatch) {
|
|
oauthVerifyUrl = codeMatch[1];
|
|
oauthDeviceCode = codeMatch[2];
|
|
console.log(` 🔑 OAuth device code: ${oauthDeviceCode} — verify at ${oauthVerifyUrl}`);
|
|
}
|
|
});
|
|
|
|
await new Promise((resolve, reject) => {
|
|
proc.on("close", (code) => {
|
|
if (code === 0) resolve();
|
|
else reject(new Error(`yt-dlp exited with code ${code}: ${stderrBuf.slice(-300)}`));
|
|
});
|
|
proc.on("error", reject);
|
|
});
|
|
|
|
console.log(" ✓ OAuth token cached successfully");
|
|
} catch (err) {
|
|
console.error(" ⚠ OAuth flow failed:", err.message);
|
|
} finally {
|
|
oauthInProgress = false;
|
|
}
|
|
})();
|
|
});
|
|
|
|
// ── History endpoints ─────────────────────────────────────────────────────
|
|
|
|
const metaPath = path.join(historyDir, "_meta.json");
|
|
|
|
// meta.json structure: { folders: [ { id, name, order, items: [sessionId, ...] } ], uncategorized: [sessionId, ...] }
|
|
async function loadMeta() {
|
|
try {
|
|
return JSON.parse(await fs.readFile(metaPath, "utf-8"));
|
|
} catch {
|
|
return { folders: [], uncategorized: [] };
|
|
}
|
|
}
|
|
async function saveMeta(meta) {
|
|
await fs.writeFile(metaPath, JSON.stringify(meta, null, 2));
|
|
}
|
|
|
|
// Get all history: sessions + folder structure
|
|
app.get("/api/history", async (req, res) => {
|
|
try {
|
|
const files = await fs.readdir(historyDir);
|
|
const sessionsMap = {};
|
|
for (const file of files.filter(f => f.endsWith(".json") && !f.startsWith("_") && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) {
|
|
try {
|
|
const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
|
|
const data = JSON.parse(raw);
|
|
sessionsMap[data.id] = {
|
|
id: data.id, videoId: data.videoId, url: data.url,
|
|
title: data.title, topicCount: data.topicCount,
|
|
type: data.type || "youtube",
|
|
segmentCount: data.segmentCount, createdAt: data.createdAt,
|
|
uploadDate: data.uploadDate || "",
|
|
};
|
|
} catch {}
|
|
}
|
|
|
|
const meta = await loadMeta();
|
|
|
|
// Clean up: remove references to deleted sessions
|
|
for (const folder of meta.folders) {
|
|
folder.items = folder.items.filter(id => sessionsMap[id]);
|
|
}
|
|
meta.uncategorized = meta.uncategorized.filter(id => sessionsMap[id]);
|
|
|
|
// Add any sessions not in meta (newly created)
|
|
const allReferenced = new Set([
|
|
...meta.uncategorized,
|
|
...meta.folders.flatMap(f => f.items),
|
|
]);
|
|
const allIds = Object.keys(sessionsMap);
|
|
const orphans = allIds.filter(id => !allReferenced.has(id))
|
|
.sort((a, b) => new Date(sessionsMap[b].createdAt) - new Date(sessionsMap[a].createdAt));
|
|
meta.uncategorized = [...orphans, ...meta.uncategorized];
|
|
|
|
await saveMeta(meta);
|
|
res.json({ sessions: sessionsMap, meta });
|
|
} catch (err) {
|
|
res.json({ sessions: {}, meta: { folders: [], uncategorized: [] } });
|
|
}
|
|
});
|
|
|
|
// Get a single session (full data)
|
|
app.get("/api/history/:id", async (req, res) => {
|
|
try {
|
|
const raw = await fs.readFile(path.join(historyDir, `${req.params.id}.json`), "utf-8");
|
|
res.json(JSON.parse(raw));
|
|
} catch {
|
|
res.status(404).json({ error: "Session not found" });
|
|
}
|
|
});
|
|
|
|
// Rename a session title
|
|
app.put("/api/history/:id/title", async (req, res) => {
|
|
try {
|
|
const filePath = path.join(historyDir, `${req.params.id}.json`);
|
|
const raw = await fs.readFile(filePath, "utf-8");
|
|
const data = JSON.parse(raw);
|
|
data.title = req.body.title || data.title;
|
|
await fs.writeFile(filePath, JSON.stringify(data));
|
|
res.json({ ok: true, title: data.title });
|
|
} catch {
|
|
res.status(404).json({ error: "Session not found" });
|
|
}
|
|
});
|
|
|
|
// Delete a session
|
|
app.delete("/api/history/:id", async (req, res) => {
|
|
try {
|
|
// Read the file first to get the videoId for the skip list
|
|
const filePath = path.join(historyDir, `${req.params.id}.json`);
|
|
let videoId = null;
|
|
try {
|
|
const raw = await fs.readFile(filePath, "utf-8");
|
|
videoId = JSON.parse(raw).videoId;
|
|
} catch {}
|
|
|
|
await fs.unlink(filePath);
|
|
|
|
// Add to skip list so subscriptions don't re-queue it
|
|
if (videoId) {
|
|
await addToSkipList(videoId);
|
|
}
|
|
|
|
// Remove from meta
|
|
const meta = await loadMeta();
|
|
meta.uncategorized = meta.uncategorized.filter(id => id !== req.params.id);
|
|
for (const folder of meta.folders) {
|
|
folder.items = folder.items.filter(id => id !== req.params.id);
|
|
}
|
|
await saveMeta(meta);
|
|
res.json({ ok: true });
|
|
} catch {
|
|
res.status(404).json({ error: "Session not found" });
|
|
}
|
|
});
|
|
|
|
// Update meta (folders, ordering) — the frontend sends the full structure
|
|
app.put("/api/history/meta", async (req, res) => {
|
|
try {
|
|
const meta = req.body;
|
|
await saveMeta(meta);
|
|
res.json({ ok: true });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// Create a folder
|
|
app.post("/api/history/folders", async (req, res) => {
|
|
try {
|
|
const meta = await loadMeta();
|
|
const folder = { id: `folder-${Date.now()}`, name: req.body.name || "New Folder", items: [] };
|
|
meta.folders.push(folder);
|
|
await saveMeta(meta);
|
|
res.json(folder);
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// Rename a folder
|
|
app.put("/api/history/folders/:id", async (req, res) => {
|
|
try {
|
|
const meta = await loadMeta();
|
|
const folder = meta.folders.find(f => f.id === req.params.id);
|
|
if (!folder) return res.status(404).json({ error: "Folder not found" });
|
|
folder.name = req.body.name || folder.name;
|
|
await saveMeta(meta);
|
|
res.json(folder);
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// Delete a folder (items move to uncategorized)
|
|
app.delete("/api/history/folders/:id", async (req, res) => {
|
|
try {
|
|
const meta = await loadMeta();
|
|
const idx = meta.folders.findIndex(f => f.id === req.params.id);
|
|
if (idx === -1) return res.status(404).json({ error: "Folder not found" });
|
|
const [folder] = meta.folders.splice(idx, 1);
|
|
meta.uncategorized = [...folder.items, ...meta.uncategorized];
|
|
await saveMeta(meta);
|
|
res.json({ ok: true });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// Move a session to a folder (or uncategorized if folderId is null)
|
|
app.put("/api/history/move", async (req, res) => {
|
|
try {
|
|
const { sessionId, folderId, index } = req.body;
|
|
const meta = await loadMeta();
|
|
|
|
// Remove from current location
|
|
meta.uncategorized = meta.uncategorized.filter(id => id !== sessionId);
|
|
for (const folder of meta.folders) {
|
|
folder.items = folder.items.filter(id => id !== sessionId);
|
|
}
|
|
|
|
// Add to new location
|
|
if (folderId) {
|
|
const folder = meta.folders.find(f => f.id === folderId);
|
|
if (folder) {
|
|
const i = typeof index === "number" ? index : folder.items.length;
|
|
folder.items.splice(i, 0, sessionId);
|
|
}
|
|
} else {
|
|
const i = typeof index === "number" ? index : 0;
|
|
meta.uncategorized.splice(i, 0, sessionId);
|
|
}
|
|
|
|
await saveMeta(meta);
|
|
res.json({ ok: true });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// ── Subscriptions ─────────────────────────────────────────────────────────
|
|
|
|
const subsPath = path.join(historyDir, "subscriptions.json");
|
|
|
|
async function loadSubscriptions() {
|
|
try {
|
|
return JSON.parse(await fs.readFile(subsPath, "utf-8")).subscriptions || [];
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
async function saveSubscriptions(subs) {
|
|
await fs.writeFile(subsPath, JSON.stringify({ subscriptions: subs }, null, 2));
|
|
}
|
|
|
|
// List recent videos from a channel/playlist via yt-dlp (no download)
|
|
// Uses --flat-playlist for speed; returns id + title (no upload_date in flat mode)
|
|
async function listChannelVideosFast(url, limit = 15) {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(id)s|%(title)s",
|
|
"--no-download",
|
|
"--playlist-end", String(limit),
|
|
"--flat-playlist",
|
|
...ytCookieArgs(),
|
|
...ytExtraArgs(),
|
|
url,
|
|
], { timeout: 60000 });
|
|
return stdout.trim().split("\n").filter(Boolean).map(line => {
|
|
const idx = line.indexOf("|");
|
|
return { id: line.slice(0, idx), title: line.slice(idx + 1) };
|
|
});
|
|
}
|
|
|
|
// Fetch upload_date for a batch of video IDs (processes in batches of 3)
|
|
// Bails after 2 consecutive failures to avoid grinding through blocked requests
|
|
async function fetchUploadDates(videoIds) {
|
|
if (videoIds.length === 0) return {};
|
|
const dateMap = {};
|
|
const batchSize = 50;
|
|
let consecutiveFails = 0;
|
|
for (let i = 0; i < videoIds.length; i += batchSize) {
|
|
const batch = videoIds.slice(i, i + batchSize);
|
|
const urls = batch.map(id => `https://www.youtube.com/watch?v=${id}`);
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(id)s|%(upload_date)s",
|
|
"--no-download",
|
|
...ytCookieArgs(),
|
|
...urls,
|
|
], { timeout: 45000 });
|
|
for (const line of stdout.trim().split("\n").filter(Boolean)) {
|
|
const [id, date] = line.split("|");
|
|
if (id && date && date !== "NA") dateMap[id] = date;
|
|
}
|
|
consecutiveFails = 0;
|
|
subLog(` Batch ${Math.floor(i/batchSize)+1}: got dates for ${batch.length} video(s)`);
|
|
} catch (err) {
|
|
consecutiveFails++;
|
|
subLog(` ⚠ Batch ${Math.floor(i/batchSize)+1} failed: ${err.message.slice(0, 80)}`);
|
|
if (consecutiveFails >= 2) {
|
|
subLog(` ⚠ 2 consecutive failures — aborting yt-dlp date fetch (bot detection likely)`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return dateMap;
|
|
}
|
|
|
|
// ── RSS-based date fetching (bypasses bot detection) ─────────────────────
|
|
|
|
// Fetch a URL and return the response body as a string
|
|
function fetchUrl(url) {
|
|
return new Promise((resolve, reject) => {
|
|
https.get(url, (res) => {
|
|
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
return fetchUrl(res.headers.location).then(resolve, reject);
|
|
}
|
|
let data = "";
|
|
res.on("data", (chunk) => (data += chunk));
|
|
res.on("end", () => resolve(data));
|
|
res.on("error", reject);
|
|
}).on("error", reject);
|
|
});
|
|
}
|
|
|
|
// Get channel_id from a YouTube channel/playlist URL using yt-dlp
|
|
async function getChannelId(url) {
|
|
// Method 1: flat-playlist channel_id (fast, may return NA)
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(channel_id)s",
|
|
"--no-download",
|
|
"--playlist-items", "1",
|
|
"--flat-playlist",
|
|
...ytCookieArgs(),
|
|
url,
|
|
], { timeout: 15000 });
|
|
const id = stdout.trim().split("\n")[0];
|
|
if (id && id !== "NA" && id.length > 5) return id;
|
|
} catch {}
|
|
// Method 2: non-flat single video (slower but gets full metadata)
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(channel_id)s",
|
|
"--no-download",
|
|
"--playlist-items", "1",
|
|
...ytCookieArgs(),
|
|
url,
|
|
], { timeout: 30000 });
|
|
const id = stdout.trim().split("\n")[0];
|
|
if (id && id !== "NA" && id.length > 5) return id;
|
|
} catch {}
|
|
return null;
|
|
}
|
|
|
|
// Fetch video dates from YouTube RSS feed (no auth, no bot detection)
|
|
// Returns { videoId: "YYYYMMDD", ... } for up to 15 most recent videos
|
|
async function fetchDatesFromRSS(channelId) {
|
|
const dateMap = {};
|
|
if (!channelId) return dateMap;
|
|
try {
|
|
const rssUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${channelId}`;
|
|
const xml = await fetchUrl(rssUrl);
|
|
const entryRegex = /<entry>[\s\S]*?<yt:videoId>([^<]+)<\/yt:videoId>[\s\S]*?<published>([^<]+)<\/published>[\s\S]*?<\/entry>/g;
|
|
let match;
|
|
while ((match = entryRegex.exec(xml)) !== null) {
|
|
const videoId = match[1];
|
|
const published = match[2]; // e.g. "2025-12-01T18:00:00+00:00"
|
|
const date = published.slice(0, 10).replace(/-/g, ""); // "20251201"
|
|
dateMap[videoId] = date;
|
|
}
|
|
} catch (err) {
|
|
subLog(` ⚠ RSS feed fetch failed: ${err.message}`);
|
|
}
|
|
return dateMap;
|
|
}
|
|
|
|
// ── Podcast RSS feed parsing ────────────────────────────────────────────────
|
|
|
|
// Detect if a URL looks like a podcast RSS feed
|
|
function isPodcastFeedUrl(url) {
|
|
if (!url) return false;
|
|
const u = url.trim().toLowerCase();
|
|
// Common podcast RSS feed patterns
|
|
if (u.includes("/feed") || u.includes("/rss") || u.includes("feeds.") || u.includes(".xml")) return true;
|
|
if (u.includes("anchor.fm") || u.includes("feeds.buzzsprout") || u.includes("feeds.simplecast")) return true;
|
|
if (u.includes("feeds.megaphone") || u.includes("feeds.transistor") || u.includes("feeds.libsyn")) return true;
|
|
if (u.includes("feeds.podcastmirror") || u.includes("feeds.acast") || u.includes("feeds.fireside")) return true;
|
|
if (u.includes("rss.art19") || u.includes("podbean.com/feed")) return true;
|
|
return false;
|
|
}
|
|
|
|
// Fetch and parse a podcast RSS feed → returns { title, episodes: [{ id, title, date, audioUrl, duration }] }
|
|
async function parsePodcastRSS(feedUrl, limit = 200) {
|
|
const xml = await fetchUrl(feedUrl);
|
|
|
|
// Extract podcast title
|
|
const titleMatch = xml.match(/<channel>[\s\S]*?<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
|
|
const podcastTitle = titleMatch ? titleMatch[1].trim() : "Unknown Podcast";
|
|
|
|
// Extract episodes from <item> elements
|
|
const episodes = [];
|
|
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
|
|
let match;
|
|
while ((match = itemRegex.exec(xml)) !== null && episodes.length < limit) {
|
|
const item = match[1];
|
|
|
|
// GUID (unique episode identifier)
|
|
const guidMatch = item.match(/<guid[^>]*>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/guid>/);
|
|
const guid = guidMatch ? guidMatch[1].trim() : null;
|
|
|
|
// Title
|
|
const epTitleMatch = item.match(/<title>(?:<!\[CDATA\[)?(.*?)(?:\]\]>)?<\/title>/);
|
|
const epTitle = epTitleMatch ? epTitleMatch[1].trim() : "Untitled Episode";
|
|
|
|
// Publish date
|
|
const pubDateMatch = item.match(/<pubDate>([^<]+)<\/pubDate>/);
|
|
const pubDate = pubDateMatch ? pubDateMatch[1].trim() : null;
|
|
let dateStr = ""; // YYYYMMDD
|
|
if (pubDate) {
|
|
try {
|
|
const d = new Date(pubDate);
|
|
if (!isNaN(d.getTime())) {
|
|
dateStr = d.toISOString().slice(0, 10).replace(/-/g, "");
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
// Audio enclosure URL
|
|
const enclosureMatch = item.match(/<enclosure[^>]+url=["']([^"']+)["']/);
|
|
const audioUrl = enclosureMatch ? enclosureMatch[1].trim() : null;
|
|
|
|
// Duration (itunes:duration)
|
|
const durMatch = item.match(/<itunes:duration>([^<]+)<\/itunes:duration>/);
|
|
const duration = durMatch ? durMatch[1].trim() : "";
|
|
|
|
if (!audioUrl) continue; // Skip episodes without audio
|
|
|
|
// Use guid, or fall back to audioUrl as unique ID
|
|
const id = guid || audioUrl;
|
|
|
|
episodes.push({ id, title: epTitle, date: dateStr, audioUrl, duration });
|
|
}
|
|
|
|
return { title: podcastTitle, episodes };
|
|
}
|
|
|
|
// Download a podcast episode audio file via HTTP(S) to a local path
|
|
function downloadPodcastAudio(audioUrl, destPath) {
|
|
return new Promise((resolve, reject) => {
|
|
const doFetch = (url) => {
|
|
const getter = url.startsWith("https") ? https : http;
|
|
getter.get(url, (res) => {
|
|
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
return doFetch(res.headers.location);
|
|
}
|
|
if (res.statusCode !== 200) {
|
|
return reject(new Error(`HTTP ${res.statusCode} downloading podcast audio`));
|
|
}
|
|
const fileStream = createWriteStream(destPath);
|
|
res.pipe(fileStream);
|
|
fileStream.on("finish", () => fileStream.close(resolve));
|
|
fileStream.on("error", reject);
|
|
}).on("error", reject);
|
|
};
|
|
doFetch(audioUrl);
|
|
});
|
|
}
|
|
|
|
// Get channel name from URL
|
|
async function fetchChannelName(url) {
|
|
// Try fast flat-playlist approach first
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(channel)s",
|
|
"--no-download",
|
|
"--playlist-end", "1",
|
|
"--flat-playlist",
|
|
...ytCookieArgs(),
|
|
url,
|
|
], { timeout: 15000 });
|
|
const name = stdout.trim().split("\n")[0];
|
|
if (name && name !== "NA") return name;
|
|
} catch {}
|
|
// Fallback: fetch without flat-playlist (slower but gets channel from video metadata)
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(channel)s",
|
|
"--no-download",
|
|
"--playlist-end", "1",
|
|
...ytCookieArgs(),
|
|
url,
|
|
], { timeout: 30000 });
|
|
const name = stdout.trim().split("\n")[0];
|
|
if (name && name !== "NA") return name;
|
|
} catch {}
|
|
// Last resort: extract handle from URL
|
|
try {
|
|
const u = new URL(url);
|
|
const handleMatch = u.pathname.match(/\/@([^/]+)/);
|
|
if (handleMatch) return "@" + handleMatch[1];
|
|
} catch {}
|
|
return "Unknown Channel";
|
|
}
|
|
|
|
// Skip list — videos deleted from history that subscriptions should not re-add
|
|
const skipPath = path.join(historyDir, "skip-list.json");
|
|
|
|
async function loadSkipList() {
|
|
try {
|
|
return new Set(JSON.parse(await fs.readFile(skipPath, "utf-8")).videoIds || []);
|
|
} catch {
|
|
return new Set();
|
|
}
|
|
}
|
|
|
|
async function addToSkipList(videoId) {
|
|
const skipIds = await loadSkipList();
|
|
skipIds.add(videoId);
|
|
await fs.writeFile(skipPath, JSON.stringify({ videoIds: [...skipIds] }));
|
|
}
|
|
|
|
// Seen list — videos already offered for approval (persists across restarts)
|
|
const seenPath = path.join(historyDir, "seen-list.json");
|
|
|
|
async function loadSeenList() {
|
|
try {
|
|
return new Set(JSON.parse(await fs.readFile(seenPath, "utf-8")).videoIds || []);
|
|
} catch {
|
|
return new Set();
|
|
}
|
|
}
|
|
|
|
async function addToSeenList(videoIds) {
|
|
const seen = await loadSeenList();
|
|
for (const id of videoIds) seen.add(id);
|
|
await fs.writeFile(seenPath, JSON.stringify({ videoIds: [...seen] }));
|
|
}
|
|
|
|
// Get all videoIds already processed in history
|
|
async function getProcessedVideoIds() {
|
|
const ids = new Set();
|
|
try {
|
|
const files = await fs.readdir(historyDir);
|
|
for (const file of files.filter(f => f.endsWith(".json") && f !== "_meta.json" && f !== "subscriptions.json" && f !== "skip-list.json" && f !== "seen-list.json" && f !== "auto-queue.json")) {
|
|
try {
|
|
const raw = await fs.readFile(path.join(historyDir, file), "utf-8");
|
|
const data = JSON.parse(raw);
|
|
if (data.videoId) ids.add(data.videoId);
|
|
} catch {}
|
|
}
|
|
} catch {}
|
|
return ids;
|
|
}
|
|
|
|
// Server-side auto-queue for subscription-discovered videos (persisted to disk)
|
|
const autoQueuePath = path.join(historyDir, "auto-queue.json");
|
|
let autoQueue = [];
|
|
|
|
async function loadAutoQueue() {
|
|
try {
|
|
const data = JSON.parse(await fs.readFile(autoQueuePath, "utf-8"));
|
|
return data.items || [];
|
|
} catch { return []; }
|
|
}
|
|
async function saveAutoQueue() {
|
|
await fs.writeFile(autoQueuePath, JSON.stringify({ items: autoQueue }, null, 2));
|
|
}
|
|
|
|
// Load persisted queue on startup, filtering out already-processed items
|
|
autoQueue = await loadAutoQueue();
|
|
{
|
|
const processed = await getProcessedVideoIds();
|
|
const before = autoQueue.length;
|
|
autoQueue = autoQueue.filter(q => !processed.has(q.videoId));
|
|
if (autoQueue.length < before) {
|
|
console.log(` Auto-queue: removed ${before - autoQueue.length} already-processed item(s)`);
|
|
await saveAutoQueue();
|
|
}
|
|
}
|
|
|
|
// ── Background processing queue ──────────────────────────────────────────
|
|
// Processes "approved" auto-queue items sequentially with configurable delay
|
|
// between items to avoid hammering YouTube with rapid-fire downloads.
|
|
const processingConfigPath = path.join(configDir, "processing-config.json");
|
|
let processingConfig = {
|
|
delaySeconds: 300, // Default delay between processing queue items (5 minutes)
|
|
enabled: true, // Whether background processing is active
|
|
};
|
|
let processingState = {
|
|
running: false, // Is the processor loop currently active?
|
|
currentItem: null, // The item currently being processed (or null)
|
|
lastCompleted: null, // Timestamp of last completed item
|
|
rush: false, // If true, skip delay before next item
|
|
log: [], // Recent processing log entries
|
|
};
|
|
|
|
function procLog(msg) {
|
|
console.log(` [processor] ${msg}`);
|
|
processingState.log.push({ t: new Date().toISOString(), msg });
|
|
if (processingState.log.length > 100) processingState.log.shift();
|
|
}
|
|
|
|
async function loadProcessingConfig() {
|
|
try {
|
|
const data = JSON.parse(await fs.readFile(processingConfigPath, "utf-8"));
|
|
if (typeof data.delaySeconds === "number") processingConfig.delaySeconds = data.delaySeconds;
|
|
if (typeof data.enabled === "boolean") processingConfig.enabled = data.enabled;
|
|
} catch {}
|
|
}
|
|
async function saveProcessingConfig() {
|
|
await fs.writeFile(processingConfigPath, JSON.stringify(processingConfig, null, 2));
|
|
}
|
|
await loadProcessingConfig();
|
|
|
|
// The background processor: picks "approved" items, processes via internal HTTP,
|
|
// waits the configured delay, then picks the next one. Runs continuously.
|
|
async function backgroundProcessor() {
|
|
if (processingState.running) return; // Already running
|
|
processingState.running = true;
|
|
procLog("Background processor started");
|
|
|
|
while (true) {
|
|
// Find next approved item
|
|
const item = autoQueue.find(q => q.status === "approved");
|
|
if (!item) {
|
|
procLog("No approved items in queue — processor sleeping");
|
|
processingState.running = false;
|
|
processingState.currentItem = null;
|
|
return;
|
|
}
|
|
|
|
if (!processingConfig.enabled) {
|
|
procLog("Processing is paused — processor sleeping");
|
|
processingState.running = false;
|
|
processingState.currentItem = null;
|
|
return;
|
|
}
|
|
|
|
// Wait the configured delay (unless rush mode or first item)
|
|
if (processingState.lastCompleted && !processingState.rush) {
|
|
const delaySec = processingConfig.delaySeconds;
|
|
procLog(`Waiting ${delaySec}s before next item...`);
|
|
await new Promise(r => setTimeout(r, delaySec * 1000));
|
|
}
|
|
processingState.rush = false;
|
|
|
|
// Re-check the item hasn't been removed/changed while we waited
|
|
const freshItem = autoQueue.find(q => q.id === item.id);
|
|
if (!freshItem || freshItem.status !== "approved") {
|
|
procLog(`Item ${item.id} was removed or status changed — skipping`);
|
|
continue;
|
|
}
|
|
|
|
// Mark as processing
|
|
freshItem.status = "processing";
|
|
processingState.currentItem = freshItem;
|
|
await saveAutoQueue();
|
|
procLog(`Processing: ${freshItem.title} (${freshItem.url})`);
|
|
|
|
try {
|
|
// Call the existing /api/process endpoint via internal HTTP request
|
|
const result = await processItemInternally(freshItem);
|
|
freshItem.status = "completed";
|
|
freshItem.completedAt = new Date().toISOString();
|
|
freshItem.historyId = result.historyId || null;
|
|
processingState.lastCompleted = new Date().toISOString();
|
|
procLog(`✓ Completed: ${freshItem.title}`);
|
|
} catch (err) {
|
|
freshItem.status = "failed";
|
|
freshItem.error = err.message || String(err);
|
|
freshItem.failedAt = new Date().toISOString();
|
|
procLog(`✗ Failed: ${freshItem.title} — ${freshItem.error.slice(0, 200)}`);
|
|
}
|
|
|
|
processingState.currentItem = null;
|
|
await saveAutoQueue();
|
|
}
|
|
}
|
|
|
|
// Internal HTTP request to /api/process — consumes the SSE stream and
|
|
// waits for the "result" or "error" event. This reuses the entire existing
|
|
// pipeline without any code duplication.
|
|
function processItemInternally(item) {
|
|
return new Promise((resolve, reject) => {
|
|
const apiKey = resolveApiKey(null); // Use server's stored key
|
|
if (!apiKey) {
|
|
return reject(new Error("No API key configured. Set your Gemini API key in Settings."));
|
|
}
|
|
|
|
const body = JSON.stringify({
|
|
url: item.url,
|
|
apiKey: "USE_SERVER_KEY",
|
|
type: item.type || undefined,
|
|
title: item.title || undefined,
|
|
uploadDate: item.uploadDate || undefined,
|
|
episodeId: item.videoId || undefined,
|
|
});
|
|
|
|
const req = http.request({
|
|
hostname: "127.0.0.1",
|
|
port: PORT,
|
|
path: "/api/process",
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) },
|
|
timeout: 1800000, // 30 minutes max for very long videos
|
|
}, (res) => {
|
|
let buffer = "";
|
|
let lastResult = null;
|
|
let lastError = null;
|
|
|
|
res.on("data", (chunk) => {
|
|
buffer += chunk.toString();
|
|
|
|
// Parse SSE events from the buffer
|
|
const lines = buffer.split("\n");
|
|
buffer = lines.pop() || ""; // Keep incomplete line in buffer
|
|
|
|
let currentEvent = null;
|
|
for (const line of lines) {
|
|
if (line.startsWith("event: ")) {
|
|
currentEvent = line.slice(7).trim();
|
|
} else if (line.startsWith("data: ") && currentEvent) {
|
|
try {
|
|
const data = JSON.parse(line.slice(6));
|
|
if (currentEvent === "result") lastResult = data;
|
|
if (currentEvent === "error") lastError = data;
|
|
if (currentEvent === "log") {
|
|
procLog(` [${data.elapsed || "?"}s] ${data.message}`);
|
|
}
|
|
} catch {}
|
|
currentEvent = null;
|
|
} else if (line === "") {
|
|
currentEvent = null;
|
|
}
|
|
}
|
|
});
|
|
|
|
res.on("end", () => {
|
|
if (lastError) {
|
|
reject(new Error(lastError.message || "Processing failed"));
|
|
} else if (lastResult) {
|
|
resolve(lastResult);
|
|
} else {
|
|
reject(new Error("Processing ended without result"));
|
|
}
|
|
});
|
|
|
|
res.on("error", reject);
|
|
});
|
|
|
|
req.on("error", reject);
|
|
req.on("timeout", () => {
|
|
req.destroy();
|
|
reject(new Error("Processing timed out after 30 minutes"));
|
|
});
|
|
|
|
req.write(body);
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
// Wake up the processor whenever there are approved items
|
|
function kickProcessor() {
|
|
if (!processingState.running && processingConfig.enabled) {
|
|
const hasApproved = autoQueue.some(q => q.status === "approved");
|
|
if (hasApproved) {
|
|
backgroundProcessor().catch(err => procLog(`Processor error: ${err.message}`));
|
|
}
|
|
}
|
|
}
|
|
|
|
let subCheckRunning = false;
|
|
let subCheckPromise = null;
|
|
let subCheckLog = []; // Stores recent check logs for debug endpoint
|
|
function subLog(msg) { console.log(msg); subCheckLog.push({ t: new Date().toISOString(), msg }); if (subCheckLog.length > 200) subCheckLog.shift(); }
|
|
|
|
async function checkSubscriptions() {
|
|
if (subCheckRunning) {
|
|
// Wait for current check to finish, then run again
|
|
if (subCheckPromise) await subCheckPromise;
|
|
return checkSubscriptions();
|
|
}
|
|
subCheckRunning = true;
|
|
subCheckPromise = _checkSubscriptionsInner().finally(() => {
|
|
subCheckRunning = false;
|
|
subCheckPromise = null;
|
|
});
|
|
return subCheckPromise;
|
|
}
|
|
|
|
async function _checkSubscriptionsInner() {
|
|
subCheckLog = []; // Clear logs for fresh check
|
|
const subs = await loadSubscriptions();
|
|
if (subs.length === 0) { subLog("No subscriptions found"); return; }
|
|
|
|
const processedIds = await getProcessedVideoIds();
|
|
const skippedIds = await loadSkipList();
|
|
const seenIds = await loadSeenList();
|
|
const queuedIds = new Set(autoQueue.map(q => {
|
|
// For YouTube: extract video ID from URL; for podcasts: use stored videoId (GUID)
|
|
if (q.videoId) return q.videoId;
|
|
const m = q.url.match(/[?&]v=([a-zA-Z0-9_-]{11})/);
|
|
return m ? m[1] : null;
|
|
}).filter(Boolean));
|
|
|
|
subLog(`${processedIds.size} in library, ${skippedIds.size} skipped, ${seenIds.size} seen, ${queuedIds.size} in queue`);
|
|
|
|
let changed = false;
|
|
|
|
for (const sub of subs) {
|
|
if (sub.paused) { subLog(`⏸ ${sub.name} — paused, skipping`); continue; }
|
|
|
|
try {
|
|
const icon = sub.type === "podcast" ? "🎙" : "📡";
|
|
subLog(`${icon} Checking: ${sub.name} (${sub.url})`);
|
|
|
|
const cutoffDate = sub.createdAt.replace(/[-T:\.Z]/g, "").slice(0, 8);
|
|
|
|
if (sub.type === "podcast") {
|
|
// ── Podcast subscription: discover episodes from RSS feed ──
|
|
const { episodes } = await parsePodcastRSS(sub.url, 200);
|
|
subLog(` Found ${episodes.length} episode(s) in RSS feed`);
|
|
|
|
if (episodes.length === 0) {
|
|
sub.lastChecked = new Date().toISOString();
|
|
changed = true;
|
|
continue;
|
|
}
|
|
|
|
// Filter out already-known episodes
|
|
const unknowns = episodes.filter(ep => {
|
|
if (processedIds.has(ep.id)) return false;
|
|
if (queuedIds.has(ep.id)) return false;
|
|
if (skippedIds.has(ep.id)) return false;
|
|
if (seenIds.has(ep.id)) return false;
|
|
return true;
|
|
});
|
|
const filtered = episodes.length - unknowns.length;
|
|
subLog(` ${unknowns.length} to check, ${filtered} already known`);
|
|
|
|
if (unknowns.length === 0) {
|
|
sub.lastChecked = new Date().toISOString();
|
|
changed = true;
|
|
continue;
|
|
}
|
|
|
|
subLog(` Cutoff date: ${cutoffDate}`);
|
|
|
|
let newCount = 0;
|
|
const seenNow = [];
|
|
for (const ep of unknowns) {
|
|
if (!ep.date || ep.date.length !== 8) continue; // skip undated
|
|
if (ep.date < cutoffDate) {
|
|
seenNow.push(ep.id);
|
|
continue; // before cutoff
|
|
}
|
|
subLog(` ✅ ${ep.date} — ${ep.title.slice(0, 60)}`);
|
|
|
|
const itemStatus = sub.autoDownload ? "approved" : "pending";
|
|
autoQueue.push({
|
|
id: `auto-${Date.now()}-${Buffer.from(ep.id).toString("base64url").slice(0, 16)}`,
|
|
videoId: ep.id, // episode GUID
|
|
url: ep.audioUrl,
|
|
title: ep.title,
|
|
uploadDate: ep.date,
|
|
subscriptionId: sub.id,
|
|
subscriptionName: sub.name,
|
|
status: itemStatus,
|
|
type: "podcast",
|
|
duration: ep.duration || "",
|
|
});
|
|
queuedIds.add(ep.id);
|
|
newCount++;
|
|
}
|
|
|
|
if (seenNow.length > 0) await addToSeenList(seenNow);
|
|
|
|
sub.lastChecked = new Date().toISOString();
|
|
subLog(` → ${newCount} episode(s) queued for approval from ${sub.name}`);
|
|
changed = true;
|
|
|
|
} else {
|
|
// ── YouTube subscription: discover videos via yt-dlp + RSS dates ──
|
|
|
|
// Scale fetch limit based on how far back the subscription date goes
|
|
const daysSinceSub = Math.max(1, Math.ceil((Date.now() - new Date(sub.createdAt).getTime()) / 86400000));
|
|
const estimatedVideos = Math.ceil(daysSinceSub / 7) * 4;
|
|
const fetchLimit = Math.min(Math.max(estimatedVideos, 15), 200);
|
|
subLog(` Subscription age: ${daysSinceSub}d → fetching up to ${fetchLimit} videos`);
|
|
|
|
const candidates = await listChannelVideosFast(sub.url, fetchLimit);
|
|
subLog(` Found ${candidates.length} recent video(s)`);
|
|
|
|
if (candidates.length === 0) {
|
|
subLog(` ⚠ No videos returned from yt-dlp for this channel`);
|
|
sub.lastChecked = new Date().toISOString();
|
|
changed = true;
|
|
continue;
|
|
}
|
|
|
|
const unknowns = candidates.filter(v => {
|
|
if (processedIds.has(v.id)) return false;
|
|
if (queuedIds.has(v.id)) return false;
|
|
if (skippedIds.has(v.id)) return false;
|
|
if (seenIds.has(v.id)) return false;
|
|
return true;
|
|
});
|
|
const filtered = candidates.length - unknowns.length;
|
|
subLog(` ${unknowns.length} to check, ${filtered} already known`);
|
|
|
|
if (unknowns.length === 0) {
|
|
sub.lastChecked = new Date().toISOString();
|
|
changed = true;
|
|
continue;
|
|
}
|
|
|
|
subLog(` Cutoff date: ${cutoffDate} — fetching upload dates...`);
|
|
|
|
// Step 1: Get channel ID (cached on subscription, or fetch once)
|
|
if (!sub.channelId) {
|
|
subLog(` Resolving channel ID...`);
|
|
sub.channelId = await getChannelId(sub.url);
|
|
if (sub.channelId) {
|
|
subLog(` Resolved channel ID: ${sub.channelId}`);
|
|
changed = true;
|
|
} else {
|
|
subLog(` ⚠ Could not resolve channel ID — RSS will be skipped`);
|
|
}
|
|
}
|
|
|
|
// Step 2: RSS feed — fast, no bot detection, covers ~15 most recent
|
|
const dateMap = sub.channelId ? await fetchDatesFromRSS(sub.channelId) : {};
|
|
const rssCount = Object.keys(dateMap).length;
|
|
if (sub.channelId) subLog(` RSS feed: got dates for ${rssCount} videos`);
|
|
|
|
// Step 3: Smart early termination
|
|
const needDates = unknowns.filter(v => !dateMap[v.id]);
|
|
let earlyTermination = false;
|
|
if (needDates.length > 0 && rssCount > 0) {
|
|
const rssDates = Object.values(dateMap).sort();
|
|
const oldestRssDate = rssDates[0];
|
|
if (oldestRssDate < cutoffDate) {
|
|
earlyTermination = true;
|
|
subLog(` Oldest RSS video (${oldestRssDate}) is before cutoff — ${needDates.length} older video(s) are definitely too old, skipping yt-dlp`);
|
|
} else {
|
|
subLog(` ${needDates.length} video(s) not in RSS — trying yt-dlp for dates...`);
|
|
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
|
|
Object.assign(dateMap, ytDates);
|
|
const ytCount = Object.keys(ytDates).length;
|
|
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
|
|
}
|
|
} else if (needDates.length > 0 && rssCount === 0) {
|
|
subLog(` ${needDates.length} video(s) need dates — trying yt-dlp...`);
|
|
const ytDates = await fetchUploadDates(needDates.map(v => v.id));
|
|
Object.assign(dateMap, ytDates);
|
|
const ytCount = Object.keys(ytDates).length;
|
|
if (ytCount > 0) subLog(` yt-dlp added dates for ${ytCount} more video(s)`);
|
|
}
|
|
|
|
const gotDates = Object.keys(dateMap).length;
|
|
if (gotDates > 0 || needDates.length === 0) {
|
|
subLog(` Total dates: ${gotDates} of ${unknowns.length} videos`);
|
|
} else {
|
|
subLog(` ⚠ No dates available — skipping. Try setting YT_COOKIES_FROM in .env`);
|
|
sub.lastChecked = new Date().toISOString();
|
|
changed = true;
|
|
continue;
|
|
}
|
|
|
|
let newCount = 0;
|
|
for (const video of unknowns) {
|
|
const uploadDate = dateMap[video.id];
|
|
if (!uploadDate || uploadDate.length !== 8) {
|
|
continue;
|
|
}
|
|
if (uploadDate < cutoffDate) {
|
|
subLog(` ⏭ ${video.id} (${uploadDate}) — before cutoff`);
|
|
continue;
|
|
}
|
|
subLog(` ✅ ${video.id}${uploadDate ? ` (${uploadDate})` : ""} — ${video.title.slice(0,50)}`);
|
|
|
|
const ytItemStatus = sub.autoDownload ? "approved" : "pending";
|
|
autoQueue.push({
|
|
id: `auto-${Date.now()}-${video.id}`,
|
|
videoId: video.id,
|
|
url: `https://www.youtube.com/watch?v=${video.id}`,
|
|
title: video.title,
|
|
uploadDate: uploadDate || null,
|
|
subscriptionId: sub.id,
|
|
subscriptionName: sub.name,
|
|
status: ytItemStatus,
|
|
});
|
|
queuedIds.add(video.id);
|
|
newCount++;
|
|
}
|
|
|
|
// Only add to seen list when we can PROVE a video is too old
|
|
const seenNow = unknowns.filter(v => {
|
|
if (queuedIds.has(v.id)) return false;
|
|
const d = dateMap[v.id];
|
|
if (d && d.length === 8 && d < cutoffDate) return true;
|
|
if (d && d.length === 8 && d >= cutoffDate) return false;
|
|
return earlyTermination;
|
|
}).map(v => v.id);
|
|
if (seenNow.length > 0) await addToSeenList(seenNow);
|
|
|
|
sub.lastChecked = new Date().toISOString();
|
|
subLog(` → ${newCount} video(s) queued for approval from ${sub.name}`);
|
|
changed = true;
|
|
}
|
|
} catch (err) {
|
|
subLog(` ⚠ FAILED for ${sub.name}: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
const pendingCount = autoQueue.filter(q => q.status === "pending").length;
|
|
const approvedCount = autoQueue.filter(q => q.status === "approved").length;
|
|
subLog(`Done. ${pendingCount} pending, ${approvedCount} approved in auto-queue.`);
|
|
if (changed) await saveSubscriptions(subs);
|
|
await saveAutoQueue();
|
|
|
|
// Wake up the background processor if there are approved items
|
|
if (approvedCount > 0) {
|
|
subLog(`Kicking background processor for ${approvedCount} approved item(s)...`);
|
|
kickProcessor();
|
|
}
|
|
}
|
|
|
|
// CRUD endpoints
|
|
app.get("/api/subscriptions", async (req, res) => {
|
|
const subs = await loadSubscriptions();
|
|
res.json({ subscriptions: subs });
|
|
});
|
|
|
|
// Extract a normalized channel key from a YouTube URL for dedup
|
|
function channelKeyFromUrl(url) {
|
|
try {
|
|
const u = new URL(url);
|
|
const path = u.pathname.toLowerCase().replace(/\/+$/, "");
|
|
// /@handle/videos → @handle, /@handle/streams → @handle
|
|
const handleMatch = path.match(/\/(@[^/]+)/);
|
|
if (handleMatch) return handleMatch[1];
|
|
// /channel/UCxxx → channel/UCxxx
|
|
const channelMatch = path.match(/\/(channel\/[^/]+)/);
|
|
if (channelMatch) return channelMatch[1];
|
|
// /c/name or /user/name
|
|
const cMatch = path.match(/\/(c|user)\/([^/]+)/);
|
|
if (cMatch) return cMatch[0];
|
|
// /playlist?list=PLxxx
|
|
const list = u.searchParams.get("list");
|
|
if (list) return `playlist/${list}`;
|
|
return path; // fallback
|
|
} catch {
|
|
return url.toLowerCase().replace(/\/+$/, "");
|
|
}
|
|
}
|
|
|
|
app.post("/api/subscriptions", async (req, res) => {
|
|
const { url, since, type } = req.body;
|
|
if (!url) return res.status(400).json({ error: "Missing url" });
|
|
|
|
const isPodcast = type === "podcast" || isPodcastFeedUrl(url);
|
|
|
|
const subs = await loadSubscriptions();
|
|
|
|
// Prevent duplicates
|
|
if (isPodcast) {
|
|
const normalizedUrl = url.trim().toLowerCase().replace(/\/+$/, "");
|
|
if (subs.find(s => s.url.trim().toLowerCase().replace(/\/+$/, "") === normalizedUrl)) {
|
|
return res.status(409).json({ error: "Already subscribed to this podcast" });
|
|
}
|
|
} else {
|
|
const newKey = channelKeyFromUrl(url);
|
|
if (subs.find(s => channelKeyFromUrl(s.url) === newKey)) {
|
|
return res.status(409).json({ error: "Already subscribed to this channel" });
|
|
}
|
|
}
|
|
|
|
// Use provided cutoff date, or default to right now
|
|
const cutoff = since ? new Date(since).toISOString() : new Date().toISOString();
|
|
const sub = {
|
|
id: `sub-${Date.now()}`,
|
|
url,
|
|
name: "Loading...",
|
|
type: isPodcast ? "podcast" : "youtube",
|
|
channelId: null,
|
|
createdAt: cutoff,
|
|
lastChecked: null,
|
|
paused: false,
|
|
};
|
|
subs.push(sub);
|
|
await saveSubscriptions(subs);
|
|
|
|
// Respond immediately so the UI isn't blocked
|
|
res.json(sub);
|
|
|
|
// Background: resolve name and check for new content
|
|
(async () => {
|
|
try {
|
|
if (isPodcast) {
|
|
// Fetch podcast title from RSS feed
|
|
const { title } = await parsePodcastRSS(url, 1);
|
|
const freshSubs = await loadSubscriptions();
|
|
const s = freshSubs.find(x => x.id === sub.id);
|
|
if (s) {
|
|
s.name = title || url;
|
|
await saveSubscriptions(freshSubs);
|
|
}
|
|
console.log(` 🎙 New podcast subscription: ${title} — checking for episodes...`);
|
|
} else {
|
|
const [name, channelId] = await Promise.all([
|
|
fetchChannelName(url),
|
|
getChannelId(url),
|
|
]);
|
|
const freshSubs = await loadSubscriptions();
|
|
const s = freshSubs.find(x => x.id === sub.id);
|
|
if (s) {
|
|
s.name = name;
|
|
if (channelId) s.channelId = channelId;
|
|
await saveSubscriptions(freshSubs);
|
|
}
|
|
console.log(` 📡 New subscription: ${name} — checking for recent videos...`);
|
|
}
|
|
await checkSubscriptions();
|
|
} catch (err) {
|
|
console.error(" ⚠ Background subscription setup error:", err.message);
|
|
}
|
|
})();
|
|
});
|
|
|
|
app.delete("/api/subscriptions/:id", async (req, res) => {
|
|
let subs = await loadSubscriptions();
|
|
subs = subs.filter(s => s.id !== req.params.id);
|
|
await saveSubscriptions(subs);
|
|
// Also remove any pending auto-queue items from this subscription
|
|
autoQueue = autoQueue.filter(q => q.subscriptionId !== req.params.id);
|
|
await saveAutoQueue();
|
|
res.json({ ok: true });
|
|
});
|
|
|
|
app.put("/api/subscriptions/:id/pause", async (req, res) => {
|
|
const subs = await loadSubscriptions();
|
|
const sub = subs.find(s => s.id === req.params.id);
|
|
if (!sub) return res.status(404).json({ error: "Subscription not found" });
|
|
sub.paused = !sub.paused;
|
|
await saveSubscriptions(subs);
|
|
res.json(sub);
|
|
});
|
|
|
|
app.put("/api/subscriptions/:id/since", async (req, res) => {
|
|
const { since } = req.body;
|
|
if (!since) return res.status(400).json({ error: "Missing 'since' date" });
|
|
const subs = await loadSubscriptions();
|
|
const sub = subs.find(s => s.id === req.params.id);
|
|
if (!sub) return res.status(404).json({ error: "Subscription not found" });
|
|
sub.createdAt = new Date(since).toISOString();
|
|
await saveSubscriptions(subs);
|
|
res.json(sub);
|
|
});
|
|
|
|
// Debug: subscription check logs (viewable in-app)
|
|
app.get("/api/sub-check-log", (req, res) => {
|
|
res.json({ log: subCheckLog, autoQueueCount: autoQueue.length, autoQueue: autoQueue.map(q => ({ id: q.id, videoId: q.videoId, title: q.title, status: q.status, sub: q.subscriptionName })) });
|
|
});
|
|
|
|
// Auto-queue endpoints (frontend polls these)
|
|
app.get("/api/auto-queue", (req, res) => {
|
|
// Return all items grouped by status for the frontend
|
|
const showAll = req.query.all === "true";
|
|
const items = showAll
|
|
? autoQueue
|
|
: autoQueue.filter(q => ["pending", "approved", "processing"].includes(q.status));
|
|
res.json({
|
|
items,
|
|
checkRunning: subCheckRunning,
|
|
counts: {
|
|
pending: autoQueue.filter(q => q.status === "pending").length,
|
|
approved: autoQueue.filter(q => q.status === "approved").length,
|
|
processing: autoQueue.filter(q => q.status === "processing").length,
|
|
completed: autoQueue.filter(q => q.status === "completed").length,
|
|
failed: autoQueue.filter(q => q.status === "failed").length,
|
|
},
|
|
});
|
|
});
|
|
|
|
app.delete("/api/auto-queue/:id", async (req, res) => {
|
|
autoQueue = autoQueue.filter(q => q.id !== req.params.id);
|
|
await saveAutoQueue();
|
|
res.json({ ok: true });
|
|
});
|
|
|
|
app.post("/api/auto-queue/:id/skip", async (req, res) => {
|
|
const item = autoQueue.find(q => q.id === req.params.id);
|
|
if (item && item.videoId) {
|
|
await addToSkipList(item.videoId);
|
|
}
|
|
autoQueue = autoQueue.filter(q => q.id !== req.params.id);
|
|
await saveAutoQueue();
|
|
res.json({ ok: true });
|
|
});
|
|
|
|
// Approve a single auto-queue item for background processing
|
|
app.post("/api/auto-queue/:id/approve", async (req, res) => {
|
|
const item = autoQueue.find(q => q.id === req.params.id);
|
|
if (!item) return res.status(404).json({ error: "Item not found" });
|
|
if (item.status !== "pending") return res.status(400).json({ error: `Cannot approve item with status '${item.status}'` });
|
|
item.status = "approved";
|
|
await saveAutoQueue();
|
|
kickProcessor();
|
|
res.json({ ok: true, item });
|
|
});
|
|
|
|
// Approve all pending items at once
|
|
app.post("/api/auto-queue/approve-all", async (req, res) => {
|
|
let count = 0;
|
|
for (const item of autoQueue) {
|
|
if (item.status === "pending") {
|
|
item.status = "approved";
|
|
count++;
|
|
}
|
|
}
|
|
if (count > 0) {
|
|
await saveAutoQueue();
|
|
kickProcessor();
|
|
}
|
|
res.json({ ok: true, approved: count });
|
|
});
|
|
|
|
// Retry a failed item
|
|
app.post("/api/auto-queue/:id/retry", async (req, res) => {
|
|
const item = autoQueue.find(q => q.id === req.params.id);
|
|
if (!item) return res.status(404).json({ error: "Item not found" });
|
|
if (item.status !== "failed") return res.status(400).json({ error: `Cannot retry item with status '${item.status}'` });
|
|
item.status = "approved";
|
|
item.error = undefined;
|
|
item.failedAt = undefined;
|
|
await saveAutoQueue();
|
|
kickProcessor();
|
|
res.json({ ok: true, item });
|
|
});
|
|
|
|
// Clear completed and failed items from the queue
|
|
app.post("/api/auto-queue/clear-finished", async (req, res) => {
|
|
const before = autoQueue.length;
|
|
autoQueue = autoQueue.filter(q => !["completed", "failed"].includes(q.status));
|
|
const removed = before - autoQueue.length;
|
|
if (removed > 0) await saveAutoQueue();
|
|
res.json({ ok: true, removed });
|
|
});
|
|
|
|
app.post("/api/subscriptions/check-now", async (req, res) => {
|
|
if (subCheckRunning) return res.json({ ok: true, message: "Already checking" });
|
|
checkSubscriptions().catch(err => console.error(" ⚠ Manual subscription check error:", err.message));
|
|
res.json({ ok: true, message: "Check started" });
|
|
});
|
|
|
|
// ── Auto-download toggle per subscription ──────────────────────────────────
|
|
app.put("/api/subscriptions/:id/auto-download", async (req, res) => {
|
|
const subs = await loadSubscriptions();
|
|
const sub = subs.find(s => s.id === req.params.id);
|
|
if (!sub) return res.status(404).json({ error: "Subscription not found" });
|
|
sub.autoDownload = req.body.enabled === true;
|
|
await saveSubscriptions(subs);
|
|
res.json({ ok: true, subscription: sub });
|
|
});
|
|
|
|
// ── Background processing status & configuration ───────────────────────────
|
|
|
|
// Get current processing status (what's running, queue depth, config)
|
|
app.get("/api/processing/status", (req, res) => {
|
|
res.json({
|
|
running: processingState.running,
|
|
currentItem: processingState.currentItem
|
|
? { id: processingState.currentItem.id, title: processingState.currentItem.title, url: processingState.currentItem.url }
|
|
: null,
|
|
lastCompleted: processingState.lastCompleted,
|
|
config: processingConfig,
|
|
counts: {
|
|
approved: autoQueue.filter(q => q.status === "approved").length,
|
|
processing: autoQueue.filter(q => q.status === "processing").length,
|
|
pending: autoQueue.filter(q => q.status === "pending").length,
|
|
},
|
|
log: processingState.log.slice(-20),
|
|
});
|
|
});
|
|
|
|
// Update processing configuration (delay between items, enable/disable)
|
|
app.put("/api/processing/config", async (req, res) => {
|
|
if (typeof req.body.delaySeconds === "number" && req.body.delaySeconds >= 0) {
|
|
processingConfig.delaySeconds = Math.max(0, Math.min(3600, req.body.delaySeconds));
|
|
}
|
|
if (typeof req.body.enabled === "boolean") {
|
|
processingConfig.enabled = req.body.enabled;
|
|
// If enabling and there are approved items, kick the processor
|
|
if (req.body.enabled) kickProcessor();
|
|
}
|
|
await saveProcessingConfig();
|
|
res.json({ ok: true, config: processingConfig });
|
|
});
|
|
|
|
// Rush: skip the delay before the next item in the queue
|
|
app.post("/api/processing/rush", (req, res) => {
|
|
processingState.rush = true;
|
|
// If not currently running but there are approved items, start processing
|
|
kickProcessor();
|
|
res.json({ ok: true, message: "Rush mode enabled — next item will process immediately" });
|
|
});
|
|
|
|
// Processing log (for debug/monitoring)
|
|
app.get("/api/processing/log", (req, res) => {
|
|
res.json({ log: processingState.log });
|
|
});
|
|
|
|
// ── Full pipeline: URL → audio → transcript → topic analysis ──────────────
|
|
|
|
app.post("/api/process", async (req, res) => {
|
|
const { url, apiKey: clientKey, model, type: itemType, title: itemTitle, uploadDate: itemUploadDate, episodeId } = req.body;
|
|
const apiKey = resolveApiKey(clientKey);
|
|
|
|
if (!url) {
|
|
return res.status(400).json({ error: "Missing url" });
|
|
}
|
|
if (!apiKey) {
|
|
return res.status(400).json({ error: "No API key provided. Set GEMINI_API_KEY in .env or enter one in Settings." });
|
|
}
|
|
|
|
// Determine if this is a podcast episode or YouTube video
|
|
const isPodcast = itemType === "podcast" || /\.(mp3|m4a|ogg|opus|wav|aac)(\?|$)/i.test(url);
|
|
const videoId = isPodcast ? (episodeId || url) : extractVideoId(url);
|
|
|
|
if (!isPodcast && !videoId) {
|
|
return res.status(400).json({ error: "Invalid YouTube URL" });
|
|
}
|
|
|
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "yts-"));
|
|
const audioExt = isPodcast ? (url.match(/\.(mp3|m4a|ogg|opus|wav|aac)/i)?.[1] || "mp3") : "mp3";
|
|
const audioPath = path.join(tmpDir, `audio.${audioExt}`);
|
|
const mimeType = { mp3: "audio/mp3", m4a: "audio/mp4", ogg: "audio/ogg", opus: "audio/opus", wav: "audio/wav", aac: "audio/aac" }[audioExt] || "audio/mp3";
|
|
|
|
try {
|
|
const pipelineStart = Date.now();
|
|
|
|
// Set up SSE
|
|
res.writeHead(200, {
|
|
"Content-Type": "text/event-stream",
|
|
"Cache-Control": "no-cache",
|
|
Connection: "keep-alive",
|
|
});
|
|
|
|
// Helper to send log entries with elapsed time
|
|
const logHistory = [];
|
|
function log(step, message, detail) {
|
|
const elapsed = ((Date.now() - pipelineStart) / 1000).toFixed(1);
|
|
const logMsg = `[${elapsed}s] ${message}`;
|
|
console.log(` ${logMsg}`);
|
|
logHistory.push({ elapsed, message, detail: detail || null });
|
|
sendEvent(res, "status", { step, message });
|
|
sendEvent(res, "log", { elapsed, message, detail: detail || null });
|
|
}
|
|
|
|
// ── Step 1: Download audio ──
|
|
const dlStart = Date.now();
|
|
let videoTitle = itemTitle || "Untitled";
|
|
let videoUploadDate = itemUploadDate || "";
|
|
|
|
if (isPodcast) {
|
|
log(1, "Downloading podcast episode...");
|
|
await downloadPodcastAudio(url, audioPath);
|
|
|
|
const stats = await fs.stat(audioPath);
|
|
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
|
|
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
|
|
log(1, `Episode downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
|
|
log(1, `Episode: ${videoTitle}`);
|
|
|
|
} else {
|
|
log(1, "Downloading audio from YouTube...");
|
|
|
|
const dlBaseArgs = [
|
|
"-x",
|
|
"--audio-format", "mp3",
|
|
"--audio-quality", "5",
|
|
"-o", audioPath,
|
|
"--no-playlist",
|
|
"--print", "%(title)s|||%(upload_date)s",
|
|
"--no-simulate",
|
|
];
|
|
const dlUrl = `https://www.youtube.com/watch?v=${videoId}`;
|
|
const cookieArgs = ytCookieArgs();
|
|
const hasCookies = cookieArgs.length > 0;
|
|
let usedCookies = false;
|
|
let dlStdout = "";
|
|
|
|
// Helper: attempt a single yt-dlp download
|
|
async function attemptDownload(args, label) {
|
|
const result = await execFileAsync("yt-dlp", args, { timeout: 600000 });
|
|
return result.stdout || "";
|
|
}
|
|
|
|
// Helper: check if error is a bot detection / rate limit block
|
|
function isBotBlock(errText) {
|
|
return /Sign in|confirm you're not a bot|bot detection|JsChallengeProvider|js.*challenge|HTTP Error 403|Too Many Requests|429/i.test(errText);
|
|
}
|
|
|
|
// ── Smart download with retry ──
|
|
// Strategy: cookies → no-cookies → wait & retry (up to 3 attempts with increasing delays)
|
|
const MAX_RETRIES = 3;
|
|
const RETRY_DELAYS = [30, 60, 120]; // seconds — escalating backoff
|
|
let downloaded = false;
|
|
let lastError = "";
|
|
|
|
for (let attempt = 0; attempt <= MAX_RETRIES && !downloaded; attempt++) {
|
|
// On retry attempts, wait before trying again
|
|
if (attempt > 0) {
|
|
const waitSec = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)];
|
|
log(1, `⏳ YouTube is rate-limiting. Waiting ${waitSec}s before retry ${attempt}/${MAX_RETRIES}...`);
|
|
sendEvent(res, "status", { step: 1, message: `Rate limited — retrying in ${waitSec}s (attempt ${attempt}/${MAX_RETRIES})` });
|
|
await new Promise(r => setTimeout(r, waitSec * 1000));
|
|
log(1, `Retrying download (attempt ${attempt}/${MAX_RETRIES})...`);
|
|
// Clean up any partial file from previous attempt
|
|
await fs.unlink(audioPath).catch(() => {});
|
|
}
|
|
|
|
// Try with cookies first
|
|
if (hasCookies && !usedCookies) {
|
|
try {
|
|
log(1, attempt === 0 ? "Trying download with browser cookies (ad-free)..." : "Retrying with cookies...");
|
|
dlStdout = await attemptDownload([...dlBaseArgs, ...cookieArgs, dlUrl], "cookies");
|
|
usedCookies = true;
|
|
downloaded = true;
|
|
break;
|
|
} catch (cookieErr) {
|
|
const cookieMsg = (cookieErr.stderr || "") + " " + (cookieErr.message || "");
|
|
if (attempt === 0) log(1, `⚠ Cookie download failed: ${cookieMsg.trim().slice(0, 200)}`);
|
|
log(1, "Retrying without cookies...");
|
|
await fs.unlink(audioPath).catch(() => {});
|
|
}
|
|
}
|
|
|
|
// Try without cookies
|
|
if (!downloaded) {
|
|
try {
|
|
dlStdout = await attemptDownload([...dlBaseArgs, dlUrl], "no-cookies");
|
|
downloaded = true;
|
|
break;
|
|
} catch (dlErr) {
|
|
lastError = (dlErr.stderr || "") + " " + (dlErr.stdout || "") + " " + (dlErr.message || "");
|
|
const blocked = isBotBlock(lastError);
|
|
|
|
if (blocked && attempt < MAX_RETRIES) {
|
|
log(1, `⚠ YouTube bot detection triggered`);
|
|
// Will loop back and wait
|
|
continue;
|
|
}
|
|
|
|
if (blocked && attempt === MAX_RETRIES) {
|
|
// Last resort: try yt-dlp auto-update in case there's a newer version that handles this
|
|
log(1, "All retries exhausted — attempting yt-dlp auto-update as last resort...");
|
|
const updateResult = await autoUpdateYtdlp();
|
|
if (updateResult.success) {
|
|
log(1, "yt-dlp updated! Final retry...");
|
|
try {
|
|
const retryResult = await attemptDownload([...dlBaseArgs, dlUrl], "post-update");
|
|
dlStdout = retryResult;
|
|
downloaded = true;
|
|
break;
|
|
} catch { /* fall through to error */ }
|
|
}
|
|
}
|
|
|
|
// Non-bot error or exhausted retries
|
|
if (!downloaded) {
|
|
log(1, `⚠ yt-dlp error: ${lastError.trim().slice(0, 300)}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!downloaded) {
|
|
const blocked = isBotBlock(lastError);
|
|
let hint = "";
|
|
if (blocked) {
|
|
hint = "\n\nYouTube is temporarily blocking downloads from your IP address. " +
|
|
"This is usually caused by:\n" +
|
|
"• Recent VPN use (YouTube flags VPN IPs)\n" +
|
|
"• Too many downloads in a short period\n" +
|
|
"• YouTube's general anti-bot measures\n\n" +
|
|
"What to try:\n" +
|
|
"• Wait 10-30 minutes and try again\n" +
|
|
"• Disconnect any VPN/proxy\n" +
|
|
"• Upload fresh cookies.txt via Settings\n" +
|
|
"• Try a different network (mobile hotspot, etc.)";
|
|
}
|
|
throw new Error(`Download failed after ${MAX_RETRIES} retries.${hint}\n\nLast error: ${lastError.trim().slice(0, 300)}`);
|
|
}
|
|
|
|
if (!usedCookies && hasCookies) {
|
|
log(1, "⚠ Downloaded without cookies — audio may contain ads");
|
|
}
|
|
|
|
const stats = await fs.stat(audioPath);
|
|
const sizeMB = (stats.size / (1024 * 1024)).toFixed(1);
|
|
const dlTime = ((Date.now() - dlStart) / 1000).toFixed(1);
|
|
log(1, `Audio downloaded (${sizeMB} MB) in ${dlTime}s`, `File size: ${sizeMB} MB`);
|
|
|
|
// Extract title from the --print output of the download command
|
|
const fallbackTitle = videoTitle !== "Untitled" ? videoTitle : null;
|
|
let gotTitle = false;
|
|
|
|
// First try: parse title from the download stdout (most reliable — same call that succeeded)
|
|
if (dlStdout) {
|
|
const printLines = dlStdout.split("\n").map(l => l.trim()).filter(Boolean);
|
|
for (const line of printLines) {
|
|
if (line.includes("|||")) {
|
|
const sep = line.indexOf("|||");
|
|
const t = line.slice(0, sep).trim();
|
|
const d = line.slice(sep + 3).trim();
|
|
if (t && t !== "NA") {
|
|
videoTitle = t;
|
|
if (d && d !== "NA") videoUploadDate = d;
|
|
gotTitle = true;
|
|
log(1, `Video title: ${videoTitle}`);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Second try: separate yt-dlp call (no cookies needed for public metadata)
|
|
if (!gotTitle) {
|
|
try {
|
|
const { stdout } = await execFileAsync("yt-dlp", [
|
|
"--print", "%(title)s|||%(upload_date)s",
|
|
"--no-download",
|
|
`https://www.youtube.com/watch?v=${videoId}`,
|
|
], { timeout: 15000 });
|
|
const raw = stdout.trim();
|
|
const sep = raw.indexOf("|||");
|
|
if (sep > 0) {
|
|
videoTitle = raw.slice(0, sep).trim() || fallbackTitle || "Untitled";
|
|
const d = raw.slice(sep + 3).trim();
|
|
if (d && d !== "NA") videoUploadDate = d || videoUploadDate;
|
|
} else {
|
|
videoTitle = raw || fallbackTitle || "Untitled";
|
|
}
|
|
gotTitle = videoTitle !== "Untitled";
|
|
if (gotTitle) log(1, `Video title: ${videoTitle}`);
|
|
} catch {
|
|
// Title fetch failed
|
|
}
|
|
}
|
|
|
|
// Third try: use the queue-provided title
|
|
if (!gotTitle && fallbackTitle) {
|
|
videoTitle = fallbackTitle;
|
|
log(1, `Using queue title: ${fallbackTitle}`);
|
|
gotTitle = true;
|
|
}
|
|
|
|
if (!gotTitle) {
|
|
log(1, "⚠ Could not fetch video title");
|
|
}
|
|
}
|
|
|
|
// ── Step 2: Transcribe audio ──
|
|
// Detect audio duration to choose strategy
|
|
const audioDuration = await getAudioDuration(audioPath);
|
|
const audioDurMin = audioDuration ? (audioDuration / 60).toFixed(1) : "unknown";
|
|
log(2, `Audio duration: ${audioDuration ? formatTime(Math.floor(audioDuration)) : "unknown"} (${audioDurMin} min)`);
|
|
|
|
// Strategy:
|
|
// < 60 min AND < 30 MB → Flash on full file (fast, cheap, reliable)
|
|
// ≥ 60 min OR ≥ 30 MB → go straight to chunked transcription with Flash (45-min chunks)
|
|
// If full-file transcription is truncated or empty → fall back to chunks
|
|
const CHUNK_TIME_THRESHOLD = 60 * 60; // 60 minutes
|
|
const CHUNK_SIZE_THRESHOLD = 30 * 1024 * 1024; // 30 MB
|
|
let audioFileSize = 0;
|
|
try { audioFileSize = (await fs.stat(audioPath)).size; } catch {}
|
|
const audioSizeMB = (audioFileSize / (1024 * 1024)).toFixed(1);
|
|
const needsChunking = (audioDuration && audioDuration >= CHUNK_TIME_THRESHOLD) || (audioFileSize >= CHUNK_SIZE_THRESHOLD);
|
|
const transcriptionModel = "gemini-3-flash-preview"; // Flash handles transcription best
|
|
const analysisModel = model || "gemini-3.1-pro-preview";
|
|
|
|
if (needsChunking) {
|
|
const reason = audioDuration >= CHUNK_TIME_THRESHOLD ? `${audioDurMin} min` : `${audioSizeMB} MB`;
|
|
log(2, `Large audio (${reason}) — will use chunked transcription with ${transcriptionModel}`);
|
|
}
|
|
|
|
const ai = new GoogleGenAI({ apiKey, httpOptions: { timeout: 900_000, headersTimeout: 900_000 } });
|
|
|
|
const transcriptionPrompt = (titleHint) => `${titleHint ? `Video title: "${titleHint}"\n\n` : ""}Transcribe this audio completely and verbatim. Include timestamps at regular intervals (every 15-30 seconds or at natural pauses).
|
|
|
|
Format each line as:
|
|
[MM:SS] The spoken text here...
|
|
|
|
Rules:
|
|
- Transcribe EVERY word spoken, do not skip or summarize anything.
|
|
- Use [MM:SS] or [H:MM:SS] timestamp format at the start of each line.
|
|
- Start a new timestamped line every 15-30 seconds or at natural speech pauses.
|
|
- Include filler words (um, uh, you know) for accuracy.
|
|
- If multiple speakers are distinguishable, identify them by name when possible (from introductions, context clues, or how they address each other). Format as: [MM:SS] Name: text. If you cannot determine a name, use descriptive labels like "Host" and "Guest" rather than "Speaker 1" and "Speaker 2".
|
|
|
|
Return ONLY the timestamped transcript, nothing else.`;
|
|
|
|
// Helper: upload a single audio file and transcribe it
|
|
async function transcribeSingleFile(filePath, mType, titleHint, modelName, offsetSeconds = 0) {
|
|
const upStart = Date.now();
|
|
log(2, `Uploading audio${offsetSeconds > 0 ? ` (offset ${formatTime(offsetSeconds)})` : ""} to Gemini File API...`);
|
|
const uploaded = await ai.files.upload({ file: filePath, config: { mimeType: mType } });
|
|
const upTime = ((Date.now() - upStart) / 1000).toFixed(1);
|
|
log(2, `Audio uploaded in ${upTime}s`, `File ref: ${uploaded.name}`);
|
|
|
|
// Wait for processing
|
|
let f = uploaded;
|
|
const pStart = Date.now();
|
|
while (f.state === "PROCESSING") {
|
|
const ws = ((Date.now() - pStart) / 1000).toFixed(0);
|
|
log(2, `Waiting for Gemini to process audio... (${ws}s)`);
|
|
await new Promise(r => setTimeout(r, 3000));
|
|
f = await ai.files.get({ name: f.name });
|
|
}
|
|
if (f.state === "FAILED") throw new Error("Gemini failed to process audio file.");
|
|
|
|
const pTime = ((Date.now() - pStart) / 1000).toFixed(1);
|
|
log(2, `Audio processed in ${pTime}s. Transcribing with ${modelName}...`);
|
|
|
|
const prompt = transcriptionPrompt(titleHint);
|
|
|
|
// Only use thinkingConfig for Flash models — Pro doesn't support "minimal"
|
|
const txConfig = modelName.includes("flash")
|
|
? { thinkingConfig: { thinkingLevel: "minimal" } }
|
|
: {};
|
|
|
|
// Retry logic that handles both API errors (via retryGemini) AND empty responses
|
|
const EMPTY_RETRIES = 3;
|
|
let result;
|
|
for (let emptyAttempt = 0; emptyAttempt < EMPTY_RETRIES; emptyAttempt++) {
|
|
result = await retryGemini(
|
|
() => ai.models.generateContent({
|
|
model: modelName,
|
|
config: {
|
|
...txConfig,
|
|
// Disable safety filters to prevent content blocking
|
|
safetySettings: [
|
|
{ category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_NONE" },
|
|
{ category: "HARM_CATEGORY_HATE_SPEECH", threshold: "BLOCK_NONE" },
|
|
{ category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_NONE" },
|
|
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
|
|
],
|
|
},
|
|
contents: [{
|
|
role: "user",
|
|
parts: [
|
|
{ fileData: { fileUri: f.uri, mimeType: mType } },
|
|
{ text: prompt },
|
|
],
|
|
}],
|
|
}),
|
|
{ retries: 3, delayMs: 5000, label: `Transcription${offsetSeconds > 0 ? ` (chunk@${formatTime(offsetSeconds)})` : ""}`, log: (msg) => log(2, msg) }
|
|
);
|
|
|
|
const responseText = safeText(result);
|
|
if (responseText) break; // Got actual content, done
|
|
|
|
// Log why it's empty
|
|
const candidate = result?.candidates?.[0];
|
|
const finishReason = candidate?.finishReason || "UNKNOWN";
|
|
const blockReason = result?.promptFeedback?.blockReason || "none";
|
|
log(2, `⚠ Empty response (attempt ${emptyAttempt + 1}/${EMPTY_RETRIES}) — finishReason: ${finishReason}, blockReason: ${blockReason}`);
|
|
|
|
if (emptyAttempt < EMPTY_RETRIES - 1) {
|
|
const waitSec = 10 * (emptyAttempt + 1);
|
|
log(2, `Waiting ${waitSec}s before retry...`);
|
|
await new Promise(r => setTimeout(r, waitSec * 1000));
|
|
}
|
|
}
|
|
|
|
// Clean up uploaded file
|
|
try { await ai.files.delete({ name: f.name }); } catch {}
|
|
|
|
return result;
|
|
}
|
|
|
|
// ── Helper: chunked transcription for long audio ──
|
|
async function transcribeChunked(srcPath, srcMime, title, modelName, logFn) {
|
|
const chunkDir = path.join(os.tmpdir(), `yt-chunks-${Date.now()}`);
|
|
await fs.mkdir(chunkDir, { recursive: true });
|
|
|
|
try {
|
|
const audioChunks = await splitAudioFile(srcPath, chunkDir, 2700); // 45 min chunks
|
|
if (!audioChunks || audioChunks.length <= 1) return null; // splitting not needed
|
|
|
|
logFn(`Split audio into ${audioChunks.length} chunks for transcription`);
|
|
const allEntries = [];
|
|
let totalIn = 0, totalOut = 0;
|
|
|
|
for (const chunk of audioChunks) {
|
|
logFn(`Transcribing chunk ${chunk.index + 1}/${audioChunks.length} (starts at ${formatTime(chunk.startOffset)})...`);
|
|
const chunkResult = await transcribeSingleFile(
|
|
chunk.path, "audio/mpeg", title,
|
|
modelName,
|
|
chunk.startOffset
|
|
);
|
|
|
|
const chunkUsage = chunkResult.usageMetadata || {};
|
|
const chunkCost = calcCost(modelName, chunkUsage);
|
|
totalIn += chunkCost.inputTokens;
|
|
totalOut += chunkCost.outputTokens;
|
|
|
|
const chunkText = safeText(chunkResult);
|
|
if (!chunkText) {
|
|
logFn(`⚠ Chunk ${chunk.index + 1} returned empty response — skipping`);
|
|
continue;
|
|
}
|
|
const chunkEntries = parseTimestampedTranscript(chunkText);
|
|
|
|
// Programmatically adjust timestamps: the model transcribes from 0:00 relative
|
|
// to the chunk, so add the chunk's startOffset to get real video timestamps
|
|
if (chunk.startOffset > 0) {
|
|
// Check if the model already adjusted (first entry near the startOffset)
|
|
const firstOffset = chunkEntries.length > 0 ? chunkEntries[0].offset : 0;
|
|
const alreadyAdjusted = firstOffset >= chunk.startOffset * 0.8;
|
|
if (!alreadyAdjusted) {
|
|
for (const e of chunkEntries) {
|
|
e.offset += chunk.startOffset;
|
|
}
|
|
logFn(`Adjusted chunk ${chunk.index + 1} timestamps by +${formatTime(chunk.startOffset)}`);
|
|
}
|
|
}
|
|
|
|
logFn(`Chunk ${chunk.index + 1}: ${chunkEntries.length} segments, last timestamp ${chunkEntries.length > 0 ? formatTime(chunkEntries[chunkEntries.length - 1].offset) : "N/A"}`);
|
|
|
|
// Merge: skip entries that overlap with already-transcribed content
|
|
const lastExistingTime = allEntries.length > 0 ? allEntries[allEntries.length - 1].offset : -1;
|
|
for (const e of chunkEntries) {
|
|
if (e.offset > lastExistingTime) allEntries.push(e);
|
|
}
|
|
}
|
|
|
|
// Recalculate durations
|
|
for (let i = 0; i < allEntries.length - 1; i++) {
|
|
allEntries[i].duration = allEntries[i + 1].offset - allEntries[i].offset;
|
|
}
|
|
if (allEntries.length > 0) allEntries[allEntries.length - 1].duration = 15;
|
|
|
|
// Calculate total cost for chunked transcription
|
|
const rates = PRICING[modelName] || PRICING["default"];
|
|
const totalCostNum = (totalIn / 1e6) * rates.input + (totalOut / 1e6) * rates.output;
|
|
const costDisplay = totalCostNum < 0.01 ? `$${(totalCostNum * 100).toFixed(3)}¢` : `$${totalCostNum.toFixed(4)}`;
|
|
|
|
logFn(`Chunked transcription complete: ${allEntries.length} total segments`);
|
|
logFn(`Total cost: ${totalIn.toLocaleString()} in / ${totalOut.toLocaleString()} out — cost: ${costDisplay}`);
|
|
return {
|
|
entries: allEntries,
|
|
cost: {
|
|
inputTokens: totalIn, outputTokens: totalOut, thinkingTokens: 0,
|
|
totalTokens: totalIn + totalOut,
|
|
totalCost: String(totalCostNum), totalCostDisplay: costDisplay,
|
|
},
|
|
};
|
|
} finally {
|
|
try { await fs.rm(chunkDir, { recursive: true, force: true }); } catch {}
|
|
}
|
|
}
|
|
|
|
let entries;
|
|
let transcriptText = "";
|
|
let txCost = { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0, totalCost: "0", totalCostDisplay: "$0.00" };
|
|
const txStart = Date.now();
|
|
|
|
if (needsChunking) {
|
|
// ── Very long audio: go straight to chunked transcription ──
|
|
log(2, `Skipping full-file attempt — using chunked transcription for ${audioDurMin} min audio`);
|
|
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
|
|
if (chunkedResult && chunkedResult.entries.length > 0) {
|
|
entries = chunkedResult.entries;
|
|
txCost = chunkedResult.cost;
|
|
} else {
|
|
log(2, `⚠ Chunked transcription returned no entries, trying full file as last resort...`);
|
|
entries = null; // fall through to full-file attempt below
|
|
}
|
|
}
|
|
|
|
if (!entries) {
|
|
// ── Normal: transcribe full file ──
|
|
const transcriptResult = await transcribeSingleFile(audioPath, mimeType, videoTitle, transcriptionModel);
|
|
|
|
transcriptText = safeText(transcriptResult);
|
|
if (!transcriptText) {
|
|
log(2, `⚠ Full-file transcription returned empty — falling back to chunked transcription...`);
|
|
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
|
|
if (chunkedResult && chunkedResult.entries.length > 0) {
|
|
entries = chunkedResult.entries;
|
|
txCost = chunkedResult.cost;
|
|
} else {
|
|
throw new Error("Gemini returned empty transcription for both full file and chunked attempts. Try again or use a shorter video.");
|
|
}
|
|
} else {
|
|
const txUsage = transcriptResult.usageMetadata || {};
|
|
txCost = calcCost(transcriptionModel, txUsage);
|
|
const txTime = ((Date.now() - txStart) / 1000).toFixed(1);
|
|
log(2, `Transcription complete in ${txTime}s`, `${transcriptText.length} chars received`);
|
|
log(2, `Transcription tokens: ${txCost.inputTokens.toLocaleString()} in / ${txCost.outputTokens.toLocaleString()} out${txCost.thinkingTokens ? ` / ${txCost.thinkingTokens.toLocaleString()} thinking` : ""} — cost: ${txCost.totalCostDisplay}`);
|
|
|
|
entries = parseTimestampedTranscript(transcriptText);
|
|
log(2, `Parsed ${entries.length} transcript segments`);
|
|
}
|
|
|
|
// ── Truncation detection → fall back to chunks ──
|
|
if (audioDuration && entries.length > 0) {
|
|
const lastEntryTime = entries[entries.length - 1].offset;
|
|
const coverageRatio = lastEntryTime / audioDuration;
|
|
const missingSeconds = audioDuration - lastEntryTime;
|
|
|
|
if (coverageRatio < 0.90 && missingSeconds > 120) {
|
|
log(2, `⚠ Transcript truncated — covers ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%)`);
|
|
log(2, `Falling back to chunked transcription...`);
|
|
const chunkedResult = await transcribeChunked(audioPath, mimeType, videoTitle, transcriptionModel, (msg) => log(2, msg));
|
|
if (chunkedResult && chunkedResult.entries.length > 0) {
|
|
entries = chunkedResult.entries;
|
|
txCost = chunkedResult.cost;
|
|
const finalCoverage = entries[entries.length - 1].offset;
|
|
log(2, `Coverage after chunking: ${formatTime(finalCoverage)} of ${formatTime(Math.floor(audioDuration))}`);
|
|
}
|
|
} else {
|
|
log(2, `Transcript coverage: ${formatTime(lastEntryTime)} of ${formatTime(Math.floor(audioDuration))} (${(coverageRatio * 100).toFixed(0)}%) — OK`);
|
|
}
|
|
}
|
|
}
|
|
|
|
const txTotalTime = ((Date.now() - txStart) / 1000).toFixed(1);
|
|
log(2, `Total transcription time: ${txTotalTime}s — ${entries.length} segments`);
|
|
|
|
if (!entries || entries.length === 0) {
|
|
const preview = (transcriptText || "").slice(0, 500).replace(/\n/g, " ↵ ");
|
|
log(2, `⚠ Transcript parse failed. Preview: ${preview}`);
|
|
sendEvent(res, "error", { message: "Failed to parse transcript." });
|
|
sendEvent(res, "result", { videoId, entries: [], chunks: [], rawTranscript: transcriptText });
|
|
res.end();
|
|
return;
|
|
}
|
|
|
|
// ── Step 3: Topic analysis with model fallback ──
|
|
const analysisFallbacks = [
|
|
analysisModel,
|
|
...[
|
|
"gemini-3.1-pro-preview",
|
|
"gemini-3-pro-preview",
|
|
"gemini-3-flash-preview",
|
|
"gemini-2.5-flash",
|
|
].filter(m => m !== analysisModel),
|
|
];
|
|
|
|
const analysisPrompt = buildAnalysisPrompt(entries);
|
|
let analysisResult = null;
|
|
let usedAnalysisModel = analysisModel;
|
|
const anaStart = Date.now();
|
|
|
|
for (const tryModel of analysisFallbacks) {
|
|
try {
|
|
log(3, `Analyzing topics across ${entries.length} segments with ${tryModel}...`);
|
|
analysisResult = await retryGemini(
|
|
() => ai.models.generateContent({
|
|
model: tryModel,
|
|
contents: [
|
|
{
|
|
role: "user",
|
|
parts: [{ text: analysisPrompt }],
|
|
},
|
|
],
|
|
}),
|
|
{ retries: 2, delayMs: 5000, label: "Analysis", log: (msg) => log(3, msg) }
|
|
);
|
|
usedAnalysisModel = tryModel;
|
|
break;
|
|
} catch (fallbackErr) {
|
|
const msg = fallbackErr?.message || String(fallbackErr);
|
|
log(3, `⚠ ${tryModel} failed: ${msg.slice(0, 150)}`);
|
|
if (tryModel !== analysisFallbacks[analysisFallbacks.length - 1]) {
|
|
log(3, `Falling back to next model...`);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!analysisResult) {
|
|
throw new Error("All analysis models failed. Please try again later.");
|
|
}
|
|
|
|
const analysisText = safeText(analysisResult);
|
|
if (!analysisText) {
|
|
throw new Error("Gemini returned an empty analysis. The transcript may be too long for the model. Try again.");
|
|
}
|
|
const anaTime = ((Date.now() - anaStart) / 1000).toFixed(1);
|
|
const anaUsage = analysisResult.usageMetadata || {};
|
|
const anaCost = calcCost(usedAnalysisModel, anaUsage);
|
|
|
|
// Parse the analysis JSON
|
|
let analysisJson;
|
|
try {
|
|
let jsonStr = analysisText.trim();
|
|
const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
if (codeBlockMatch) jsonStr = codeBlockMatch[1].trim();
|
|
analysisJson = JSON.parse(jsonStr);
|
|
} catch (e) {
|
|
console.error("Failed to parse analysis JSON:", e.message);
|
|
sendEvent(res, "error", { message: "Topic analysis returned invalid JSON. Returning raw transcript." });
|
|
sendEvent(res, "result", { videoId, entries, chunks: [], rawTranscript: transcriptText });
|
|
res.end();
|
|
return;
|
|
}
|
|
|
|
// Build final chunks
|
|
const chunks = analysisJson.sections.map((section) => {
|
|
const start = Math.max(0, section.startIndex);
|
|
const end = Math.min(entries.length - 1, section.endIndex);
|
|
const sectionEntries = entries.slice(start, end + 1);
|
|
return {
|
|
title: section.title,
|
|
summary: section.summary,
|
|
entries: sectionEntries,
|
|
startTime: sectionEntries[0]?.offset || 0,
|
|
};
|
|
}).filter((c) => c.entries.length > 0);
|
|
|
|
const totalTime = ((Date.now() - pipelineStart) / 1000).toFixed(1);
|
|
log(3, `Topic analysis complete in ${anaTime}s — found ${chunks.length} topics`);
|
|
log(3, `Analysis tokens: ${anaCost.inputTokens.toLocaleString()} in / ${anaCost.outputTokens.toLocaleString()} out${anaCost.thinkingTokens ? ` / ${anaCost.thinkingTokens.toLocaleString()} thinking` : ""} — cost: ${anaCost.totalCostDisplay}`);
|
|
|
|
// Total cost summary
|
|
const totalCostNum = parseFloat(txCost.totalCost) + parseFloat(anaCost.totalCost);
|
|
const totalCostDisplay = totalCostNum < 0.01 ? `$${(totalCostNum * 100).toFixed(3)}¢` : `$${totalCostNum.toFixed(4)}`;
|
|
const totalTokens = (txCost.totalTokens + anaCost.totalTokens).toLocaleString();
|
|
log(3, `Pipeline finished in ${totalTime}s — total cost: ${totalCostDisplay} (${totalTokens} tokens)`);
|
|
|
|
// Save to history
|
|
const contentType = isPodcast ? "podcast" : "youtube";
|
|
const historyId = await saveToHistory(videoId, url, videoTitle, chunks, entries, logHistory, videoUploadDate, contentType).catch(() => null);
|
|
|
|
sendEvent(res, "result", { videoId, videoTitle, entries, chunks, historyId, type: contentType });
|
|
res.end();
|
|
|
|
} catch (err) {
|
|
console.error("Pipeline error:", err);
|
|
if (!res.headersSent) {
|
|
res.status(500).json({ error: err.message });
|
|
} else {
|
|
sendEvent(res, "error", { message: err.message });
|
|
res.end();
|
|
}
|
|
} finally {
|
|
// Clean up temp files
|
|
try {
|
|
await fs.rm(tmpDir, { recursive: true, force: true });
|
|
} catch {}
|
|
}
|
|
});
|
|
|
|
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
|
|
// ── Audio duration helper (ffprobe) ─────────────────────────────────────
|
|
async function getAudioDuration(filePath) {
|
|
try {
|
|
const { stdout } = await execFileAsync("ffprobe", [
|
|
"-v", "error",
|
|
"-show_entries", "format=duration",
|
|
"-of", "default=noprint_wrappers=1:nokey=1",
|
|
filePath,
|
|
], { timeout: 15000 });
|
|
const dur = parseFloat(stdout.trim());
|
|
return isNaN(dur) ? null : dur;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// ── Split audio into chunks with ffmpeg ─────────────────────────────────
|
|
async function splitAudioFile(inputPath, outputDir, chunkSeconds = 2700) {
|
|
const duration = await getAudioDuration(inputPath);
|
|
if (!duration || duration <= chunkSeconds) return null; // no split needed
|
|
|
|
const chunks = [];
|
|
let startSec = 0;
|
|
let i = 0;
|
|
while (startSec < duration) {
|
|
const chunkPath = path.join(outputDir, `chunk_${i}.mp3`);
|
|
const segLen = Math.min(chunkSeconds, duration - startSec);
|
|
await execFileAsync("ffmpeg", [
|
|
"-y", "-i", inputPath,
|
|
"-ss", String(startSec),
|
|
"-t", String(segLen),
|
|
"-acodec", "copy",
|
|
chunkPath,
|
|
], { timeout: 120000 });
|
|
chunks.push({ path: chunkPath, startOffset: startSec, index: i });
|
|
startSec += chunkSeconds;
|
|
i++;
|
|
}
|
|
return chunks;
|
|
}
|
|
|
|
function sendEvent(res, event, data) {
|
|
res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
}
|
|
|
|
function extractVideoId(url) {
|
|
if (!url) return null;
|
|
const patterns = [
|
|
/(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([a-zA-Z0-9_-]{11})/,
|
|
/^([a-zA-Z0-9_-]{11})$/,
|
|
];
|
|
for (const p of patterns) {
|
|
const m = url.match(p);
|
|
if (m) return m[1];
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function formatTime(seconds) {
|
|
const s = Math.floor(seconds);
|
|
const h = Math.floor(s / 3600);
|
|
const m = Math.floor((s % 3600) / 60);
|
|
const sec = s % 60;
|
|
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
|
|
return `${m}:${String(sec).padStart(2, "0")}`;
|
|
}
|
|
|
|
function parseTimestampedTranscript(text) {
|
|
const lines = text.trim().split("\n").filter(Boolean);
|
|
const entries = [];
|
|
// Match timestamps in various formats:
|
|
// [0:00], [00:00], [0:00:00], (0:00), 0:00 -, **0:00**, etc.
|
|
// Optionally preceded by speaker labels, markdown bold, etc.
|
|
const tsRegex = /^(?:[*_]*)?(?:\[?\(?)(\d{1,2}):(\d{2})(?::(\d{2}))?[\])]?(?:[*_]*)?\s*[-–—:]?\s*(.*)/;
|
|
// Also match lines like "Speaker 1 [0:00]: text" or "**[0:00]** text"
|
|
const altRegex = /^(?:.*?)[\[(\s](\d{1,2}):(\d{2})(?::(\d{2}))?[\])]\s*[-–—:]?\s*(.*)/;
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.trim();
|
|
let m = trimmed.match(tsRegex);
|
|
if (!m) m = trimmed.match(altRegex);
|
|
if (m) {
|
|
const hours = m[3] !== undefined ? parseInt(m[1]) : 0;
|
|
const mins = m[3] !== undefined ? parseInt(m[2]) : parseInt(m[1]);
|
|
const secs = m[3] !== undefined ? parseInt(m[3]) : parseInt(m[2]);
|
|
const offset = hours * 3600 + mins * 60 + secs;
|
|
// Strip any leftover markdown or speaker prefix from the text
|
|
const lineText = m[4].replace(/^\*\*\s*/, "").replace(/\s*\*\*$/, "").trim();
|
|
if (lineText) entries.push({ text: lineText, offset, duration: 0 });
|
|
}
|
|
}
|
|
|
|
// Calculate durations from gaps between entries
|
|
for (let i = 0; i < entries.length - 1; i++) {
|
|
entries[i].duration = entries[i + 1].offset - entries[i].offset;
|
|
}
|
|
if (entries.length > 0) {
|
|
entries[entries.length - 1].duration = 15; // default last segment
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
function buildAnalysisPrompt(entries) {
|
|
const numbered = entries
|
|
.map((e, i) => `[${i}] (${formatTime(e.offset)}) ${e.text}`)
|
|
.join("\n");
|
|
|
|
return `You are analyzing a video transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections.
|
|
|
|
TRANSCRIPT (each line is numbered with a timestamp):
|
|
${numbered}
|
|
|
|
INSTRUCTIONS:
|
|
1. Read the entire transcript carefully.
|
|
2. Identify where the discussion naturally shifts from one topic to another.
|
|
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
|
|
4. For each section, write:
|
|
- A short, specific topic title (3-8 words)
|
|
- A 1-3 sentence summary of what's discussed
|
|
- The start and end segment indices (inclusive)
|
|
|
|
IMPORTANT:
|
|
- Sections must be chronological and non-overlapping.
|
|
- Every segment index from 0 to ${entries.length - 1} must belong to exactly one section.
|
|
- startIndex of section N+1 must equal endIndex of section N plus 1.
|
|
- Create as many or as few sections as the content naturally requires.
|
|
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
|
|
|
|
Respond with ONLY valid JSON in this exact format, no other text:
|
|
{
|
|
"sections": [
|
|
{
|
|
"title": "Brief Topic Title",
|
|
"summary": "1-3 sentence summary of this discussion section.",
|
|
"startIndex": 0,
|
|
"endIndex": 15
|
|
}
|
|
]
|
|
}`;
|
|
}
|
|
|
|
// ── Network mode ──────────────────────────────────────────────────────────
|
|
// On StartOS (DATA_DIR=/data): always bind to 0.0.0.0 (container networking)
|
|
// On local Mac dev: default to localhost (safe on public Wi-Fi)
|
|
// - Your .app launcher sets LAN_MODE=true (Home) or false (Traveling)
|
|
// - Running "npm start" directly defaults to localhost
|
|
const isStartOS = process.env.DATA_DIR && process.env.DATA_DIR !== path.join(__dirname, "..");
|
|
const lanMode = isStartOS ? true : process.env.LAN_MODE === "true";
|
|
const BIND_HOST = lanMode ? "0.0.0.0" : "127.0.0.1";
|
|
|
|
app.get("/api/network-mode", (req, res) => {
|
|
res.json({ lan: lanMode });
|
|
});
|
|
|
|
// ── Start server ───────────────────────────────────────────────────────────
|
|
|
|
app.listen(PORT, BIND_HOST, async () => {
|
|
console.log(`\n YouTube Summarizer API running on http://${BIND_HOST}:${PORT}`);
|
|
console.log(` Data directory: ${DATA_DIR}`);
|
|
console.log(` Checking yt-dlp...`);
|
|
|
|
const info = await checkYtdlp();
|
|
if (!info.installed) {
|
|
console.log(` ⚠ yt-dlp not found. Install it: pip install yt-dlp\n`);
|
|
} else if (info.updateAvailable) {
|
|
console.log(` ✓ yt-dlp ${info.version} found`);
|
|
console.log(` ↑ Update available: ${info.latestVersion}`);
|
|
console.log(` Auto-updating...`);
|
|
const result = await autoUpdateYtdlp();
|
|
if (result.success) {
|
|
const refreshed = await checkYtdlp();
|
|
console.log(` ✓ yt-dlp updated to ${refreshed.version}\n`);
|
|
} else {
|
|
console.log(` ⚠ Auto-update failed. Run manually: yt-dlp -U\n`);
|
|
}
|
|
} else {
|
|
console.log(` ✓ yt-dlp ${info.version} (up to date)\n`);
|
|
}
|
|
|
|
// Check subscriptions on startup
|
|
const subs = await loadSubscriptions();
|
|
if (subs.length > 0) {
|
|
console.log(` 📡 Checking ${subs.length} subscription(s) for new videos...`);
|
|
await checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
|
|
const pending = autoQueue.filter(q => q.status === "pending").length;
|
|
const approved = autoQueue.filter(q => q.status === "approved").length;
|
|
if (pending > 0) console.log(` ✚ ${pending} new video(s) queued from subscriptions`);
|
|
if (approved > 0) console.log(` ⏳ ${approved} approved video(s) ready for processing`);
|
|
if (pending === 0 && approved === 0) console.log(` ✓ No new videos from subscriptions`);
|
|
}
|
|
|
|
// Resume processing any approved items from a previous session
|
|
const resumeApproved = autoQueue.filter(q => q.status === "approved").length;
|
|
// Also recover any items stuck in "processing" state from a crash
|
|
for (const item of autoQueue) {
|
|
if (item.status === "processing") {
|
|
console.log(` ⚠ Recovering stuck item: ${item.title}`);
|
|
item.status = "approved"; // Re-queue for processing
|
|
}
|
|
}
|
|
if (autoQueue.some(q => q.status === "approved")) {
|
|
await saveAutoQueue();
|
|
console.log(` 🔄 Starting background processor...`);
|
|
// Delay slightly so the server is fully ready before internal HTTP calls
|
|
setTimeout(() => kickProcessor(), 2000);
|
|
}
|
|
|
|
console.log(` ⚙ Processing config: ${processingConfig.delaySeconds}s delay, ${processingConfig.enabled ? "enabled" : "paused"}`);
|
|
|
|
// Check subscriptions every hour (runs continuously on StartOS)
|
|
setInterval(() => {
|
|
checkSubscriptions().catch(err => console.error(" ⚠ Subscription check error:", err.message));
|
|
}, 60 * 60 * 1000);
|
|
});
|