// Stateless helpers — no module-scoped state, no Express, no // initialization side effects. Anything in here is safe to import from // any other module without worrying about ordering. A few helpers do // I/O (fetchUrl) but only when called. import https from "https"; // ── SSE helper ────────────────────────────────────────────────────────────── // Writes a single Server-Sent Events frame: `event: X\ndata: Y\n\n`. // Each call flushes one event. Caller is responsible for `res.writeHead` // and `res.end()`. export function sendEvent(res, event, data) { res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); } // ── YouTube video-id extraction ───────────────────────────────────────────── // Accepts watch URLs, youtu.be, /embed/, /v/, or a bare 11-char id. // Returns null when no id can be extracted. export function extractVideoId(url) { if (!url) return null; const patterns = [ /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([a-zA-Z0-9_-]{11})/, /^([a-zA-Z0-9_-]{11})$/, ]; for (const p of patterns) { const m = url.match(p); if (m) return m[1]; } return null; } // ── Time formatting ───────────────────────────────────────────────────────── // Seconds → "M:SS" or "H:MM:SS" (auto-promotes to hours when needed). export function formatTime(seconds) { const s = Math.floor(seconds); const h = Math.floor(s / 3600); const m = Math.floor((s % 3600) / 60); const sec = s % 60; if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`; return `${m}:${String(sec).padStart(2, "0")}`; } // ── Transcript parsing ────────────────────────────────────────────────────── // Parses Gemini's timestamped transcript output into structured entries. // Tolerates several formats: [0:00], (0:00), 0:00, **0:00**, with optional // speaker prefixes and markdown noise. Each entry has { text, offset, // duration }, where duration is computed from the gap to the next entry // (last entry defaults to 15 s). export function parseTimestampedTranscript(text) { const lines = text.trim().split("\n").filter(Boolean); const entries = []; // Primary: timestamp at the start of the line. const tsRegex = /^(?:[*_]*)?(?:\[?\(?)(\d{1,2}):(\d{2})(?::(\d{2}))?[\])]?(?:[*_]*)?\s*[-–—:]?\s*(.*)/; // Secondary: timestamp anywhere on the line, e.g. "Speaker 1 [0:00]: text". const altRegex = /^(?:.*?)[\[(\s](\d{1,2}):(\d{2})(?::(\d{2}))?[\])]\s*[-–—:]?\s*(.*)/; for (const line of lines) { const trimmed = line.trim(); let m = trimmed.match(tsRegex); if (!m) m = trimmed.match(altRegex); if (m) { const hours = m[3] !== undefined ? parseInt(m[1]) : 0; const mins = m[3] !== undefined ? parseInt(m[2]) : parseInt(m[1]); const secs = m[3] !== undefined ? parseInt(m[3]) : parseInt(m[2]); const offset = hours * 3600 + mins * 60 + secs; const lineText = m[4].replace(/^\*\*\s*/, "").replace(/\s*\*\*$/, "").trim(); if (lineText) entries.push({ text: lineText, offset, duration: 0 }); } } for (let i = 0; i < entries.length - 1; i++) { entries[i].duration = entries[i + 1].offset - entries[i].offset; } if (entries.length > 0) { entries[entries.length - 1].duration = 15; } return entries; } // ── Safe text extraction from Gemini responses ────────────────────────────── // The Gemini SDK's .text getter can throw or return undefined depending on // response shape — fall back to digging into candidates manually. export function safeText(result) { try { if (result.text) return result.text; } catch {} try { const parts = result?.candidates?.[0]?.content?.parts; if (parts) return parts.map(p => p.text || "").join(""); } catch {} return ""; } // ── HTTP GET with redirect following ──────────────────────────────────────── // Returns the response body as a string. Follows HTTP redirects up to a // reasonable depth (relies on https module's default behavior plus a one- // level recursion). Used for fetching RSS feeds, channel pages, etc. // // For binary downloads (e.g. podcast audio), use audio.downloadPodcastAudio // — it streams to disk instead of buffering in memory. export function fetchUrl(url) { return new Promise((resolve, reject) => { https.get(url, (res) => { if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { return fetchUrl(res.headers.location).then(resolve, reject); } let data = ""; res.on("data", (chunk) => (data += chunk)); res.on("end", () => resolve(data)); res.on("error", reject); }).on("error", reject); }); } // ── Retry helper for transient API errors ────────────────────────────────── // Retries on 503/429/529 and on common transient network errors. Linear // backoff (delayMs * attempt). The optional `log` callback receives a // one-line status message per retry — useful for streaming progress to a // UI. Provider-neutral: error shapes from @google/genai, @anthropic-ai/sdk, // openai, and raw fetch all expose `.status` (or message text) we can match. export async function retryAPI(fn, { retries = 3, delayMs = 3000, label = "API call", log: logFn } = {}) { let lastErr; for (let attempt = 1; attempt <= retries; attempt++) { // Surface every attempt — including the first — so the user // sees what's happening when a retry is in flight rather than // a frozen-looking activity log between "failed, retrying in // 5s" and the final outcome. if (attempt > 1 && logFn) { logFn(`Retrying ${label}... (attempt ${attempt}/${retries})`); } try { return await fn(); } catch (err) { // User-cancelled requests must not be retried — re-throw so the // outer handler can treat it as a clean cancellation rather than // letting the retry loop log noise and burn time. if (err?.name === "AbortError" || /aborted|operation was aborted/i.test(err?.message || "")) { throw err; } lastErr = err; const msg = err?.message || String(err); const status = err?.status || err?.httpStatusCode || 0; const isRetryable = status === 503 || status === 429 || status === 529 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg); // Dump every detail we can pry out of the error so generic // messages like "500 status code (no body)" become debuggable // server-side. Anthropic/OpenAI SDK errors expose .response, // .body, .headers, .cause; Node stream errors expose .code. const richDetail = { status, code: err?.code, type: err?.type, body: err?.body || err?.response?.body || err?.error, cause: err?.cause?.message || err?.cause?.code || err?.cause, }; console.error( `[retryAPI] ${label} failed (attempt ${attempt}/${retries}, status=${status || "n/a"}): ${msg}`, JSON.stringify(richDetail, (_k, v) => (typeof v === "bigint" ? v.toString() : v)) ); if (isRetryable && attempt < retries) { const waitSec = (delayMs * attempt / 1000).toFixed(0); if (logFn) logFn(`⚠ ${label} failed (${status || "error"}), retrying in ${waitSec}s... (attempt ${attempt}/${retries})`); await new Promise(r => setTimeout(r, delayMs * attempt)); } else { throw err; } } } throw lastErr; } // Back-compat alias: pre-existing call sites used `retryGemini`. Keep // the name working so this rename is non-breaking. export const retryGemini = retryAPI; // Split a plain-text transcript into synthetic sentence-based entries // with interpolated timestamps. Used when a transcription provider // returns just text (no per-segment timing) — e.g. NVIDIA Parakeet // behind an OpenAI-compatible wrapper. Without this, the entire // transcript lands in one entry at [0:00] and the analyzer can only // produce a single section spanning the whole audio. // // Strategy: // 1. Split on sentence terminators (. ! ?). Keep the punctuation. // 2. If no terminators (very rare in real speech), fall back to // 30-word chunks. // 3. Distribute timestamps proportionally by character count — // sentence N starts at (cum_chars_so_far / total_chars) * // audio_duration. Not perfectly accurate, but good enough to // let the analyzer carve out coherent topic sections. export function synthesizeEntriesFromText(text, totalDurationSeconds) { const t = (text || "").trim(); if (!t || !totalDurationSeconds || totalDurationSeconds <= 0) { return [{ offset: 0, text: t, duration: totalDurationSeconds || 0 }]; } // Sentence split — keep the terminator on each sentence. const sentenceMatches = t.match(/[^.!?\n]+[.!?]+|[^.!?\n]+$/g) || []; let chunks = sentenceMatches.map((s) => s.trim()).filter(Boolean); // If we couldn't find sentence boundaries (unpunctuated transcript), // fall back to fixed-size word chunks. if (chunks.length <= 1) { const words = t.split(/\s+/).filter(Boolean); if (words.length <= 1) { return [{ offset: 0, text: t, duration: totalDurationSeconds }]; } const wordsPerChunk = 30; chunks = []; for (let i = 0; i < words.length; i += wordsPerChunk) { chunks.push(words.slice(i, i + wordsPerChunk).join(" ")); } } // Coalesce extremely short sentences (single words like "Yeah." or // "Right.") into the previous chunk so we don't end up with hundreds // of useless 5-char entries. const COALESCE_MIN_CHARS = 40; const coalesced = []; for (const c of chunks) { if (coalesced.length > 0 && coalesced[coalesced.length - 1].length < COALESCE_MIN_CHARS) { coalesced[coalesced.length - 1] = `${coalesced[coalesced.length - 1]} ${c}`.trim(); } else { coalesced.push(c); } } // Distribute timestamps proportionally by character length. const totalChars = coalesced.reduce((sum, c) => sum + c.length, 0) || 1; const entries = []; let cumChars = 0; for (const c of coalesced) { const startRatio = cumChars / totalChars; cumChars += c.length; const endRatio = cumChars / totalChars; entries.push({ offset: startRatio * totalDurationSeconds, text: c, duration: Math.max(0.1, (endRatio - startRatio) * totalDurationSeconds), }); } return entries; }