recap/server/util.js

// Stateless helpers — no module-scoped state, no Express, no
// initialization side effects. Anything in here is safe to import from
// any other module without worrying about ordering. A few helpers do
// I/O (fetchUrl) but only when called.

import https from "https";

// ── SSE helper ──────────────────────────────────────────────────────────────
// Writes a single Server-Sent Events frame: `event: X\ndata: Y\n\n`.
// Each call flushes one event. Caller is responsible for `res.writeHead`
// and `res.end()`.
export function sendEvent(res, event, data) {
  res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
}

// ── YouTube video-id extraction ─────────────────────────────────────────────
// Accepts watch URLs, youtu.be, /embed/, /v/, or a bare 11-char id.
// Returns null when no id can be extracted.
export function extractVideoId(url) {
  if (!url) return null;
  const patterns = [
    /(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([a-zA-Z0-9_-]{11})/,
    /^([a-zA-Z0-9_-]{11})$/,
  ];
  for (const p of patterns) {
    const m = url.match(p);
    if (m) return m[1];
  }
  return null;
}

// ── Time formatting ─────────────────────────────────────────────────────────
// Seconds → "M:SS" or "H:MM:SS" (auto-promotes to hours when needed).
export function formatTime(seconds) {
  const s = Math.floor(seconds);
  const h = Math.floor(s / 3600);
  const m = Math.floor((s % 3600) / 60);
  const sec = s % 60;
  if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
  return `${m}:${String(sec).padStart(2, "0")}`;
}

// ── Transcript parsing ──────────────────────────────────────────────────────
// Parses Gemini's timestamped transcript output into structured entries.
// Tolerates several formats: [0:00], (0:00), 0:00, **0:00**, with optional
// speaker prefixes and markdown noise. Each entry has { text, offset,
// duration }, where duration is computed from the gap to the next entry
// (last entry defaults to 15 s).
export function parseTimestampedTranscript(text) {
  const lines = text.trim().split("\n").filter(Boolean);
  const entries = [];
  // Primary: timestamp at the start of the line.
  const tsRegex = /^(?:[*_]*)?(?:\[?\(?)(\d{1,2}):(\d{2})(?::(\d{2}))?[\])]?(?:[*_]*)?\s*[-–—:]?\s*(.*)/;
  // Secondary: timestamp anywhere on the line, e.g. "Speaker 1 [0:00]: text".
  const altRegex = /^(?:.*?)[\[(\s](\d{1,2}):(\d{2})(?::(\d{2}))?[\])]\s*[-–—:]?\s*(.*)/;

  for (const line of lines) {
    const trimmed = line.trim();
    let m = trimmed.match(tsRegex);
    if (!m) m = trimmed.match(altRegex);
    if (m) {
      const hours = m[3] !== undefined ? parseInt(m[1]) : 0;
      const mins = m[3] !== undefined ? parseInt(m[2]) : parseInt(m[1]);
      const secs = m[3] !== undefined ? parseInt(m[3]) : parseInt(m[2]);
      const offset = hours * 3600 + mins * 60 + secs;
      const lineText = m[4].replace(/^\*\*\s*/, "").replace(/\s*\*\*$/, "").trim();
      if (lineText) entries.push({ text: lineText, offset, duration: 0 });
    }
  }

  for (let i = 0; i < entries.length - 1; i++) {
    entries[i].duration = entries[i + 1].offset - entries[i].offset;
  }
  if (entries.length > 0) {
    entries[entries.length - 1].duration = 15;
  }

  return entries;
}

// ── Safe text extraction from Gemini responses ──────────────────────────────
// The Gemini SDK's .text getter can throw or return undefined depending on
// response shape — fall back to digging into candidates manually.
export function safeText(result) {
  try {
    if (result.text) return result.text;
  } catch {}
  try {
    const parts = result?.candidates?.[0]?.content?.parts;
    if (parts) return parts.map(p => p.text || "").join("");
  } catch {}
  return "";
}

// ── HTTP GET with redirect following ────────────────────────────────────────
// Returns the response body as a string. Follows HTTP redirects up to a
// reasonable depth (relies on https module's default behavior plus a one-
// level recursion). Used for fetching RSS feeds, channel pages, etc.
//
// For binary downloads (e.g. podcast audio), use audio.downloadPodcastAudio
// — it streams to disk instead of buffering in memory.
export function fetchUrl(url) {
  return new Promise((resolve, reject) => {
    https.get(url, (res) => {
      if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
        return fetchUrl(res.headers.location).then(resolve, reject);
      }
      let data = "";
      res.on("data", (chunk) => (data += chunk));
      res.on("end", () => resolve(data));
      res.on("error", reject);
    }).on("error", reject);
  });
}

// ── Retry helper for transient API errors ──────────────────────────────────
// Retries on 503/429/529 and on common transient network errors. Linear
// backoff (delayMs * attempt). The optional `log` callback receives a
// one-line status message per retry — useful for streaming progress to a
// UI. Provider-neutral: error shapes from @google/genai, @anthropic-ai/sdk,
// openai, and raw fetch all expose `.status` (or message text) we can match.
export async function retryAPI(fn, { retries = 3, delayMs = 3000, label = "API call", log: logFn } = {}) {
  let lastErr;
  for (let attempt = 1; attempt <= retries; attempt++) {
    // Surface every attempt — including the first — so the user
    // sees what's happening when a retry is in flight rather than
    // a frozen-looking activity log between "failed, retrying in
    // 5s" and the final outcome.
    if (attempt > 1 && logFn) {
      logFn(`Retrying ${label}... (attempt ${attempt}/${retries})`);
    }
    try {
      return await fn();
    } catch (err) {
      // User-cancelled requests must not be retried — re-throw so the
      // outer handler can treat it as a clean cancellation rather than
      // letting the retry loop log noise and burn time.
      if (err?.name === "AbortError" || /aborted|operation was aborted/i.test(err?.message || "")) {
        throw err;
      }
      lastErr = err;
      const msg = err?.message || String(err);
      const status = err?.status || err?.httpStatusCode || 0;
      const isRetryable = status === 503 || status === 429 || status === 529 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg);
      // Dump every detail we can pry out of the error so generic
      // messages like "500 status code (no body)" become debuggable
      // server-side. Anthropic/OpenAI SDK errors expose .response,
      // .body, .headers, .cause; Node stream errors expose .code.
      const richDetail = {
        status,
        code: err?.code,
        type: err?.type,
        body: err?.body || err?.response?.body || err?.error,
        cause: err?.cause?.message || err?.cause?.code || err?.cause,
      };
      console.error(
        `[retryAPI] ${label} failed (attempt ${attempt}/${retries}, status=${status || "n/a"}): ${msg}`,
        JSON.stringify(richDetail, (_k, v) => (typeof v === "bigint" ? v.toString() : v))
      );
      if (isRetryable && attempt < retries) {
        const waitSec = (delayMs * attempt / 1000).toFixed(0);
        if (logFn) logFn(`⚠ ${label} failed (${status || "error"}), retrying in ${waitSec}s... (attempt ${attempt}/${retries})`);
        await new Promise(r => setTimeout(r, delayMs * attempt));
      } else {
        throw err;
      }
    }
  }
  throw lastErr;
}

// Back-compat alias: pre-existing call sites used `retryGemini`. Keep
// the name working so this rename is non-breaking.
export const retryGemini = retryAPI;

// Split a plain-text transcript into synthetic sentence-based entries
// with interpolated timestamps. Used when a transcription provider
// returns just text (no per-segment timing) — e.g. NVIDIA Parakeet
// behind an OpenAI-compatible wrapper. Without this, the entire
// transcript lands in one entry at [0:00] and the analyzer can only
// produce a single section spanning the whole audio.
//
// Strategy:
//   1. Split on sentence terminators (. ! ?). Keep the punctuation.
//   2. If no terminators (very rare in real speech), fall back to
//      30-word chunks.
//   3. Distribute timestamps proportionally by character count —
//      sentence N starts at (cum_chars_so_far / total_chars) *
//      audio_duration. Not perfectly accurate, but good enough to
//      let the analyzer carve out coherent topic sections.
export function synthesizeEntriesFromText(text, totalDurationSeconds) {
  const t = (text || "").trim();
  if (!t || !totalDurationSeconds || totalDurationSeconds <= 0) {
    return [{ offset: 0, text: t, duration: totalDurationSeconds || 0 }];
  }

  // Sentence split — keep the terminator on each sentence.
  const sentenceMatches = t.match(/[^.!?\n]+[.!?]+|[^.!?\n]+$/g) || [];
  let chunks = sentenceMatches.map((s) => s.trim()).filter(Boolean);

  // If we couldn't find sentence boundaries (unpunctuated transcript),
  // fall back to fixed-size word chunks.
  if (chunks.length <= 1) {
    const words = t.split(/\s+/).filter(Boolean);
    if (words.length <= 1) {
      return [{ offset: 0, text: t, duration: totalDurationSeconds }];
    }
    const wordsPerChunk = 30;
    chunks = [];
    for (let i = 0; i < words.length; i += wordsPerChunk) {
      chunks.push(words.slice(i, i + wordsPerChunk).join(" "));
    }
  }

  // Coalesce extremely short sentences (single words like "Yeah." or
  // "Right.") into the previous chunk so we don't end up with hundreds
  // of useless 5-char entries.
  const COALESCE_MIN_CHARS = 40;
  const coalesced = [];
  for (const c of chunks) {
    if (coalesced.length > 0 && coalesced[coalesced.length - 1].length < COALESCE_MIN_CHARS) {
      coalesced[coalesced.length - 1] = `${coalesced[coalesced.length - 1]} ${c}`.trim();
    } else {
      coalesced.push(c);
    }
  }

  // Distribute timestamps proportionally by character length.
  const totalChars = coalesced.reduce((sum, c) => sum + c.length, 0) || 1;
  const entries = [];
  let cumChars = 0;
  for (const c of coalesced) {
    const startRatio = cumChars / totalChars;
    cumChars += c.length;
    const endRatio = cumChars / totalChars;
    entries.push({
      offset: startRatio * totalDurationSeconds,
      text: c,
      duration: Math.max(0.1, (endRatio - startRatio) * totalDurationSeconds),
    });
  }
  return entries;
}