cb961cd2d9
The video-id regex only matched /watch?v=, youtu.be, /embed/, and /v/ forms, so youtube.com/live/<id> and youtube.com/shorts/<id> links were rejected with "Invalid YouTube URL". Add both forms to the server and frontend extractors (kept in sync) and cover them with tests. Ship as 0.2.159.
244 lines
11 KiB
JavaScript
244 lines
11 KiB
JavaScript
// Stateless helpers — no module-scoped state, no Express, no
|
|
// initialization side effects. Anything in here is safe to import from
|
|
// any other module without worrying about ordering. A few helpers do
|
|
// I/O (fetchUrl) but only when called.
|
|
|
|
import https from "https";
|
|
|
|
// ── SSE helper ──────────────────────────────────────────────────────────────
|
|
// Writes a single Server-Sent Events frame: `event: X\ndata: Y\n\n`.
|
|
// Each call flushes one event. Caller is responsible for `res.writeHead`
|
|
// and `res.end()`.
|
|
export function sendEvent(res, event, data) {
|
|
res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
}
|
|
|
|
// ── YouTube video-id extraction ─────────────────────────────────────────────
|
|
// Accepts watch URLs, youtu.be, /embed/, /v/, /live/, /shorts/, or a bare
|
|
// 11-char id. Returns null when no id can be extracted.
|
|
export function extractVideoId(url) {
|
|
if (!url) return null;
|
|
const patterns = [
|
|
/(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/|youtube\.com\/live\/|youtube\.com\/shorts\/)([a-zA-Z0-9_-]{11})/,
|
|
/^([a-zA-Z0-9_-]{11})$/,
|
|
];
|
|
for (const p of patterns) {
|
|
const m = url.match(p);
|
|
if (m) return m[1];
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// ── Time formatting ─────────────────────────────────────────────────────────
|
|
// Seconds → "M:SS" or "H:MM:SS" (auto-promotes to hours when needed).
|
|
export function formatTime(seconds) {
|
|
const s = Math.floor(seconds);
|
|
const h = Math.floor(s / 3600);
|
|
const m = Math.floor((s % 3600) / 60);
|
|
const sec = s % 60;
|
|
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(sec).padStart(2, "0")}`;
|
|
return `${m}:${String(sec).padStart(2, "0")}`;
|
|
}
|
|
|
|
// ── Transcript parsing ──────────────────────────────────────────────────────
|
|
// Parses Gemini's timestamped transcript output into structured entries.
|
|
// Tolerates several formats: [0:00], (0:00), 0:00, **0:00**, with optional
|
|
// speaker prefixes and markdown noise. Each entry has { text, offset,
|
|
// duration }, where duration is computed from the gap to the next entry
|
|
// (last entry defaults to 15 s).
|
|
export function parseTimestampedTranscript(text) {
|
|
const lines = text.trim().split("\n").filter(Boolean);
|
|
const entries = [];
|
|
// Primary: timestamp at the start of the line.
|
|
const tsRegex = /^(?:[*_]*)?(?:\[?\(?)(\d{1,2}):(\d{2})(?::(\d{2}))?[\])]?(?:[*_]*)?\s*[-–—:]?\s*(.*)/;
|
|
// Secondary: timestamp anywhere on the line, e.g. "Speaker 1 [0:00]: text".
|
|
const altRegex = /^(?:.*?)[\[(\s](\d{1,2}):(\d{2})(?::(\d{2}))?[\])]\s*[-–—:]?\s*(.*)/;
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.trim();
|
|
let m = trimmed.match(tsRegex);
|
|
if (!m) m = trimmed.match(altRegex);
|
|
if (m) {
|
|
const hours = m[3] !== undefined ? parseInt(m[1]) : 0;
|
|
const mins = m[3] !== undefined ? parseInt(m[2]) : parseInt(m[1]);
|
|
const secs = m[3] !== undefined ? parseInt(m[3]) : parseInt(m[2]);
|
|
const offset = hours * 3600 + mins * 60 + secs;
|
|
const lineText = m[4].replace(/^\*\*\s*/, "").replace(/\s*\*\*$/, "").trim();
|
|
if (lineText) entries.push({ text: lineText, offset, duration: 0 });
|
|
}
|
|
}
|
|
|
|
for (let i = 0; i < entries.length - 1; i++) {
|
|
entries[i].duration = entries[i + 1].offset - entries[i].offset;
|
|
}
|
|
if (entries.length > 0) {
|
|
entries[entries.length - 1].duration = 15;
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
// ── Safe text extraction from Gemini responses ──────────────────────────────
|
|
// The Gemini SDK's .text getter can throw or return undefined depending on
|
|
// response shape — fall back to digging into candidates manually.
|
|
export function safeText(result) {
|
|
try {
|
|
if (result.text) return result.text;
|
|
} catch {}
|
|
try {
|
|
const parts = result?.candidates?.[0]?.content?.parts;
|
|
if (parts) return parts.map(p => p.text || "").join("");
|
|
} catch {}
|
|
return "";
|
|
}
|
|
|
|
// ── HTTP GET with redirect following ────────────────────────────────────────
|
|
// Returns the response body as a string. Follows HTTP redirects up to a
|
|
// reasonable depth (relies on https module's default behavior plus a one-
|
|
// level recursion). Used for fetching RSS feeds, channel pages, etc.
|
|
//
|
|
// For binary downloads (e.g. podcast audio), use audio.downloadPodcastAudio
|
|
// — it streams to disk instead of buffering in memory.
|
|
export function fetchUrl(url) {
|
|
return new Promise((resolve, reject) => {
|
|
https.get(url, (res) => {
|
|
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
return fetchUrl(res.headers.location).then(resolve, reject);
|
|
}
|
|
let data = "";
|
|
res.on("data", (chunk) => (data += chunk));
|
|
res.on("end", () => resolve(data));
|
|
res.on("error", reject);
|
|
}).on("error", reject);
|
|
});
|
|
}
|
|
|
|
// ── Retry helper for transient API errors ──────────────────────────────────
|
|
// Retries on 503/429/529 and on common transient network errors. Linear
|
|
// backoff (delayMs * attempt). The optional `log` callback receives a
|
|
// one-line status message per retry — useful for streaming progress to a
|
|
// UI. Provider-neutral: error shapes from @google/genai, @anthropic-ai/sdk,
|
|
// openai, and raw fetch all expose `.status` (or message text) we can match.
|
|
export async function retryAPI(fn, { retries = 3, delayMs = 3000, label = "API call", log: logFn } = {}) {
|
|
let lastErr;
|
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
// Surface every attempt — including the first — so the user
|
|
// sees what's happening when a retry is in flight rather than
|
|
// a frozen-looking activity log between "failed, retrying in
|
|
// 5s" and the final outcome.
|
|
if (attempt > 1 && logFn) {
|
|
logFn(`Retrying ${label}... (attempt ${attempt}/${retries})`);
|
|
}
|
|
try {
|
|
return await fn();
|
|
} catch (err) {
|
|
// User-cancelled requests must not be retried — re-throw so the
|
|
// outer handler can treat it as a clean cancellation rather than
|
|
// letting the retry loop log noise and burn time.
|
|
if (err?.name === "AbortError" || /aborted|operation was aborted/i.test(err?.message || "")) {
|
|
throw err;
|
|
}
|
|
lastErr = err;
|
|
const msg = err?.message || String(err);
|
|
const status = err?.status || err?.httpStatusCode || 0;
|
|
const isRetryable = status === 503 || status === 429 || status === 529 || /overloaded|unavailable|capacity|high demand|rate limit|fetch failed|ECONNRESET|ETIMEDOUT|socket hang up|network/i.test(msg);
|
|
// Dump every detail we can pry out of the error so generic
|
|
// messages like "500 status code (no body)" become debuggable
|
|
// server-side. Anthropic/OpenAI SDK errors expose .response,
|
|
// .body, .headers, .cause; Node stream errors expose .code.
|
|
const richDetail = {
|
|
status,
|
|
code: err?.code,
|
|
type: err?.type,
|
|
body: err?.body || err?.response?.body || err?.error,
|
|
cause: err?.cause?.message || err?.cause?.code || err?.cause,
|
|
};
|
|
console.error(
|
|
`[retryAPI] ${label} failed (attempt ${attempt}/${retries}, status=${status || "n/a"}): ${msg}`,
|
|
JSON.stringify(richDetail, (_k, v) => (typeof v === "bigint" ? v.toString() : v))
|
|
);
|
|
if (isRetryable && attempt < retries) {
|
|
const waitSec = (delayMs * attempt / 1000).toFixed(0);
|
|
if (logFn) logFn(`⚠ ${label} failed (${status || "error"}), retrying in ${waitSec}s... (attempt ${attempt}/${retries})`);
|
|
await new Promise(r => setTimeout(r, delayMs * attempt));
|
|
} else {
|
|
throw err;
|
|
}
|
|
}
|
|
}
|
|
throw lastErr;
|
|
}
|
|
|
|
// Back-compat alias: pre-existing call sites used `retryGemini`. Keep
|
|
// the name working so this rename is non-breaking.
|
|
export const retryGemini = retryAPI;
|
|
|
|
// Split a plain-text transcript into synthetic sentence-based entries
|
|
// with interpolated timestamps. Used when a transcription provider
|
|
// returns just text (no per-segment timing) — e.g. NVIDIA Parakeet
|
|
// behind an OpenAI-compatible wrapper. Without this, the entire
|
|
// transcript lands in one entry at [0:00] and the analyzer can only
|
|
// produce a single section spanning the whole audio.
|
|
//
|
|
// Strategy:
|
|
// 1. Split on sentence terminators (. ! ?). Keep the punctuation.
|
|
// 2. If no terminators (very rare in real speech), fall back to
|
|
// 30-word chunks.
|
|
// 3. Distribute timestamps proportionally by character count —
|
|
// sentence N starts at (cum_chars_so_far / total_chars) *
|
|
// audio_duration. Not perfectly accurate, but good enough to
|
|
// let the analyzer carve out coherent topic sections.
|
|
export function synthesizeEntriesFromText(text, totalDurationSeconds) {
|
|
const t = (text || "").trim();
|
|
if (!t || !totalDurationSeconds || totalDurationSeconds <= 0) {
|
|
return [{ offset: 0, text: t, duration: totalDurationSeconds || 0 }];
|
|
}
|
|
|
|
// Sentence split — keep the terminator on each sentence.
|
|
const sentenceMatches = t.match(/[^.!?\n]+[.!?]+|[^.!?\n]+$/g) || [];
|
|
let chunks = sentenceMatches.map((s) => s.trim()).filter(Boolean);
|
|
|
|
// If we couldn't find sentence boundaries (unpunctuated transcript),
|
|
// fall back to fixed-size word chunks.
|
|
if (chunks.length <= 1) {
|
|
const words = t.split(/\s+/).filter(Boolean);
|
|
if (words.length <= 1) {
|
|
return [{ offset: 0, text: t, duration: totalDurationSeconds }];
|
|
}
|
|
const wordsPerChunk = 30;
|
|
chunks = [];
|
|
for (let i = 0; i < words.length; i += wordsPerChunk) {
|
|
chunks.push(words.slice(i, i + wordsPerChunk).join(" "));
|
|
}
|
|
}
|
|
|
|
// Coalesce extremely short sentences (single words like "Yeah." or
|
|
// "Right.") into the previous chunk so we don't end up with hundreds
|
|
// of useless 5-char entries.
|
|
const COALESCE_MIN_CHARS = 40;
|
|
const coalesced = [];
|
|
for (const c of chunks) {
|
|
if (coalesced.length > 0 && coalesced[coalesced.length - 1].length < COALESCE_MIN_CHARS) {
|
|
coalesced[coalesced.length - 1] = `${coalesced[coalesced.length - 1]} ${c}`.trim();
|
|
} else {
|
|
coalesced.push(c);
|
|
}
|
|
}
|
|
|
|
// Distribute timestamps proportionally by character length.
|
|
const totalChars = coalesced.reduce((sum, c) => sum + c.length, 0) || 1;
|
|
const entries = [];
|
|
let cumChars = 0;
|
|
for (const c of coalesced) {
|
|
const startRatio = cumChars / totalChars;
|
|
cumChars += c.length;
|
|
const endRatio = cumChars / totalChars;
|
|
entries.push({
|
|
offset: startRatio * totalDurationSeconds,
|
|
text: c,
|
|
duration: Math.max(0.1, (endRatio - startRatio) * totalDurationSeconds),
|
|
});
|
|
}
|
|
return entries;
|
|
}
|