// Phase 2 of Path 2A — meeting extras analysis. // // Runs a single LLM pass AFTER transcribe → diarize → cluster → // analyze → name-inference → summary-polish complete. Pulls out four // categories of structured information that operators consistently // want at the top of an internal meeting recap: // // - decisions : what was agreed on (with the offset where it was settled) // - action_items : who owes what, by when (best-effort due_hint) // - open_questions : questions raised that didn't get resolved // - key_quotes : notable statements worth surfacing verbatim // // Each item carries a `supporting_offset` (or `offset`) in seconds so // the dashboard can render the timestamp as a clickable jump to the // corresponding transcript line. Each item also carries speaker IDs // (cluster ids like Speaker_A) so the renderer can show the speaker's // colored chip + display name, and so an operator-rename or per-line // override propagates here too. // // Returns: // { // decisions: [{ statement, agreed_by[], supporting_offset }], // action_items: [{ description, owner, due_hint, supporting_offset }], // open_questions: [{ question, raised_by, answered }], // key_quotes: [{ speaker, offset, quote, why_notable }], // } // // or null on total failure. Failure is non-fatal — the meeting still // saves with rec.extras = null and the dashboard just hides the // extras section. import { recordCall } from "./audit-log.js"; const EXTRAS_MAX_ATTEMPTS = 3; export const DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE = `You are extracting structured information from an internal team meeting transcript. The transcript below has been pre-tagged with speaker labels like [A], [B], [C] (anonymous voice-clustering labels) and inferred real names where available. MEETING METADATA: - Title: {{title}} - Duration: {{duration}} {{operatorContext}}SPEAKERS (from voice clustering, with operator-confirmed names where present): {{speakerRoster}} TOPIC SUMMARIES (already produced — for context only, do not duplicate): {{topics}} TRANSCRIPT (each line is "[ ] text"): {{transcript}} INSTRUCTIONS: Extract FIVE categories of information from the meeting. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. 1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Write in past tense, third person. Keep it dense — every clause should carry information. Skip pleasantries and procedural opening/closing chatter. If a meeting was genuinely substanceless (a 3-minute check-in, audio test, etc.), write one factual descriptor sentence instead of padding. This is the only required category — even the most trivial meeting gets a one-sentence TLDR. - summary: the 2-4 sentence executive summary - primary_speakers: array of Speaker_X ids who drove the conversation (the 1-3 people most central to the discussion, in rough order of contribution). Empty array if unclear. 2. DECISIONS — Things explicitly decided / agreed during the meeting. Include only clear commitments ("we will do X", "let's go with Y"), not casual mentions. For each: - statement: the decision in one sentence - agreed_by: array of Speaker_X ids who explicitly agreed (use the chip-letter notation, e.g. ["Speaker_A", "Speaker_C"]). Empty array if unclear. - supporting_offset: integer SECONDS where this decision was made (use the [ ] timestamp from the most relevant transcript line — convert MM:SS to total seconds) 3. ACTION_ITEMS — Specific commitments where someone said they would do something. Include only explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should...". For each: - description: the action in imperative form - owner: the Speaker_X id of the person taking it on (e.g. "Speaker_A"), or null if unclear - due_hint: the deadline as a string if mentioned ("by Friday", "end of week", "before next call"), or null - supporting_offset: integer seconds where the commitment was made 4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered during the meeting. Skip rhetorical questions and questions that got direct answers. For each: - question: the question, rephrased to be self-contained - raised_by: the Speaker_X id who asked (or null if unclear) - answered: false (always — if it was answered, don't include it) 5. KEY_QUOTES — Statements worth surfacing verbatim because they are pivotal, particularly insightful, or capture a strong opinion. Limit to 3-6 quotes max. Skip filler and conversational text. For each: - speaker: the Speaker_X id of the speaker - offset: integer seconds where the quote occurs - quote: the verbatim quote (trim to the substantive sentence, 4-30 words) - why_notable: one short clause on why this is worth surfacing Be conservative across all five. Better to return an empty array (or for TLDR, a single factual sentence) than to fabricate. A 5-minute small-talk call may legitimately have 0 decisions, 0 action items, 0 open questions, 0 key quotes — but it still gets a TLDR. Respond with ONLY valid JSON in this exact shape, no other text: { "tldr": {"summary": "...", "primary_speakers": ["Speaker_A", "Speaker_B"]}, "decisions": [{"statement": "...", "agreed_by": ["Speaker_A"], "supporting_offset": 123}], "action_items": [{"description": "...", "owner": "Speaker_B", "due_hint": "by Friday", "supporting_offset": 234}], "open_questions": [{"question": "...", "raised_by": "Speaker_C", "answered": false}], "key_quotes": [{"speaker": "Speaker_A", "offset": 345, "quote": "...", "why_notable": "..."}] }`; function fillTemplate(template, vars) { return String(template || "").replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => { return key in vars ? String(vars[key]) : `{{${key}}}`; }); } function formatDuration(seconds) { const s = Math.max(0, Math.floor(seconds || 0)); const h = Math.floor(s / 3600); const m = Math.floor((s % 3600) / 60); const sec = s % 60; if (h > 0) return `${h}h ${m}m ${sec}s`; if (m > 0) return `${m}m ${sec}s`; return `${sec}s`; } function formatLabeledTranscript(segments) { if (!Array.isArray(segments) || segments.length === 0) return ""; const lines = []; for (const seg of segments) { const text = (seg.text || "").trim(); if (!text) continue; const t = seg.start || 0; let letter = "?"; const m = String(seg.speaker || "").match(/^Speaker_([A-Z]+)$/); if (m) letter = m[1]; const secInt = Math.floor(t); const mm = Math.floor(secInt / 60); const ss = secInt % 60; lines.push(`[${letter} ${mm}:${String(ss).padStart(2, "0")}] ${text}`); } return lines.join("\n"); } // Trim a too-large transcript by keeping the head and tail. Keeps // the meeting's opening (introductions, agenda) AND closing (wrap-up, // next steps) which are where most extras-worthy content lives. function capTranscript(text, maxChars) { if (text.length <= maxChars) return text; const half = Math.floor(maxChars / 2) - 50; return ( text.slice(0, half) + "\n\n…[middle truncated for prompt length]…\n\n" + text.slice(-half) ); } function safeParseExtras(text) { if (!text || typeof text !== "string") return null; let s = text.trim(); const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/); if (fence) s = fence[1].trim(); let parsed; try { parsed = JSON.parse(s); } catch { return null; } if (!parsed || typeof parsed !== "object") return null; const asArray = (v) => (Array.isArray(v) ? v : []); // TLDR — exactly one object (not an array). Required category; // we accept any well-formed shape and clamp to safe bounds. If // the LLM omitted it entirely we leave it null so the renderer // can show "TLDR unavailable" rather than fabricating. let tldr = null; if (parsed.tldr && typeof parsed.tldr === "object" && !Array.isArray(parsed.tldr)) { const summary = typeof parsed.tldr.summary === "string" ? parsed.tldr.summary.trim() : ""; if (summary) { tldr = { summary: summary.slice(0, 800), primary_speakers: Array.isArray(parsed.tldr.primary_speakers) ? parsed.tldr.primary_speakers .filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x)) .slice(0, 5) : [], }; } } // Coerce + clamp each category to a sane shape. Drop entries // that fail validation rather than failing the whole pass. const decisions = asArray(parsed.decisions) .map((d) => { if (!d || typeof d !== "object") return null; const statement = typeof d.statement === "string" ? d.statement.trim() : ""; if (!statement) return null; return { statement: statement.slice(0, 400), agreed_by: Array.isArray(d.agreed_by) ? d.agreed_by.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x)).slice(0, 10) : [], supporting_offset: Number.isFinite(d.supporting_offset) ? Math.max(0, Math.floor(d.supporting_offset)) : null, }; }) .filter(Boolean) .slice(0, 20); const action_items = asArray(parsed.action_items) .map((a) => { if (!a || typeof a !== "object") return null; const description = typeof a.description === "string" ? a.description.trim() : ""; if (!description) return null; return { description: description.slice(0, 400), owner: typeof a.owner === "string" && /^Speaker_[A-Z]+$/.test(a.owner) ? a.owner : null, due_hint: typeof a.due_hint === "string" && a.due_hint.trim() ? a.due_hint.trim().slice(0, 80) : null, supporting_offset: Number.isFinite(a.supporting_offset) ? Math.max(0, Math.floor(a.supporting_offset)) : null, }; }) .filter(Boolean) .slice(0, 30); const open_questions = asArray(parsed.open_questions) .map((q) => { if (!q || typeof q !== "object") return null; const question = typeof q.question === "string" ? q.question.trim() : ""; if (!question) return null; return { question: question.slice(0, 400), raised_by: typeof q.raised_by === "string" && /^Speaker_[A-Z]+$/.test(q.raised_by) ? q.raised_by : null, answered: q.answered === true, }; }) .filter(Boolean) .slice(0, 20); const key_quotes = asArray(parsed.key_quotes) .map((q) => { if (!q || typeof q !== "object") return null; const quote = typeof q.quote === "string" ? q.quote.trim() : ""; if (!quote) return null; return { speaker: typeof q.speaker === "string" && /^Speaker_[A-Z]+$/.test(q.speaker) ? q.speaker : null, offset: Number.isFinite(q.offset) ? Math.max(0, Math.floor(q.offset)) : null, quote: quote.slice(0, 400), why_notable: typeof q.why_notable === "string" ? q.why_notable.trim().slice(0, 200) : "", }; }) .filter(Boolean) .slice(0, 10); return { tldr, decisions, action_items, open_questions, key_quotes }; } export async function runMeetingExtras({ title, audioSec, speakers, speakerNames, transcriptSegments, topics, // array of { title, summary, startTime } from analyze-then-polish promptOverride = "", // Operator-supplied hints (internal meetings only). participantHints // is a CSV-ish string of expected attendees; operatorNotes is free- // form prose describing who-said-what. Both are framed as hints in // the rendered prompt — the LLM is instructed to use them as soft // signals and verify against the transcript before quoting or // attributing. Empty → no OPERATOR HINTS block appears. participantHints = "", operatorNotes = "", backend, pipelineBackend, jobId, installId, licenseFingerprint = null, source, computeCostDetails, }) { if (!backend) return null; if (!Array.isArray(transcriptSegments) || transcriptSegments.length === 0) return null; // Build speaker roster — Speaker_A (chip A, 12m 34s, "Matt Hill") const speakerLetters = Object.keys(speakers || {}) .filter((k) => /^Speaker_[A-Z]+$/.test(k)) .sort(); const speakerRoster = speakerLetters .map((k) => { const stats = speakers[k] || {}; const secs = Math.round(stats.total_speaking_seconds || 0); const mins = Math.floor(secs / 60); const rem = secs % 60; const timeStr = mins > 0 ? `${mins}m ${rem}s` : `${rem}s`; const letter = k.replace("Speaker_", ""); const name = speakerNames && speakerNames[k] ? `"${speakerNames[k]}"` : "(unknown)"; return `- ${k} (chip [${letter}], ${timeStr} speaking, ${stats.turns || 0} turns): ${name}`; }) .join("\n"); const topicsBlock = Array.isArray(topics) && topics.length ? topics .map((t, i) => { const startSec = t.startTime || 0; const mm = Math.floor(startSec / 60); const ss = Math.floor(startSec % 60); const tStr = `${mm}:${String(ss).padStart(2, "0")}`; return `${i + 1}. [${tStr}] ${t.title || "(untitled)"} — ${t.summary || ""}`; }) .join("\n") : "(no topics)"; const fullTranscript = formatLabeledTranscript(transcriptSegments); const cappedTranscript = capTranscript(fullTranscript, 25000); // Compose the OPERATOR HINTS block — same shape as the name- // inference pipeline so the LLM gets consistent framing across // both passes. Empty when no hints supplied. const hintsParts = []; if (participantHints && String(participantHints).trim()) { hintsParts.push( `Possible participants in this meeting (operator-supplied — may be incomplete):\n${String(participantHints).trim()}`, ); } if (operatorNotes && String(operatorNotes).trim()) { const trimmed = String(operatorNotes).trim().slice(0, 4000); hintsParts.push( `Operator notes (may describe who said what — use as soft context, verify against the transcript before extracting decisions / action items / quotes):\n${trimmed}`, ); } const operatorContextBlock = hintsParts.length ? `OPERATOR HINTS (treat as suggestions only — verify against the transcript):\n\n${hintsParts.join("\n\n")}\n\n` : ""; const templateSource = typeof promptOverride === "string" && promptOverride.trim() ? promptOverride : DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE; const prompt = fillTemplate(templateSource, { title: title || "(untitled)", duration: formatDuration(audioSec), operatorContext: operatorContextBlock, speakerRoster: speakerRoster || "(no speakers identified)", topics: topicsBlock, transcript: cappedTranscript || "(empty)", }); const t0 = Date.now(); let r = null; let parsed = null; let lastErr = null; for (let attempt = 0; attempt < EXTRAS_MAX_ATTEMPTS; attempt++) { try { r = await backend.analyzeText({ prompt }); parsed = safeParseExtras(r.text); if (parsed) { lastErr = null; break; } lastErr = "invalid JSON in extras response"; } catch (err) { lastErr = (err?.message || String(err)).slice(0, 280); r = null; } if (attempt < EXTRAS_MAX_ATTEMPTS - 1) { console.warn( `[meeting-extras] attempt ${attempt + 1} failed (${lastErr}) — retrying` ); } } const dur = Date.now() - t0; const cost = parsed && r ? computeCostDetails(r.model, r.usage) : { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 }; await recordCall({ install_id: installId, license_fingerprint: licenseFingerprint, tier: "core", pipeline: "meeting_extras", backend: pipelineBackend, model: r?.model || null, status: parsed ? "success" : "error", duration_ms: dur, audio_seconds: 0, job_id: jobId, batch_id: null, source, media_url: null, error: parsed ? null : lastErr || "extras analysis failed", ...cost, }); if (!parsed) { console.warn( `[meeting-extras] all ${EXTRAS_MAX_ATTEMPTS} attempts failed (${lastErr}) — extras unavailable` ); return null; } console.log( `[meeting-extras] extracted ${parsed.tldr ? "tldr + " : "(no tldr) + "}${parsed.decisions.length} decision(s), ${parsed.action_items.length} action(s), ${parsed.open_questions.length} question(s), ${parsed.key_quotes.length} quote(s) in ${(dur / 1000).toFixed(1)}s` ); return parsed; }