Add internal-meetings pipeline and post-hoc speaker tools

2026-06-13 13:35:53 -05:00
parent 9a2dbf69df
commit 705807e286
15 changed files with 7375 additions and 0 deletions
@@ -0,0 +1,376 @@
+// Phase 2 of Path 2A — meeting extras analysis.
+//
+// Runs a single LLM pass AFTER transcribe → diarize → cluster →
+// analyze → name-inference → summary-polish complete. Pulls out four
+// categories of structured information that operators consistently
+// want at the top of an internal meeting recap:
+//
+//   - decisions       : what was agreed on (with the offset where it was settled)
+//   - action_items    : who owes what, by when (best-effort due_hint)
+//   - open_questions  : questions raised that didn't get resolved
+//   - key_quotes      : notable statements worth surfacing verbatim
+//
+// Each item carries a `supporting_offset` (or `offset`) in seconds so
+// the dashboard can render the timestamp as a clickable jump to the
+// corresponding transcript line. Each item also carries speaker IDs
+// (cluster ids like Speaker_A) so the renderer can show the speaker's
+// colored chip + display name, and so an operator-rename or per-line
+// override propagates here too.
+//
+// Returns:
+//   {
+//     decisions:      [{ statement, agreed_by[], supporting_offset }],
+//     action_items:   [{ description, owner, due_hint, supporting_offset }],
+//     open_questions: [{ question, raised_by, answered }],
+//     key_quotes:     [{ speaker, offset, quote, why_notable }],
+//   }
+//
+// or null on total failure. Failure is non-fatal — the meeting still
+// saves with rec.extras = null and the dashboard just hides the
+// extras section.
+
+import { recordCall } from "./audit-log.js";
+
+const EXTRAS_MAX_ATTEMPTS = 3;
+
+export const DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE = `You are extracting structured information from an internal team meeting transcript. The transcript below has been pre-tagged with speaker labels like [A], [B], [C] (anonymous voice-clustering labels) and inferred real names where available.
+
+MEETING METADATA:
+- Title: {{title}}
+- Duration: {{duration}}
+
+{{operatorContext}}SPEAKERS (from voice clustering, with operator-confirmed names where present):
+{{speakerRoster}}
+
+TOPIC SUMMARIES (already produced — for context only, do not duplicate):
+{{topics}}
+
+TRANSCRIPT (each line is "[<letter> <MM:SS>] text"):
+{{transcript}}
+
+INSTRUCTIONS:
+Extract FIVE categories of information from the meeting. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items.
+
+1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Write in past tense, third person. Keep it dense — every clause should carry information. Skip pleasantries and procedural opening/closing chatter. If a meeting was genuinely substanceless (a 3-minute check-in, audio test, etc.), write one factual descriptor sentence instead of padding. This is the only required category — even the most trivial meeting gets a one-sentence TLDR.
+   - summary: the 2-4 sentence executive summary
+   - primary_speakers: array of Speaker_X ids who drove the conversation (the 1-3 people most central to the discussion, in rough order of contribution). Empty array if unclear.
+
+2. DECISIONS — Things explicitly decided / agreed during the meeting. Include only clear commitments ("we will do X", "let's go with Y"), not casual mentions. For each:
+   - statement: the decision in one sentence
+   - agreed_by: array of Speaker_X ids who explicitly agreed (use the chip-letter notation, e.g. ["Speaker_A", "Speaker_C"]). Empty array if unclear.
+   - supporting_offset: integer SECONDS where this decision was made (use the [<letter> <MM:SS>] timestamp from the most relevant transcript line — convert MM:SS to total seconds)
+
+3. ACTION_ITEMS — Specific commitments where someone said they would do something. Include only explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should...". For each:
+   - description: the action in imperative form
+   - owner: the Speaker_X id of the person taking it on (e.g. "Speaker_A"), or null if unclear
+   - due_hint: the deadline as a string if mentioned ("by Friday", "end of week", "before next call"), or null
+   - supporting_offset: integer seconds where the commitment was made
+
+4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered during the meeting. Skip rhetorical questions and questions that got direct answers. For each:
+   - question: the question, rephrased to be self-contained
+   - raised_by: the Speaker_X id who asked (or null if unclear)
+   - answered: false (always — if it was answered, don't include it)
+
+5. KEY_QUOTES — Statements worth surfacing verbatim because they are pivotal, particularly insightful, or capture a strong opinion. Limit to 3-6 quotes max. Skip filler and conversational text. For each:
+   - speaker: the Speaker_X id of the speaker
+   - offset: integer seconds where the quote occurs
+   - quote: the verbatim quote (trim to the substantive sentence, 4-30 words)
+   - why_notable: one short clause on why this is worth surfacing
+
+Be conservative across all five. Better to return an empty array (or for TLDR, a single factual sentence) than to fabricate. A 5-minute small-talk call may legitimately have 0 decisions, 0 action items, 0 open questions, 0 key quotes — but it still gets a TLDR.
+
+Respond with ONLY valid JSON in this exact shape, no other text:
+{
+  "tldr": {"summary": "...", "primary_speakers": ["Speaker_A", "Speaker_B"]},
+  "decisions": [{"statement": "...", "agreed_by": ["Speaker_A"], "supporting_offset": 123}],
+  "action_items": [{"description": "...", "owner": "Speaker_B", "due_hint": "by Friday", "supporting_offset": 234}],
+  "open_questions": [{"question": "...", "raised_by": "Speaker_C", "answered": false}],
+  "key_quotes": [{"speaker": "Speaker_A", "offset": 345, "quote": "...", "why_notable": "..."}]
+}`;
+
+function fillTemplate(template, vars) {
+  return String(template || "").replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
+    return key in vars ? String(vars[key]) : `{{${key}}}`;
+  });
+}
+
+function formatDuration(seconds) {
+  const s = Math.max(0, Math.floor(seconds || 0));
+  const h = Math.floor(s / 3600);
+  const m = Math.floor((s % 3600) / 60);
+  const sec = s % 60;
+  if (h > 0) return `${h}h ${m}m ${sec}s`;
+  if (m > 0) return `${m}m ${sec}s`;
+  return `${sec}s`;
+}
+
+function formatLabeledTranscript(segments) {
+  if (!Array.isArray(segments) || segments.length === 0) return "";
+  const lines = [];
+  for (const seg of segments) {
+    const text = (seg.text || "").trim();
+    if (!text) continue;
+    const t = seg.start || 0;
+    let letter = "?";
+    const m = String(seg.speaker || "").match(/^Speaker_([A-Z]+)$/);
+    if (m) letter = m[1];
+    const secInt = Math.floor(t);
+    const mm = Math.floor(secInt / 60);
+    const ss = secInt % 60;
+    lines.push(`[${letter} ${mm}:${String(ss).padStart(2, "0")}] ${text}`);
+  }
+  return lines.join("\n");
+}
+
+// Trim a too-large transcript by keeping the head and tail. Keeps
+// the meeting's opening (introductions, agenda) AND closing (wrap-up,
+// next steps) which are where most extras-worthy content lives.
+function capTranscript(text, maxChars) {
+  if (text.length <= maxChars) return text;
+  const half = Math.floor(maxChars / 2) - 50;
+  return (
+    text.slice(0, half) +
+    "\n\n…[middle truncated for prompt length]…\n\n" +
+    text.slice(-half)
+  );
+}
+
+function safeParseExtras(text) {
+  if (!text || typeof text !== "string") return null;
+  let s = text.trim();
+  const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (fence) s = fence[1].trim();
+  let parsed;
+  try {
+    parsed = JSON.parse(s);
+  } catch {
+    return null;
+  }
+  if (!parsed || typeof parsed !== "object") return null;
+  const asArray = (v) => (Array.isArray(v) ? v : []);
+  // TLDR — exactly one object (not an array). Required category;
+  // we accept any well-formed shape and clamp to safe bounds. If
+  // the LLM omitted it entirely we leave it null so the renderer
+  // can show "TLDR unavailable" rather than fabricating.
+  let tldr = null;
+  if (parsed.tldr && typeof parsed.tldr === "object" && !Array.isArray(parsed.tldr)) {
+    const summary = typeof parsed.tldr.summary === "string" ? parsed.tldr.summary.trim() : "";
+    if (summary) {
+      tldr = {
+        summary: summary.slice(0, 800),
+        primary_speakers: Array.isArray(parsed.tldr.primary_speakers)
+          ? parsed.tldr.primary_speakers
+              .filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x))
+              .slice(0, 5)
+          : [],
+      };
+    }
+  }
+  // Coerce + clamp each category to a sane shape. Drop entries
+  // that fail validation rather than failing the whole pass.
+  const decisions = asArray(parsed.decisions)
+    .map((d) => {
+      if (!d || typeof d !== "object") return null;
+      const statement = typeof d.statement === "string" ? d.statement.trim() : "";
+      if (!statement) return null;
+      return {
+        statement: statement.slice(0, 400),
+        agreed_by: Array.isArray(d.agreed_by)
+          ? d.agreed_by.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x)).slice(0, 10)
+          : [],
+        supporting_offset: Number.isFinite(d.supporting_offset) ? Math.max(0, Math.floor(d.supporting_offset)) : null,
+      };
+    })
+    .filter(Boolean)
+    .slice(0, 20);
+  const action_items = asArray(parsed.action_items)
+    .map((a) => {
+      if (!a || typeof a !== "object") return null;
+      const description = typeof a.description === "string" ? a.description.trim() : "";
+      if (!description) return null;
+      return {
+        description: description.slice(0, 400),
+        owner: typeof a.owner === "string" && /^Speaker_[A-Z]+$/.test(a.owner) ? a.owner : null,
+        due_hint: typeof a.due_hint === "string" && a.due_hint.trim() ? a.due_hint.trim().slice(0, 80) : null,
+        supporting_offset: Number.isFinite(a.supporting_offset) ? Math.max(0, Math.floor(a.supporting_offset)) : null,
+      };
+    })
+    .filter(Boolean)
+    .slice(0, 30);
+  const open_questions = asArray(parsed.open_questions)
+    .map((q) => {
+      if (!q || typeof q !== "object") return null;
+      const question = typeof q.question === "string" ? q.question.trim() : "";
+      if (!question) return null;
+      return {
+        question: question.slice(0, 400),
+        raised_by: typeof q.raised_by === "string" && /^Speaker_[A-Z]+$/.test(q.raised_by) ? q.raised_by : null,
+        answered: q.answered === true,
+      };
+    })
+    .filter(Boolean)
+    .slice(0, 20);
+  const key_quotes = asArray(parsed.key_quotes)
+    .map((q) => {
+      if (!q || typeof q !== "object") return null;
+      const quote = typeof q.quote === "string" ? q.quote.trim() : "";
+      if (!quote) return null;
+      return {
+        speaker: typeof q.speaker === "string" && /^Speaker_[A-Z]+$/.test(q.speaker) ? q.speaker : null,
+        offset: Number.isFinite(q.offset) ? Math.max(0, Math.floor(q.offset)) : null,
+        quote: quote.slice(0, 400),
+        why_notable: typeof q.why_notable === "string" ? q.why_notable.trim().slice(0, 200) : "",
+      };
+    })
+    .filter(Boolean)
+    .slice(0, 10);
+  return { tldr, decisions, action_items, open_questions, key_quotes };
+}
+
+export async function runMeetingExtras({
+  title,
+  audioSec,
+  speakers,
+  speakerNames,
+  transcriptSegments,
+  topics, // array of { title, summary, startTime } from analyze-then-polish
+  promptOverride = "",
+  // Operator-supplied hints (internal meetings only). participantHints
+  // is a CSV-ish string of expected attendees; operatorNotes is free-
+  // form prose describing who-said-what. Both are framed as hints in
+  // the rendered prompt — the LLM is instructed to use them as soft
+  // signals and verify against the transcript before quoting or
+  // attributing. Empty → no OPERATOR HINTS block appears.
+  participantHints = "",
+  operatorNotes = "",
+  backend,
+  pipelineBackend,
+  jobId,
+  installId,
+  licenseFingerprint = null,
+  source,
+  computeCostDetails,
+}) {
+  if (!backend) return null;
+  if (!Array.isArray(transcriptSegments) || transcriptSegments.length === 0) return null;
+
+  // Build speaker roster — Speaker_A (chip A, 12m 34s, "Matt Hill")
+  const speakerLetters = Object.keys(speakers || {})
+    .filter((k) => /^Speaker_[A-Z]+$/.test(k))
+    .sort();
+  const speakerRoster = speakerLetters
+    .map((k) => {
+      const stats = speakers[k] || {};
+      const secs = Math.round(stats.total_speaking_seconds || 0);
+      const mins = Math.floor(secs / 60);
+      const rem = secs % 60;
+      const timeStr = mins > 0 ? `${mins}m ${rem}s` : `${rem}s`;
+      const letter = k.replace("Speaker_", "");
+      const name = speakerNames && speakerNames[k] ? `"${speakerNames[k]}"` : "(unknown)";
+      return `- ${k} (chip [${letter}], ${timeStr} speaking, ${stats.turns || 0} turns): ${name}`;
+    })
+    .join("\n");
+
+  const topicsBlock = Array.isArray(topics) && topics.length
+    ? topics
+        .map((t, i) => {
+          const startSec = t.startTime || 0;
+          const mm = Math.floor(startSec / 60);
+          const ss = Math.floor(startSec % 60);
+          const tStr = `${mm}:${String(ss).padStart(2, "0")}`;
+          return `${i + 1}. [${tStr}] ${t.title || "(untitled)"} — ${t.summary || ""}`;
+        })
+        .join("\n")
+    : "(no topics)";
+
+  const fullTranscript = formatLabeledTranscript(transcriptSegments);
+  const cappedTranscript = capTranscript(fullTranscript, 25000);
+
+  // Compose the OPERATOR HINTS block — same shape as the name-
+  // inference pipeline so the LLM gets consistent framing across
+  // both passes. Empty when no hints supplied.
+  const hintsParts = [];
+  if (participantHints && String(participantHints).trim()) {
+    hintsParts.push(
+      `Possible participants in this meeting (operator-supplied — may be incomplete):\n${String(participantHints).trim()}`,
+    );
+  }
+  if (operatorNotes && String(operatorNotes).trim()) {
+    const trimmed = String(operatorNotes).trim().slice(0, 4000);
+    hintsParts.push(
+      `Operator notes (may describe who said what — use as soft context, verify against the transcript before extracting decisions / action items / quotes):\n${trimmed}`,
+    );
+  }
+  const operatorContextBlock = hintsParts.length
+    ? `OPERATOR HINTS (treat as suggestions only — verify against the transcript):\n\n${hintsParts.join("\n\n")}\n\n`
+    : "";
+
+  const templateSource =
+    typeof promptOverride === "string" && promptOverride.trim()
+      ? promptOverride
+      : DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE;
+  const prompt = fillTemplate(templateSource, {
+    title: title || "(untitled)",
+    duration: formatDuration(audioSec),
+    operatorContext: operatorContextBlock,
+    speakerRoster: speakerRoster || "(no speakers identified)",
+    topics: topicsBlock,
+    transcript: cappedTranscript || "(empty)",
+  });
+
+  const t0 = Date.now();
+  let r = null;
+  let parsed = null;
+  let lastErr = null;
+  for (let attempt = 0; attempt < EXTRAS_MAX_ATTEMPTS; attempt++) {
+    try {
+      r = await backend.analyzeText({ prompt });
+      parsed = safeParseExtras(r.text);
+      if (parsed) {
+        lastErr = null;
+        break;
+      }
+      lastErr = "invalid JSON in extras response";
+    } catch (err) {
+      lastErr = (err?.message || String(err)).slice(0, 280);
+      r = null;
+    }
+    if (attempt < EXTRAS_MAX_ATTEMPTS - 1) {
+      console.warn(
+        `[meeting-extras] attempt ${attempt + 1} failed (${lastErr}) — retrying`
+      );
+    }
+  }
+  const dur = Date.now() - t0;
+  const cost =
+    parsed && r
+      ? computeCostDetails(r.model, r.usage)
+      : { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
+  await recordCall({
+    install_id: installId,
+    license_fingerprint: licenseFingerprint,
+    tier: "core",
+    pipeline: "meeting_extras",
+    backend: pipelineBackend,
+    model: r?.model || null,
+    status: parsed ? "success" : "error",
+    duration_ms: dur,
+    audio_seconds: 0,
+    job_id: jobId,
+    batch_id: null,
+    source,
+    media_url: null,
+    error: parsed ? null : lastErr || "extras analysis failed",
+    ...cost,
+  });
+  if (!parsed) {
+    console.warn(
+      `[meeting-extras] all ${EXTRAS_MAX_ATTEMPTS} attempts failed (${lastErr}) — extras unavailable`
+    );
+    return null;
+  }
+  console.log(
+    `[meeting-extras] extracted ${parsed.tldr ? "tldr + " : "(no tldr) + "}${parsed.decisions.length} decision(s), ${parsed.action_items.length} action(s), ${parsed.open_questions.length} question(s), ${parsed.key_quotes.length} quote(s) in ${(dur / 1000).toFixed(1)}s`
+  );
+  return parsed;
+}