Add internal-meetings pipeline and post-hoc speaker tools

This commit is contained in:
Keysat
2026-06-13 13:35:53 -05:00
parent 9a2dbf69df
commit 705807e286
15 changed files with 7375 additions and 0 deletions
+376
View File
@@ -0,0 +1,376 @@
// Phase 2 of Path 2A — meeting extras analysis.
//
// Runs a single LLM pass AFTER transcribe → diarize → cluster →
// analyze → name-inference → summary-polish complete. Pulls out four
// categories of structured information that operators consistently
// want at the top of an internal meeting recap:
//
// - decisions : what was agreed on (with the offset where it was settled)
// - action_items : who owes what, by when (best-effort due_hint)
// - open_questions : questions raised that didn't get resolved
// - key_quotes : notable statements worth surfacing verbatim
//
// Each item carries a `supporting_offset` (or `offset`) in seconds so
// the dashboard can render the timestamp as a clickable jump to the
// corresponding transcript line. Each item also carries speaker IDs
// (cluster ids like Speaker_A) so the renderer can show the speaker's
// colored chip + display name, and so an operator-rename or per-line
// override propagates here too.
//
// Returns:
// {
// decisions: [{ statement, agreed_by[], supporting_offset }],
// action_items: [{ description, owner, due_hint, supporting_offset }],
// open_questions: [{ question, raised_by, answered }],
// key_quotes: [{ speaker, offset, quote, why_notable }],
// }
//
// or null on total failure. Failure is non-fatal — the meeting still
// saves with rec.extras = null and the dashboard just hides the
// extras section.
import { recordCall } from "./audit-log.js";
const EXTRAS_MAX_ATTEMPTS = 3;
export const DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE = `You are extracting structured information from an internal team meeting transcript. The transcript below has been pre-tagged with speaker labels like [A], [B], [C] (anonymous voice-clustering labels) and inferred real names where available.
MEETING METADATA:
- Title: {{title}}
- Duration: {{duration}}
{{operatorContext}}SPEAKERS (from voice clustering, with operator-confirmed names where present):
{{speakerRoster}}
TOPIC SUMMARIES (already produced — for context only, do not duplicate):
{{topics}}
TRANSCRIPT (each line is "[<letter> <MM:SS>] text"):
{{transcript}}
INSTRUCTIONS:
Extract FIVE categories of information from the meeting. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items.
1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Write in past tense, third person. Keep it dense — every clause should carry information. Skip pleasantries and procedural opening/closing chatter. If a meeting was genuinely substanceless (a 3-minute check-in, audio test, etc.), write one factual descriptor sentence instead of padding. This is the only required category — even the most trivial meeting gets a one-sentence TLDR.
- summary: the 2-4 sentence executive summary
- primary_speakers: array of Speaker_X ids who drove the conversation (the 1-3 people most central to the discussion, in rough order of contribution). Empty array if unclear.
2. DECISIONS — Things explicitly decided / agreed during the meeting. Include only clear commitments ("we will do X", "let's go with Y"), not casual mentions. For each:
- statement: the decision in one sentence
- agreed_by: array of Speaker_X ids who explicitly agreed (use the chip-letter notation, e.g. ["Speaker_A", "Speaker_C"]). Empty array if unclear.
- supporting_offset: integer SECONDS where this decision was made (use the [<letter> <MM:SS>] timestamp from the most relevant transcript line — convert MM:SS to total seconds)
3. ACTION_ITEMS — Specific commitments where someone said they would do something. Include only explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should...". For each:
- description: the action in imperative form
- owner: the Speaker_X id of the person taking it on (e.g. "Speaker_A"), or null if unclear
- due_hint: the deadline as a string if mentioned ("by Friday", "end of week", "before next call"), or null
- supporting_offset: integer seconds where the commitment was made
4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered during the meeting. Skip rhetorical questions and questions that got direct answers. For each:
- question: the question, rephrased to be self-contained
- raised_by: the Speaker_X id who asked (or null if unclear)
- answered: false (always — if it was answered, don't include it)
5. KEY_QUOTES — Statements worth surfacing verbatim because they are pivotal, particularly insightful, or capture a strong opinion. Limit to 3-6 quotes max. Skip filler and conversational text. For each:
- speaker: the Speaker_X id of the speaker
- offset: integer seconds where the quote occurs
- quote: the verbatim quote (trim to the substantive sentence, 4-30 words)
- why_notable: one short clause on why this is worth surfacing
Be conservative across all five. Better to return an empty array (or for TLDR, a single factual sentence) than to fabricate. A 5-minute small-talk call may legitimately have 0 decisions, 0 action items, 0 open questions, 0 key quotes — but it still gets a TLDR.
Respond with ONLY valid JSON in this exact shape, no other text:
{
"tldr": {"summary": "...", "primary_speakers": ["Speaker_A", "Speaker_B"]},
"decisions": [{"statement": "...", "agreed_by": ["Speaker_A"], "supporting_offset": 123}],
"action_items": [{"description": "...", "owner": "Speaker_B", "due_hint": "by Friday", "supporting_offset": 234}],
"open_questions": [{"question": "...", "raised_by": "Speaker_C", "answered": false}],
"key_quotes": [{"speaker": "Speaker_A", "offset": 345, "quote": "...", "why_notable": "..."}]
}`;
function fillTemplate(template, vars) {
return String(template || "").replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
return key in vars ? String(vars[key]) : `{{${key}}}`;
});
}
function formatDuration(seconds) {
const s = Math.max(0, Math.floor(seconds || 0));
const h = Math.floor(s / 3600);
const m = Math.floor((s % 3600) / 60);
const sec = s % 60;
if (h > 0) return `${h}h ${m}m ${sec}s`;
if (m > 0) return `${m}m ${sec}s`;
return `${sec}s`;
}
function formatLabeledTranscript(segments) {
if (!Array.isArray(segments) || segments.length === 0) return "";
const lines = [];
for (const seg of segments) {
const text = (seg.text || "").trim();
if (!text) continue;
const t = seg.start || 0;
let letter = "?";
const m = String(seg.speaker || "").match(/^Speaker_([A-Z]+)$/);
if (m) letter = m[1];
const secInt = Math.floor(t);
const mm = Math.floor(secInt / 60);
const ss = secInt % 60;
lines.push(`[${letter} ${mm}:${String(ss).padStart(2, "0")}] ${text}`);
}
return lines.join("\n");
}
// Trim a too-large transcript by keeping the head and tail. Keeps
// the meeting's opening (introductions, agenda) AND closing (wrap-up,
// next steps) which are where most extras-worthy content lives.
function capTranscript(text, maxChars) {
if (text.length <= maxChars) return text;
const half = Math.floor(maxChars / 2) - 50;
return (
text.slice(0, half) +
"\n\n…[middle truncated for prompt length]…\n\n" +
text.slice(-half)
);
}
function safeParseExtras(text) {
if (!text || typeof text !== "string") return null;
let s = text.trim();
const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fence) s = fence[1].trim();
let parsed;
try {
parsed = JSON.parse(s);
} catch {
return null;
}
if (!parsed || typeof parsed !== "object") return null;
const asArray = (v) => (Array.isArray(v) ? v : []);
// TLDR — exactly one object (not an array). Required category;
// we accept any well-formed shape and clamp to safe bounds. If
// the LLM omitted it entirely we leave it null so the renderer
// can show "TLDR unavailable" rather than fabricating.
let tldr = null;
if (parsed.tldr && typeof parsed.tldr === "object" && !Array.isArray(parsed.tldr)) {
const summary = typeof parsed.tldr.summary === "string" ? parsed.tldr.summary.trim() : "";
if (summary) {
tldr = {
summary: summary.slice(0, 800),
primary_speakers: Array.isArray(parsed.tldr.primary_speakers)
? parsed.tldr.primary_speakers
.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x))
.slice(0, 5)
: [],
};
}
}
// Coerce + clamp each category to a sane shape. Drop entries
// that fail validation rather than failing the whole pass.
const decisions = asArray(parsed.decisions)
.map((d) => {
if (!d || typeof d !== "object") return null;
const statement = typeof d.statement === "string" ? d.statement.trim() : "";
if (!statement) return null;
return {
statement: statement.slice(0, 400),
agreed_by: Array.isArray(d.agreed_by)
? d.agreed_by.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x)).slice(0, 10)
: [],
supporting_offset: Number.isFinite(d.supporting_offset) ? Math.max(0, Math.floor(d.supporting_offset)) : null,
};
})
.filter(Boolean)
.slice(0, 20);
const action_items = asArray(parsed.action_items)
.map((a) => {
if (!a || typeof a !== "object") return null;
const description = typeof a.description === "string" ? a.description.trim() : "";
if (!description) return null;
return {
description: description.slice(0, 400),
owner: typeof a.owner === "string" && /^Speaker_[A-Z]+$/.test(a.owner) ? a.owner : null,
due_hint: typeof a.due_hint === "string" && a.due_hint.trim() ? a.due_hint.trim().slice(0, 80) : null,
supporting_offset: Number.isFinite(a.supporting_offset) ? Math.max(0, Math.floor(a.supporting_offset)) : null,
};
})
.filter(Boolean)
.slice(0, 30);
const open_questions = asArray(parsed.open_questions)
.map((q) => {
if (!q || typeof q !== "object") return null;
const question = typeof q.question === "string" ? q.question.trim() : "";
if (!question) return null;
return {
question: question.slice(0, 400),
raised_by: typeof q.raised_by === "string" && /^Speaker_[A-Z]+$/.test(q.raised_by) ? q.raised_by : null,
answered: q.answered === true,
};
})
.filter(Boolean)
.slice(0, 20);
const key_quotes = asArray(parsed.key_quotes)
.map((q) => {
if (!q || typeof q !== "object") return null;
const quote = typeof q.quote === "string" ? q.quote.trim() : "";
if (!quote) return null;
return {
speaker: typeof q.speaker === "string" && /^Speaker_[A-Z]+$/.test(q.speaker) ? q.speaker : null,
offset: Number.isFinite(q.offset) ? Math.max(0, Math.floor(q.offset)) : null,
quote: quote.slice(0, 400),
why_notable: typeof q.why_notable === "string" ? q.why_notable.trim().slice(0, 200) : "",
};
})
.filter(Boolean)
.slice(0, 10);
return { tldr, decisions, action_items, open_questions, key_quotes };
}
export async function runMeetingExtras({
title,
audioSec,
speakers,
speakerNames,
transcriptSegments,
topics, // array of { title, summary, startTime } from analyze-then-polish
promptOverride = "",
// Operator-supplied hints (internal meetings only). participantHints
// is a CSV-ish string of expected attendees; operatorNotes is free-
// form prose describing who-said-what. Both are framed as hints in
// the rendered prompt — the LLM is instructed to use them as soft
// signals and verify against the transcript before quoting or
// attributing. Empty → no OPERATOR HINTS block appears.
participantHints = "",
operatorNotes = "",
backend,
pipelineBackend,
jobId,
installId,
licenseFingerprint = null,
source,
computeCostDetails,
}) {
if (!backend) return null;
if (!Array.isArray(transcriptSegments) || transcriptSegments.length === 0) return null;
// Build speaker roster — Speaker_A (chip A, 12m 34s, "Matt Hill")
const speakerLetters = Object.keys(speakers || {})
.filter((k) => /^Speaker_[A-Z]+$/.test(k))
.sort();
const speakerRoster = speakerLetters
.map((k) => {
const stats = speakers[k] || {};
const secs = Math.round(stats.total_speaking_seconds || 0);
const mins = Math.floor(secs / 60);
const rem = secs % 60;
const timeStr = mins > 0 ? `${mins}m ${rem}s` : `${rem}s`;
const letter = k.replace("Speaker_", "");
const name = speakerNames && speakerNames[k] ? `"${speakerNames[k]}"` : "(unknown)";
return `- ${k} (chip [${letter}], ${timeStr} speaking, ${stats.turns || 0} turns): ${name}`;
})
.join("\n");
const topicsBlock = Array.isArray(topics) && topics.length
? topics
.map((t, i) => {
const startSec = t.startTime || 0;
const mm = Math.floor(startSec / 60);
const ss = Math.floor(startSec % 60);
const tStr = `${mm}:${String(ss).padStart(2, "0")}`;
return `${i + 1}. [${tStr}] ${t.title || "(untitled)"}${t.summary || ""}`;
})
.join("\n")
: "(no topics)";
const fullTranscript = formatLabeledTranscript(transcriptSegments);
const cappedTranscript = capTranscript(fullTranscript, 25000);
// Compose the OPERATOR HINTS block — same shape as the name-
// inference pipeline so the LLM gets consistent framing across
// both passes. Empty when no hints supplied.
const hintsParts = [];
if (participantHints && String(participantHints).trim()) {
hintsParts.push(
`Possible participants in this meeting (operator-supplied — may be incomplete):\n${String(participantHints).trim()}`,
);
}
if (operatorNotes && String(operatorNotes).trim()) {
const trimmed = String(operatorNotes).trim().slice(0, 4000);
hintsParts.push(
`Operator notes (may describe who said what — use as soft context, verify against the transcript before extracting decisions / action items / quotes):\n${trimmed}`,
);
}
const operatorContextBlock = hintsParts.length
? `OPERATOR HINTS (treat as suggestions only — verify against the transcript):\n\n${hintsParts.join("\n\n")}\n\n`
: "";
const templateSource =
typeof promptOverride === "string" && promptOverride.trim()
? promptOverride
: DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE;
const prompt = fillTemplate(templateSource, {
title: title || "(untitled)",
duration: formatDuration(audioSec),
operatorContext: operatorContextBlock,
speakerRoster: speakerRoster || "(no speakers identified)",
topics: topicsBlock,
transcript: cappedTranscript || "(empty)",
});
const t0 = Date.now();
let r = null;
let parsed = null;
let lastErr = null;
for (let attempt = 0; attempt < EXTRAS_MAX_ATTEMPTS; attempt++) {
try {
r = await backend.analyzeText({ prompt });
parsed = safeParseExtras(r.text);
if (parsed) {
lastErr = null;
break;
}
lastErr = "invalid JSON in extras response";
} catch (err) {
lastErr = (err?.message || String(err)).slice(0, 280);
r = null;
}
if (attempt < EXTRAS_MAX_ATTEMPTS - 1) {
console.warn(
`[meeting-extras] attempt ${attempt + 1} failed (${lastErr}) — retrying`
);
}
}
const dur = Date.now() - t0;
const cost =
parsed && r
? computeCostDetails(r.model, r.usage)
: { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
await recordCall({
install_id: installId,
license_fingerprint: licenseFingerprint,
tier: "core",
pipeline: "meeting_extras",
backend: pipelineBackend,
model: r?.model || null,
status: parsed ? "success" : "error",
duration_ms: dur,
audio_seconds: 0,
job_id: jobId,
batch_id: null,
source,
media_url: null,
error: parsed ? null : lastErr || "extras analysis failed",
...cost,
});
if (!parsed) {
console.warn(
`[meeting-extras] all ${EXTRAS_MAX_ATTEMPTS} attempts failed (${lastErr}) — extras unavailable`
);
return null;
}
console.log(
`[meeting-extras] extracted ${parsed.tldr ? "tldr + " : "(no tldr) + "}${parsed.decisions.length} decision(s), ${parsed.action_items.length} action(s), ${parsed.open_questions.length} question(s), ${parsed.key_quotes.length} quote(s) in ${(dur / 1000).toFixed(1)}s`
);
return parsed;
}