377 lines
16 KiB
JavaScript
377 lines
16 KiB
JavaScript
// Phase 2 of Path 2A — meeting extras analysis.
|
|
//
|
|
// Runs a single LLM pass AFTER transcribe → diarize → cluster →
|
|
// analyze → name-inference → summary-polish complete. Pulls out four
|
|
// categories of structured information that operators consistently
|
|
// want at the top of an internal meeting recap:
|
|
//
|
|
// - decisions : what was agreed on (with the offset where it was settled)
|
|
// - action_items : who owes what, by when (best-effort due_hint)
|
|
// - open_questions : questions raised that didn't get resolved
|
|
// - key_quotes : notable statements worth surfacing verbatim
|
|
//
|
|
// Each item carries a `supporting_offset` (or `offset`) in seconds so
|
|
// the dashboard can render the timestamp as a clickable jump to the
|
|
// corresponding transcript line. Each item also carries speaker IDs
|
|
// (cluster ids like Speaker_A) so the renderer can show the speaker's
|
|
// colored chip + display name, and so an operator-rename or per-line
|
|
// override propagates here too.
|
|
//
|
|
// Returns:
|
|
// {
|
|
// decisions: [{ statement, agreed_by[], supporting_offset }],
|
|
// action_items: [{ description, owner, due_hint, supporting_offset }],
|
|
// open_questions: [{ question, raised_by, answered }],
|
|
// key_quotes: [{ speaker, offset, quote, why_notable }],
|
|
// }
|
|
//
|
|
// or null on total failure. Failure is non-fatal — the meeting still
|
|
// saves with rec.extras = null and the dashboard just hides the
|
|
// extras section.
|
|
|
|
import { recordCall } from "./audit-log.js";
|
|
|
|
const EXTRAS_MAX_ATTEMPTS = 3;
|
|
|
|
export const DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE = `You are extracting structured information from an internal team meeting transcript. The transcript below has been pre-tagged with speaker labels like [A], [B], [C] (anonymous voice-clustering labels) and inferred real names where available.
|
|
|
|
MEETING METADATA:
|
|
- Title: {{title}}
|
|
- Duration: {{duration}}
|
|
|
|
{{operatorContext}}SPEAKERS (from voice clustering, with operator-confirmed names where present):
|
|
{{speakerRoster}}
|
|
|
|
TOPIC SUMMARIES (already produced — for context only, do not duplicate):
|
|
{{topics}}
|
|
|
|
TRANSCRIPT (each line is "[<letter> <MM:SS>] text"):
|
|
{{transcript}}
|
|
|
|
INSTRUCTIONS:
|
|
Extract FIVE categories of information from the meeting. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items.
|
|
|
|
1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Write in past tense, third person. Keep it dense — every clause should carry information. Skip pleasantries and procedural opening/closing chatter. If a meeting was genuinely substanceless (a 3-minute check-in, audio test, etc.), write one factual descriptor sentence instead of padding. This is the only required category — even the most trivial meeting gets a one-sentence TLDR.
|
|
- summary: the 2-4 sentence executive summary
|
|
- primary_speakers: array of Speaker_X ids who drove the conversation (the 1-3 people most central to the discussion, in rough order of contribution). Empty array if unclear.
|
|
|
|
2. DECISIONS — Things explicitly decided / agreed during the meeting. Include only clear commitments ("we will do X", "let's go with Y"), not casual mentions. For each:
|
|
- statement: the decision in one sentence
|
|
- agreed_by: array of Speaker_X ids who explicitly agreed (use the chip-letter notation, e.g. ["Speaker_A", "Speaker_C"]). Empty array if unclear.
|
|
- supporting_offset: integer SECONDS where this decision was made (use the [<letter> <MM:SS>] timestamp from the most relevant transcript line — convert MM:SS to total seconds)
|
|
|
|
3. ACTION_ITEMS — Specific commitments where someone said they would do something. Include only explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should...". For each:
|
|
- description: the action in imperative form
|
|
- owner: the Speaker_X id of the person taking it on (e.g. "Speaker_A"), or null if unclear
|
|
- due_hint: the deadline as a string if mentioned ("by Friday", "end of week", "before next call"), or null
|
|
- supporting_offset: integer seconds where the commitment was made
|
|
|
|
4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered during the meeting. Skip rhetorical questions and questions that got direct answers. For each:
|
|
- question: the question, rephrased to be self-contained
|
|
- raised_by: the Speaker_X id who asked (or null if unclear)
|
|
- answered: false (always — if it was answered, don't include it)
|
|
|
|
5. KEY_QUOTES — Statements worth surfacing verbatim because they are pivotal, particularly insightful, or capture a strong opinion. Limit to 3-6 quotes max. Skip filler and conversational text. For each:
|
|
- speaker: the Speaker_X id of the speaker
|
|
- offset: integer seconds where the quote occurs
|
|
- quote: the verbatim quote (trim to the substantive sentence, 4-30 words)
|
|
- why_notable: one short clause on why this is worth surfacing
|
|
|
|
Be conservative across all five. Better to return an empty array (or for TLDR, a single factual sentence) than to fabricate. A 5-minute small-talk call may legitimately have 0 decisions, 0 action items, 0 open questions, 0 key quotes — but it still gets a TLDR.
|
|
|
|
Respond with ONLY valid JSON in this exact shape, no other text:
|
|
{
|
|
"tldr": {"summary": "...", "primary_speakers": ["Speaker_A", "Speaker_B"]},
|
|
"decisions": [{"statement": "...", "agreed_by": ["Speaker_A"], "supporting_offset": 123}],
|
|
"action_items": [{"description": "...", "owner": "Speaker_B", "due_hint": "by Friday", "supporting_offset": 234}],
|
|
"open_questions": [{"question": "...", "raised_by": "Speaker_C", "answered": false}],
|
|
"key_quotes": [{"speaker": "Speaker_A", "offset": 345, "quote": "...", "why_notable": "..."}]
|
|
}`;
|
|
|
|
function fillTemplate(template, vars) {
|
|
return String(template || "").replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
|
|
return key in vars ? String(vars[key]) : `{{${key}}}`;
|
|
});
|
|
}
|
|
|
|
function formatDuration(seconds) {
|
|
const s = Math.max(0, Math.floor(seconds || 0));
|
|
const h = Math.floor(s / 3600);
|
|
const m = Math.floor((s % 3600) / 60);
|
|
const sec = s % 60;
|
|
if (h > 0) return `${h}h ${m}m ${sec}s`;
|
|
if (m > 0) return `${m}m ${sec}s`;
|
|
return `${sec}s`;
|
|
}
|
|
|
|
function formatLabeledTranscript(segments) {
|
|
if (!Array.isArray(segments) || segments.length === 0) return "";
|
|
const lines = [];
|
|
for (const seg of segments) {
|
|
const text = (seg.text || "").trim();
|
|
if (!text) continue;
|
|
const t = seg.start || 0;
|
|
let letter = "?";
|
|
const m = String(seg.speaker || "").match(/^Speaker_([A-Z]+)$/);
|
|
if (m) letter = m[1];
|
|
const secInt = Math.floor(t);
|
|
const mm = Math.floor(secInt / 60);
|
|
const ss = secInt % 60;
|
|
lines.push(`[${letter} ${mm}:${String(ss).padStart(2, "0")}] ${text}`);
|
|
}
|
|
return lines.join("\n");
|
|
}
|
|
|
|
// Trim a too-large transcript by keeping the head and tail. Keeps
|
|
// the meeting's opening (introductions, agenda) AND closing (wrap-up,
|
|
// next steps) which are where most extras-worthy content lives.
|
|
function capTranscript(text, maxChars) {
|
|
if (text.length <= maxChars) return text;
|
|
const half = Math.floor(maxChars / 2) - 50;
|
|
return (
|
|
text.slice(0, half) +
|
|
"\n\n…[middle truncated for prompt length]…\n\n" +
|
|
text.slice(-half)
|
|
);
|
|
}
|
|
|
|
function safeParseExtras(text) {
|
|
if (!text || typeof text !== "string") return null;
|
|
let s = text.trim();
|
|
const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
if (fence) s = fence[1].trim();
|
|
let parsed;
|
|
try {
|
|
parsed = JSON.parse(s);
|
|
} catch {
|
|
return null;
|
|
}
|
|
if (!parsed || typeof parsed !== "object") return null;
|
|
const asArray = (v) => (Array.isArray(v) ? v : []);
|
|
// TLDR — exactly one object (not an array). Required category;
|
|
// we accept any well-formed shape and clamp to safe bounds. If
|
|
// the LLM omitted it entirely we leave it null so the renderer
|
|
// can show "TLDR unavailable" rather than fabricating.
|
|
let tldr = null;
|
|
if (parsed.tldr && typeof parsed.tldr === "object" && !Array.isArray(parsed.tldr)) {
|
|
const summary = typeof parsed.tldr.summary === "string" ? parsed.tldr.summary.trim() : "";
|
|
if (summary) {
|
|
tldr = {
|
|
summary: summary.slice(0, 800),
|
|
primary_speakers: Array.isArray(parsed.tldr.primary_speakers)
|
|
? parsed.tldr.primary_speakers
|
|
.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x))
|
|
.slice(0, 5)
|
|
: [],
|
|
};
|
|
}
|
|
}
|
|
// Coerce + clamp each category to a sane shape. Drop entries
|
|
// that fail validation rather than failing the whole pass.
|
|
const decisions = asArray(parsed.decisions)
|
|
.map((d) => {
|
|
if (!d || typeof d !== "object") return null;
|
|
const statement = typeof d.statement === "string" ? d.statement.trim() : "";
|
|
if (!statement) return null;
|
|
return {
|
|
statement: statement.slice(0, 400),
|
|
agreed_by: Array.isArray(d.agreed_by)
|
|
? d.agreed_by.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x)).slice(0, 10)
|
|
: [],
|
|
supporting_offset: Number.isFinite(d.supporting_offset) ? Math.max(0, Math.floor(d.supporting_offset)) : null,
|
|
};
|
|
})
|
|
.filter(Boolean)
|
|
.slice(0, 20);
|
|
const action_items = asArray(parsed.action_items)
|
|
.map((a) => {
|
|
if (!a || typeof a !== "object") return null;
|
|
const description = typeof a.description === "string" ? a.description.trim() : "";
|
|
if (!description) return null;
|
|
return {
|
|
description: description.slice(0, 400),
|
|
owner: typeof a.owner === "string" && /^Speaker_[A-Z]+$/.test(a.owner) ? a.owner : null,
|
|
due_hint: typeof a.due_hint === "string" && a.due_hint.trim() ? a.due_hint.trim().slice(0, 80) : null,
|
|
supporting_offset: Number.isFinite(a.supporting_offset) ? Math.max(0, Math.floor(a.supporting_offset)) : null,
|
|
};
|
|
})
|
|
.filter(Boolean)
|
|
.slice(0, 30);
|
|
const open_questions = asArray(parsed.open_questions)
|
|
.map((q) => {
|
|
if (!q || typeof q !== "object") return null;
|
|
const question = typeof q.question === "string" ? q.question.trim() : "";
|
|
if (!question) return null;
|
|
return {
|
|
question: question.slice(0, 400),
|
|
raised_by: typeof q.raised_by === "string" && /^Speaker_[A-Z]+$/.test(q.raised_by) ? q.raised_by : null,
|
|
answered: q.answered === true,
|
|
};
|
|
})
|
|
.filter(Boolean)
|
|
.slice(0, 20);
|
|
const key_quotes = asArray(parsed.key_quotes)
|
|
.map((q) => {
|
|
if (!q || typeof q !== "object") return null;
|
|
const quote = typeof q.quote === "string" ? q.quote.trim() : "";
|
|
if (!quote) return null;
|
|
return {
|
|
speaker: typeof q.speaker === "string" && /^Speaker_[A-Z]+$/.test(q.speaker) ? q.speaker : null,
|
|
offset: Number.isFinite(q.offset) ? Math.max(0, Math.floor(q.offset)) : null,
|
|
quote: quote.slice(0, 400),
|
|
why_notable: typeof q.why_notable === "string" ? q.why_notable.trim().slice(0, 200) : "",
|
|
};
|
|
})
|
|
.filter(Boolean)
|
|
.slice(0, 10);
|
|
return { tldr, decisions, action_items, open_questions, key_quotes };
|
|
}
|
|
|
|
export async function runMeetingExtras({
|
|
title,
|
|
audioSec,
|
|
speakers,
|
|
speakerNames,
|
|
transcriptSegments,
|
|
topics, // array of { title, summary, startTime } from analyze-then-polish
|
|
promptOverride = "",
|
|
// Operator-supplied hints (internal meetings only). participantHints
|
|
// is a CSV-ish string of expected attendees; operatorNotes is free-
|
|
// form prose describing who-said-what. Both are framed as hints in
|
|
// the rendered prompt — the LLM is instructed to use them as soft
|
|
// signals and verify against the transcript before quoting or
|
|
// attributing. Empty → no OPERATOR HINTS block appears.
|
|
participantHints = "",
|
|
operatorNotes = "",
|
|
backend,
|
|
pipelineBackend,
|
|
jobId,
|
|
installId,
|
|
licenseFingerprint = null,
|
|
source,
|
|
computeCostDetails,
|
|
}) {
|
|
if (!backend) return null;
|
|
if (!Array.isArray(transcriptSegments) || transcriptSegments.length === 0) return null;
|
|
|
|
// Build speaker roster — Speaker_A (chip A, 12m 34s, "Matt Hill")
|
|
const speakerLetters = Object.keys(speakers || {})
|
|
.filter((k) => /^Speaker_[A-Z]+$/.test(k))
|
|
.sort();
|
|
const speakerRoster = speakerLetters
|
|
.map((k) => {
|
|
const stats = speakers[k] || {};
|
|
const secs = Math.round(stats.total_speaking_seconds || 0);
|
|
const mins = Math.floor(secs / 60);
|
|
const rem = secs % 60;
|
|
const timeStr = mins > 0 ? `${mins}m ${rem}s` : `${rem}s`;
|
|
const letter = k.replace("Speaker_", "");
|
|
const name = speakerNames && speakerNames[k] ? `"${speakerNames[k]}"` : "(unknown)";
|
|
return `- ${k} (chip [${letter}], ${timeStr} speaking, ${stats.turns || 0} turns): ${name}`;
|
|
})
|
|
.join("\n");
|
|
|
|
const topicsBlock = Array.isArray(topics) && topics.length
|
|
? topics
|
|
.map((t, i) => {
|
|
const startSec = t.startTime || 0;
|
|
const mm = Math.floor(startSec / 60);
|
|
const ss = Math.floor(startSec % 60);
|
|
const tStr = `${mm}:${String(ss).padStart(2, "0")}`;
|
|
return `${i + 1}. [${tStr}] ${t.title || "(untitled)"} — ${t.summary || ""}`;
|
|
})
|
|
.join("\n")
|
|
: "(no topics)";
|
|
|
|
const fullTranscript = formatLabeledTranscript(transcriptSegments);
|
|
const cappedTranscript = capTranscript(fullTranscript, 25000);
|
|
|
|
// Compose the OPERATOR HINTS block — same shape as the name-
|
|
// inference pipeline so the LLM gets consistent framing across
|
|
// both passes. Empty when no hints supplied.
|
|
const hintsParts = [];
|
|
if (participantHints && String(participantHints).trim()) {
|
|
hintsParts.push(
|
|
`Possible participants in this meeting (operator-supplied — may be incomplete):\n${String(participantHints).trim()}`,
|
|
);
|
|
}
|
|
if (operatorNotes && String(operatorNotes).trim()) {
|
|
const trimmed = String(operatorNotes).trim().slice(0, 4000);
|
|
hintsParts.push(
|
|
`Operator notes (may describe who said what — use as soft context, verify against the transcript before extracting decisions / action items / quotes):\n${trimmed}`,
|
|
);
|
|
}
|
|
const operatorContextBlock = hintsParts.length
|
|
? `OPERATOR HINTS (treat as suggestions only — verify against the transcript):\n\n${hintsParts.join("\n\n")}\n\n`
|
|
: "";
|
|
|
|
const templateSource =
|
|
typeof promptOverride === "string" && promptOverride.trim()
|
|
? promptOverride
|
|
: DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE;
|
|
const prompt = fillTemplate(templateSource, {
|
|
title: title || "(untitled)",
|
|
duration: formatDuration(audioSec),
|
|
operatorContext: operatorContextBlock,
|
|
speakerRoster: speakerRoster || "(no speakers identified)",
|
|
topics: topicsBlock,
|
|
transcript: cappedTranscript || "(empty)",
|
|
});
|
|
|
|
const t0 = Date.now();
|
|
let r = null;
|
|
let parsed = null;
|
|
let lastErr = null;
|
|
for (let attempt = 0; attempt < EXTRAS_MAX_ATTEMPTS; attempt++) {
|
|
try {
|
|
r = await backend.analyzeText({ prompt });
|
|
parsed = safeParseExtras(r.text);
|
|
if (parsed) {
|
|
lastErr = null;
|
|
break;
|
|
}
|
|
lastErr = "invalid JSON in extras response";
|
|
} catch (err) {
|
|
lastErr = (err?.message || String(err)).slice(0, 280);
|
|
r = null;
|
|
}
|
|
if (attempt < EXTRAS_MAX_ATTEMPTS - 1) {
|
|
console.warn(
|
|
`[meeting-extras] attempt ${attempt + 1} failed (${lastErr}) — retrying`
|
|
);
|
|
}
|
|
}
|
|
const dur = Date.now() - t0;
|
|
const cost =
|
|
parsed && r
|
|
? computeCostDetails(r.model, r.usage)
|
|
: { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
|
|
await recordCall({
|
|
install_id: installId,
|
|
license_fingerprint: licenseFingerprint,
|
|
tier: "core",
|
|
pipeline: "meeting_extras",
|
|
backend: pipelineBackend,
|
|
model: r?.model || null,
|
|
status: parsed ? "success" : "error",
|
|
duration_ms: dur,
|
|
audio_seconds: 0,
|
|
job_id: jobId,
|
|
batch_id: null,
|
|
source,
|
|
media_url: null,
|
|
error: parsed ? null : lastErr || "extras analysis failed",
|
|
...cost,
|
|
});
|
|
if (!parsed) {
|
|
console.warn(
|
|
`[meeting-extras] all ${EXTRAS_MAX_ATTEMPTS} attempts failed (${lastErr}) — extras unavailable`
|
|
);
|
|
return null;
|
|
}
|
|
console.log(
|
|
`[meeting-extras] extracted ${parsed.tldr ? "tldr + " : "(no tldr) + "}${parsed.decisions.length} decision(s), ${parsed.action_items.length} action(s), ${parsed.open_questions.length} question(s), ${parsed.key_quotes.length} quote(s) in ${(dur / 1000).toFixed(1)}s`
|
|
);
|
|
return parsed;
|
|
}
|