Add internal-meetings pipeline and post-hoc speaker tools

2026-06-13 13:35:53 -05:00
parent 9a2dbf69df
commit 705807e286
15 changed files with 7375 additions and 0 deletions
@@ -0,0 +1,60 @@
+// Re-polish bug fix: the summary-polish pass must label each transcript
+// line with the operator's CORRECTED speaker name, so a re-polish after a
+// legend rename actually re-attributes statements to the new name (rather
+// than echoing the stale name baked into the original summaries).
+
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+
+import { formatSpeakerLabeledTranscript } from "../post-cluster-polish.js";
+
+const SEGMENTS = [
+  { start: 5, speaker: "Speaker_A", text: "Let's get started." },
+  { start: 12, speaker: "Speaker_B", text: "Sounds good." },
+  { start: 20, speaker: "Speaker_C", text: "One more thing." }, // unnamed
+  { start: 30, speaker: "", text: "(crosstalk)" }, // no speaker
+];
+
+describe("formatSpeakerLabeledTranscript", () => {
+  test("without speakerNames: labels by chip letter (name-inference pass)", () => {
+    const out = formatSpeakerLabeledTranscript(SEGMENTS);
+    assert.match(out, /\[A 0:05\] Let's get started\./);
+    assert.match(out, /\[B 0:12\] Sounds good\./);
+    assert.match(out, /\[C 0:20\] One more thing\./);
+    // Segment with no speaker → "?" label.
+    assert.match(out, /\[\? 0:30\] \(crosstalk\)/);
+  });
+
+  test("with speakerNames: named speakers labeled by NAME, unnamed fall back to letter", () => {
+    const out = formatSpeakerLabeledTranscript(SEGMENTS, {
+      speakerNames: { Speaker_A: "Matt", Speaker_B: "Grant" },
+    });
+    assert.match(out, /\[Matt 0:05\] Let's get started\./);
+    assert.match(out, /\[Grant 0:12\] Sounds good\./);
+    // Speaker_C has no name → still the letter.
+    assert.match(out, /\[C 0:20\] One more thing\./);
+    // Crucially, the OLD letter labels for the named speakers are gone.
+    assert.doesNotMatch(out, /\[A 0:05\]/);
+    assert.doesNotMatch(out, /\[B 0:12\]/);
+  });
+
+  test("respects the time window (startSec/endSec)", () => {
+    const out = formatSpeakerLabeledTranscript(SEGMENTS, {
+      startSec: 10,
+      endSec: 25,
+      speakerNames: { Speaker_A: "Matt" },
+    });
+    assert.doesNotMatch(out, /Let's get started/); // 0:05, before window
+    assert.match(out, /Sounds good/); // 0:12, in window
+    assert.match(out, /One more thing/); // 0:20, in window
+    assert.doesNotMatch(out, /crosstalk/); // 0:30, after window
+  });
+
+  test("strips brackets from a name so the [label] frame can't break", () => {
+    const out = formatSpeakerLabeledTranscript(
+      [{ start: 0, speaker: "Speaker_A", text: "hi" }],
+      { speakerNames: { Speaker_A: "Ma[t]t" } },
+    );
+    assert.match(out, /\[Matt 0:00\] hi/);
+  });
+});