Add internal-meetings pipeline and post-hoc speaker tools

2026-06-13 13:35:53 -05:00
parent 9a2dbf69df
commit 705807e286
15 changed files with 7375 additions and 0 deletions
@@ -0,0 +1,203 @@
+// Unit tests for the Phase 1D speaker-clustering module.
+// Run via: node --test server/test/speaker-clustering.test.js
+
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  cosineSimilarity,
+  clusterSpeakers,
+  assignSpeakersToSegments,
+} from "../speaker-clustering.js";
+
+// Synthetic fingerprints — easier to reason about than 192-dim vectors.
+// "Person A" embeddings all point roughly toward [+1, 0, 0]
+// "Person B" embeddings all point roughly toward [0, +1, 0]
+// "Person C" embeddings (when present) point toward [0, 0, +1]
+const FP_A = (jitter = 0) => [1.0 + jitter * 0.01, 0.05 * jitter, 0];
+const FP_B = (jitter = 0) => [0.05 * jitter, 1.0 + jitter * 0.01, 0];
+const FP_C = (jitter = 0) => [0, 0, 1.0 + jitter * 0.01];
+
+test("cosineSimilarity: identical vectors = 1", () => {
+  assert.equal(cosineSimilarity([1, 0, 0], [1, 0, 0]), 1);
+});
+
+test("cosineSimilarity: orthogonal vectors = 0", () => {
+  assert.equal(cosineSimilarity([1, 0, 0], [0, 1, 0]), 0);
+});
+
+test("cosineSimilarity: zero-magnitude input returns 0 (no NaN)", () => {
+  assert.equal(cosineSimilarity([0, 0, 0], [1, 1, 1]), 0);
+});
+
+test("clusterSpeakers: two distinct speakers across 3 chunks → 2 clusters", () => {
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [],
+      fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
+    },
+    {
+      ok: true,
+      chunkIndex: 1,
+      segments: [],
+      fingerprints: { Speaker_0: FP_A(2), Speaker_1: FP_B(2) },
+    },
+    {
+      ok: true,
+      chunkIndex: 2,
+      segments: [],
+      fingerprints: { Speaker_0: FP_B(3), Speaker_1: FP_A(3) }, // labels flipped this chunk
+    },
+  ];
+  const { clusterCount, globalMap, speakers } = clusterSpeakers(chunkDiar, 70);
+  assert.equal(clusterCount, 2, "should identify 2 distinct speakers");
+  // First speaker seen (chunk 0, Speaker_0 = FP_A) becomes Speaker_A
+  assert.equal(globalMap.get("0:Speaker_0"), "Speaker_A");
+  assert.equal(globalMap.get("0:Speaker_1"), "Speaker_B");
+  // Chunk 1 (same physical voices, same label assignment by SC)
+  assert.equal(globalMap.get("1:Speaker_0"), "Speaker_A");
+  assert.equal(globalMap.get("1:Speaker_1"), "Speaker_B");
+  // Chunk 2 has labels flipped — clustering should recover the truth
+  assert.equal(globalMap.get("2:Speaker_0"), "Speaker_B");
+  assert.equal(globalMap.get("2:Speaker_1"), "Speaker_A");
+  // Summary should report each speaker appearing in 3 chunks
+  assert.equal(speakers.Speaker_A.fingerprint_count, 3);
+  assert.equal(speakers.Speaker_B.fingerprint_count, 3);
+});
+
+test("clusterSpeakers: three distinct speakers → 3 clusters", () => {
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [],
+      fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
+    },
+    {
+      ok: true,
+      chunkIndex: 1,
+      segments: [],
+      fingerprints: { Speaker_0: FP_C(2), Speaker_1: FP_B(2) },
+    },
+  ];
+  const { clusterCount } = clusterSpeakers(chunkDiar, 70);
+  assert.equal(clusterCount, 3);
+});
+
+test("clusterSpeakers: empty input returns empty result", () => {
+  const out = clusterSpeakers([], 70);
+  assert.equal(out.clusterCount, 0);
+  assert.equal(out.globalMap.size, 0);
+  assert.deepEqual(out.speakers, {});
+});
+
+test("clusterSpeakers: all-failed-chunks input returns empty result", () => {
+  const out = clusterSpeakers([{ ok: false }, { ok: false }], 70);
+  assert.equal(out.clusterCount, 0);
+});
+
+test("clusterSpeakers: threshold clamped to 50..95", () => {
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [],
+      fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
+    },
+  ];
+  const lo = clusterSpeakers(chunkDiar, 0); // clamps to 50
+  assert.equal(lo.thresholdSimilarity, 0.5);
+  const hi = clusterSpeakers(chunkDiar, 200); // clamps to 95
+  assert.equal(hi.thresholdSimilarity, 0.95);
+});
+
+test("clusterSpeakers: very strict threshold (95%) splits tightly-grouped voices", () => {
+  // FP_A with significant jitter — at 70% they cluster as one, at 95% they may split.
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [],
+      fingerprints: {
+        Speaker_0: [1.0, 0.0, 0.0],
+        // Same general direction but ~0.93 similarity — borderline.
+        Speaker_1: [0.93, 0.36, 0.06],
+      },
+    },
+  ];
+  const lenient = clusterSpeakers(chunkDiar, 70);
+  const strict = clusterSpeakers(chunkDiar, 95);
+  assert.equal(lenient.clusterCount, 1, "lenient should merge");
+  assert.equal(strict.clusterCount, 2, "strict should split");
+});
+
+test("clusterSpeakers: summary stats aggregate turns + speaking time", () => {
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [
+        { start: 0, end: 10, speaker_local: "Speaker_0", confidence: 0.9 },
+        { start: 10, end: 25, speaker_local: "Speaker_1", confidence: 0.8 },
+        { start: 25, end: 30, speaker_local: "Speaker_0", confidence: 0.95 },
+      ],
+      fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
+    },
+  ];
+  const { speakers } = clusterSpeakers(chunkDiar, 70);
+  assert.equal(speakers.Speaker_A.turns, 2);
+  assert.equal(speakers.Speaker_A.total_speaking_seconds, 15);
+  assert.equal(speakers.Speaker_B.turns, 1);
+  assert.equal(speakers.Speaker_B.total_speaking_seconds, 15);
+  assert.ok(Math.abs(speakers.Speaker_A.mean_confidence - 0.925) < 0.001);
+});
+
+test("assignSpeakersToSegments: midpoint inside diar segment wins", () => {
+  const segments = [
+    { start: 0, end: 5, text: "hello" },
+    { start: 5, end: 10, text: "world" },
+  ];
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [
+        { start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 },
+        { start: 5, end: 10, speaker_local: "Speaker_1", confidence: 0.85 },
+      ],
+      fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
+    },
+  ];
+  const { globalMap } = clusterSpeakers(chunkDiar, 70);
+  assignSpeakersToSegments(segments, chunkDiar, globalMap);
+  assert.equal(segments[0].speaker, "Speaker_A");
+  assert.equal(segments[1].speaker, "Speaker_B");
+  assert.equal(segments[0].speaker_confidence, 0.9);
+});
+
+test("assignSpeakersToSegments: nearest-fallback within 5s window", () => {
+  const segments = [
+    { start: 8, end: 12, text: "in between" }, // gap with no covering diar seg
+  ];
+  const chunkDiar = [
+    {
+      ok: true,
+      chunkIndex: 0,
+      segments: [
+        { start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 },
+      ],
+      fingerprints: { Speaker_0: FP_A(1) },
+    },
+  ];
+  const { globalMap } = clusterSpeakers(chunkDiar, 70);
+  assignSpeakersToSegments(segments, chunkDiar, globalMap);
+  // Diar segment ends at 5, transcript mid is 10 → distance 7.5 > 5s → speaker stays null
+  assert.equal(segments[0].speaker, null);
+});
+
+test("assignSpeakersToSegments: no diar data leaves segments unchanged", () => {
+  const segments = [{ start: 0, end: 5, text: "hello" }];
+  assignSpeakersToSegments(segments, [], new Map());
+  assert.equal(segments[0].speaker, undefined);
+});