// Unit tests for the Phase 1D speaker-clustering module. // Run via: node --test server/test/speaker-clustering.test.js import { test } from "node:test"; import assert from "node:assert/strict"; import { cosineSimilarity, clusterSpeakers, assignSpeakersToSegments, } from "../speaker-clustering.js"; // Synthetic fingerprints — easier to reason about than 192-dim vectors. // "Person A" embeddings all point roughly toward [+1, 0, 0] // "Person B" embeddings all point roughly toward [0, +1, 0] // "Person C" embeddings (when present) point toward [0, 0, +1] const FP_A = (jitter = 0) => [1.0 + jitter * 0.01, 0.05 * jitter, 0]; const FP_B = (jitter = 0) => [0.05 * jitter, 1.0 + jitter * 0.01, 0]; const FP_C = (jitter = 0) => [0, 0, 1.0 + jitter * 0.01]; test("cosineSimilarity: identical vectors = 1", () => { assert.equal(cosineSimilarity([1, 0, 0], [1, 0, 0]), 1); }); test("cosineSimilarity: orthogonal vectors = 0", () => { assert.equal(cosineSimilarity([1, 0, 0], [0, 1, 0]), 0); }); test("cosineSimilarity: zero-magnitude input returns 0 (no NaN)", () => { assert.equal(cosineSimilarity([0, 0, 0], [1, 1, 1]), 0); }); test("clusterSpeakers: two distinct speakers across 3 chunks → 2 clusters", () => { const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [], fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) }, }, { ok: true, chunkIndex: 1, segments: [], fingerprints: { Speaker_0: FP_A(2), Speaker_1: FP_B(2) }, }, { ok: true, chunkIndex: 2, segments: [], fingerprints: { Speaker_0: FP_B(3), Speaker_1: FP_A(3) }, // labels flipped this chunk }, ]; const { clusterCount, globalMap, speakers } = clusterSpeakers(chunkDiar, 70); assert.equal(clusterCount, 2, "should identify 2 distinct speakers"); // First speaker seen (chunk 0, Speaker_0 = FP_A) becomes Speaker_A assert.equal(globalMap.get("0:Speaker_0"), "Speaker_A"); assert.equal(globalMap.get("0:Speaker_1"), "Speaker_B"); // Chunk 1 (same physical voices, same label assignment by SC) assert.equal(globalMap.get("1:Speaker_0"), "Speaker_A"); assert.equal(globalMap.get("1:Speaker_1"), "Speaker_B"); // Chunk 2 has labels flipped — clustering should recover the truth assert.equal(globalMap.get("2:Speaker_0"), "Speaker_B"); assert.equal(globalMap.get("2:Speaker_1"), "Speaker_A"); // Summary should report each speaker appearing in 3 chunks assert.equal(speakers.Speaker_A.fingerprint_count, 3); assert.equal(speakers.Speaker_B.fingerprint_count, 3); }); test("clusterSpeakers: three distinct speakers → 3 clusters", () => { const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [], fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) }, }, { ok: true, chunkIndex: 1, segments: [], fingerprints: { Speaker_0: FP_C(2), Speaker_1: FP_B(2) }, }, ]; const { clusterCount } = clusterSpeakers(chunkDiar, 70); assert.equal(clusterCount, 3); }); test("clusterSpeakers: empty input returns empty result", () => { const out = clusterSpeakers([], 70); assert.equal(out.clusterCount, 0); assert.equal(out.globalMap.size, 0); assert.deepEqual(out.speakers, {}); }); test("clusterSpeakers: all-failed-chunks input returns empty result", () => { const out = clusterSpeakers([{ ok: false }, { ok: false }], 70); assert.equal(out.clusterCount, 0); }); test("clusterSpeakers: threshold clamped to 50..95", () => { const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [], fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) }, }, ]; const lo = clusterSpeakers(chunkDiar, 0); // clamps to 50 assert.equal(lo.thresholdSimilarity, 0.5); const hi = clusterSpeakers(chunkDiar, 200); // clamps to 95 assert.equal(hi.thresholdSimilarity, 0.95); }); test("clusterSpeakers: very strict threshold (95%) splits tightly-grouped voices", () => { // FP_A with significant jitter — at 70% they cluster as one, at 95% they may split. const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [], fingerprints: { Speaker_0: [1.0, 0.0, 0.0], // Same general direction but ~0.93 similarity — borderline. Speaker_1: [0.93, 0.36, 0.06], }, }, ]; const lenient = clusterSpeakers(chunkDiar, 70); const strict = clusterSpeakers(chunkDiar, 95); assert.equal(lenient.clusterCount, 1, "lenient should merge"); assert.equal(strict.clusterCount, 2, "strict should split"); }); test("clusterSpeakers: summary stats aggregate turns + speaking time", () => { const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [ { start: 0, end: 10, speaker_local: "Speaker_0", confidence: 0.9 }, { start: 10, end: 25, speaker_local: "Speaker_1", confidence: 0.8 }, { start: 25, end: 30, speaker_local: "Speaker_0", confidence: 0.95 }, ], fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) }, }, ]; const { speakers } = clusterSpeakers(chunkDiar, 70); assert.equal(speakers.Speaker_A.turns, 2); assert.equal(speakers.Speaker_A.total_speaking_seconds, 15); assert.equal(speakers.Speaker_B.turns, 1); assert.equal(speakers.Speaker_B.total_speaking_seconds, 15); assert.ok(Math.abs(speakers.Speaker_A.mean_confidence - 0.925) < 0.001); }); test("assignSpeakersToSegments: midpoint inside diar segment wins", () => { const segments = [ { start: 0, end: 5, text: "hello" }, { start: 5, end: 10, text: "world" }, ]; const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [ { start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 }, { start: 5, end: 10, speaker_local: "Speaker_1", confidence: 0.85 }, ], fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) }, }, ]; const { globalMap } = clusterSpeakers(chunkDiar, 70); assignSpeakersToSegments(segments, chunkDiar, globalMap); assert.equal(segments[0].speaker, "Speaker_A"); assert.equal(segments[1].speaker, "Speaker_B"); assert.equal(segments[0].speaker_confidence, 0.9); }); test("assignSpeakersToSegments: nearest-fallback within 5s window", () => { const segments = [ { start: 8, end: 12, text: "in between" }, // gap with no covering diar seg ]; const chunkDiar = [ { ok: true, chunkIndex: 0, segments: [ { start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 }, ], fingerprints: { Speaker_0: FP_A(1) }, }, ]; const { globalMap } = clusterSpeakers(chunkDiar, 70); assignSpeakersToSegments(segments, chunkDiar, globalMap); // Diar segment ends at 5, transcript mid is 10 → distance 7.5 > 5s → speaker stays null assert.equal(segments[0].speaker, null); }); test("assignSpeakersToSegments: no diar data leaves segments unchanged", () => { const segments = [{ start: 0, end: 5, text: "hello" }]; assignSpeakersToSegments(segments, [], new Map()); assert.equal(segments[0].speaker, undefined); });