Add internal-meetings pipeline and post-hoc speaker tools

This commit is contained in:
Keysat
2026-06-13 13:35:53 -05:00
parent 9a2dbf69df
commit 705807e286
15 changed files with 7375 additions and 0 deletions
+203
View File
@@ -0,0 +1,203 @@
// Unit tests for the Phase 1D speaker-clustering module.
// Run via: node --test server/test/speaker-clustering.test.js
import { test } from "node:test";
import assert from "node:assert/strict";
import {
cosineSimilarity,
clusterSpeakers,
assignSpeakersToSegments,
} from "../speaker-clustering.js";
// Synthetic fingerprints — easier to reason about than 192-dim vectors.
// "Person A" embeddings all point roughly toward [+1, 0, 0]
// "Person B" embeddings all point roughly toward [0, +1, 0]
// "Person C" embeddings (when present) point toward [0, 0, +1]
const FP_A = (jitter = 0) => [1.0 + jitter * 0.01, 0.05 * jitter, 0];
const FP_B = (jitter = 0) => [0.05 * jitter, 1.0 + jitter * 0.01, 0];
const FP_C = (jitter = 0) => [0, 0, 1.0 + jitter * 0.01];
test("cosineSimilarity: identical vectors = 1", () => {
assert.equal(cosineSimilarity([1, 0, 0], [1, 0, 0]), 1);
});
test("cosineSimilarity: orthogonal vectors = 0", () => {
assert.equal(cosineSimilarity([1, 0, 0], [0, 1, 0]), 0);
});
test("cosineSimilarity: zero-magnitude input returns 0 (no NaN)", () => {
assert.equal(cosineSimilarity([0, 0, 0], [1, 1, 1]), 0);
});
test("clusterSpeakers: two distinct speakers across 3 chunks → 2 clusters", () => {
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [],
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
},
{
ok: true,
chunkIndex: 1,
segments: [],
fingerprints: { Speaker_0: FP_A(2), Speaker_1: FP_B(2) },
},
{
ok: true,
chunkIndex: 2,
segments: [],
fingerprints: { Speaker_0: FP_B(3), Speaker_1: FP_A(3) }, // labels flipped this chunk
},
];
const { clusterCount, globalMap, speakers } = clusterSpeakers(chunkDiar, 70);
assert.equal(clusterCount, 2, "should identify 2 distinct speakers");
// First speaker seen (chunk 0, Speaker_0 = FP_A) becomes Speaker_A
assert.equal(globalMap.get("0:Speaker_0"), "Speaker_A");
assert.equal(globalMap.get("0:Speaker_1"), "Speaker_B");
// Chunk 1 (same physical voices, same label assignment by SC)
assert.equal(globalMap.get("1:Speaker_0"), "Speaker_A");
assert.equal(globalMap.get("1:Speaker_1"), "Speaker_B");
// Chunk 2 has labels flipped — clustering should recover the truth
assert.equal(globalMap.get("2:Speaker_0"), "Speaker_B");
assert.equal(globalMap.get("2:Speaker_1"), "Speaker_A");
// Summary should report each speaker appearing in 3 chunks
assert.equal(speakers.Speaker_A.fingerprint_count, 3);
assert.equal(speakers.Speaker_B.fingerprint_count, 3);
});
test("clusterSpeakers: three distinct speakers → 3 clusters", () => {
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [],
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
},
{
ok: true,
chunkIndex: 1,
segments: [],
fingerprints: { Speaker_0: FP_C(2), Speaker_1: FP_B(2) },
},
];
const { clusterCount } = clusterSpeakers(chunkDiar, 70);
assert.equal(clusterCount, 3);
});
test("clusterSpeakers: empty input returns empty result", () => {
const out = clusterSpeakers([], 70);
assert.equal(out.clusterCount, 0);
assert.equal(out.globalMap.size, 0);
assert.deepEqual(out.speakers, {});
});
test("clusterSpeakers: all-failed-chunks input returns empty result", () => {
const out = clusterSpeakers([{ ok: false }, { ok: false }], 70);
assert.equal(out.clusterCount, 0);
});
test("clusterSpeakers: threshold clamped to 50..95", () => {
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [],
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
},
];
const lo = clusterSpeakers(chunkDiar, 0); // clamps to 50
assert.equal(lo.thresholdSimilarity, 0.5);
const hi = clusterSpeakers(chunkDiar, 200); // clamps to 95
assert.equal(hi.thresholdSimilarity, 0.95);
});
test("clusterSpeakers: very strict threshold (95%) splits tightly-grouped voices", () => {
// FP_A with significant jitter — at 70% they cluster as one, at 95% they may split.
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [],
fingerprints: {
Speaker_0: [1.0, 0.0, 0.0],
// Same general direction but ~0.93 similarity — borderline.
Speaker_1: [0.93, 0.36, 0.06],
},
},
];
const lenient = clusterSpeakers(chunkDiar, 70);
const strict = clusterSpeakers(chunkDiar, 95);
assert.equal(lenient.clusterCount, 1, "lenient should merge");
assert.equal(strict.clusterCount, 2, "strict should split");
});
test("clusterSpeakers: summary stats aggregate turns + speaking time", () => {
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [
{ start: 0, end: 10, speaker_local: "Speaker_0", confidence: 0.9 },
{ start: 10, end: 25, speaker_local: "Speaker_1", confidence: 0.8 },
{ start: 25, end: 30, speaker_local: "Speaker_0", confidence: 0.95 },
],
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
},
];
const { speakers } = clusterSpeakers(chunkDiar, 70);
assert.equal(speakers.Speaker_A.turns, 2);
assert.equal(speakers.Speaker_A.total_speaking_seconds, 15);
assert.equal(speakers.Speaker_B.turns, 1);
assert.equal(speakers.Speaker_B.total_speaking_seconds, 15);
assert.ok(Math.abs(speakers.Speaker_A.mean_confidence - 0.925) < 0.001);
});
test("assignSpeakersToSegments: midpoint inside diar segment wins", () => {
const segments = [
{ start: 0, end: 5, text: "hello" },
{ start: 5, end: 10, text: "world" },
];
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [
{ start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 },
{ start: 5, end: 10, speaker_local: "Speaker_1", confidence: 0.85 },
],
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
},
];
const { globalMap } = clusterSpeakers(chunkDiar, 70);
assignSpeakersToSegments(segments, chunkDiar, globalMap);
assert.equal(segments[0].speaker, "Speaker_A");
assert.equal(segments[1].speaker, "Speaker_B");
assert.equal(segments[0].speaker_confidence, 0.9);
});
test("assignSpeakersToSegments: nearest-fallback within 5s window", () => {
const segments = [
{ start: 8, end: 12, text: "in between" }, // gap with no covering diar seg
];
const chunkDiar = [
{
ok: true,
chunkIndex: 0,
segments: [
{ start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 },
],
fingerprints: { Speaker_0: FP_A(1) },
},
];
const { globalMap } = clusterSpeakers(chunkDiar, 70);
assignSpeakersToSegments(segments, chunkDiar, globalMap);
// Diar segment ends at 5, transcript mid is 10 → distance 7.5 > 5s → speaker stays null
assert.equal(segments[0].speaker, null);
});
test("assignSpeakersToSegments: no diar data leaves segments unchanged", () => {
const segments = [{ start: 0, end: 5, text: "hello" }];
assignSpeakersToSegments(segments, [], new Map());
assert.equal(segments[0].speaker, undefined);
});