Add internal-meetings pipeline and post-hoc speaker tools
This commit is contained in:
@@ -0,0 +1,203 @@
|
||||
// Unit tests for the Phase 1D speaker-clustering module.
|
||||
// Run via: node --test server/test/speaker-clustering.test.js
|
||||
|
||||
import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
cosineSimilarity,
|
||||
clusterSpeakers,
|
||||
assignSpeakersToSegments,
|
||||
} from "../speaker-clustering.js";
|
||||
|
||||
// Synthetic fingerprints — easier to reason about than 192-dim vectors.
|
||||
// "Person A" embeddings all point roughly toward [+1, 0, 0]
|
||||
// "Person B" embeddings all point roughly toward [0, +1, 0]
|
||||
// "Person C" embeddings (when present) point toward [0, 0, +1]
|
||||
const FP_A = (jitter = 0) => [1.0 + jitter * 0.01, 0.05 * jitter, 0];
|
||||
const FP_B = (jitter = 0) => [0.05 * jitter, 1.0 + jitter * 0.01, 0];
|
||||
const FP_C = (jitter = 0) => [0, 0, 1.0 + jitter * 0.01];
|
||||
|
||||
test("cosineSimilarity: identical vectors = 1", () => {
|
||||
assert.equal(cosineSimilarity([1, 0, 0], [1, 0, 0]), 1);
|
||||
});
|
||||
|
||||
test("cosineSimilarity: orthogonal vectors = 0", () => {
|
||||
assert.equal(cosineSimilarity([1, 0, 0], [0, 1, 0]), 0);
|
||||
});
|
||||
|
||||
test("cosineSimilarity: zero-magnitude input returns 0 (no NaN)", () => {
|
||||
assert.equal(cosineSimilarity([0, 0, 0], [1, 1, 1]), 0);
|
||||
});
|
||||
|
||||
test("clusterSpeakers: two distinct speakers across 3 chunks → 2 clusters", () => {
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [],
|
||||
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
|
||||
},
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 1,
|
||||
segments: [],
|
||||
fingerprints: { Speaker_0: FP_A(2), Speaker_1: FP_B(2) },
|
||||
},
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 2,
|
||||
segments: [],
|
||||
fingerprints: { Speaker_0: FP_B(3), Speaker_1: FP_A(3) }, // labels flipped this chunk
|
||||
},
|
||||
];
|
||||
const { clusterCount, globalMap, speakers } = clusterSpeakers(chunkDiar, 70);
|
||||
assert.equal(clusterCount, 2, "should identify 2 distinct speakers");
|
||||
// First speaker seen (chunk 0, Speaker_0 = FP_A) becomes Speaker_A
|
||||
assert.equal(globalMap.get("0:Speaker_0"), "Speaker_A");
|
||||
assert.equal(globalMap.get("0:Speaker_1"), "Speaker_B");
|
||||
// Chunk 1 (same physical voices, same label assignment by SC)
|
||||
assert.equal(globalMap.get("1:Speaker_0"), "Speaker_A");
|
||||
assert.equal(globalMap.get("1:Speaker_1"), "Speaker_B");
|
||||
// Chunk 2 has labels flipped — clustering should recover the truth
|
||||
assert.equal(globalMap.get("2:Speaker_0"), "Speaker_B");
|
||||
assert.equal(globalMap.get("2:Speaker_1"), "Speaker_A");
|
||||
// Summary should report each speaker appearing in 3 chunks
|
||||
assert.equal(speakers.Speaker_A.fingerprint_count, 3);
|
||||
assert.equal(speakers.Speaker_B.fingerprint_count, 3);
|
||||
});
|
||||
|
||||
test("clusterSpeakers: three distinct speakers → 3 clusters", () => {
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [],
|
||||
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
|
||||
},
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 1,
|
||||
segments: [],
|
||||
fingerprints: { Speaker_0: FP_C(2), Speaker_1: FP_B(2) },
|
||||
},
|
||||
];
|
||||
const { clusterCount } = clusterSpeakers(chunkDiar, 70);
|
||||
assert.equal(clusterCount, 3);
|
||||
});
|
||||
|
||||
test("clusterSpeakers: empty input returns empty result", () => {
|
||||
const out = clusterSpeakers([], 70);
|
||||
assert.equal(out.clusterCount, 0);
|
||||
assert.equal(out.globalMap.size, 0);
|
||||
assert.deepEqual(out.speakers, {});
|
||||
});
|
||||
|
||||
test("clusterSpeakers: all-failed-chunks input returns empty result", () => {
|
||||
const out = clusterSpeakers([{ ok: false }, { ok: false }], 70);
|
||||
assert.equal(out.clusterCount, 0);
|
||||
});
|
||||
|
||||
test("clusterSpeakers: threshold clamped to 50..95", () => {
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [],
|
||||
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
|
||||
},
|
||||
];
|
||||
const lo = clusterSpeakers(chunkDiar, 0); // clamps to 50
|
||||
assert.equal(lo.thresholdSimilarity, 0.5);
|
||||
const hi = clusterSpeakers(chunkDiar, 200); // clamps to 95
|
||||
assert.equal(hi.thresholdSimilarity, 0.95);
|
||||
});
|
||||
|
||||
test("clusterSpeakers: very strict threshold (95%) splits tightly-grouped voices", () => {
|
||||
// FP_A with significant jitter — at 70% they cluster as one, at 95% they may split.
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [],
|
||||
fingerprints: {
|
||||
Speaker_0: [1.0, 0.0, 0.0],
|
||||
// Same general direction but ~0.93 similarity — borderline.
|
||||
Speaker_1: [0.93, 0.36, 0.06],
|
||||
},
|
||||
},
|
||||
];
|
||||
const lenient = clusterSpeakers(chunkDiar, 70);
|
||||
const strict = clusterSpeakers(chunkDiar, 95);
|
||||
assert.equal(lenient.clusterCount, 1, "lenient should merge");
|
||||
assert.equal(strict.clusterCount, 2, "strict should split");
|
||||
});
|
||||
|
||||
test("clusterSpeakers: summary stats aggregate turns + speaking time", () => {
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [
|
||||
{ start: 0, end: 10, speaker_local: "Speaker_0", confidence: 0.9 },
|
||||
{ start: 10, end: 25, speaker_local: "Speaker_1", confidence: 0.8 },
|
||||
{ start: 25, end: 30, speaker_local: "Speaker_0", confidence: 0.95 },
|
||||
],
|
||||
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
|
||||
},
|
||||
];
|
||||
const { speakers } = clusterSpeakers(chunkDiar, 70);
|
||||
assert.equal(speakers.Speaker_A.turns, 2);
|
||||
assert.equal(speakers.Speaker_A.total_speaking_seconds, 15);
|
||||
assert.equal(speakers.Speaker_B.turns, 1);
|
||||
assert.equal(speakers.Speaker_B.total_speaking_seconds, 15);
|
||||
assert.ok(Math.abs(speakers.Speaker_A.mean_confidence - 0.925) < 0.001);
|
||||
});
|
||||
|
||||
test("assignSpeakersToSegments: midpoint inside diar segment wins", () => {
|
||||
const segments = [
|
||||
{ start: 0, end: 5, text: "hello" },
|
||||
{ start: 5, end: 10, text: "world" },
|
||||
];
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [
|
||||
{ start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 },
|
||||
{ start: 5, end: 10, speaker_local: "Speaker_1", confidence: 0.85 },
|
||||
],
|
||||
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
|
||||
},
|
||||
];
|
||||
const { globalMap } = clusterSpeakers(chunkDiar, 70);
|
||||
assignSpeakersToSegments(segments, chunkDiar, globalMap);
|
||||
assert.equal(segments[0].speaker, "Speaker_A");
|
||||
assert.equal(segments[1].speaker, "Speaker_B");
|
||||
assert.equal(segments[0].speaker_confidence, 0.9);
|
||||
});
|
||||
|
||||
test("assignSpeakersToSegments: nearest-fallback within 5s window", () => {
|
||||
const segments = [
|
||||
{ start: 8, end: 12, text: "in between" }, // gap with no covering diar seg
|
||||
];
|
||||
const chunkDiar = [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [
|
||||
{ start: 0, end: 5, speaker_local: "Speaker_0", confidence: 0.9 },
|
||||
],
|
||||
fingerprints: { Speaker_0: FP_A(1) },
|
||||
},
|
||||
];
|
||||
const { globalMap } = clusterSpeakers(chunkDiar, 70);
|
||||
assignSpeakersToSegments(segments, chunkDiar, globalMap);
|
||||
// Diar segment ends at 5, transcript mid is 10 → distance 7.5 > 5s → speaker stays null
|
||||
assert.equal(segments[0].speaker, null);
|
||||
});
|
||||
|
||||
test("assignSpeakersToSegments: no diar data leaves segments unchanged", () => {
|
||||
const segments = [{ start: 0, end: 5, text: "hello" }];
|
||||
assignSpeakersToSegments(segments, [], new Map());
|
||||
assert.equal(segments[0].speaker, undefined);
|
||||
});
|
||||
Reference in New Issue
Block a user