Add internal-meetings pipeline and post-hoc speaker tools
This commit is contained in:
@@ -0,0 +1,269 @@
|
||||
// Unit tests for post-hoc speaker edits (merge + re-cluster) on saved
|
||||
// internal-meeting records.
|
||||
// Run via: node --test server/test/meeting-speaker-edits.test.js
|
||||
|
||||
import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
mergeSpeakersInRecord,
|
||||
reclusterMeetingRecord,
|
||||
backfillEntrySpeakers,
|
||||
applyPolishedSummaries,
|
||||
} from "../meeting-speaker-edits.js";
|
||||
|
||||
// Distinct synthetic voice fingerprints (mirror speaker-clustering.test.js).
|
||||
const FP_A = (j = 0) => [1.0 + j * 0.01, 0.05 * j, 0];
|
||||
const FP_B = (j = 0) => [0.05 * j, 1.0 + j * 0.01, 0];
|
||||
|
||||
// A 3-speaker record with labels spread across all four sync points.
|
||||
function makeMergeRecord() {
|
||||
return {
|
||||
id: "m1",
|
||||
transcript_segments: [
|
||||
{ start: 0, end: 9, text: "a", speaker: "Speaker_A" },
|
||||
{ start: 10, end: 19, text: "b", speaker: "Speaker_B" },
|
||||
{ start: 20, end: 29, text: "c", speaker: "Speaker_C" },
|
||||
{ start: 30, end: 39, text: "c2", speaker: "Speaker_C" },
|
||||
],
|
||||
chunks: [
|
||||
{
|
||||
title: "t",
|
||||
summary: "s",
|
||||
startTime: 0,
|
||||
entries: [
|
||||
{ offset: 0, text: "a", speaker: "Speaker_A" },
|
||||
{ offset: 10, text: "b", speaker: "Speaker_B" },
|
||||
{ offset: 20, text: "c", speaker: "Speaker_C", speaker_override: "Speaker_C" },
|
||||
{ offset: 30, text: "c2", speaker: "Speaker_A", speaker_override: "Speaker_C" },
|
||||
],
|
||||
},
|
||||
],
|
||||
speakers: {
|
||||
Speaker_A: { turns: 4, total_speaking_seconds: 40, mean_confidence: 0.8, chunks_appeared_in: 2, fingerprint_count: 2 },
|
||||
Speaker_B: { turns: 2, total_speaking_seconds: 20, mean_confidence: 0.9, chunks_appeared_in: 1, fingerprint_count: 1 },
|
||||
Speaker_C: { turns: 6, total_speaking_seconds: 18, mean_confidence: 0.6, chunks_appeared_in: 3, fingerprint_count: 3 },
|
||||
},
|
||||
speaker_names: { Speaker_A: "Matt", Speaker_B: "John" },
|
||||
extras: {
|
||||
tldr: { summary: "x", primary_speakers: ["Speaker_A", "Speaker_C"] },
|
||||
decisions: [{ statement: "d", agreed_by: ["Speaker_C", "Speaker_A"], supporting_offset: 5 }],
|
||||
action_items: [{ description: "do", owner: "Speaker_C", supporting_offset: 6 }],
|
||||
key_quotes: [{ speaker: "Speaker_C", offset: 7, quote: "q" }],
|
||||
},
|
||||
meta: {},
|
||||
};
|
||||
}
|
||||
|
||||
test("merge: collapses absorbed speaker across all four label locations", () => {
|
||||
const rec = makeMergeRecord();
|
||||
const out = mergeSpeakersInRecord(rec, "Speaker_A", ["Speaker_C"]);
|
||||
|
||||
// transcript_segments
|
||||
assert.deepEqual(
|
||||
rec.transcript_segments.map((s) => s.speaker),
|
||||
["Speaker_A", "Speaker_B", "Speaker_A", "Speaker_A"]
|
||||
);
|
||||
// entries + per-line overrides
|
||||
assert.deepEqual(
|
||||
rec.chunks[0].entries.map((e) => e.speaker),
|
||||
["Speaker_A", "Speaker_B", "Speaker_A", "Speaker_A"]
|
||||
);
|
||||
assert.equal(rec.chunks[0].entries[2].speaker_override, "Speaker_A");
|
||||
assert.equal(rec.chunks[0].entries[3].speaker_override, "Speaker_A");
|
||||
|
||||
// stats merged, Speaker_C gone
|
||||
assert.ok(!("Speaker_C" in rec.speakers));
|
||||
assert.equal(rec.speakers.Speaker_A.turns, 10); // 4 + 6
|
||||
assert.equal(rec.speakers.Speaker_A.total_speaking_seconds, 58); // 40 + 18
|
||||
assert.equal(rec.speakers.Speaker_A.fingerprint_count, 5); // 2 + 3
|
||||
// turn-weighted mean confidence: (0.8*4 + 0.6*6) / 10 = 0.68
|
||||
assert.ok(Math.abs(rec.speakers.Speaker_A.mean_confidence - 0.68) < 1e-9);
|
||||
|
||||
// names: survivor keeps its own, absorbed dropped
|
||||
assert.equal(rec.speaker_names.Speaker_A, "Matt");
|
||||
assert.ok(!("Speaker_C" in rec.speaker_names));
|
||||
|
||||
// extras remapped + deduped
|
||||
assert.deepEqual(rec.extras.tldr.primary_speakers, ["Speaker_A"]);
|
||||
assert.deepEqual(rec.extras.decisions[0].agreed_by, ["Speaker_A"]);
|
||||
assert.equal(rec.extras.action_items[0].owner, "Speaker_A");
|
||||
assert.equal(rec.extras.key_quotes[0].speaker, "Speaker_A");
|
||||
|
||||
assert.ok(rec.meta.speakers_merged_at > 0);
|
||||
assert.equal(out.changed > 0, true);
|
||||
});
|
||||
|
||||
test("merge: survivor with no name inherits the absorbed name", () => {
|
||||
const rec = makeMergeRecord();
|
||||
// Speaker_B has a name; clear it so it can inherit Speaker_C's.
|
||||
delete rec.speaker_names.Speaker_B;
|
||||
rec.speaker_names.Speaker_C = "Carol";
|
||||
mergeSpeakersInRecord(rec, "Speaker_B", ["Speaker_C"]);
|
||||
assert.equal(rec.speaker_names.Speaker_B, "Carol");
|
||||
assert.ok(!("Speaker_C" in rec.speaker_names));
|
||||
});
|
||||
|
||||
test("merge: rejects invalid input", () => {
|
||||
const rec = makeMergeRecord();
|
||||
assert.throws(() => mergeSpeakersInRecord(rec, "Speaker_Z", ["Speaker_A"]), /survivor/);
|
||||
assert.throws(() => mergeSpeakersInRecord(rec, "Speaker_A", ["Speaker_A"]), /itself/);
|
||||
assert.throws(() => mergeSpeakersInRecord(rec, "Speaker_A", ["Speaker_Z"]), /unknown/);
|
||||
assert.throws(() => mergeSpeakersInRecord(rec, "Speaker_A", []), /at least one/);
|
||||
});
|
||||
|
||||
// A record carrying per-chunk fingerprints so re-clustering can run
|
||||
// fully offline. Two distinct voices (FP_A first, FP_B second) →
|
||||
// Speaker_A / Speaker_B by first-appearance order.
|
||||
function makeReclusterRecord() {
|
||||
return {
|
||||
id: "r1",
|
||||
transcript_segments: [
|
||||
{ start: 0, end: 9, text: "a", speaker: "STALE" },
|
||||
{ start: 10, end: 19, text: "b", speaker: "STALE" },
|
||||
{ start: 20, end: 29, text: "c", speaker: "STALE" },
|
||||
],
|
||||
chunks: [
|
||||
{
|
||||
title: "t",
|
||||
summary: "s",
|
||||
startTime: 0,
|
||||
entries: [
|
||||
{ offset: 0, text: "a", speaker: "STALE", speaker_override: "STALE" },
|
||||
{ offset: 10, text: "b", speaker: "STALE" },
|
||||
{ offset: 20, text: "c", speaker: "STALE" },
|
||||
],
|
||||
},
|
||||
],
|
||||
speakers: { STALE: { turns: 3, total_speaking_seconds: 30, mean_confidence: 0.5, chunks_appeared_in: 2, fingerprint_count: 3 } },
|
||||
speaker_names: { STALE: "Wrong" },
|
||||
extras: {
|
||||
tldr: { summary: "x", primary_speakers: ["STALE"] },
|
||||
decisions: [{ statement: "d", agreed_by: ["STALE"], supporting_offset: 5 }],
|
||||
action_items: [{ description: "do", owner: "STALE", supporting_offset: 6 }],
|
||||
key_quotes: [{ speaker: "STALE", offset: 7, quote: "q" }],
|
||||
},
|
||||
diarization: [
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 0,
|
||||
segments: [
|
||||
{ start: 0, end: 10, speaker_local: "Speaker_0", confidence: 0.9 },
|
||||
{ start: 10, end: 20, speaker_local: "Speaker_1", confidence: 0.9 },
|
||||
],
|
||||
fingerprints: { Speaker_0: FP_A(1), Speaker_1: FP_B(1) },
|
||||
},
|
||||
{
|
||||
ok: true,
|
||||
chunkIndex: 1,
|
||||
segments: [{ start: 20, end: 30, speaker_local: "Speaker_0", confidence: 0.8 }],
|
||||
fingerprints: { Speaker_0: FP_A(2) },
|
||||
},
|
||||
],
|
||||
meta: { polish_done: true },
|
||||
};
|
||||
}
|
||||
|
||||
test("recluster: re-stamps segments + entries and resets stale data", () => {
|
||||
const rec = makeReclusterRecord();
|
||||
const out = reclusterMeetingRecord(rec, { threshold: 70 });
|
||||
|
||||
// Two distinct voices recovered.
|
||||
assert.equal(out.speakers ? Object.keys(out.speakers).filter((k) => k !== "Speaker_Unknown").length : 0, 2);
|
||||
|
||||
// Segments re-stamped: FP_A group = Speaker_A (first), FP_B = Speaker_B.
|
||||
assert.deepEqual(
|
||||
rec.transcript_segments.map((s) => s.speaker),
|
||||
["Speaker_A", "Speaker_B", "Speaker_A"]
|
||||
);
|
||||
// Entries re-derived to match.
|
||||
assert.deepEqual(
|
||||
rec.chunks[0].entries.map((e) => e.speaker),
|
||||
["Speaker_A", "Speaker_B", "Speaker_A"]
|
||||
);
|
||||
// Per-line override cleared.
|
||||
assert.ok(!("speaker_override" in rec.chunks[0].entries[0]));
|
||||
|
||||
// Stale attribution data reset.
|
||||
assert.deepEqual(rec.speaker_names, {});
|
||||
assert.deepEqual(rec.extras.tldr.primary_speakers, []);
|
||||
assert.deepEqual(rec.extras.decisions[0].agreed_by, []);
|
||||
assert.equal(rec.extras.action_items[0].owner, null);
|
||||
assert.equal(rec.extras.key_quotes[0].speaker, null);
|
||||
// Decision text preserved.
|
||||
assert.equal(rec.extras.decisions[0].statement, "d");
|
||||
|
||||
assert.ok(rec.meta.reclustered_at > 0);
|
||||
assert.equal(rec.meta.recluster_threshold, 70);
|
||||
assert.equal(rec.meta.polish_done, false);
|
||||
});
|
||||
|
||||
test("recluster: throws NO_FINGERPRINTS when none are saved", () => {
|
||||
const rec = makeReclusterRecord();
|
||||
rec.diarization = null;
|
||||
assert.throws(() => reclusterMeetingRecord(rec, { threshold: 70 }), (e) => e.code === "NO_FINGERPRINTS");
|
||||
|
||||
const rec2 = makeReclusterRecord();
|
||||
rec2.diarization = [{ ok: true, chunkIndex: 0, segments: [], fingerprints: {} }];
|
||||
assert.throws(() => reclusterMeetingRecord(rec2, { threshold: 70 }), (e) => e.code === "NO_FINGERPRINTS");
|
||||
});
|
||||
|
||||
test("applyPolishedSummaries: writes summaries to analysis + chunks, leaves entries", () => {
|
||||
const rec = {
|
||||
analysis: { sections: [
|
||||
{ title: "Intro", summary: "OLD intro", startIndex: 0, endIndex: 1 },
|
||||
{ title: "Plan", summary: "OLD plan", startIndex: 2, endIndex: 3 },
|
||||
] },
|
||||
chunks: [
|
||||
{ title: "Intro", summary: "OLD intro", entries: [{ offset: 0, speaker: "Speaker_A", speaker_override: "Speaker_B" }] },
|
||||
{ title: "Plan", summary: "OLD plan", entries: [{ offset: 20, speaker: "Speaker_B" }] },
|
||||
],
|
||||
meta: {},
|
||||
};
|
||||
const polished = [
|
||||
{ title: "Intro", summary: "Matt opens the standup", startIndex: 0, endIndex: 1 },
|
||||
{ title: "Plan", summary: "John lays out the Q3 plan", startIndex: 2, endIndex: 3 },
|
||||
];
|
||||
const changed = applyPolishedSummaries(rec, polished);
|
||||
assert.equal(changed, 2);
|
||||
// analysis store updated
|
||||
assert.equal(rec.analysis.sections[0].summary, "Matt opens the standup");
|
||||
// chunk cards updated by title
|
||||
assert.equal(rec.chunks[0].summary, "Matt opens the standup");
|
||||
assert.equal(rec.chunks[1].summary, "John lays out the Q3 plan");
|
||||
// entries + per-line override untouched
|
||||
assert.equal(rec.chunks[0].entries[0].speaker, "Speaker_A");
|
||||
assert.equal(rec.chunks[0].entries[0].speaker_override, "Speaker_B");
|
||||
});
|
||||
|
||||
test("applyPolishedSummaries: duplicate titles map in order", () => {
|
||||
const rec = {
|
||||
analysis: { sections: [] },
|
||||
chunks: [
|
||||
{ title: "Discussion", summary: "old1", entries: [] },
|
||||
{ title: "Discussion", summary: "old2", entries: [] },
|
||||
],
|
||||
};
|
||||
const polished = [
|
||||
{ title: "Discussion", summary: "new1" },
|
||||
{ title: "Discussion", summary: "new2" },
|
||||
];
|
||||
applyPolishedSummaries(rec, polished);
|
||||
assert.equal(rec.chunks[0].summary, "new1");
|
||||
assert.equal(rec.chunks[1].summary, "new2");
|
||||
});
|
||||
|
||||
test("backfillEntrySpeakers force re-stamps already-labeled entries", () => {
|
||||
const rec = {
|
||||
transcript_segments: [
|
||||
{ start: 0, end: 9, text: "a", speaker: "Speaker_A" },
|
||||
{ start: 10, end: 19, text: "b", speaker: "Speaker_B" },
|
||||
],
|
||||
chunks: [{ entries: [{ offset: 0, speaker: "OLD" }, { offset: 10, speaker: "OLD" }] }],
|
||||
};
|
||||
// Without force, existing speakers are left alone.
|
||||
backfillEntrySpeakers(rec);
|
||||
assert.deepEqual(rec.chunks[0].entries.map((e) => e.speaker), ["OLD", "OLD"]);
|
||||
// With force, they are re-derived from the segments.
|
||||
backfillEntrySpeakers(rec, { force: true });
|
||||
assert.deepEqual(rec.chunks[0].entries.map((e) => e.speaker), ["Speaker_A", "Speaker_B"]);
|
||||
});
|
||||
Reference in New Issue
Block a user