Files
recap-relay/server/meeting-speaker-edits.js
T

360 lines
14 KiB
JavaScript

// Post-hoc speaker edits for saved internal meetings.
//
// Two operator tools that mutate a saved meeting record in place,
// without re-uploading audio or hitting Spark Control:
//
// mergeSpeakersInRecord — fold one or more clusters that diarization
// mistakenly split apart into a single speaker.
// reclusterMeetingRecord — re-run the cross-chunk voice clustering at a
// new strictness threshold to separate two
// people who were over-merged into one cluster.
// Pure offline re-clustering off the persisted
// per-chunk fingerprints (rec.diarization).
//
// Both must keep the FOUR places a speaker label lives in sync:
// 1. rec.transcript_segments[].speaker
// 2. rec.chunks[].entries[].speaker (+ .speaker_override)
// 3. rec.speakers (per-cluster stats map)
// 4. rec.extras (tldr.primary_speakers, decisions.agreed_by,
// action_items.owner, key_quotes.speaker)
// plus rec.speaker_names (display-name map).
import {
clusterSpeakers,
assignSpeakersToSegments,
} from "./speaker-clustering.js";
// ─── Entry speaker backfill ─────────────────────────────────────────
// Re-derive each chunk entry's speaker from rec.transcript_segments by
// timestamp. Used (a) on load to repair pre-diarization records and
// (b) after a re-cluster re-stamps the segments. By default it only
// fills entries that LACK a speaker (the load-path use); pass
// { force: true } to re-stamp every entry (the re-cluster use, after
// the old labels have been cleared).
//
// Matching mirrors the pipeline's original offset→segment logic
// (internal-meetings.js build path): exact floored-start, then a
// containing segment within ±0.5s, then nearest preceding within 5s.
export function backfillEntrySpeakers(rec, { force = false } = {}) {
if (!rec || !Array.isArray(rec.chunks) || !Array.isArray(rec.transcript_segments)) {
return;
}
if (!force) {
const needsBackfill = rec.chunks.some((c) =>
Array.isArray(c.entries) && c.entries.some((e) => !e || !e.speaker)
);
if (!needsBackfill) return;
}
const segs = rec.transcript_segments
.slice()
.sort((a, b) => (a.start || 0) - (b.start || 0));
const byFlooredStart = new Map();
for (const seg of segs) {
const k = Math.floor(seg.start || 0);
if (!byFlooredStart.has(k)) byFlooredStart.set(k, seg);
}
const pickSpeaker = (t) => {
let found = byFlooredStart.get(t);
if (found && found.speaker) return found;
for (const seg of segs) {
if ((seg.start || 0) > t + 5) break;
if ((seg.start || 0) - 0.5 <= t && t <= (seg.end || 0) + 0.5) {
if (seg.speaker) return seg;
}
}
let bestPrev = null;
let bestDist = Infinity;
for (const seg of segs) {
if ((seg.start || 0) > t) break;
const dist = t - (seg.start || 0);
if (dist < bestDist && seg.speaker) {
bestDist = dist;
bestPrev = seg;
}
}
if (bestPrev && bestDist <= 5) return bestPrev;
return null;
};
for (const chunk of rec.chunks) {
if (!Array.isArray(chunk.entries)) continue;
for (const entry of chunk.entries) {
if (!force && entry.speaker) continue;
const t = entry.offset || 0;
const found = pickSpeaker(t);
if (found && found.speaker) {
entry.speaker = found.speaker;
entry.speaker_confidence = found.speaker_confidence ?? null;
entry.speaker_uncertain = !!found.speaker_uncertain;
}
}
}
}
// ─── Merge speakers ─────────────────────────────────────────────────
// Fold each cluster in `absorbed` into `survivor`. Rewrites every label
// reference, sums the stats, inherits the absorbed display name only
// when the survivor has none, and rewrites extras attributions.
// Remaining letters are intentionally NOT renumbered — that would
// cascade through speaker_names + per-line overrides for no real gain.
//
// Returns { changed, speakers, speaker_names }. Throws on invalid input.
export function mergeSpeakersInRecord(rec, survivor, absorbed) {
if (!rec || typeof rec !== "object") {
throw badRequest("record required");
}
const speakers = rec.speakers && typeof rec.speakers === "object" ? rec.speakers : {};
const absorbList = Array.isArray(absorbed) ? [...new Set(absorbed)] : [];
if (typeof survivor !== "string" || !speakers[survivor]) {
throw badRequest("survivor must be an existing speaker id");
}
if (absorbList.length === 0) {
throw badRequest("absorbed must list at least one speaker id");
}
for (const x of absorbList) {
if (x === survivor) throw badRequest("cannot merge a speaker into itself");
if (!speakers[x]) throw badRequest(`unknown speaker id: ${x}`);
}
// Refuse if the merge would leave no named-able speakers — i.e. it
// collapses everything into one is fine, but survivor must remain.
const remaining = Object.keys(speakers).filter((id) => !absorbList.includes(id));
if (!remaining.includes(survivor)) {
throw badRequest("survivor cannot be in the absorbed set");
}
const absorbedSet = new Set(absorbList);
let changed = 0;
// 1. transcript_segments
for (const seg of rec.transcript_segments || []) {
if (seg && absorbedSet.has(seg.speaker)) {
seg.speaker = survivor;
changed += 1;
}
}
// 2. chunk entries (+ per-line overrides)
for (const chunk of rec.chunks || []) {
for (const entry of chunk.entries || []) {
if (!entry) continue;
if (absorbedSet.has(entry.speaker)) {
entry.speaker = survivor;
changed += 1;
}
if (absorbedSet.has(entry.speaker_override)) {
entry.speaker_override = survivor;
changed += 1;
}
}
}
// 3. stats + display name
rec.speaker_names = rec.speaker_names && typeof rec.speaker_names === "object"
? rec.speaker_names
: {};
for (const x of absorbList) {
mergeStats(speakers[survivor], speakers[x]);
delete speakers[x];
// Survivor inherits the absorbed name only if it has none of its own.
if (!rec.speaker_names[survivor] && rec.speaker_names[x]) {
rec.speaker_names[survivor] = rec.speaker_names[x];
}
if (x in rec.speaker_names) delete rec.speaker_names[x];
}
// 4. extras attributions
remapExtrasSpeakers(rec.extras, (id) => (absorbedSet.has(id) ? survivor : id));
rec.meta = rec.meta || {};
rec.meta.speakers_merged_at = Date.now();
return { changed, speakers: rec.speakers, speaker_names: rec.speaker_names };
}
// ─── Re-cluster (re-run diarization) ────────────────────────────────
// Re-run cross-chunk clustering off the persisted per-chunk
// fingerprints at a new threshold (+ optional suppression knobs),
// re-stamp every segment + entry, then RESET the now-stale attribution
// data (inferred names, per-line overrides, extras speaker tags) so the
// operator re-labels from a clean slate. No LLM calls.
//
// Returns { speakers, clusterCount, threshold }. Throws a NO_FINGERPRINTS
// error (code on err) when the record has no usable fingerprint data.
export function reclusterMeetingRecord(rec, opts = {}) {
if (!rec || typeof rec !== "object") throw badRequest("record required");
const diar = Array.isArray(rec.diarization) ? rec.diarization : [];
const totalFps = diar.reduce(
(n, d) => n + (d && d.ok ? Object.keys(d.fingerprints || {}).length : 0),
0
);
if (totalFps === 0) {
const err = new Error(
"this meeting has no saved voice fingerprints — it predates fingerprint capture or was processed with diarization off, so it can't be re-clustered"
);
err.code = "NO_FINGERPRINTS";
throw err;
}
const threshold = opts.threshold;
const { globalMap, uncertaintyMap, speakers, clusterCount } = clusterSpeakers(
diar,
threshold,
{
anchorMinSpeakingSec: opts.anchorMinSpeakingSec,
smallClusterMaxSpeakingSec: opts.smallClusterMaxSpeakingSec,
uncertainMarginPct: opts.uncertainMarginPct,
}
);
// Re-stamp the flat transcript segments off the new clustering...
if (Array.isArray(rec.transcript_segments)) {
assignSpeakersToSegments(rec.transcript_segments, diar, globalMap, uncertaintyMap);
}
// ...then clear + re-derive each chunk entry's speaker from them.
for (const chunk of rec.chunks || []) {
for (const entry of chunk.entries || []) {
if (!entry) continue;
entry.speaker = null;
entry.speaker_confidence = null;
entry.speaker_uncertain = false;
if ("speaker_override" in entry) delete entry.speaker_override;
}
}
backfillEntrySpeakers(rec, { force: true });
// New roster; stale name/attribution data reset.
rec.speakers = speakers;
rec.speaker_names = {};
resetExtrasSpeakers(rec.extras);
rec.meta = rec.meta || {};
rec.meta.reclustered_at = Date.now();
rec.meta.recluster_threshold = clampPct(threshold);
rec.meta.polish_done = false;
return { speakers, clusterCount, threshold: rec.meta.recluster_threshold };
}
// ─── Apply re-polished summaries ────────────────────────────────────
// After a re-polish pass (runSummaryPolish with the operator's corrected
// names), write the new section summaries back into the saved record:
// - rec.analysis.sections — the canonical section store
// - rec.chunks[].summary — the rendered topic cards
// Chunk summaries are matched to sections BY TITLE (polish never changes
// titles), consumed in section order so duplicate titles still line up.
// Chunk ENTRIES and any per-line speaker_override are left untouched —
// only the summary text changes. Returns the count of chunk summaries
// actually changed.
export function applyPolishedSummaries(rec, polishedSections) {
if (!rec || typeof rec !== "object" || !Array.isArray(polishedSections)) return 0;
if (rec.analysis && typeof rec.analysis === "object") {
rec.analysis.sections = polishedSections;
} else {
rec.analysis = { sections: polishedSections };
}
// title → queue of summaries, in section order.
const byTitle = new Map();
for (const s of polishedSections) {
const key = s && typeof s.title === "string" ? s.title : "";
if (!byTitle.has(key)) byTitle.set(key, []);
byTitle.get(key).push(s && typeof s.summary === "string" ? s.summary : "");
}
const used = new Map();
let changed = 0;
for (const chunk of rec.chunks || []) {
if (!chunk) continue;
const key = typeof chunk.title === "string" ? chunk.title : "";
const list = byTitle.get(key);
if (!list || !list.length) continue;
const i = used.get(key) || 0;
const summary = i < list.length ? list[i] : list[list.length - 1];
used.set(key, i + 1);
if (typeof summary === "string" && summary && chunk.summary !== summary) {
chunk.summary = summary;
changed += 1;
}
}
return changed;
}
// ─── helpers ────────────────────────────────────────────────────────
function badRequest(message) {
const err = new Error(message);
err.code = "BAD_REQUEST";
return err;
}
function clampPct(v) {
const n = Number(v);
if (!Number.isFinite(n)) return 70;
return Math.max(50, Math.min(95, Math.round(n)));
}
// Merge stats of `from` into `into` in place. turns / speaking-time /
// fingerprint-count sum; mean_confidence is turn-weighted across the
// clusters that have one; chunks_appeared_in uses max as a safe
// approximation (the raw per-cluster chunk sets aren't retained).
function mergeStats(into, from) {
if (!into || !from) return;
const t1 = into.turns || 0;
const t2 = from.turns || 0;
const c1 = typeof into.mean_confidence === "number" ? into.mean_confidence : null;
const c2 = typeof from.mean_confidence === "number" ? from.mean_confidence : null;
let mean = null;
if (c1 != null && c2 != null) {
const w = t1 + t2;
mean = w > 0 ? (c1 * t1 + c2 * t2) / w : (c1 + c2) / 2;
} else if (c1 != null) {
mean = c1;
} else if (c2 != null) {
mean = c2;
}
into.turns = t1 + t2;
into.total_speaking_seconds =
Math.round(((into.total_speaking_seconds || 0) + (from.total_speaking_seconds || 0)) * 10) / 10;
into.fingerprint_count = (into.fingerprint_count || 0) + (from.fingerprint_count || 0);
into.chunks_appeared_in = Math.max(into.chunks_appeared_in || 0, from.chunks_appeared_in || 0);
into.mean_confidence = mean;
}
// Rewrite every speaker id in the extras block through `map`.
function remapExtrasSpeakers(extras, map) {
if (!extras || typeof extras !== "object") return;
if (extras.tldr && Array.isArray(extras.tldr.primary_speakers)) {
extras.tldr.primary_speakers = dedupe(extras.tldr.primary_speakers.map(map));
}
for (const d of arr(extras.decisions)) {
if (Array.isArray(d.agreed_by)) d.agreed_by = dedupe(d.agreed_by.map(map));
}
for (const a of arr(extras.action_items)) {
if (a.owner) a.owner = map(a.owner);
}
for (const q of arr(extras.key_quotes)) {
if (q.speaker) q.speaker = map(q.speaker);
}
}
// Clear extras speaker attributions (keep the text). Used by re-cluster
// since cluster identities change and old ids would be meaningless.
function resetExtrasSpeakers(extras) {
if (!extras || typeof extras !== "object") return;
if (extras.tldr) extras.tldr.primary_speakers = [];
for (const d of arr(extras.decisions)) d.agreed_by = [];
for (const a of arr(extras.action_items)) a.owner = null;
for (const q of arr(extras.key_quotes)) q.speaker = null;
}
function arr(v) {
return Array.isArray(v) ? v : [];
}
function dedupe(list) {
return [...new Set(list)];
}