360 lines
14 KiB
JavaScript
360 lines
14 KiB
JavaScript
// Post-hoc speaker edits for saved internal meetings.
|
|
//
|
|
// Two operator tools that mutate a saved meeting record in place,
|
|
// without re-uploading audio or hitting Spark Control:
|
|
//
|
|
// mergeSpeakersInRecord — fold one or more clusters that diarization
|
|
// mistakenly split apart into a single speaker.
|
|
// reclusterMeetingRecord — re-run the cross-chunk voice clustering at a
|
|
// new strictness threshold to separate two
|
|
// people who were over-merged into one cluster.
|
|
// Pure offline re-clustering off the persisted
|
|
// per-chunk fingerprints (rec.diarization).
|
|
//
|
|
// Both must keep the FOUR places a speaker label lives in sync:
|
|
// 1. rec.transcript_segments[].speaker
|
|
// 2. rec.chunks[].entries[].speaker (+ .speaker_override)
|
|
// 3. rec.speakers (per-cluster stats map)
|
|
// 4. rec.extras (tldr.primary_speakers, decisions.agreed_by,
|
|
// action_items.owner, key_quotes.speaker)
|
|
// plus rec.speaker_names (display-name map).
|
|
|
|
import {
|
|
clusterSpeakers,
|
|
assignSpeakersToSegments,
|
|
} from "./speaker-clustering.js";
|
|
|
|
// ─── Entry speaker backfill ─────────────────────────────────────────
|
|
// Re-derive each chunk entry's speaker from rec.transcript_segments by
|
|
// timestamp. Used (a) on load to repair pre-diarization records and
|
|
// (b) after a re-cluster re-stamps the segments. By default it only
|
|
// fills entries that LACK a speaker (the load-path use); pass
|
|
// { force: true } to re-stamp every entry (the re-cluster use, after
|
|
// the old labels have been cleared).
|
|
//
|
|
// Matching mirrors the pipeline's original offset→segment logic
|
|
// (internal-meetings.js build path): exact floored-start, then a
|
|
// containing segment within ±0.5s, then nearest preceding within 5s.
|
|
export function backfillEntrySpeakers(rec, { force = false } = {}) {
|
|
if (!rec || !Array.isArray(rec.chunks) || !Array.isArray(rec.transcript_segments)) {
|
|
return;
|
|
}
|
|
if (!force) {
|
|
const needsBackfill = rec.chunks.some((c) =>
|
|
Array.isArray(c.entries) && c.entries.some((e) => !e || !e.speaker)
|
|
);
|
|
if (!needsBackfill) return;
|
|
}
|
|
|
|
const segs = rec.transcript_segments
|
|
.slice()
|
|
.sort((a, b) => (a.start || 0) - (b.start || 0));
|
|
const byFlooredStart = new Map();
|
|
for (const seg of segs) {
|
|
const k = Math.floor(seg.start || 0);
|
|
if (!byFlooredStart.has(k)) byFlooredStart.set(k, seg);
|
|
}
|
|
const pickSpeaker = (t) => {
|
|
let found = byFlooredStart.get(t);
|
|
if (found && found.speaker) return found;
|
|
for (const seg of segs) {
|
|
if ((seg.start || 0) > t + 5) break;
|
|
if ((seg.start || 0) - 0.5 <= t && t <= (seg.end || 0) + 0.5) {
|
|
if (seg.speaker) return seg;
|
|
}
|
|
}
|
|
let bestPrev = null;
|
|
let bestDist = Infinity;
|
|
for (const seg of segs) {
|
|
if ((seg.start || 0) > t) break;
|
|
const dist = t - (seg.start || 0);
|
|
if (dist < bestDist && seg.speaker) {
|
|
bestDist = dist;
|
|
bestPrev = seg;
|
|
}
|
|
}
|
|
if (bestPrev && bestDist <= 5) return bestPrev;
|
|
return null;
|
|
};
|
|
for (const chunk of rec.chunks) {
|
|
if (!Array.isArray(chunk.entries)) continue;
|
|
for (const entry of chunk.entries) {
|
|
if (!force && entry.speaker) continue;
|
|
const t = entry.offset || 0;
|
|
const found = pickSpeaker(t);
|
|
if (found && found.speaker) {
|
|
entry.speaker = found.speaker;
|
|
entry.speaker_confidence = found.speaker_confidence ?? null;
|
|
entry.speaker_uncertain = !!found.speaker_uncertain;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ─── Merge speakers ─────────────────────────────────────────────────
|
|
// Fold each cluster in `absorbed` into `survivor`. Rewrites every label
|
|
// reference, sums the stats, inherits the absorbed display name only
|
|
// when the survivor has none, and rewrites extras attributions.
|
|
// Remaining letters are intentionally NOT renumbered — that would
|
|
// cascade through speaker_names + per-line overrides for no real gain.
|
|
//
|
|
// Returns { changed, speakers, speaker_names }. Throws on invalid input.
|
|
export function mergeSpeakersInRecord(rec, survivor, absorbed) {
|
|
if (!rec || typeof rec !== "object") {
|
|
throw badRequest("record required");
|
|
}
|
|
const speakers = rec.speakers && typeof rec.speakers === "object" ? rec.speakers : {};
|
|
const absorbList = Array.isArray(absorbed) ? [...new Set(absorbed)] : [];
|
|
|
|
if (typeof survivor !== "string" || !speakers[survivor]) {
|
|
throw badRequest("survivor must be an existing speaker id");
|
|
}
|
|
if (absorbList.length === 0) {
|
|
throw badRequest("absorbed must list at least one speaker id");
|
|
}
|
|
for (const x of absorbList) {
|
|
if (x === survivor) throw badRequest("cannot merge a speaker into itself");
|
|
if (!speakers[x]) throw badRequest(`unknown speaker id: ${x}`);
|
|
}
|
|
// Refuse if the merge would leave no named-able speakers — i.e. it
|
|
// collapses everything into one is fine, but survivor must remain.
|
|
const remaining = Object.keys(speakers).filter((id) => !absorbList.includes(id));
|
|
if (!remaining.includes(survivor)) {
|
|
throw badRequest("survivor cannot be in the absorbed set");
|
|
}
|
|
|
|
const absorbedSet = new Set(absorbList);
|
|
let changed = 0;
|
|
|
|
// 1. transcript_segments
|
|
for (const seg of rec.transcript_segments || []) {
|
|
if (seg && absorbedSet.has(seg.speaker)) {
|
|
seg.speaker = survivor;
|
|
changed += 1;
|
|
}
|
|
}
|
|
|
|
// 2. chunk entries (+ per-line overrides)
|
|
for (const chunk of rec.chunks || []) {
|
|
for (const entry of chunk.entries || []) {
|
|
if (!entry) continue;
|
|
if (absorbedSet.has(entry.speaker)) {
|
|
entry.speaker = survivor;
|
|
changed += 1;
|
|
}
|
|
if (absorbedSet.has(entry.speaker_override)) {
|
|
entry.speaker_override = survivor;
|
|
changed += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3. stats + display name
|
|
rec.speaker_names = rec.speaker_names && typeof rec.speaker_names === "object"
|
|
? rec.speaker_names
|
|
: {};
|
|
for (const x of absorbList) {
|
|
mergeStats(speakers[survivor], speakers[x]);
|
|
delete speakers[x];
|
|
// Survivor inherits the absorbed name only if it has none of its own.
|
|
if (!rec.speaker_names[survivor] && rec.speaker_names[x]) {
|
|
rec.speaker_names[survivor] = rec.speaker_names[x];
|
|
}
|
|
if (x in rec.speaker_names) delete rec.speaker_names[x];
|
|
}
|
|
|
|
// 4. extras attributions
|
|
remapExtrasSpeakers(rec.extras, (id) => (absorbedSet.has(id) ? survivor : id));
|
|
|
|
rec.meta = rec.meta || {};
|
|
rec.meta.speakers_merged_at = Date.now();
|
|
|
|
return { changed, speakers: rec.speakers, speaker_names: rec.speaker_names };
|
|
}
|
|
|
|
// ─── Re-cluster (re-run diarization) ────────────────────────────────
|
|
// Re-run cross-chunk clustering off the persisted per-chunk
|
|
// fingerprints at a new threshold (+ optional suppression knobs),
|
|
// re-stamp every segment + entry, then RESET the now-stale attribution
|
|
// data (inferred names, per-line overrides, extras speaker tags) so the
|
|
// operator re-labels from a clean slate. No LLM calls.
|
|
//
|
|
// Returns { speakers, clusterCount, threshold }. Throws a NO_FINGERPRINTS
|
|
// error (code on err) when the record has no usable fingerprint data.
|
|
export function reclusterMeetingRecord(rec, opts = {}) {
|
|
if (!rec || typeof rec !== "object") throw badRequest("record required");
|
|
|
|
const diar = Array.isArray(rec.diarization) ? rec.diarization : [];
|
|
const totalFps = diar.reduce(
|
|
(n, d) => n + (d && d.ok ? Object.keys(d.fingerprints || {}).length : 0),
|
|
0
|
|
);
|
|
if (totalFps === 0) {
|
|
const err = new Error(
|
|
"this meeting has no saved voice fingerprints — it predates fingerprint capture or was processed with diarization off, so it can't be re-clustered"
|
|
);
|
|
err.code = "NO_FINGERPRINTS";
|
|
throw err;
|
|
}
|
|
|
|
const threshold = opts.threshold;
|
|
const { globalMap, uncertaintyMap, speakers, clusterCount } = clusterSpeakers(
|
|
diar,
|
|
threshold,
|
|
{
|
|
anchorMinSpeakingSec: opts.anchorMinSpeakingSec,
|
|
smallClusterMaxSpeakingSec: opts.smallClusterMaxSpeakingSec,
|
|
uncertainMarginPct: opts.uncertainMarginPct,
|
|
}
|
|
);
|
|
|
|
// Re-stamp the flat transcript segments off the new clustering...
|
|
if (Array.isArray(rec.transcript_segments)) {
|
|
assignSpeakersToSegments(rec.transcript_segments, diar, globalMap, uncertaintyMap);
|
|
}
|
|
// ...then clear + re-derive each chunk entry's speaker from them.
|
|
for (const chunk of rec.chunks || []) {
|
|
for (const entry of chunk.entries || []) {
|
|
if (!entry) continue;
|
|
entry.speaker = null;
|
|
entry.speaker_confidence = null;
|
|
entry.speaker_uncertain = false;
|
|
if ("speaker_override" in entry) delete entry.speaker_override;
|
|
}
|
|
}
|
|
backfillEntrySpeakers(rec, { force: true });
|
|
|
|
// New roster; stale name/attribution data reset.
|
|
rec.speakers = speakers;
|
|
rec.speaker_names = {};
|
|
resetExtrasSpeakers(rec.extras);
|
|
|
|
rec.meta = rec.meta || {};
|
|
rec.meta.reclustered_at = Date.now();
|
|
rec.meta.recluster_threshold = clampPct(threshold);
|
|
rec.meta.polish_done = false;
|
|
|
|
return { speakers, clusterCount, threshold: rec.meta.recluster_threshold };
|
|
}
|
|
|
|
// ─── Apply re-polished summaries ────────────────────────────────────
|
|
// After a re-polish pass (runSummaryPolish with the operator's corrected
|
|
// names), write the new section summaries back into the saved record:
|
|
// - rec.analysis.sections — the canonical section store
|
|
// - rec.chunks[].summary — the rendered topic cards
|
|
// Chunk summaries are matched to sections BY TITLE (polish never changes
|
|
// titles), consumed in section order so duplicate titles still line up.
|
|
// Chunk ENTRIES and any per-line speaker_override are left untouched —
|
|
// only the summary text changes. Returns the count of chunk summaries
|
|
// actually changed.
|
|
export function applyPolishedSummaries(rec, polishedSections) {
|
|
if (!rec || typeof rec !== "object" || !Array.isArray(polishedSections)) return 0;
|
|
|
|
if (rec.analysis && typeof rec.analysis === "object") {
|
|
rec.analysis.sections = polishedSections;
|
|
} else {
|
|
rec.analysis = { sections: polishedSections };
|
|
}
|
|
|
|
// title → queue of summaries, in section order.
|
|
const byTitle = new Map();
|
|
for (const s of polishedSections) {
|
|
const key = s && typeof s.title === "string" ? s.title : "";
|
|
if (!byTitle.has(key)) byTitle.set(key, []);
|
|
byTitle.get(key).push(s && typeof s.summary === "string" ? s.summary : "");
|
|
}
|
|
|
|
const used = new Map();
|
|
let changed = 0;
|
|
for (const chunk of rec.chunks || []) {
|
|
if (!chunk) continue;
|
|
const key = typeof chunk.title === "string" ? chunk.title : "";
|
|
const list = byTitle.get(key);
|
|
if (!list || !list.length) continue;
|
|
const i = used.get(key) || 0;
|
|
const summary = i < list.length ? list[i] : list[list.length - 1];
|
|
used.set(key, i + 1);
|
|
if (typeof summary === "string" && summary && chunk.summary !== summary) {
|
|
chunk.summary = summary;
|
|
changed += 1;
|
|
}
|
|
}
|
|
return changed;
|
|
}
|
|
|
|
// ─── helpers ────────────────────────────────────────────────────────
|
|
|
|
function badRequest(message) {
|
|
const err = new Error(message);
|
|
err.code = "BAD_REQUEST";
|
|
return err;
|
|
}
|
|
|
|
function clampPct(v) {
|
|
const n = Number(v);
|
|
if (!Number.isFinite(n)) return 70;
|
|
return Math.max(50, Math.min(95, Math.round(n)));
|
|
}
|
|
|
|
// Merge stats of `from` into `into` in place. turns / speaking-time /
|
|
// fingerprint-count sum; mean_confidence is turn-weighted across the
|
|
// clusters that have one; chunks_appeared_in uses max as a safe
|
|
// approximation (the raw per-cluster chunk sets aren't retained).
|
|
function mergeStats(into, from) {
|
|
if (!into || !from) return;
|
|
const t1 = into.turns || 0;
|
|
const t2 = from.turns || 0;
|
|
const c1 = typeof into.mean_confidence === "number" ? into.mean_confidence : null;
|
|
const c2 = typeof from.mean_confidence === "number" ? from.mean_confidence : null;
|
|
let mean = null;
|
|
if (c1 != null && c2 != null) {
|
|
const w = t1 + t2;
|
|
mean = w > 0 ? (c1 * t1 + c2 * t2) / w : (c1 + c2) / 2;
|
|
} else if (c1 != null) {
|
|
mean = c1;
|
|
} else if (c2 != null) {
|
|
mean = c2;
|
|
}
|
|
into.turns = t1 + t2;
|
|
into.total_speaking_seconds =
|
|
Math.round(((into.total_speaking_seconds || 0) + (from.total_speaking_seconds || 0)) * 10) / 10;
|
|
into.fingerprint_count = (into.fingerprint_count || 0) + (from.fingerprint_count || 0);
|
|
into.chunks_appeared_in = Math.max(into.chunks_appeared_in || 0, from.chunks_appeared_in || 0);
|
|
into.mean_confidence = mean;
|
|
}
|
|
|
|
// Rewrite every speaker id in the extras block through `map`.
|
|
function remapExtrasSpeakers(extras, map) {
|
|
if (!extras || typeof extras !== "object") return;
|
|
if (extras.tldr && Array.isArray(extras.tldr.primary_speakers)) {
|
|
extras.tldr.primary_speakers = dedupe(extras.tldr.primary_speakers.map(map));
|
|
}
|
|
for (const d of arr(extras.decisions)) {
|
|
if (Array.isArray(d.agreed_by)) d.agreed_by = dedupe(d.agreed_by.map(map));
|
|
}
|
|
for (const a of arr(extras.action_items)) {
|
|
if (a.owner) a.owner = map(a.owner);
|
|
}
|
|
for (const q of arr(extras.key_quotes)) {
|
|
if (q.speaker) q.speaker = map(q.speaker);
|
|
}
|
|
}
|
|
|
|
// Clear extras speaker attributions (keep the text). Used by re-cluster
|
|
// since cluster identities change and old ids would be meaningless.
|
|
function resetExtrasSpeakers(extras) {
|
|
if (!extras || typeof extras !== "object") return;
|
|
if (extras.tldr) extras.tldr.primary_speakers = [];
|
|
for (const d of arr(extras.decisions)) d.agreed_by = [];
|
|
for (const a of arr(extras.action_items)) a.owner = null;
|
|
for (const q of arr(extras.key_quotes)) q.speaker = null;
|
|
}
|
|
|
|
function arr(v) {
|
|
return Array.isArray(v) ? v : [];
|
|
}
|
|
|
|
function dedupe(list) {
|
|
return [...new Set(list)];
|
|
}
|