recap-relay/server/meeting-speaker-edits.js

// Post-hoc speaker edits for saved internal meetings.
//
// Two operator tools that mutate a saved meeting record in place,
// without re-uploading audio or hitting Spark Control:
//
//   mergeSpeakersInRecord  — fold one or more clusters that diarization
//                            mistakenly split apart into a single speaker.
//   reclusterMeetingRecord — re-run the cross-chunk voice clustering at a
//                            new strictness threshold to separate two
//                            people who were over-merged into one cluster.
//                            Pure offline re-clustering off the persisted
//                            per-chunk fingerprints (rec.diarization).
//
// Both must keep the FOUR places a speaker label lives in sync:
//   1. rec.transcript_segments[].speaker
//   2. rec.chunks[].entries[].speaker  (+ .speaker_override)
//   3. rec.speakers          (per-cluster stats map)
//   4. rec.extras            (tldr.primary_speakers, decisions.agreed_by,
//                             action_items.owner, key_quotes.speaker)
// plus rec.speaker_names (display-name map).

import {
  clusterSpeakers,
  assignSpeakersToSegments,
} from "./speaker-clustering.js";

// ─── Entry speaker backfill ─────────────────────────────────────────
// Re-derive each chunk entry's speaker from rec.transcript_segments by
// timestamp. Used (a) on load to repair pre-diarization records and
// (b) after a re-cluster re-stamps the segments. By default it only
// fills entries that LACK a speaker (the load-path use); pass
// { force: true } to re-stamp every entry (the re-cluster use, after
// the old labels have been cleared).
//
// Matching mirrors the pipeline's original offset→segment logic
// (internal-meetings.js build path): exact floored-start, then a
// containing segment within ±0.5s, then nearest preceding within 5s.
export function backfillEntrySpeakers(rec, { force = false } = {}) {
  if (!rec || !Array.isArray(rec.chunks) || !Array.isArray(rec.transcript_segments)) {
    return;
  }
  if (!force) {
    const needsBackfill = rec.chunks.some((c) =>
      Array.isArray(c.entries) && c.entries.some((e) => !e || !e.speaker)
    );
    if (!needsBackfill) return;
  }

  const segs = rec.transcript_segments
    .slice()
    .sort((a, b) => (a.start || 0) - (b.start || 0));
  const byFlooredStart = new Map();
  for (const seg of segs) {
    const k = Math.floor(seg.start || 0);
    if (!byFlooredStart.has(k)) byFlooredStart.set(k, seg);
  }
  const pickSpeaker = (t) => {
    let found = byFlooredStart.get(t);
    if (found && found.speaker) return found;
    for (const seg of segs) {
      if ((seg.start || 0) > t + 5) break;
      if ((seg.start || 0) - 0.5 <= t && t <= (seg.end || 0) + 0.5) {
        if (seg.speaker) return seg;
      }
    }
    let bestPrev = null;
    let bestDist = Infinity;
    for (const seg of segs) {
      if ((seg.start || 0) > t) break;
      const dist = t - (seg.start || 0);
      if (dist < bestDist && seg.speaker) {
        bestDist = dist;
        bestPrev = seg;
      }
    }
    if (bestPrev && bestDist <= 5) return bestPrev;
    return null;
  };
  for (const chunk of rec.chunks) {
    if (!Array.isArray(chunk.entries)) continue;
    for (const entry of chunk.entries) {
      if (!force && entry.speaker) continue;
      const t = entry.offset || 0;
      const found = pickSpeaker(t);
      if (found && found.speaker) {
        entry.speaker = found.speaker;
        entry.speaker_confidence = found.speaker_confidence ?? null;
        entry.speaker_uncertain = !!found.speaker_uncertain;
      }
    }
  }
}

// ─── Merge speakers ─────────────────────────────────────────────────
// Fold each cluster in `absorbed` into `survivor`. Rewrites every label
// reference, sums the stats, inherits the absorbed display name only
// when the survivor has none, and rewrites extras attributions.
// Remaining letters are intentionally NOT renumbered — that would
// cascade through speaker_names + per-line overrides for no real gain.
//
// Returns { changed, speakers, speaker_names }. Throws on invalid input.
export function mergeSpeakersInRecord(rec, survivor, absorbed) {
  if (!rec || typeof rec !== "object") {
    throw badRequest("record required");
  }
  const speakers = rec.speakers && typeof rec.speakers === "object" ? rec.speakers : {};
  const absorbList = Array.isArray(absorbed) ? [...new Set(absorbed)] : [];

  if (typeof survivor !== "string" || !speakers[survivor]) {
    throw badRequest("survivor must be an existing speaker id");
  }
  if (absorbList.length === 0) {
    throw badRequest("absorbed must list at least one speaker id");
  }
  for (const x of absorbList) {
    if (x === survivor) throw badRequest("cannot merge a speaker into itself");
    if (!speakers[x]) throw badRequest(`unknown speaker id: ${x}`);
  }
  // Refuse if the merge would leave no named-able speakers — i.e. it
  // collapses everything into one is fine, but survivor must remain.
  const remaining = Object.keys(speakers).filter((id) => !absorbList.includes(id));
  if (!remaining.includes(survivor)) {
    throw badRequest("survivor cannot be in the absorbed set");
  }

  const absorbedSet = new Set(absorbList);
  let changed = 0;

  // 1. transcript_segments
  for (const seg of rec.transcript_segments || []) {
    if (seg && absorbedSet.has(seg.speaker)) {
      seg.speaker = survivor;
      changed += 1;
    }
  }

  // 2. chunk entries (+ per-line overrides)
  for (const chunk of rec.chunks || []) {
    for (const entry of chunk.entries || []) {
      if (!entry) continue;
      if (absorbedSet.has(entry.speaker)) {
        entry.speaker = survivor;
        changed += 1;
      }
      if (absorbedSet.has(entry.speaker_override)) {
        entry.speaker_override = survivor;
        changed += 1;
      }
    }
  }

  // 3. stats + display name
  rec.speaker_names = rec.speaker_names && typeof rec.speaker_names === "object"
    ? rec.speaker_names
    : {};
  for (const x of absorbList) {
    mergeStats(speakers[survivor], speakers[x]);
    delete speakers[x];
    // Survivor inherits the absorbed name only if it has none of its own.
    if (!rec.speaker_names[survivor] && rec.speaker_names[x]) {
      rec.speaker_names[survivor] = rec.speaker_names[x];
    }
    if (x in rec.speaker_names) delete rec.speaker_names[x];
  }

  // 4. extras attributions
  remapExtrasSpeakers(rec.extras, (id) => (absorbedSet.has(id) ? survivor : id));

  rec.meta = rec.meta || {};
  rec.meta.speakers_merged_at = Date.now();

  return { changed, speakers: rec.speakers, speaker_names: rec.speaker_names };
}

// ─── Re-cluster (re-run diarization) ────────────────────────────────
// Re-run cross-chunk clustering off the persisted per-chunk
// fingerprints at a new threshold (+ optional suppression knobs),
// re-stamp every segment + entry, then RESET the now-stale attribution
// data (inferred names, per-line overrides, extras speaker tags) so the
// operator re-labels from a clean slate. No LLM calls.
//
// Returns { speakers, clusterCount, threshold }. Throws a NO_FINGERPRINTS
// error (code on err) when the record has no usable fingerprint data.
export function reclusterMeetingRecord(rec, opts = {}) {
  if (!rec || typeof rec !== "object") throw badRequest("record required");

  const diar = Array.isArray(rec.diarization) ? rec.diarization : [];
  const totalFps = diar.reduce(
    (n, d) => n + (d && d.ok ? Object.keys(d.fingerprints || {}).length : 0),
    0
  );
  if (totalFps === 0) {
    const err = new Error(
      "this meeting has no saved voice fingerprints — it predates fingerprint capture or was processed with diarization off, so it can't be re-clustered"
    );
    err.code = "NO_FINGERPRINTS";
    throw err;
  }

  const threshold = opts.threshold;
  const { globalMap, uncertaintyMap, speakers, clusterCount } = clusterSpeakers(
    diar,
    threshold,
    {
      anchorMinSpeakingSec: opts.anchorMinSpeakingSec,
      smallClusterMaxSpeakingSec: opts.smallClusterMaxSpeakingSec,
      uncertainMarginPct: opts.uncertainMarginPct,
    }
  );

  // Re-stamp the flat transcript segments off the new clustering...
  if (Array.isArray(rec.transcript_segments)) {
    assignSpeakersToSegments(rec.transcript_segments, diar, globalMap, uncertaintyMap);
  }
  // ...then clear + re-derive each chunk entry's speaker from them.
  for (const chunk of rec.chunks || []) {
    for (const entry of chunk.entries || []) {
      if (!entry) continue;
      entry.speaker = null;
      entry.speaker_confidence = null;
      entry.speaker_uncertain = false;
      if ("speaker_override" in entry) delete entry.speaker_override;
    }
  }
  backfillEntrySpeakers(rec, { force: true });

  // New roster; stale name/attribution data reset.
  rec.speakers = speakers;
  rec.speaker_names = {};
  resetExtrasSpeakers(rec.extras);

  rec.meta = rec.meta || {};
  rec.meta.reclustered_at = Date.now();
  rec.meta.recluster_threshold = clampPct(threshold);
  rec.meta.polish_done = false;

  return { speakers, clusterCount, threshold: rec.meta.recluster_threshold };
}

// ─── Apply re-polished summaries ────────────────────────────────────
// After a re-polish pass (runSummaryPolish with the operator's corrected
// names), write the new section summaries back into the saved record:
//   - rec.analysis.sections — the canonical section store
//   - rec.chunks[].summary  — the rendered topic cards
// Chunk summaries are matched to sections BY TITLE (polish never changes
// titles), consumed in section order so duplicate titles still line up.
// Chunk ENTRIES and any per-line speaker_override are left untouched —
// only the summary text changes. Returns the count of chunk summaries
// actually changed.
export function applyPolishedSummaries(rec, polishedSections) {
  if (!rec || typeof rec !== "object" || !Array.isArray(polishedSections)) return 0;

  if (rec.analysis && typeof rec.analysis === "object") {
    rec.analysis.sections = polishedSections;
  } else {
    rec.analysis = { sections: polishedSections };
  }

  // title → queue of summaries, in section order.
  const byTitle = new Map();
  for (const s of polishedSections) {
    const key = s && typeof s.title === "string" ? s.title : "";
    if (!byTitle.has(key)) byTitle.set(key, []);
    byTitle.get(key).push(s && typeof s.summary === "string" ? s.summary : "");
  }

  const used = new Map();
  let changed = 0;
  for (const chunk of rec.chunks || []) {
    if (!chunk) continue;
    const key = typeof chunk.title === "string" ? chunk.title : "";
    const list = byTitle.get(key);
    if (!list || !list.length) continue;
    const i = used.get(key) || 0;
    const summary = i < list.length ? list[i] : list[list.length - 1];
    used.set(key, i + 1);
    if (typeof summary === "string" && summary && chunk.summary !== summary) {
      chunk.summary = summary;
      changed += 1;
    }
  }
  return changed;
}

// ─── helpers ────────────────────────────────────────────────────────

function badRequest(message) {
  const err = new Error(message);
  err.code = "BAD_REQUEST";
  return err;
}

function clampPct(v) {
  const n = Number(v);
  if (!Number.isFinite(n)) return 70;
  return Math.max(50, Math.min(95, Math.round(n)));
}

// Merge stats of `from` into `into` in place. turns / speaking-time /
// fingerprint-count sum; mean_confidence is turn-weighted across the
// clusters that have one; chunks_appeared_in uses max as a safe
// approximation (the raw per-cluster chunk sets aren't retained).
function mergeStats(into, from) {
  if (!into || !from) return;
  const t1 = into.turns || 0;
  const t2 = from.turns || 0;
  const c1 = typeof into.mean_confidence === "number" ? into.mean_confidence : null;
  const c2 = typeof from.mean_confidence === "number" ? from.mean_confidence : null;
  let mean = null;
  if (c1 != null && c2 != null) {
    const w = t1 + t2;
    mean = w > 0 ? (c1 * t1 + c2 * t2) / w : (c1 + c2) / 2;
  } else if (c1 != null) {
    mean = c1;
  } else if (c2 != null) {
    mean = c2;
  }
  into.turns = t1 + t2;
  into.total_speaking_seconds =
    Math.round(((into.total_speaking_seconds || 0) + (from.total_speaking_seconds || 0)) * 10) / 10;
  into.fingerprint_count = (into.fingerprint_count || 0) + (from.fingerprint_count || 0);
  into.chunks_appeared_in = Math.max(into.chunks_appeared_in || 0, from.chunks_appeared_in || 0);
  into.mean_confidence = mean;
}

// Rewrite every speaker id in the extras block through `map`.
function remapExtrasSpeakers(extras, map) {
  if (!extras || typeof extras !== "object") return;
  if (extras.tldr && Array.isArray(extras.tldr.primary_speakers)) {
    extras.tldr.primary_speakers = dedupe(extras.tldr.primary_speakers.map(map));
  }
  for (const d of arr(extras.decisions)) {
    if (Array.isArray(d.agreed_by)) d.agreed_by = dedupe(d.agreed_by.map(map));
  }
  for (const a of arr(extras.action_items)) {
    if (a.owner) a.owner = map(a.owner);
  }
  for (const q of arr(extras.key_quotes)) {
    if (q.speaker) q.speaker = map(q.speaker);
  }
}

// Clear extras speaker attributions (keep the text). Used by re-cluster
// since cluster identities change and old ids would be meaningless.
function resetExtrasSpeakers(extras) {
  if (!extras || typeof extras !== "object") return;
  if (extras.tldr) extras.tldr.primary_speakers = [];
  for (const d of arr(extras.decisions)) d.agreed_by = [];
  for (const a of arr(extras.action_items)) a.owner = null;
  for (const q of arr(extras.key_quotes)) q.speaker = null;
}

function arr(v) {
  return Array.isArray(v) ? v : [];
}

function dedupe(list) {
  return [...new Set(list)];
}