Speaker corrections: rename / merge / reassign + voice learning

Native editor to fix speaker-ID errors after transcription (modeled on recap-relay's
correction UX): rename a speaker in the legend, merge two speakers, or reassign an
individual transcript line. Saving rewrites speakers.json, re-renders transcript.md +
recap.html, and updates the voiceprint memory — so a correction compounds: naming an
"Unknown" speaker teaches that voice for future calls.

- SpeakerEditing (pure, tested): replaceSpeaker (rename = merge-onto-existing),
  reassign, netNameMap (compose ops), and remap (apply a name map to a recap's
  structured fields + whole-word free text, so summaries/extras update without re-LLM).
- RecapEditModel (@MainActor): loads speakers.json (+ optional recap.json +
  cluster_fingerprints.json); on save writes the resolved speakers.json, re-renders,
  and reconciles voiceprints — merge keeps the survivor's print; rename/name-an-Unknown
  enrolls the cluster's fingerprint under the new name.
- TranscriptEditorView (SwiftUI) + EditorWindow (AppKit window for the LSUIElement app);
  menu gains "Edit speakers".
- Pipeline now persists cluster_fingerprints.json (every cluster incl. Unknown) and
  recap.json (RecapFile) so the editor can learn voices + re-render offline.
- RecapModels made Codable; TranscriptAssembler exposes allFingerprints;
  VoiceprintStore gains enroll() + merge().

52/52 XCTest (6 new, incl. a full rename→artifacts→voiceprint round-trip on disk).
This commit is contained in:
Grant Gilliam
2026-06-06 15:12:23 -05:00
parent 85bfdf2b56
commit 4c086251d9
11 changed files with 569 additions and 16 deletions
+120
View File
@@ -0,0 +1,120 @@
import Foundation
/// Editable view-model for one session's speaker corrections. Loads `speakers.json`
/// (+ optional `recap.json` and `cluster_fingerprints.json`), applies rename / merge
/// / per-segment reassignment, and on save rewrites `speakers.json`, re-renders
/// `transcript.md` + `recap.html`, and updates the voiceprint store (learning a voice
/// when an Unknown speaker is named).
@MainActor
final class RecapEditModel: ObservableObject {
let folder: URL
let title: String
private let voiceprints: VoiceprintStore
private let base: SpeakersFile
private var recapFile: RecapFile?
private let clusterFingerprints: [String: [Float]]
private let originalSpeakers: [String]
private var renameOps: [(from: String, to: String)] = []
@Published private(set) var segments: [SpeakersFile.Segment]
@Published private(set) var speakers: [String]
@Published private(set) var dirty = false
@Published private(set) var status: String?
init?(folder: URL, voiceprints: VoiceprintStore) {
let speakersURL = folder.appendingPathComponent("speakers.json")
guard let data = try? Data(contentsOf: speakersURL),
let file = try? JSONDecoder().decode(SpeakersFile.self, from: data),
!file.segments.isEmpty else { return nil }
self.folder = folder
self.voiceprints = voiceprints
self.base = file
self.segments = file.segments
self.speakers = SpeakerEditing.orderedSpeakers(file.segments)
self.originalSpeakers = SpeakerEditing.orderedSpeakers(file.segments)
self.recapFile = RecapFile.read(from: folder.appendingPathComponent("recap.json"))
self.clusterFingerprints = Self.loadFingerprints(folder.appendingPathComponent("cluster_fingerprints.json"))
self.title = recapFile?.title ?? file.app.capitalized + " call"
}
// MARK: - Edits
func rename(_ name: String, to newName: String) {
let to = newName.trimmingCharacters(in: .whitespacesAndNewlines)
guard !to.isEmpty, to != name else { return }
renameOps.append((from: name, to: to))
segments = SpeakerEditing.replaceSpeaker(name, with: to, in: segments)
refresh()
}
/// Merge is just a rename onto an existing speaker.
func merge(_ absorbed: String, into survivor: String) {
guard absorbed != survivor else { return }
renameOps.append((from: absorbed, to: survivor))
segments = SpeakerEditing.replaceSpeaker(absorbed, with: survivor, in: segments)
refresh()
}
func reassign(_ index: Int, to speaker: String) {
segments = SpeakerEditing.reassign(index, to: speaker, in: segments)
refresh()
}
private func refresh() {
speakers = SpeakerEditing.orderedSpeakers(segments)
dirty = true
status = nil
}
// MARK: - Save
/// Persist corrections: rewrite speakers.json, re-render artifacts, update voiceprints.
func save() {
let newSpeakers = buildSpeakerList()
let file = SpeakersFile(sessionId: base.sessionId, app: base.app, durationSec: base.durationSec,
speakers: newSpeakers, segments: segments, models: base.models)
try? file.write(to: folder.appendingPathComponent("speakers.json"))
let net = SpeakerEditing.netNameMap(originals: originalSpeakers, ops: renameOps)
let result = recapFile.map { SpeakerEditing.remap($0.result, names: net) } ?? RecapResult(sections: [], extras: nil)
if recapFile != nil {
try? RecapFile(title: title, result: result).write(to: folder.appendingPathComponent("recap.json"))
}
try? RecapRenderer.write(file: file, result: result, title: title, to: folder)
// Voiceprints: reconcile per the net rename/merge map.
let stored = voiceprints.knownVoiceprints()
for (orig, final) in net where !LabelMergeResponse.isUnknownName(final) {
let finalHasPrint = clusterFingerprints[final] != nil || stored[final] != nil
if finalHasPrint {
// Merge into an existing identity keep the survivor's print, drop the absorbed.
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
} else if let vec = clusterFingerprints[orig] {
// Rename / name an Unknown learn this voice for the new name.
voiceprints.enroll(name: final, vector: vec)
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
} else {
voiceprints.rename(orig, to: final)
}
}
renameOps.removeAll()
dirty = false
status = "Saved — recap.html & transcript.md updated."
}
/// Speaker roster from the edited segments: keep the original source where the
/// name is unchanged; mark new (renamed/merged) names as user-set.
private func buildSpeakerList() -> [SpeakersFile.Speaker] {
let byName = Dictionary(base.speakers.map { ($0.name, $0) }, uniquingKeysWith: { a, _ in a })
return SpeakerEditing.orderedSpeakers(segments).map { name in
byName[name] ?? SpeakersFile.Speaker(name: name, source: "manual", overlapConfidence: nil, matchSimilarity: nil)
}
}
private static func loadFingerprints(_ url: URL) -> [String: [Float]] {
guard let data = try? Data(contentsOf: url),
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: [Any]] else { return [:] }
return obj.mapValues { $0.compactMap { ($0 as? NSNumber)?.floatValue } }
}
}
+25 -8
View File
@@ -2,7 +2,7 @@ import Foundation
/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]`
/// (inclusive, indices into the canonical entries array) with an LLM title + summary.
struct TopicSection: Equatable {
struct TopicSection: Equatable, Codable {
var title: String
var summary: String
var startIndex: Int
@@ -12,12 +12,12 @@ struct TopicSection: Equatable {
/// Structured "meeting extras" extracted from the named transcript. Mirrors
/// recap-relay's schema; speakers are real names (we already have them from
/// label-merge), not anonymous cluster ids.
struct MeetingExtras: Equatable {
struct TLDR: Equatable { var summary: String; var primarySpeakers: [String] }
struct Decision: Equatable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
struct ActionItem: Equatable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
struct OpenQuestion: Equatable { var question: String; var raisedBy: String? }
struct KeyQuote: Equatable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
struct MeetingExtras: Equatable, Codable {
struct TLDR: Equatable, Codable { var summary: String; var primarySpeakers: [String] }
struct Decision: Equatable, Codable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
struct ActionItem: Equatable, Codable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
struct OpenQuestion: Equatable, Codable { var question: String; var raisedBy: String? }
struct KeyQuote: Equatable, Codable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
var tldr: TLDR
var decisions: [Decision]
@@ -32,7 +32,24 @@ struct MeetingExtras: Equatable {
/// The assembled recap for one session: the topic sections + (optional) extras,
/// over the session's transcript. Rendered to `transcript.md` / `recap.html`.
struct RecapResult: Equatable {
struct RecapResult: Equatable, Codable {
var sections: [TopicSection]
var extras: MeetingExtras?
}
/// Persisted `recap.json` the recap result plus its title, so the speaker editor
/// can re-render `recap.html` / `transcript.md` after corrections without re-calling
/// the LLM (a "Regenerate" action re-runs analysis when the user wants fresh summaries).
struct RecapFile: Equatable, Codable {
var title: String
var result: RecapResult
func write(to url: URL) throws {
let enc = JSONEncoder(); enc.outputFormatting = [.prettyPrinted, .sortedKeys]
try enc.encode(self).write(to: url)
}
static func read(from url: URL) -> RecapFile? {
guard let data = try? Data(contentsOf: url) else { return nil }
return try? JSONDecoder().decode(RecapFile.self, from: data)
}
}
@@ -0,0 +1,91 @@
import Foundation
/// Pure transforms for speaker corrections: rename, merge (rename onto an existing
/// name), and per-segment reassignment, plus remapping speaker names through a
/// recap's text/structured fields. No UI/IO fully unit-testable.
enum SpeakerEditing {
typealias Segment = SpeakersFile.Segment
/// Distinct speakers in first-appearance order (the legend).
static func orderedSpeakers(_ segments: [Segment]) -> [String] {
var seen = Set<String>(), order: [String] = []
for s in segments where !s.speaker.isEmpty && !seen.contains(s.speaker) {
seen.insert(s.speaker); order.append(s.speaker)
}
return order
}
/// Replace every `from` with `to` across segments. Rename when `to` is new; a
/// merge when `to` already exists same primitive either way.
static func replaceSpeaker(_ from: String, with to: String, in segments: [Segment]) -> [Segment] {
guard from != to, !to.isEmpty else { return segments }
return segments.map {
$0.speaker == from ? Segment(start: $0.start, end: $0.end, speaker: to, text: $0.text) : $0
}
}
/// Reassign a single segment to another speaker.
static func reassign(_ index: Int, to speaker: String, in segments: [Segment]) -> [Segment] {
guard segments.indices.contains(index), !speaker.isEmpty else { return segments }
var out = segments
let s = out[index]
out[index] = Segment(start: s.start, end: s.end, speaker: speaker, text: s.text)
return out
}
/// Compose an ordered list of (from to) rename/merge ops into the net
/// originalfinal map (per-segment reassignments are NOT renames, so they don't
/// appear here). Only entries that actually changed are returned.
static func netNameMap(originals: [String], ops: [(from: String, to: String)]) -> [String: String] {
var cur = Dictionary(uniqueKeysWithValues: originals.map { ($0, $0) })
for op in ops {
for (k, v) in cur where v == op.from { cur[k] = op.to }
}
return cur.filter { $0.key != $0.value }
}
// MARK: - Recap remapping
/// Apply a name map to a recap's structured fields (exact) and free text
/// (whole-word), so a rename/merge is reflected in summaries, the TLDR, and the
/// extras attributions without re-running the LLM.
static func remap(_ result: RecapResult, names map: [String: String]) -> RecapResult {
guard !map.isEmpty else { return result }
func exact(_ s: String?) -> String? { s.flatMap { map[$0] ?? $0 } }
func exactList(_ a: [String]) -> [String] { a.map { map[$0] ?? $0 } }
let sections = result.sections.map {
TopicSection(title: replaceWords($0.title, map),
summary: replaceWords($0.summary, map),
startIndex: $0.startIndex, endIndex: $0.endIndex)
}
var extras = result.extras
if let x = result.extras {
extras = MeetingExtras(
tldr: .init(summary: replaceWords(x.tldr.summary, map),
primarySpeakers: exactList(x.tldr.primarySpeakers)),
decisions: x.decisions.map { .init(statement: replaceWords($0.statement, map),
agreedBy: exactList($0.agreedBy), supportingOffset: $0.supportingOffset) },
actionItems: x.actionItems.map { .init(description: replaceWords($0.description, map),
owner: exact($0.owner), dueHint: $0.dueHint, supportingOffset: $0.supportingOffset) },
openQuestions: x.openQuestions.map { .init(question: replaceWords($0.question, map), raisedBy: exact($0.raisedBy)) },
keyQuotes: x.keyQuotes.map { .init(speaker: exact($0.speaker), offset: $0.offset,
quote: replaceWords($0.quote, map), whyNotable: replaceWords($0.whyNotable, map)) })
}
return RecapResult(sections: sections, extras: extras)
}
/// Whole-word replace each `from``to` in free text (case-sensitive). Used so a
/// renamed speaker's name updates inside summaries without clobbering substrings.
static func replaceWords(_ text: String, _ map: [String: String]) -> String {
var out = text
for (from, to) in map where from != to && !from.isEmpty {
let pattern = "\\b" + NSRegularExpression.escapedPattern(for: from) + "\\b"
guard let re = try? NSRegularExpression(pattern: pattern) else { continue }
let range = NSRange(out.startIndex..., in: out)
out = re.stringByReplacingMatches(in: out, range: range,
withTemplate: NSRegularExpression.escapedTemplate(for: to))
}
return out
}
}