Speaker corrections: rename / merge / reassign + voice learning

Native editor to fix speaker-ID errors after transcription (modeled on recap-relay's
correction UX): rename a speaker in the legend, merge two speakers, or reassign an
individual transcript line. Saving rewrites speakers.json, re-renders transcript.md +
recap.html, and updates the voiceprint memory — so a correction compounds: naming an
"Unknown" speaker teaches that voice for future calls.

- SpeakerEditing (pure, tested): replaceSpeaker (rename = merge-onto-existing),
  reassign, netNameMap (compose ops), and remap (apply a name map to a recap's
  structured fields + whole-word free text, so summaries/extras update without re-LLM).
- RecapEditModel (@MainActor): loads speakers.json (+ optional recap.json +
  cluster_fingerprints.json); on save writes the resolved speakers.json, re-renders,
  and reconciles voiceprints — merge keeps the survivor's print; rename/name-an-Unknown
  enrolls the cluster's fingerprint under the new name.
- TranscriptEditorView (SwiftUI) + EditorWindow (AppKit window for the LSUIElement app);
  menu gains "Edit speakers".
- Pipeline now persists cluster_fingerprints.json (every cluster incl. Unknown) and
  recap.json (RecapFile) so the editor can learn voices + re-render offline.
- RecapModels made Codable; TranscriptAssembler exposes allFingerprints;
  VoiceprintStore gains enroll() + merge().

52/52 XCTest (6 new, incl. a full rename→artifacts→voiceprint round-trip on disk).
This commit is contained in:
Grant Gilliam
2026-06-06 15:12:23 -05:00
parent 85bfdf2b56
commit 4c086251d9
11 changed files with 569 additions and 16 deletions
+120
View File
@@ -0,0 +1,120 @@
import Foundation
/// Editable view-model for one session's speaker corrections. Loads `speakers.json`
/// (+ optional `recap.json` and `cluster_fingerprints.json`), applies rename / merge
/// / per-segment reassignment, and on save rewrites `speakers.json`, re-renders
/// `transcript.md` + `recap.html`, and updates the voiceprint store (learning a voice
/// when an Unknown speaker is named).
@MainActor
final class RecapEditModel: ObservableObject {
let folder: URL
let title: String
private let voiceprints: VoiceprintStore
private let base: SpeakersFile
private var recapFile: RecapFile?
private let clusterFingerprints: [String: [Float]]
private let originalSpeakers: [String]
private var renameOps: [(from: String, to: String)] = []
@Published private(set) var segments: [SpeakersFile.Segment]
@Published private(set) var speakers: [String]
@Published private(set) var dirty = false
@Published private(set) var status: String?
init?(folder: URL, voiceprints: VoiceprintStore) {
let speakersURL = folder.appendingPathComponent("speakers.json")
guard let data = try? Data(contentsOf: speakersURL),
let file = try? JSONDecoder().decode(SpeakersFile.self, from: data),
!file.segments.isEmpty else { return nil }
self.folder = folder
self.voiceprints = voiceprints
self.base = file
self.segments = file.segments
self.speakers = SpeakerEditing.orderedSpeakers(file.segments)
self.originalSpeakers = SpeakerEditing.orderedSpeakers(file.segments)
self.recapFile = RecapFile.read(from: folder.appendingPathComponent("recap.json"))
self.clusterFingerprints = Self.loadFingerprints(folder.appendingPathComponent("cluster_fingerprints.json"))
self.title = recapFile?.title ?? file.app.capitalized + " call"
}
// MARK: - Edits
func rename(_ name: String, to newName: String) {
let to = newName.trimmingCharacters(in: .whitespacesAndNewlines)
guard !to.isEmpty, to != name else { return }
renameOps.append((from: name, to: to))
segments = SpeakerEditing.replaceSpeaker(name, with: to, in: segments)
refresh()
}
/// Merge is just a rename onto an existing speaker.
func merge(_ absorbed: String, into survivor: String) {
guard absorbed != survivor else { return }
renameOps.append((from: absorbed, to: survivor))
segments = SpeakerEditing.replaceSpeaker(absorbed, with: survivor, in: segments)
refresh()
}
func reassign(_ index: Int, to speaker: String) {
segments = SpeakerEditing.reassign(index, to: speaker, in: segments)
refresh()
}
private func refresh() {
speakers = SpeakerEditing.orderedSpeakers(segments)
dirty = true
status = nil
}
// MARK: - Save
/// Persist corrections: rewrite speakers.json, re-render artifacts, update voiceprints.
func save() {
let newSpeakers = buildSpeakerList()
let file = SpeakersFile(sessionId: base.sessionId, app: base.app, durationSec: base.durationSec,
speakers: newSpeakers, segments: segments, models: base.models)
try? file.write(to: folder.appendingPathComponent("speakers.json"))
let net = SpeakerEditing.netNameMap(originals: originalSpeakers, ops: renameOps)
let result = recapFile.map { SpeakerEditing.remap($0.result, names: net) } ?? RecapResult(sections: [], extras: nil)
if recapFile != nil {
try? RecapFile(title: title, result: result).write(to: folder.appendingPathComponent("recap.json"))
}
try? RecapRenderer.write(file: file, result: result, title: title, to: folder)
// Voiceprints: reconcile per the net rename/merge map.
let stored = voiceprints.knownVoiceprints()
for (orig, final) in net where !LabelMergeResponse.isUnknownName(final) {
let finalHasPrint = clusterFingerprints[final] != nil || stored[final] != nil
if finalHasPrint {
// Merge into an existing identity keep the survivor's print, drop the absorbed.
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
} else if let vec = clusterFingerprints[orig] {
// Rename / name an Unknown learn this voice for the new name.
voiceprints.enroll(name: final, vector: vec)
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
} else {
voiceprints.rename(orig, to: final)
}
}
renameOps.removeAll()
dirty = false
status = "Saved — recap.html & transcript.md updated."
}
/// Speaker roster from the edited segments: keep the original source where the
/// name is unchanged; mark new (renamed/merged) names as user-set.
private func buildSpeakerList() -> [SpeakersFile.Speaker] {
let byName = Dictionary(base.speakers.map { ($0.name, $0) }, uniquingKeysWith: { a, _ in a })
return SpeakerEditing.orderedSpeakers(segments).map { name in
byName[name] ?? SpeakersFile.Speaker(name: name, source: "manual", overlapConfidence: nil, matchSimilarity: nil)
}
}
private static func loadFingerprints(_ url: URL) -> [String: [Float]] {
guard let data = try? Data(contentsOf: url),
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: [Any]] else { return [:] }
return obj.mapValues { $0.compactMap { ($0 as? NSNumber)?.floatValue } }
}
}