Speaker corrections: rename / merge / reassign + voice learning
Native editor to fix speaker-ID errors after transcription (modeled on recap-relay's correction UX): rename a speaker in the legend, merge two speakers, or reassign an individual transcript line. Saving rewrites speakers.json, re-renders transcript.md + recap.html, and updates the voiceprint memory — so a correction compounds: naming an "Unknown" speaker teaches that voice for future calls. - SpeakerEditing (pure, tested): replaceSpeaker (rename = merge-onto-existing), reassign, netNameMap (compose ops), and remap (apply a name map to a recap's structured fields + whole-word free text, so summaries/extras update without re-LLM). - RecapEditModel (@MainActor): loads speakers.json (+ optional recap.json + cluster_fingerprints.json); on save writes the resolved speakers.json, re-renders, and reconciles voiceprints — merge keeps the survivor's print; rename/name-an-Unknown enrolls the cluster's fingerprint under the new name. - TranscriptEditorView (SwiftUI) + EditorWindow (AppKit window for the LSUIElement app); menu gains "Edit speakers". - Pipeline now persists cluster_fingerprints.json (every cluster incl. Unknown) and recap.json (RecapFile) so the editor can learn voices + re-render offline. - RecapModels made Codable; TranscriptAssembler exposes allFingerprints; VoiceprintStore gains enroll() + merge(). 52/52 XCTest (6 new, incl. a full rename→artifacts→voiceprint round-trip on disk).
This commit is contained in:
@@ -406,6 +406,7 @@ final class SessionController: ObservableObject {
|
||||
guard let result = try? await analyzer.recap(file: speakers) else { return }
|
||||
let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
|
||||
try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder)
|
||||
try? RecapFile(title: title, result: result).write(to: inputs.folder.appendingPathComponent("recap.json"))
|
||||
let url = inputs.folder.appendingPathComponent("recap.html")
|
||||
if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
|
||||
}
|
||||
@@ -421,6 +422,21 @@ final class SessionController: ObservableObject {
|
||||
return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
|
||||
}
|
||||
|
||||
// MARK: - Speaker corrections
|
||||
|
||||
/// True once the last session has a transcribed `speakers.json` to correct.
|
||||
var canEditSpeakers: Bool {
|
||||
guard let folder = lastSession?.folder else { return false }
|
||||
return FileManager.default.fileExists(atPath: folder.appendingPathComponent("speakers.json").path)
|
||||
}
|
||||
|
||||
/// Open the speaker-correction editor for the last session.
|
||||
func editLastSession() {
|
||||
guard let folder = lastSession?.folder,
|
||||
let model = RecapEditModel(folder: folder, voiceprints: voiceprints) else { return }
|
||||
EditorWindow.shared.show(model: model)
|
||||
}
|
||||
|
||||
private func fail(_ message: String) {
|
||||
recorder = nil
|
||||
visualCapture = nil // recorder.start() failed before visual started; nothing running
|
||||
|
||||
@@ -11,7 +11,8 @@ enum TranscriptAssembler {
|
||||
|
||||
struct Assembled {
|
||||
let speakersFile: SpeakersFile
|
||||
let fingerprints: [String: [Float]] // name -> 192-dim, for VoiceprintStore
|
||||
let fingerprints: [String: [Float]] // confidently-named only, for VoiceprintStore
|
||||
let allFingerprints: [String: [Float]] // EVERY cluster incl. Unknown — for editor voice-learning
|
||||
}
|
||||
|
||||
/// Source ranking when the same name appears across chunks with different sources.
|
||||
@@ -33,6 +34,7 @@ enum TranscriptAssembler {
|
||||
var segments: [SpeakersFile.Segment] = []
|
||||
var bestSpeaker: [String: SpeakersFile.Speaker] = [:]
|
||||
var fingerprints: [String: [Float]] = [:]
|
||||
var allFingerprints: [String: [Float]] = [:]
|
||||
var models: [String: String] = [:]
|
||||
var duration = 0.0
|
||||
|
||||
@@ -58,13 +60,14 @@ enum TranscriptAssembler {
|
||||
} else {
|
||||
bestSpeaker[sp.name] = candidate
|
||||
}
|
||||
// Collect named fingerprints only (never Unknown_N / Speaker_unknown).
|
||||
if !isUnknown(sp.name), let fp = sp.fingerprint, fp.count > 0 {
|
||||
fingerprints[sp.name] = fp
|
||||
if let fp = sp.fingerprint, fp.count > 0 {
|
||||
allFingerprints[sp.name] = fp // every cluster, for the editor
|
||||
if !isUnknown(sp.name) { fingerprints[sp.name] = fp } // named only, for the store
|
||||
}
|
||||
}
|
||||
for (name, fp) in chunk.response.fingerprints where !isUnknown(name) && fp.count > 0 {
|
||||
fingerprints[name] = fp
|
||||
for (name, fp) in chunk.response.fingerprints where fp.count > 0 {
|
||||
allFingerprints[name] = fp
|
||||
if !isUnknown(name) { fingerprints[name] = fp }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,6 +78,6 @@ enum TranscriptAssembler {
|
||||
let file = SpeakersFile(
|
||||
sessionId: sessionId, app: app, durationSec: duration,
|
||||
speakers: speakers, segments: segments, models: models)
|
||||
return Assembled(speakersFile: file, fingerprints: fingerprints)
|
||||
return Assembled(speakersFile: file, fingerprints: fingerprints, allFingerprints: allFingerprints)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,6 +97,13 @@ final class TranscriptPipeline {
|
||||
|
||||
let assembled = TranscriptAssembler.assemble(sessionId: sessionId, app: app, chunks: results)
|
||||
try assembled.speakersFile.write(to: sessionFolder.appendingPathComponent("speakers.json"))
|
||||
// Persist every cluster's voiceprint (incl. Unknown) so the speaker editor can
|
||||
// teach the store a voice when the user renames an Unknown to a real name.
|
||||
if !assembled.allFingerprints.isEmpty,
|
||||
let data = try? JSONSerialization.data(withJSONObject: assembled.allFingerprints.mapValues { $0.map(Double.init) },
|
||||
options: [.sortedKeys]) {
|
||||
try? data.write(to: sessionFolder.appendingPathComponent("cluster_fingerprints.json"))
|
||||
}
|
||||
return assembled.speakersFile
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user