Speaker corrections: rename / merge / reassign + voice learning
Native editor to fix speaker-ID errors after transcription (modeled on recap-relay's correction UX): rename a speaker in the legend, merge two speakers, or reassign an individual transcript line. Saving rewrites speakers.json, re-renders transcript.md + recap.html, and updates the voiceprint memory — so a correction compounds: naming an "Unknown" speaker teaches that voice for future calls. - SpeakerEditing (pure, tested): replaceSpeaker (rename = merge-onto-existing), reassign, netNameMap (compose ops), and remap (apply a name map to a recap's structured fields + whole-word free text, so summaries/extras update without re-LLM). - RecapEditModel (@MainActor): loads speakers.json (+ optional recap.json + cluster_fingerprints.json); on save writes the resolved speakers.json, re-renders, and reconciles voiceprints — merge keeps the survivor's print; rename/name-an-Unknown enrolls the cluster's fingerprint under the new name. - TranscriptEditorView (SwiftUI) + EditorWindow (AppKit window for the LSUIElement app); menu gains "Edit speakers". - Pipeline now persists cluster_fingerprints.json (every cluster incl. Unknown) and recap.json (RecapFile) so the editor can learn voices + re-render offline. - RecapModels made Codable; TranscriptAssembler exposes allFingerprints; VoiceprintStore gains enroll() + merge(). 52/52 XCTest (6 new, incl. a full rename→artifacts→voiceprint round-trip on disk).
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
import Foundation
|
||||
|
||||
/// Editable view-model for one session's speaker corrections. Loads `speakers.json`
|
||||
/// (+ optional `recap.json` and `cluster_fingerprints.json`), applies rename / merge
|
||||
/// / per-segment reassignment, and on save rewrites `speakers.json`, re-renders
|
||||
/// `transcript.md` + `recap.html`, and updates the voiceprint store (learning a voice
|
||||
/// when an Unknown speaker is named).
|
||||
@MainActor
|
||||
final class RecapEditModel: ObservableObject {
|
||||
let folder: URL
|
||||
let title: String
|
||||
private let voiceprints: VoiceprintStore
|
||||
private let base: SpeakersFile
|
||||
private var recapFile: RecapFile?
|
||||
private let clusterFingerprints: [String: [Float]]
|
||||
private let originalSpeakers: [String]
|
||||
private var renameOps: [(from: String, to: String)] = []
|
||||
|
||||
@Published private(set) var segments: [SpeakersFile.Segment]
|
||||
@Published private(set) var speakers: [String]
|
||||
@Published private(set) var dirty = false
|
||||
@Published private(set) var status: String?
|
||||
|
||||
init?(folder: URL, voiceprints: VoiceprintStore) {
|
||||
let speakersURL = folder.appendingPathComponent("speakers.json")
|
||||
guard let data = try? Data(contentsOf: speakersURL),
|
||||
let file = try? JSONDecoder().decode(SpeakersFile.self, from: data),
|
||||
!file.segments.isEmpty else { return nil }
|
||||
self.folder = folder
|
||||
self.voiceprints = voiceprints
|
||||
self.base = file
|
||||
self.segments = file.segments
|
||||
self.speakers = SpeakerEditing.orderedSpeakers(file.segments)
|
||||
self.originalSpeakers = SpeakerEditing.orderedSpeakers(file.segments)
|
||||
self.recapFile = RecapFile.read(from: folder.appendingPathComponent("recap.json"))
|
||||
self.clusterFingerprints = Self.loadFingerprints(folder.appendingPathComponent("cluster_fingerprints.json"))
|
||||
self.title = recapFile?.title ?? file.app.capitalized + " call"
|
||||
}
|
||||
|
||||
// MARK: - Edits
|
||||
|
||||
func rename(_ name: String, to newName: String) {
|
||||
let to = newName.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !to.isEmpty, to != name else { return }
|
||||
renameOps.append((from: name, to: to))
|
||||
segments = SpeakerEditing.replaceSpeaker(name, with: to, in: segments)
|
||||
refresh()
|
||||
}
|
||||
|
||||
/// Merge is just a rename onto an existing speaker.
|
||||
func merge(_ absorbed: String, into survivor: String) {
|
||||
guard absorbed != survivor else { return }
|
||||
renameOps.append((from: absorbed, to: survivor))
|
||||
segments = SpeakerEditing.replaceSpeaker(absorbed, with: survivor, in: segments)
|
||||
refresh()
|
||||
}
|
||||
|
||||
func reassign(_ index: Int, to speaker: String) {
|
||||
segments = SpeakerEditing.reassign(index, to: speaker, in: segments)
|
||||
refresh()
|
||||
}
|
||||
|
||||
private func refresh() {
|
||||
speakers = SpeakerEditing.orderedSpeakers(segments)
|
||||
dirty = true
|
||||
status = nil
|
||||
}
|
||||
|
||||
// MARK: - Save
|
||||
|
||||
/// Persist corrections: rewrite speakers.json, re-render artifacts, update voiceprints.
|
||||
func save() {
|
||||
let newSpeakers = buildSpeakerList()
|
||||
let file = SpeakersFile(sessionId: base.sessionId, app: base.app, durationSec: base.durationSec,
|
||||
speakers: newSpeakers, segments: segments, models: base.models)
|
||||
try? file.write(to: folder.appendingPathComponent("speakers.json"))
|
||||
|
||||
let net = SpeakerEditing.netNameMap(originals: originalSpeakers, ops: renameOps)
|
||||
let result = recapFile.map { SpeakerEditing.remap($0.result, names: net) } ?? RecapResult(sections: [], extras: nil)
|
||||
if recapFile != nil {
|
||||
try? RecapFile(title: title, result: result).write(to: folder.appendingPathComponent("recap.json"))
|
||||
}
|
||||
try? RecapRenderer.write(file: file, result: result, title: title, to: folder)
|
||||
|
||||
// Voiceprints: reconcile per the net rename/merge map.
|
||||
let stored = voiceprints.knownVoiceprints()
|
||||
for (orig, final) in net where !LabelMergeResponse.isUnknownName(final) {
|
||||
let finalHasPrint = clusterFingerprints[final] != nil || stored[final] != nil
|
||||
if finalHasPrint {
|
||||
// Merge into an existing identity → keep the survivor's print, drop the absorbed.
|
||||
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
|
||||
} else if let vec = clusterFingerprints[orig] {
|
||||
// Rename / name an Unknown → learn this voice for the new name.
|
||||
voiceprints.enroll(name: final, vector: vec)
|
||||
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
|
||||
} else {
|
||||
voiceprints.rename(orig, to: final)
|
||||
}
|
||||
}
|
||||
|
||||
renameOps.removeAll()
|
||||
dirty = false
|
||||
status = "Saved — recap.html & transcript.md updated."
|
||||
}
|
||||
|
||||
/// Speaker roster from the edited segments: keep the original source where the
|
||||
/// name is unchanged; mark new (renamed/merged) names as user-set.
|
||||
private func buildSpeakerList() -> [SpeakersFile.Speaker] {
|
||||
let byName = Dictionary(base.speakers.map { ($0.name, $0) }, uniquingKeysWith: { a, _ in a })
|
||||
return SpeakerEditing.orderedSpeakers(segments).map { name in
|
||||
byName[name] ?? SpeakersFile.Speaker(name: name, source: "manual", overlapConfidence: nil, matchSimilarity: nil)
|
||||
}
|
||||
}
|
||||
|
||||
private static func loadFingerprints(_ url: URL) -> [String: [Float]] {
|
||||
guard let data = try? Data(contentsOf: url),
|
||||
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: [Any]] else { return [:] }
|
||||
return obj.mapValues { $0.compactMap { ($0 as? NSNumber)?.floatValue } }
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import Foundation
|
||||
|
||||
/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]`
|
||||
/// (inclusive, indices into the canonical entries array) with an LLM title + summary.
|
||||
struct TopicSection: Equatable {
|
||||
struct TopicSection: Equatable, Codable {
|
||||
var title: String
|
||||
var summary: String
|
||||
var startIndex: Int
|
||||
@@ -12,12 +12,12 @@ struct TopicSection: Equatable {
|
||||
/// Structured "meeting extras" extracted from the named transcript. Mirrors
|
||||
/// recap-relay's schema; speakers are real names (we already have them from
|
||||
/// label-merge), not anonymous cluster ids.
|
||||
struct MeetingExtras: Equatable {
|
||||
struct TLDR: Equatable { var summary: String; var primarySpeakers: [String] }
|
||||
struct Decision: Equatable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
|
||||
struct ActionItem: Equatable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
|
||||
struct OpenQuestion: Equatable { var question: String; var raisedBy: String? }
|
||||
struct KeyQuote: Equatable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
|
||||
struct MeetingExtras: Equatable, Codable {
|
||||
struct TLDR: Equatable, Codable { var summary: String; var primarySpeakers: [String] }
|
||||
struct Decision: Equatable, Codable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
|
||||
struct ActionItem: Equatable, Codable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
|
||||
struct OpenQuestion: Equatable, Codable { var question: String; var raisedBy: String? }
|
||||
struct KeyQuote: Equatable, Codable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
|
||||
|
||||
var tldr: TLDR
|
||||
var decisions: [Decision]
|
||||
@@ -32,7 +32,24 @@ struct MeetingExtras: Equatable {
|
||||
|
||||
/// The assembled recap for one session: the topic sections + (optional) extras,
|
||||
/// over the session's transcript. Rendered to `transcript.md` / `recap.html`.
|
||||
struct RecapResult: Equatable {
|
||||
struct RecapResult: Equatable, Codable {
|
||||
var sections: [TopicSection]
|
||||
var extras: MeetingExtras?
|
||||
}
|
||||
|
||||
/// Persisted `recap.json` — the recap result plus its title, so the speaker editor
|
||||
/// can re-render `recap.html` / `transcript.md` after corrections without re-calling
|
||||
/// the LLM (a "Regenerate" action re-runs analysis when the user wants fresh summaries).
|
||||
struct RecapFile: Equatable, Codable {
|
||||
var title: String
|
||||
var result: RecapResult
|
||||
|
||||
func write(to url: URL) throws {
|
||||
let enc = JSONEncoder(); enc.outputFormatting = [.prettyPrinted, .sortedKeys]
|
||||
try enc.encode(self).write(to: url)
|
||||
}
|
||||
static func read(from url: URL) -> RecapFile? {
|
||||
guard let data = try? Data(contentsOf: url) else { return nil }
|
||||
return try? JSONDecoder().decode(RecapFile.self, from: data)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
import Foundation
|
||||
|
||||
/// Pure transforms for speaker corrections: rename, merge (rename onto an existing
|
||||
/// name), and per-segment reassignment, plus remapping speaker names through a
|
||||
/// recap's text/structured fields. No UI/IO — fully unit-testable.
|
||||
enum SpeakerEditing {
|
||||
typealias Segment = SpeakersFile.Segment
|
||||
|
||||
/// Distinct speakers in first-appearance order (the legend).
|
||||
static func orderedSpeakers(_ segments: [Segment]) -> [String] {
|
||||
var seen = Set<String>(), order: [String] = []
|
||||
for s in segments where !s.speaker.isEmpty && !seen.contains(s.speaker) {
|
||||
seen.insert(s.speaker); order.append(s.speaker)
|
||||
}
|
||||
return order
|
||||
}
|
||||
|
||||
/// Replace every `from` with `to` across segments. Rename when `to` is new; a
|
||||
/// merge when `to` already exists — same primitive either way.
|
||||
static func replaceSpeaker(_ from: String, with to: String, in segments: [Segment]) -> [Segment] {
|
||||
guard from != to, !to.isEmpty else { return segments }
|
||||
return segments.map {
|
||||
$0.speaker == from ? Segment(start: $0.start, end: $0.end, speaker: to, text: $0.text) : $0
|
||||
}
|
||||
}
|
||||
|
||||
/// Reassign a single segment to another speaker.
|
||||
static func reassign(_ index: Int, to speaker: String, in segments: [Segment]) -> [Segment] {
|
||||
guard segments.indices.contains(index), !speaker.isEmpty else { return segments }
|
||||
var out = segments
|
||||
let s = out[index]
|
||||
out[index] = Segment(start: s.start, end: s.end, speaker: speaker, text: s.text)
|
||||
return out
|
||||
}
|
||||
|
||||
/// Compose an ordered list of (from → to) rename/merge ops into the net
|
||||
/// original→final map (per-segment reassignments are NOT renames, so they don't
|
||||
/// appear here). Only entries that actually changed are returned.
|
||||
static func netNameMap(originals: [String], ops: [(from: String, to: String)]) -> [String: String] {
|
||||
var cur = Dictionary(uniqueKeysWithValues: originals.map { ($0, $0) })
|
||||
for op in ops {
|
||||
for (k, v) in cur where v == op.from { cur[k] = op.to }
|
||||
}
|
||||
return cur.filter { $0.key != $0.value }
|
||||
}
|
||||
|
||||
// MARK: - Recap remapping
|
||||
|
||||
/// Apply a name map to a recap's structured fields (exact) and free text
|
||||
/// (whole-word), so a rename/merge is reflected in summaries, the TLDR, and the
|
||||
/// extras attributions without re-running the LLM.
|
||||
static func remap(_ result: RecapResult, names map: [String: String]) -> RecapResult {
|
||||
guard !map.isEmpty else { return result }
|
||||
func exact(_ s: String?) -> String? { s.flatMap { map[$0] ?? $0 } }
|
||||
func exactList(_ a: [String]) -> [String] { a.map { map[$0] ?? $0 } }
|
||||
|
||||
let sections = result.sections.map {
|
||||
TopicSection(title: replaceWords($0.title, map),
|
||||
summary: replaceWords($0.summary, map),
|
||||
startIndex: $0.startIndex, endIndex: $0.endIndex)
|
||||
}
|
||||
var extras = result.extras
|
||||
if let x = result.extras {
|
||||
extras = MeetingExtras(
|
||||
tldr: .init(summary: replaceWords(x.tldr.summary, map),
|
||||
primarySpeakers: exactList(x.tldr.primarySpeakers)),
|
||||
decisions: x.decisions.map { .init(statement: replaceWords($0.statement, map),
|
||||
agreedBy: exactList($0.agreedBy), supportingOffset: $0.supportingOffset) },
|
||||
actionItems: x.actionItems.map { .init(description: replaceWords($0.description, map),
|
||||
owner: exact($0.owner), dueHint: $0.dueHint, supportingOffset: $0.supportingOffset) },
|
||||
openQuestions: x.openQuestions.map { .init(question: replaceWords($0.question, map), raisedBy: exact($0.raisedBy)) },
|
||||
keyQuotes: x.keyQuotes.map { .init(speaker: exact($0.speaker), offset: $0.offset,
|
||||
quote: replaceWords($0.quote, map), whyNotable: replaceWords($0.whyNotable, map)) })
|
||||
}
|
||||
return RecapResult(sections: sections, extras: extras)
|
||||
}
|
||||
|
||||
/// Whole-word replace each `from`→`to` in free text (case-sensitive). Used so a
|
||||
/// renamed speaker's name updates inside summaries without clobbering substrings.
|
||||
static func replaceWords(_ text: String, _ map: [String: String]) -> String {
|
||||
var out = text
|
||||
for (from, to) in map where from != to && !from.isEmpty {
|
||||
let pattern = "\\b" + NSRegularExpression.escapedPattern(for: from) + "\\b"
|
||||
guard let re = try? NSRegularExpression(pattern: pattern) else { continue }
|
||||
let range = NSRange(out.startIndex..., in: out)
|
||||
out = re.stringByReplacingMatches(in: out, range: range,
|
||||
withTemplate: NSRegularExpression.escapedTemplate(for: to))
|
||||
}
|
||||
return out
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user