Speaker corrections: rename / merge / reassign + voice learning
Native editor to fix speaker-ID errors after transcription (modeled on recap-relay's correction UX): rename a speaker in the legend, merge two speakers, or reassign an individual transcript line. Saving rewrites speakers.json, re-renders transcript.md + recap.html, and updates the voiceprint memory — so a correction compounds: naming an "Unknown" speaker teaches that voice for future calls. - SpeakerEditing (pure, tested): replaceSpeaker (rename = merge-onto-existing), reassign, netNameMap (compose ops), and remap (apply a name map to a recap's structured fields + whole-word free text, so summaries/extras update without re-LLM). - RecapEditModel (@MainActor): loads speakers.json (+ optional recap.json + cluster_fingerprints.json); on save writes the resolved speakers.json, re-renders, and reconciles voiceprints — merge keeps the survivor's print; rename/name-an-Unknown enrolls the cluster's fingerprint under the new name. - TranscriptEditorView (SwiftUI) + EditorWindow (AppKit window for the LSUIElement app); menu gains "Edit speakers". - Pipeline now persists cluster_fingerprints.json (every cluster incl. Unknown) and recap.json (RecapFile) so the editor can learn voices + re-render offline. - RecapModels made Codable; TranscriptAssembler exposes allFingerprints; VoiceprintStore gains enroll() + merge(). 52/52 XCTest (6 new, incl. a full rename→artifacts→voiceprint round-trip on disk).
This commit is contained in:
@@ -66,11 +66,34 @@ final class VoiceprintStore {
|
|||||||
|
|
||||||
func rename(_ old: String, to new: String) {
|
func rename(_ old: String, to new: String) {
|
||||||
lock.lock(); defer { lock.unlock() }
|
lock.lock(); defer { lock.unlock() }
|
||||||
guard let e = entriesStore.removeValue(forKey: old) else { return }
|
guard old != new, let e = entriesStore.removeValue(forKey: old) else { return }
|
||||||
entriesStore[new] = e
|
entriesStore[new] = e
|
||||||
save()
|
save()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Enroll/refresh a voiceprint under `name` (e.g. after the user renames an
|
||||||
|
/// "Unknown" speaker to a real name — we learn that voice for future calls).
|
||||||
|
func enroll(name: String, vector: [Float]) {
|
||||||
|
guard !name.isEmpty, !Self.isUnknown(name), !vector.isEmpty else { return }
|
||||||
|
lock.lock(); defer { lock.unlock() }
|
||||||
|
let now = ISO8601DateFormatter().string(from: Date())
|
||||||
|
var entry = entriesStore[name] ?? Entry(vector: vector, updated: now, calls: 0)
|
||||||
|
entry.vector = vector
|
||||||
|
entry.updated = now
|
||||||
|
entry.calls += 1
|
||||||
|
entriesStore[name] = entry
|
||||||
|
save()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge `absorbed` into `survivor`: drop the absorbed entry, keep the survivor's
|
||||||
|
/// print (the user said they're the same person).
|
||||||
|
func merge(_ absorbed: String, into survivor: String) {
|
||||||
|
lock.lock(); defer { lock.unlock() }
|
||||||
|
guard absorbed != survivor else { return }
|
||||||
|
entriesStore.removeValue(forKey: absorbed)
|
||||||
|
save()
|
||||||
|
}
|
||||||
|
|
||||||
func remove(_ name: String) {
|
func remove(_ name: String) {
|
||||||
lock.lock(); defer { lock.unlock() }
|
lock.lock(); defer { lock.unlock() }
|
||||||
entriesStore.removeValue(forKey: name)
|
entriesStore.removeValue(forKey: name)
|
||||||
|
|||||||
@@ -0,0 +1,120 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
/// Editable view-model for one session's speaker corrections. Loads `speakers.json`
|
||||||
|
/// (+ optional `recap.json` and `cluster_fingerprints.json`), applies rename / merge
|
||||||
|
/// / per-segment reassignment, and on save rewrites `speakers.json`, re-renders
|
||||||
|
/// `transcript.md` + `recap.html`, and updates the voiceprint store (learning a voice
|
||||||
|
/// when an Unknown speaker is named).
|
||||||
|
@MainActor
|
||||||
|
final class RecapEditModel: ObservableObject {
|
||||||
|
let folder: URL
|
||||||
|
let title: String
|
||||||
|
private let voiceprints: VoiceprintStore
|
||||||
|
private let base: SpeakersFile
|
||||||
|
private var recapFile: RecapFile?
|
||||||
|
private let clusterFingerprints: [String: [Float]]
|
||||||
|
private let originalSpeakers: [String]
|
||||||
|
private var renameOps: [(from: String, to: String)] = []
|
||||||
|
|
||||||
|
@Published private(set) var segments: [SpeakersFile.Segment]
|
||||||
|
@Published private(set) var speakers: [String]
|
||||||
|
@Published private(set) var dirty = false
|
||||||
|
@Published private(set) var status: String?
|
||||||
|
|
||||||
|
init?(folder: URL, voiceprints: VoiceprintStore) {
|
||||||
|
let speakersURL = folder.appendingPathComponent("speakers.json")
|
||||||
|
guard let data = try? Data(contentsOf: speakersURL),
|
||||||
|
let file = try? JSONDecoder().decode(SpeakersFile.self, from: data),
|
||||||
|
!file.segments.isEmpty else { return nil }
|
||||||
|
self.folder = folder
|
||||||
|
self.voiceprints = voiceprints
|
||||||
|
self.base = file
|
||||||
|
self.segments = file.segments
|
||||||
|
self.speakers = SpeakerEditing.orderedSpeakers(file.segments)
|
||||||
|
self.originalSpeakers = SpeakerEditing.orderedSpeakers(file.segments)
|
||||||
|
self.recapFile = RecapFile.read(from: folder.appendingPathComponent("recap.json"))
|
||||||
|
self.clusterFingerprints = Self.loadFingerprints(folder.appendingPathComponent("cluster_fingerprints.json"))
|
||||||
|
self.title = recapFile?.title ?? file.app.capitalized + " call"
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Edits
|
||||||
|
|
||||||
|
func rename(_ name: String, to newName: String) {
|
||||||
|
let to = newName.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
guard !to.isEmpty, to != name else { return }
|
||||||
|
renameOps.append((from: name, to: to))
|
||||||
|
segments = SpeakerEditing.replaceSpeaker(name, with: to, in: segments)
|
||||||
|
refresh()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge is just a rename onto an existing speaker.
|
||||||
|
func merge(_ absorbed: String, into survivor: String) {
|
||||||
|
guard absorbed != survivor else { return }
|
||||||
|
renameOps.append((from: absorbed, to: survivor))
|
||||||
|
segments = SpeakerEditing.replaceSpeaker(absorbed, with: survivor, in: segments)
|
||||||
|
refresh()
|
||||||
|
}
|
||||||
|
|
||||||
|
func reassign(_ index: Int, to speaker: String) {
|
||||||
|
segments = SpeakerEditing.reassign(index, to: speaker, in: segments)
|
||||||
|
refresh()
|
||||||
|
}
|
||||||
|
|
||||||
|
private func refresh() {
|
||||||
|
speakers = SpeakerEditing.orderedSpeakers(segments)
|
||||||
|
dirty = true
|
||||||
|
status = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Save
|
||||||
|
|
||||||
|
/// Persist corrections: rewrite speakers.json, re-render artifacts, update voiceprints.
|
||||||
|
func save() {
|
||||||
|
let newSpeakers = buildSpeakerList()
|
||||||
|
let file = SpeakersFile(sessionId: base.sessionId, app: base.app, durationSec: base.durationSec,
|
||||||
|
speakers: newSpeakers, segments: segments, models: base.models)
|
||||||
|
try? file.write(to: folder.appendingPathComponent("speakers.json"))
|
||||||
|
|
||||||
|
let net = SpeakerEditing.netNameMap(originals: originalSpeakers, ops: renameOps)
|
||||||
|
let result = recapFile.map { SpeakerEditing.remap($0.result, names: net) } ?? RecapResult(sections: [], extras: nil)
|
||||||
|
if recapFile != nil {
|
||||||
|
try? RecapFile(title: title, result: result).write(to: folder.appendingPathComponent("recap.json"))
|
||||||
|
}
|
||||||
|
try? RecapRenderer.write(file: file, result: result, title: title, to: folder)
|
||||||
|
|
||||||
|
// Voiceprints: reconcile per the net rename/merge map.
|
||||||
|
let stored = voiceprints.knownVoiceprints()
|
||||||
|
for (orig, final) in net where !LabelMergeResponse.isUnknownName(final) {
|
||||||
|
let finalHasPrint = clusterFingerprints[final] != nil || stored[final] != nil
|
||||||
|
if finalHasPrint {
|
||||||
|
// Merge into an existing identity → keep the survivor's print, drop the absorbed.
|
||||||
|
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
|
||||||
|
} else if let vec = clusterFingerprints[orig] {
|
||||||
|
// Rename / name an Unknown → learn this voice for the new name.
|
||||||
|
voiceprints.enroll(name: final, vector: vec)
|
||||||
|
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
|
||||||
|
} else {
|
||||||
|
voiceprints.rename(orig, to: final)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
renameOps.removeAll()
|
||||||
|
dirty = false
|
||||||
|
status = "Saved — recap.html & transcript.md updated."
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Speaker roster from the edited segments: keep the original source where the
|
||||||
|
/// name is unchanged; mark new (renamed/merged) names as user-set.
|
||||||
|
private func buildSpeakerList() -> [SpeakersFile.Speaker] {
|
||||||
|
let byName = Dictionary(base.speakers.map { ($0.name, $0) }, uniquingKeysWith: { a, _ in a })
|
||||||
|
return SpeakerEditing.orderedSpeakers(segments).map { name in
|
||||||
|
byName[name] ?? SpeakersFile.Speaker(name: name, source: "manual", overlapConfidence: nil, matchSimilarity: nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func loadFingerprints(_ url: URL) -> [String: [Float]] {
|
||||||
|
guard let data = try? Data(contentsOf: url),
|
||||||
|
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: [Any]] else { return [:] }
|
||||||
|
return obj.mapValues { $0.compactMap { ($0 as? NSNumber)?.floatValue } }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@ import Foundation
|
|||||||
|
|
||||||
/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]`
|
/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]`
|
||||||
/// (inclusive, indices into the canonical entries array) with an LLM title + summary.
|
/// (inclusive, indices into the canonical entries array) with an LLM title + summary.
|
||||||
struct TopicSection: Equatable {
|
struct TopicSection: Equatable, Codable {
|
||||||
var title: String
|
var title: String
|
||||||
var summary: String
|
var summary: String
|
||||||
var startIndex: Int
|
var startIndex: Int
|
||||||
@@ -12,12 +12,12 @@ struct TopicSection: Equatable {
|
|||||||
/// Structured "meeting extras" extracted from the named transcript. Mirrors
|
/// Structured "meeting extras" extracted from the named transcript. Mirrors
|
||||||
/// recap-relay's schema; speakers are real names (we already have them from
|
/// recap-relay's schema; speakers are real names (we already have them from
|
||||||
/// label-merge), not anonymous cluster ids.
|
/// label-merge), not anonymous cluster ids.
|
||||||
struct MeetingExtras: Equatable {
|
struct MeetingExtras: Equatable, Codable {
|
||||||
struct TLDR: Equatable { var summary: String; var primarySpeakers: [String] }
|
struct TLDR: Equatable, Codable { var summary: String; var primarySpeakers: [String] }
|
||||||
struct Decision: Equatable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
|
struct Decision: Equatable, Codable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
|
||||||
struct ActionItem: Equatable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
|
struct ActionItem: Equatable, Codable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
|
||||||
struct OpenQuestion: Equatable { var question: String; var raisedBy: String? }
|
struct OpenQuestion: Equatable, Codable { var question: String; var raisedBy: String? }
|
||||||
struct KeyQuote: Equatable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
|
struct KeyQuote: Equatable, Codable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
|
||||||
|
|
||||||
var tldr: TLDR
|
var tldr: TLDR
|
||||||
var decisions: [Decision]
|
var decisions: [Decision]
|
||||||
@@ -32,7 +32,24 @@ struct MeetingExtras: Equatable {
|
|||||||
|
|
||||||
/// The assembled recap for one session: the topic sections + (optional) extras,
|
/// The assembled recap for one session: the topic sections + (optional) extras,
|
||||||
/// over the session's transcript. Rendered to `transcript.md` / `recap.html`.
|
/// over the session's transcript. Rendered to `transcript.md` / `recap.html`.
|
||||||
struct RecapResult: Equatable {
|
struct RecapResult: Equatable, Codable {
|
||||||
var sections: [TopicSection]
|
var sections: [TopicSection]
|
||||||
var extras: MeetingExtras?
|
var extras: MeetingExtras?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Persisted `recap.json` — the recap result plus its title, so the speaker editor
|
||||||
|
/// can re-render `recap.html` / `transcript.md` after corrections without re-calling
|
||||||
|
/// the LLM (a "Regenerate" action re-runs analysis when the user wants fresh summaries).
|
||||||
|
struct RecapFile: Equatable, Codable {
|
||||||
|
var title: String
|
||||||
|
var result: RecapResult
|
||||||
|
|
||||||
|
func write(to url: URL) throws {
|
||||||
|
let enc = JSONEncoder(); enc.outputFormatting = [.prettyPrinted, .sortedKeys]
|
||||||
|
try enc.encode(self).write(to: url)
|
||||||
|
}
|
||||||
|
static func read(from url: URL) -> RecapFile? {
|
||||||
|
guard let data = try? Data(contentsOf: url) else { return nil }
|
||||||
|
return try? JSONDecoder().decode(RecapFile.self, from: data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,91 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
/// Pure transforms for speaker corrections: rename, merge (rename onto an existing
|
||||||
|
/// name), and per-segment reassignment, plus remapping speaker names through a
|
||||||
|
/// recap's text/structured fields. No UI/IO — fully unit-testable.
|
||||||
|
enum SpeakerEditing {
|
||||||
|
typealias Segment = SpeakersFile.Segment
|
||||||
|
|
||||||
|
/// Distinct speakers in first-appearance order (the legend).
|
||||||
|
static func orderedSpeakers(_ segments: [Segment]) -> [String] {
|
||||||
|
var seen = Set<String>(), order: [String] = []
|
||||||
|
for s in segments where !s.speaker.isEmpty && !seen.contains(s.speaker) {
|
||||||
|
seen.insert(s.speaker); order.append(s.speaker)
|
||||||
|
}
|
||||||
|
return order
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Replace every `from` with `to` across segments. Rename when `to` is new; a
|
||||||
|
/// merge when `to` already exists — same primitive either way.
|
||||||
|
static func replaceSpeaker(_ from: String, with to: String, in segments: [Segment]) -> [Segment] {
|
||||||
|
guard from != to, !to.isEmpty else { return segments }
|
||||||
|
return segments.map {
|
||||||
|
$0.speaker == from ? Segment(start: $0.start, end: $0.end, speaker: to, text: $0.text) : $0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reassign a single segment to another speaker.
|
||||||
|
static func reassign(_ index: Int, to speaker: String, in segments: [Segment]) -> [Segment] {
|
||||||
|
guard segments.indices.contains(index), !speaker.isEmpty else { return segments }
|
||||||
|
var out = segments
|
||||||
|
let s = out[index]
|
||||||
|
out[index] = Segment(start: s.start, end: s.end, speaker: speaker, text: s.text)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compose an ordered list of (from → to) rename/merge ops into the net
|
||||||
|
/// original→final map (per-segment reassignments are NOT renames, so they don't
|
||||||
|
/// appear here). Only entries that actually changed are returned.
|
||||||
|
static func netNameMap(originals: [String], ops: [(from: String, to: String)]) -> [String: String] {
|
||||||
|
var cur = Dictionary(uniqueKeysWithValues: originals.map { ($0, $0) })
|
||||||
|
for op in ops {
|
||||||
|
for (k, v) in cur where v == op.from { cur[k] = op.to }
|
||||||
|
}
|
||||||
|
return cur.filter { $0.key != $0.value }
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Recap remapping
|
||||||
|
|
||||||
|
/// Apply a name map to a recap's structured fields (exact) and free text
|
||||||
|
/// (whole-word), so a rename/merge is reflected in summaries, the TLDR, and the
|
||||||
|
/// extras attributions without re-running the LLM.
|
||||||
|
static func remap(_ result: RecapResult, names map: [String: String]) -> RecapResult {
|
||||||
|
guard !map.isEmpty else { return result }
|
||||||
|
func exact(_ s: String?) -> String? { s.flatMap { map[$0] ?? $0 } }
|
||||||
|
func exactList(_ a: [String]) -> [String] { a.map { map[$0] ?? $0 } }
|
||||||
|
|
||||||
|
let sections = result.sections.map {
|
||||||
|
TopicSection(title: replaceWords($0.title, map),
|
||||||
|
summary: replaceWords($0.summary, map),
|
||||||
|
startIndex: $0.startIndex, endIndex: $0.endIndex)
|
||||||
|
}
|
||||||
|
var extras = result.extras
|
||||||
|
if let x = result.extras {
|
||||||
|
extras = MeetingExtras(
|
||||||
|
tldr: .init(summary: replaceWords(x.tldr.summary, map),
|
||||||
|
primarySpeakers: exactList(x.tldr.primarySpeakers)),
|
||||||
|
decisions: x.decisions.map { .init(statement: replaceWords($0.statement, map),
|
||||||
|
agreedBy: exactList($0.agreedBy), supportingOffset: $0.supportingOffset) },
|
||||||
|
actionItems: x.actionItems.map { .init(description: replaceWords($0.description, map),
|
||||||
|
owner: exact($0.owner), dueHint: $0.dueHint, supportingOffset: $0.supportingOffset) },
|
||||||
|
openQuestions: x.openQuestions.map { .init(question: replaceWords($0.question, map), raisedBy: exact($0.raisedBy)) },
|
||||||
|
keyQuotes: x.keyQuotes.map { .init(speaker: exact($0.speaker), offset: $0.offset,
|
||||||
|
quote: replaceWords($0.quote, map), whyNotable: replaceWords($0.whyNotable, map)) })
|
||||||
|
}
|
||||||
|
return RecapResult(sections: sections, extras: extras)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whole-word replace each `from`→`to` in free text (case-sensitive). Used so a
|
||||||
|
/// renamed speaker's name updates inside summaries without clobbering substrings.
|
||||||
|
static func replaceWords(_ text: String, _ map: [String: String]) -> String {
|
||||||
|
var out = text
|
||||||
|
for (from, to) in map where from != to && !from.isEmpty {
|
||||||
|
let pattern = "\\b" + NSRegularExpression.escapedPattern(for: from) + "\\b"
|
||||||
|
guard let re = try? NSRegularExpression(pattern: pattern) else { continue }
|
||||||
|
let range = NSRange(out.startIndex..., in: out)
|
||||||
|
out = re.stringByReplacingMatches(in: out, range: range,
|
||||||
|
withTemplate: NSRegularExpression.escapedTemplate(for: to))
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -406,6 +406,7 @@ final class SessionController: ObservableObject {
|
|||||||
guard let result = try? await analyzer.recap(file: speakers) else { return }
|
guard let result = try? await analyzer.recap(file: speakers) else { return }
|
||||||
let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
|
let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
|
||||||
try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder)
|
try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder)
|
||||||
|
try? RecapFile(title: title, result: result).write(to: inputs.folder.appendingPathComponent("recap.json"))
|
||||||
let url = inputs.folder.appendingPathComponent("recap.html")
|
let url = inputs.folder.appendingPathComponent("recap.html")
|
||||||
if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
|
if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
|
||||||
}
|
}
|
||||||
@@ -421,6 +422,21 @@ final class SessionController: ObservableObject {
|
|||||||
return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
|
return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: - Speaker corrections
|
||||||
|
|
||||||
|
/// True once the last session has a transcribed `speakers.json` to correct.
|
||||||
|
var canEditSpeakers: Bool {
|
||||||
|
guard let folder = lastSession?.folder else { return false }
|
||||||
|
return FileManager.default.fileExists(atPath: folder.appendingPathComponent("speakers.json").path)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Open the speaker-correction editor for the last session.
|
||||||
|
func editLastSession() {
|
||||||
|
guard let folder = lastSession?.folder,
|
||||||
|
let model = RecapEditModel(folder: folder, voiceprints: voiceprints) else { return }
|
||||||
|
EditorWindow.shared.show(model: model)
|
||||||
|
}
|
||||||
|
|
||||||
private func fail(_ message: String) {
|
private func fail(_ message: String) {
|
||||||
recorder = nil
|
recorder = nil
|
||||||
visualCapture = nil // recorder.start() failed before visual started; nothing running
|
visualCapture = nil // recorder.start() failed before visual started; nothing running
|
||||||
|
|||||||
@@ -11,7 +11,8 @@ enum TranscriptAssembler {
|
|||||||
|
|
||||||
struct Assembled {
|
struct Assembled {
|
||||||
let speakersFile: SpeakersFile
|
let speakersFile: SpeakersFile
|
||||||
let fingerprints: [String: [Float]] // name -> 192-dim, for VoiceprintStore
|
let fingerprints: [String: [Float]] // confidently-named only, for VoiceprintStore
|
||||||
|
let allFingerprints: [String: [Float]] // EVERY cluster incl. Unknown — for editor voice-learning
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Source ranking when the same name appears across chunks with different sources.
|
/// Source ranking when the same name appears across chunks with different sources.
|
||||||
@@ -33,6 +34,7 @@ enum TranscriptAssembler {
|
|||||||
var segments: [SpeakersFile.Segment] = []
|
var segments: [SpeakersFile.Segment] = []
|
||||||
var bestSpeaker: [String: SpeakersFile.Speaker] = [:]
|
var bestSpeaker: [String: SpeakersFile.Speaker] = [:]
|
||||||
var fingerprints: [String: [Float]] = [:]
|
var fingerprints: [String: [Float]] = [:]
|
||||||
|
var allFingerprints: [String: [Float]] = [:]
|
||||||
var models: [String: String] = [:]
|
var models: [String: String] = [:]
|
||||||
var duration = 0.0
|
var duration = 0.0
|
||||||
|
|
||||||
@@ -58,13 +60,14 @@ enum TranscriptAssembler {
|
|||||||
} else {
|
} else {
|
||||||
bestSpeaker[sp.name] = candidate
|
bestSpeaker[sp.name] = candidate
|
||||||
}
|
}
|
||||||
// Collect named fingerprints only (never Unknown_N / Speaker_unknown).
|
if let fp = sp.fingerprint, fp.count > 0 {
|
||||||
if !isUnknown(sp.name), let fp = sp.fingerprint, fp.count > 0 {
|
allFingerprints[sp.name] = fp // every cluster, for the editor
|
||||||
fingerprints[sp.name] = fp
|
if !isUnknown(sp.name) { fingerprints[sp.name] = fp } // named only, for the store
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (name, fp) in chunk.response.fingerprints where !isUnknown(name) && fp.count > 0 {
|
for (name, fp) in chunk.response.fingerprints where fp.count > 0 {
|
||||||
fingerprints[name] = fp
|
allFingerprints[name] = fp
|
||||||
|
if !isUnknown(name) { fingerprints[name] = fp }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,6 +78,6 @@ enum TranscriptAssembler {
|
|||||||
let file = SpeakersFile(
|
let file = SpeakersFile(
|
||||||
sessionId: sessionId, app: app, durationSec: duration,
|
sessionId: sessionId, app: app, durationSec: duration,
|
||||||
speakers: speakers, segments: segments, models: models)
|
speakers: speakers, segments: segments, models: models)
|
||||||
return Assembled(speakersFile: file, fingerprints: fingerprints)
|
return Assembled(speakersFile: file, fingerprints: fingerprints, allFingerprints: allFingerprints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,6 +97,13 @@ final class TranscriptPipeline {
|
|||||||
|
|
||||||
let assembled = TranscriptAssembler.assemble(sessionId: sessionId, app: app, chunks: results)
|
let assembled = TranscriptAssembler.assemble(sessionId: sessionId, app: app, chunks: results)
|
||||||
try assembled.speakersFile.write(to: sessionFolder.appendingPathComponent("speakers.json"))
|
try assembled.speakersFile.write(to: sessionFolder.appendingPathComponent("speakers.json"))
|
||||||
|
// Persist every cluster's voiceprint (incl. Unknown) so the speaker editor can
|
||||||
|
// teach the store a voice when the user renames an Unknown to a real name.
|
||||||
|
if !assembled.allFingerprints.isEmpty,
|
||||||
|
let data = try? JSONSerialization.data(withJSONObject: assembled.allFingerprints.mapValues { $0.map(Double.init) },
|
||||||
|
options: [.sortedKeys]) {
|
||||||
|
try? data.write(to: sessionFolder.appendingPathComponent("cluster_fingerprints.json"))
|
||||||
|
}
|
||||||
return assembled.speakersFile
|
return assembled.speakersFile
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
import AppKit
|
||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
/// Hosts the speaker-correction editor in a standalone resizable window. A
|
||||||
|
/// menu-bar (LSUIElement) app has no normal window scene, so we open one via
|
||||||
|
/// AppKit and activate the app so it comes to the front.
|
||||||
|
@MainActor
|
||||||
|
final class EditorWindow {
|
||||||
|
static let shared = EditorWindow()
|
||||||
|
private var window: NSWindow?
|
||||||
|
|
||||||
|
func show(model: RecapEditModel) {
|
||||||
|
if let window {
|
||||||
|
window.contentViewController = NSHostingController(rootView: TranscriptEditorView(model: model))
|
||||||
|
window.title = model.title
|
||||||
|
NSApp.activate(ignoringOtherApps: true)
|
||||||
|
window.makeKeyAndOrderFront(nil)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
let w = NSWindow(
|
||||||
|
contentRect: NSRect(x: 0, y: 0, width: 640, height: 560),
|
||||||
|
styleMask: [.titled, .closable, .resizable, .miniaturizable],
|
||||||
|
backing: .buffered, defer: false)
|
||||||
|
w.title = model.title
|
||||||
|
w.isReleasedWhenClosed = false
|
||||||
|
w.center()
|
||||||
|
w.contentViewController = NSHostingController(rootView: TranscriptEditorView(model: model))
|
||||||
|
window = w
|
||||||
|
NSApp.activate(ignoringOtherApps: true)
|
||||||
|
w.makeKeyAndOrderFront(nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -94,6 +94,9 @@ struct MenuBarView: View {
|
|||||||
if let recap = session.recapURL {
|
if let recap = session.recapURL {
|
||||||
Button("Open recap") { NSWorkspace.shared.open(recap) }
|
Button("Open recap") { NSWorkspace.shared.open(recap) }
|
||||||
}
|
}
|
||||||
|
if session.canEditSpeakers {
|
||||||
|
Button("Edit speakers") { session.editLastSession() }
|
||||||
|
}
|
||||||
Spacer()
|
Spacer()
|
||||||
}
|
}
|
||||||
if !transcriptText.isEmpty {
|
if !transcriptText.isEmpty {
|
||||||
|
|||||||
@@ -0,0 +1,152 @@
|
|||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
/// Speaker-correction editor: rename a speaker, merge two speakers, or reassign an
|
||||||
|
/// individual transcript line to a different speaker. Saving rewrites speakers.json,
|
||||||
|
/// re-renders the recap, and updates the voiceprint memory.
|
||||||
|
struct TranscriptEditorView: View {
|
||||||
|
@ObservedObject var model: RecapEditModel
|
||||||
|
|
||||||
|
@State private var renameTarget: String?
|
||||||
|
@State private var renameText = ""
|
||||||
|
@State private var reassignNewIndex: Int?
|
||||||
|
@State private var newSpeakerText = ""
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 0) {
|
||||||
|
header
|
||||||
|
Divider()
|
||||||
|
transcript
|
||||||
|
Divider()
|
||||||
|
footer
|
||||||
|
}
|
||||||
|
.frame(minWidth: 560, minHeight: 480)
|
||||||
|
.sheet(item: Binding(get: { renameTarget.map { IdentifiableString($0) } },
|
||||||
|
set: { renameTarget = $0?.value })) { item in
|
||||||
|
renameSheet(for: item.value)
|
||||||
|
}
|
||||||
|
.sheet(item: Binding(get: { reassignNewIndex.map { IdentifiableInt($0) } },
|
||||||
|
set: { reassignNewIndex = $0?.value })) { item in
|
||||||
|
newSpeakerSheet(for: item.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Header / legend
|
||||||
|
|
||||||
|
private var header: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 8) {
|
||||||
|
Text(model.title).font(.headline)
|
||||||
|
Text("Fix speaker names, merge duplicates, or reassign a line. Saving updates the recap and remembers any names you set.")
|
||||||
|
.font(.caption).foregroundStyle(.secondary)
|
||||||
|
ScrollView(.horizontal, showsIndicators: false) {
|
||||||
|
HStack(spacing: 8) {
|
||||||
|
ForEach(model.speakers, id: \.self) { name in
|
||||||
|
Menu {
|
||||||
|
Button("Rename…") { renameText = name; renameTarget = name }
|
||||||
|
if model.speakers.count > 1 {
|
||||||
|
Menu("Merge into") {
|
||||||
|
ForEach(model.speakers.filter { $0 != name }, id: \.self) { other in
|
||||||
|
Button(other) { model.merge(name, into: other) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} label: {
|
||||||
|
Text(name).font(.caption).padding(.horizontal, 8).padding(.vertical, 3)
|
||||||
|
.background(Color.accentColor.opacity(0.18)).clipShape(Capsule())
|
||||||
|
}
|
||||||
|
.menuStyle(.borderlessButton).fixedSize()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(.vertical, 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(12)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Transcript
|
||||||
|
|
||||||
|
private var transcript: some View {
|
||||||
|
List {
|
||||||
|
ForEach(Array(model.segments.enumerated()), id: \.offset) { idx, seg in
|
||||||
|
HStack(alignment: .top, spacing: 10) {
|
||||||
|
Text(RecapAnalyzer.mmss(seg.start))
|
||||||
|
.font(.caption.monospacedDigit()).foregroundStyle(.secondary)
|
||||||
|
.frame(width: 52, alignment: .trailing)
|
||||||
|
Menu {
|
||||||
|
ForEach(model.speakers, id: \.self) { name in
|
||||||
|
Button(name) { model.reassign(idx, to: name) }
|
||||||
|
}
|
||||||
|
Divider()
|
||||||
|
Button("New name…") { newSpeakerText = ""; reassignNewIndex = idx }
|
||||||
|
} label: {
|
||||||
|
Text(seg.speaker).font(.caption.weight(.semibold))
|
||||||
|
.frame(width: 96, alignment: .leading)
|
||||||
|
}
|
||||||
|
.menuStyle(.borderlessButton).fixedSize()
|
||||||
|
Text(seg.text ?? "").font(.callout).textSelection(.enabled)
|
||||||
|
.frame(maxWidth: .infinity, alignment: .leading)
|
||||||
|
}
|
||||||
|
.padding(.vertical, 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.listStyle(.inset)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Footer
|
||||||
|
|
||||||
|
private var footer: some View {
|
||||||
|
HStack {
|
||||||
|
Button("Save corrections") { model.save() }
|
||||||
|
.keyboardShortcut("s", modifiers: .command)
|
||||||
|
.disabled(!model.dirty)
|
||||||
|
if let s = model.status { Text(s).font(.caption).foregroundStyle(.green) }
|
||||||
|
Spacer()
|
||||||
|
if let recap = recapURL {
|
||||||
|
Button("Open recap") { NSWorkspace.shared.open(recap) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(12)
|
||||||
|
}
|
||||||
|
|
||||||
|
private var recapURL: URL? {
|
||||||
|
let u = model.folder.appendingPathComponent("recap.html")
|
||||||
|
return FileManager.default.fileExists(atPath: u.path) ? u : nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Sheets
|
||||||
|
|
||||||
|
private func renameSheet(for name: String) -> some View {
|
||||||
|
VStack(alignment: .leading, spacing: 12) {
|
||||||
|
Text("Rename “\(name)”").font(.headline)
|
||||||
|
TextField("New name", text: $renameText).textFieldStyle(.roundedBorder).frame(width: 260)
|
||||||
|
HStack {
|
||||||
|
Spacer()
|
||||||
|
Button("Cancel") { renameTarget = nil }
|
||||||
|
Button("Rename") {
|
||||||
|
model.rename(name, to: renameText)
|
||||||
|
renameTarget = nil
|
||||||
|
}.keyboardShortcut(.defaultAction).disabled(renameText.trimmingCharacters(in: .whitespaces).isEmpty)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(16)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func newSpeakerSheet(for index: Int) -> some View {
|
||||||
|
VStack(alignment: .leading, spacing: 12) {
|
||||||
|
Text("Assign this line to a new speaker").font(.headline)
|
||||||
|
TextField("Speaker name", text: $newSpeakerText).textFieldStyle(.roundedBorder).frame(width: 260)
|
||||||
|
HStack {
|
||||||
|
Spacer()
|
||||||
|
Button("Cancel") { reassignNewIndex = nil }
|
||||||
|
Button("Assign") {
|
||||||
|
let n = newSpeakerText.trimmingCharacters(in: .whitespaces)
|
||||||
|
if !n.isEmpty { model.reassign(index, to: n) }
|
||||||
|
reassignNewIndex = nil
|
||||||
|
}.keyboardShortcut(.defaultAction).disabled(newSpeakerText.trimmingCharacters(in: .whitespaces).isEmpty)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.padding(16)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private struct IdentifiableString: Identifiable { let value: String; var id: String { value }; init(_ v: String) { value = v } }
|
||||||
|
private struct IdentifiableInt: Identifiable { let value: Int; var id: Int { value }; init(_ v: Int) { value = v } }
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
import XCTest
|
||||||
|
@testable import Ten31Transcripts
|
||||||
|
|
||||||
|
final class SpeakerEditingTests: XCTestCase {
|
||||||
|
private func seg(_ s: Double, _ e: Double, _ who: String, _ t: String) -> SpeakersFile.Segment {
|
||||||
|
.init(start: s, end: e, speaker: who, text: t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testReplaceSpeakerRenamesAll() {
|
||||||
|
let segs = [seg(0, 1, "A", "x"), seg(1, 2, "B", "y"), seg(2, 3, "A", "z")]
|
||||||
|
let out = SpeakerEditing.replaceSpeaker("A", with: "Alice", in: segs)
|
||||||
|
XCTAssertEqual(out.map { $0.speaker }, ["Alice", "B", "Alice"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testReplaceSpeakerMergesOntoExisting() {
|
||||||
|
let segs = [seg(0, 1, "A", "x"), seg(1, 2, "B", "y")]
|
||||||
|
let out = SpeakerEditing.replaceSpeaker("B", with: "A", in: segs) // merge B→A
|
||||||
|
XCTAssertEqual(SpeakerEditing.orderedSpeakers(out), ["A"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testReassignSingleSegment() {
|
||||||
|
let segs = [seg(0, 1, "A", "x"), seg(1, 2, "A", "y")]
|
||||||
|
let out = SpeakerEditing.reassign(1, to: "B", in: segs)
|
||||||
|
XCTAssertEqual(out.map { $0.speaker }, ["A", "B"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testNetNameMapComposesChains() {
|
||||||
|
let net = SpeakerEditing.netNameMap(originals: ["A", "B", "C"], ops: [("A", "B"), ("B", "C")])
|
||||||
|
XCTAssertEqual(net["A"], "C")
|
||||||
|
XCTAssertEqual(net["B"], "C")
|
||||||
|
XCTAssertNil(net["C"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testRemapStructuredAndWordBoundaryText() {
|
||||||
|
let result = RecapResult(
|
||||||
|
sections: [TopicSection(title: "Grant intro", summary: "Grant and Unknown_0 talk; Grantham stays.", startIndex: 0, endIndex: 1)],
|
||||||
|
extras: MeetingExtras(
|
||||||
|
tldr: .init(summary: "Grant led.", primarySpeakers: ["Grant"]),
|
||||||
|
decisions: [.init(statement: "ship", agreedBy: ["Grant", "Unknown_0"], supportingOffset: 1)],
|
||||||
|
actionItems: [.init(description: "Unknown_0 sends doc", owner: "Unknown_0", dueHint: nil, supportingOffset: nil)],
|
||||||
|
openQuestions: [],
|
||||||
|
keyQuotes: [.init(speaker: "Unknown_0", offset: 2, quote: "go", whyNotable: "")]))
|
||||||
|
let map = ["Unknown_0": "Caitlyn", "Grant": "Grant Gilliam"]
|
||||||
|
let out = SpeakerEditing.remap(result, names: map)
|
||||||
|
XCTAssertEqual(out.sections[0].title, "Grant Gilliam intro")
|
||||||
|
XCTAssertEqual(out.sections[0].summary, "Grant Gilliam and Caitlyn talk; Grantham stays.") // word boundary keeps "Grantham"
|
||||||
|
XCTAssertEqual(out.extras?.tldr.primarySpeakers, ["Grant Gilliam"])
|
||||||
|
XCTAssertEqual(out.extras?.decisions.first?.agreedBy, ["Grant Gilliam", "Caitlyn"])
|
||||||
|
XCTAssertEqual(out.extras?.actionItems.first?.owner, "Caitlyn")
|
||||||
|
XCTAssertEqual(out.extras?.keyQuotes.first?.speaker, "Caitlyn")
|
||||||
|
}
|
||||||
|
|
||||||
|
@MainActor
|
||||||
|
func testEditModelRenameSavesArtifactsAndLearnsVoice() throws {
|
||||||
|
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("edit_\(UUID().uuidString)")
|
||||||
|
try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
|
||||||
|
defer { try? FileManager.default.removeItem(at: dir) }
|
||||||
|
|
||||||
|
let speakers = SpeakersFile(sessionId: "s", app: "meet", durationSec: 6,
|
||||||
|
speakers: [.init(name: "Grant", source: "mic_channel", overlapConfidence: nil, matchSimilarity: nil),
|
||||||
|
.init(name: "Unknown_0", source: "unmatched", overlapConfidence: nil, matchSimilarity: nil)],
|
||||||
|
segments: [seg(0, 2, "Grant", "hi"), seg(3, 5, "Unknown_0", "hello there")], models: [:])
|
||||||
|
try speakers.write(to: dir.appendingPathComponent("speakers.json"))
|
||||||
|
try RecapFile(title: "Meet call",
|
||||||
|
result: RecapResult(sections: [TopicSection(title: "Intro", summary: "Unknown_0 greets the room.", startIndex: 0, endIndex: 1)], extras: nil))
|
||||||
|
.write(to: dir.appendingPathComponent("recap.json"))
|
||||||
|
try JSONSerialization.data(withJSONObject: ["Unknown_0": [0.5, 0.6], "Grant": [0.1, 0.2]])
|
||||||
|
.write(to: dir.appendingPathComponent("cluster_fingerprints.json"))
|
||||||
|
|
||||||
|
let store = VoiceprintStore(fileURL: dir.appendingPathComponent("voiceprints.json"))
|
||||||
|
let model = try XCTUnwrap(RecapEditModel(folder: dir, voiceprints: store))
|
||||||
|
model.rename("Unknown_0", to: "Caitlyn")
|
||||||
|
XCTAssertTrue(model.speakers.contains("Caitlyn"))
|
||||||
|
XCTAssertFalse(model.speakers.contains("Unknown_0"))
|
||||||
|
model.save()
|
||||||
|
|
||||||
|
let reloaded = try JSONDecoder().decode(SpeakersFile.self,
|
||||||
|
from: Data(contentsOf: dir.appendingPathComponent("speakers.json")))
|
||||||
|
XCTAssertTrue(reloaded.segments.contains { $0.speaker == "Caitlyn" })
|
||||||
|
XCTAssertFalse(reloaded.segments.contains { $0.speaker == "Unknown_0" })
|
||||||
|
XCTAssertTrue(FileManager.default.fileExists(atPath: dir.appendingPathComponent("recap.html").path))
|
||||||
|
XCTAssertTrue(FileManager.default.fileExists(atPath: dir.appendingPathComponent("transcript.md").path))
|
||||||
|
// The renamed Unknown taught the store a voice for "Caitlyn".
|
||||||
|
XCTAssertEqual(store.knownVoiceprints()["Caitlyn"], [0.5, 0.6])
|
||||||
|
// recap.json summary remapped Unknown_0 → Caitlyn.
|
||||||
|
let rf = try XCTUnwrap(RecapFile.read(from: dir.appendingPathComponent("recap.json")))
|
||||||
|
XCTAssertTrue(rf.result.sections[0].summary.contains("Caitlyn"))
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user