Files
Grant Gilliam 6d0c8be8c9 Speaker reconciliation + open/re-process any saved session
Reconciliation (the marry-the-signals layer): after transcription, before the recap,
SpeakerReconciler (1) MERGES non-self clusters whose voiceprints are highly similar
(cosine >= 0.82) — fixes a person split across chunks (the real 1-on-1 failure: one
remote came back as 'MH' + 'Unknown_0'); and (2) NAMES remaining non-self clusters
from transcript CONTENT via the gateway LLM (people addressed by name / self-intros),
conservative + confidence-gated, keeping the placeholder when unrevealed. The
mic-channel self is protected and never reassigned. Voice does the segmentation; the
fingerprint-merge fixes splits; the LLM adds the content signal visual/voiceprint lack.

- SpeakerReconciler: pure cosine merge (tested) + LLM content-naming pass; rewrites
  speakers.json before recap. SessionController.finishBackend shares one model lookup
  for reconcile + recap. Gated by settings.reconcileSpeakers (default on).
- Open saved session: menu 'Open saved session…' → folder picker. Edits it if already
  transcribed, else reconstructs inputs from disk (visual_timeline vision segs +
  channel self-spans) and runs transcribe → reconcile → recap, then opens the editor.
  Lets you evaluate/correct ANY past call, not just the in-memory last one.

Note (from real Signal data): visual naming is unreliable on Signal (sparse, misread
initials, lowercase/center names) — so reconciliation + the editor (which teaches
voiceprints on confirm) carry it; the editor remains the human arbiter. 59/59 XCTest.
2026-06-08 11:54:41 -05:00

179 lines
8.3 KiB
Swift

import Foundation
/// Editable view-model for one session's speaker corrections. Loads `speakers.json`
/// (+ optional `recap.json` and `cluster_fingerprints.json`), applies rename / merge
/// / per-segment reassignment, and on save rewrites `speakers.json`, re-renders
/// `transcript.md` + `recap.html`, and updates the voiceprint store (learning a voice
/// when an Unknown speaker is named).
@MainActor
final class RecapEditModel: ObservableObject {
let folder: URL
let title: String
private let voiceprints: VoiceprintStore
private let baseURL: String
private let skipTLS: Bool
private let base: SpeakersFile
private var recapFile: RecapFile?
private let clusterFingerprints: [String: [Float]]
private var originalSpeakers: [String]
private var renameOps: [(from: String, to: String)] = []
let templates: [RecapTemplate]
@Published var selectedTemplateId: String
@Published private(set) var segments: [SpeakersFile.Segment]
@Published private(set) var speakers: [String]
@Published private(set) var dirty = false
@Published private(set) var regenerating = false
@Published private(set) var hasRecap: Bool
@Published private(set) var status: String?
init?(folder: URL, voiceprints: VoiceprintStore, baseURL: String, skipTLS: Bool,
templates: [RecapTemplate], defaultTemplateId: String) {
let speakersURL = folder.appendingPathComponent("speakers.json")
guard let data = try? Data(contentsOf: speakersURL),
let file = try? JSONDecoder().decode(SpeakersFile.self, from: data),
!file.segments.isEmpty else { return nil }
self.folder = folder
self.voiceprints = voiceprints
self.baseURL = baseURL
self.skipTLS = skipTLS
self.templates = templates.isEmpty ? RecapTemplate.builtIns : templates
self.selectedTemplateId = (templates.contains { $0.id == defaultTemplateId } ? defaultTemplateId : templates.first?.id)
?? RecapTemplate.builtIns.first!.id
self.base = file
self.segments = file.segments
self.speakers = SpeakerEditing.orderedSpeakers(file.segments)
self.originalSpeakers = SpeakerEditing.orderedSpeakers(file.segments)
let rf = RecapFile.read(from: folder.appendingPathComponent("recap.json"))
self.recapFile = rf
self.hasRecap = rf != nil
self.clusterFingerprints = Self.loadFingerprints(folder.appendingPathComponent("cluster_fingerprints.json"))
self.title = rf?.title ?? file.app.capitalized + " call"
}
// MARK: - Edits
func rename(_ name: String, to newName: String) {
let to = newName.trimmingCharacters(in: .whitespacesAndNewlines)
guard !to.isEmpty, to != name else { return }
renameOps.append((from: name, to: to))
segments = SpeakerEditing.replaceSpeaker(name, with: to, in: segments)
refresh()
}
/// Merge is just a rename onto an existing speaker.
func merge(_ absorbed: String, into survivor: String) {
guard absorbed != survivor else { return }
renameOps.append((from: absorbed, to: survivor))
segments = SpeakerEditing.replaceSpeaker(absorbed, with: survivor, in: segments)
refresh()
}
func reassign(_ index: Int, to speaker: String) {
segments = SpeakerEditing.reassign(index, to: speaker, in: segments)
refresh()
}
private func refresh() {
speakers = SpeakerEditing.orderedSpeakers(segments)
dirty = true
status = nil
}
// MARK: - Save
/// Persist corrections: rewrite speakers.json, re-render the recap with names
/// remapped in place (fast, no LLM), and update voiceprints.
func save() {
let file = commitCorrections()
let net = SpeakerEditing.netNameMap(originals: originalSpeakers, ops: renameOps)
let result = recapFile.map { SpeakerEditing.remap($0.result, names: net) } ?? RecapResult(sections: [], extras: nil)
if recapFile != nil {
let rf = RecapFile(title: title, result: result)
recapFile = rf
try? rf.write(to: folder.appendingPathComponent("recap.json"))
}
try? RecapRenderer.write(file: file, result: result, title: title, to: folder)
rebaseline()
status = "Saved — recap.html & transcript.md updated."
}
/// Re-run the LLM analysis on the CORRECTED transcript, so summaries/extras are
/// freshly written with the corrected names (not just find-replaced). Commits the
/// corrections first; needs the gateway LLM (no-op message if unavailable).
func regenerate() async {
guard !regenerating else { return }
regenerating = true
status = "Regenerating recap…"
defer { regenerating = false }
let file = commitCorrections()
let template = templates.first { $0.id == selectedTemplateId } ?? templates.first ?? .internalMeeting
let llm = GatewayLLMClient(baseURL: baseURL, skipTLS: skipTLS)
guard let model = await llm.chatModelId() else {
status = "No language model on the gateway — saved corrections only."
rebaseline(); return
}
let analyzer = RecapAnalyzer(llm: llm, model: model)
guard let result = try? await analyzer.recap(file: file, template: template) else {
status = "Recap regeneration failed — corrections were saved."
rebaseline(); return
}
let rf = RecapFile(title: title, result: result)
recapFile = rf
hasRecap = true
try? rf.write(to: folder.appendingPathComponent("recap.json"))
try? RecapRenderer.write(file: file, result: result, title: title, to: folder)
rebaseline()
status = "Recap regenerated with corrected names."
}
/// Write the corrected speakers.json and reconcile the voiceprint store. Shared by
/// save() and regenerate(); does NOT clear renameOps (caller rebaselines after).
private func commitCorrections() -> SpeakersFile {
let file = SpeakersFile(sessionId: base.sessionId, app: base.app, durationSec: base.durationSec,
speakers: buildSpeakerList(), segments: segments, models: base.models)
try? file.write(to: folder.appendingPathComponent("speakers.json"))
let net = SpeakerEditing.netNameMap(originals: originalSpeakers, ops: renameOps)
let stored = voiceprints.knownVoiceprints()
for (orig, final) in net where !LabelMergeResponse.isUnknownName(final) {
let finalHasPrint = clusterFingerprints[final] != nil || stored[final] != nil
if finalHasPrint {
// Merge into an existing identity keep the survivor's print, drop the absorbed.
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
} else if let vec = clusterFingerprints[orig] {
// Rename / name an Unknown learn this voice for the new name.
voiceprints.enroll(name: final, vector: vec)
if !LabelMergeResponse.isUnknownName(orig) { voiceprints.merge(orig, into: final) }
} else {
voiceprints.rename(orig, to: final)
}
}
return file
}
/// After a commit, the corrected names become the new baseline so further edits
/// map cleanly (and the now-baked-in recap isn't double-remapped).
private func rebaseline() {
originalSpeakers = SpeakerEditing.orderedSpeakers(segments)
renameOps.removeAll()
dirty = false
}
/// Speaker roster from the edited segments: keep the original source where the
/// name is unchanged; mark new (renamed/merged) names as user-set.
private func buildSpeakerList() -> [SpeakersFile.Speaker] {
let byName = Dictionary(base.speakers.map { ($0.name, $0) }, uniquingKeysWith: { a, _ in a })
return SpeakerEditing.orderedSpeakers(segments).map { name in
byName[name] ?? SpeakersFile.Speaker(name: name, source: "manual", overlapConfidence: nil, matchSimilarity: nil)
}
}
static func loadFingerprints(_ url: URL) -> [String: [Float]] {
guard let data = try? Data(contentsOf: url),
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: [Any]] else { return [:] }
return obj.mapValues { $0.compactMap { ($0 as? NSNumber)?.floatValue } }
}
}