import Foundation /// Concatenates per-chunk `label-merge` results into one global `speakers.json`: /// segment times offset back to global seconds, speakers unified across chunks by /// name, and fingerprints collected for the voiceprint store. enum TranscriptAssembler { struct ChunkResult { let chunkStart: Double // global seconds (the sliced window start) let response: LabelMergeResponse // The region this chunk OWNS; segments whose midpoint falls outside it are the // neighbour's (overlap margin) and are dropped here. Defaults keep everything // (no-overlap behaviour). var bodyStart: Double = -.greatestFiniteMagnitude var bodyEnd: Double = .greatestFiniteMagnitude } struct Assembled { let speakersFile: SpeakersFile let fingerprints: [String: [Float]] // confidently-named only, for VoiceprintStore let allFingerprints: [String: [Float]] // EVERY cluster incl. Unknown — for editor voice-learning } /// Source ranking when the same name appears across chunks with different sources. /// `mic_channel` (the local user's own microphone) is the most authoritative. private static func rank(_ source: String) -> Int { switch source { case "mic_channel": return 4 case "visual": return 3 case "voiceprint": return 2 default: return 1 // unmatched } } private static func isUnknown(_ name: String) -> Bool { LabelMergeResponse.isUnknownName(name) } static func assemble(sessionId: String, app: String, chunks: [ChunkResult]) -> Assembled { var segments: [SpeakersFile.Segment] = [] var bestSpeaker: [String: SpeakersFile.Speaker] = [:] var fingerprints: [String: [Float]] = [:] var allFingerprints: [String: [Float]] = [:] var models: [String: String] = [:] var duration = 0.0 for chunk in chunks { let offset = chunk.chunkStart // Body end bounds the real session length even on silent/all-unknown calls. duration = max(duration, min(chunk.bodyEnd, offset + chunk.response.duration)) for seg in chunk.response.segments { let start = seg.startSeconds + offset let end = seg.endSeconds + offset // Overlap dedup: keep a segment only in the chunk that OWNS its midpoint; // the other chunk saw it only in its margin (for context) and drops it. let mid = (start + end) / 2 guard mid >= chunk.bodyStart, mid < chunk.bodyEnd else { continue } segments.append(.init(start: start, end: end, speaker: seg.speaker, text: seg.text)) duration = max(duration, end) } for sp in chunk.response.speakers { let candidate = SpeakersFile.Speaker( name: sp.name, source: sp.source, overlapConfidence: sp.overlapConfidence, matchSimilarity: sp.matchSimilarity) if let existing = bestSpeaker[sp.name] { if rank(sp.source) > rank(existing.source) { bestSpeaker[sp.name] = candidate } } else { bestSpeaker[sp.name] = candidate } if let fp = sp.fingerprint, fp.count > 0 { allFingerprints[sp.name] = fp // every cluster, for the editor if !isUnknown(sp.name) { fingerprints[sp.name] = fp } // named only, for the store } } for (name, fp) in chunk.response.fingerprints where fp.count > 0 { allFingerprints[name] = fp if !isUnknown(name) { fingerprints[name] = fp } } } segments.sort { $0.start < $1.start } let speakers = bestSpeaker.values.sorted { $0.name < $1.name } models = chunks.last?.response.models ?? [:] let file = SpeakersFile( sessionId: sessionId, app: app, durationSec: duration, speakers: speakers, segments: segments, models: models) return Assembled(speakersFile: file, fingerprints: fingerprints, allFingerprints: allFingerprints) } }