diff --git a/Ten31Transcripts/Backend/SparkControlClient.swift b/Ten31Transcripts/Backend/SparkControlClient.swift index f474b6a..0d4f402 100644 --- a/Ten31Transcripts/Backend/SparkControlClient.swift +++ b/Ten31Transcripts/Backend/SparkControlClient.swift @@ -88,8 +88,7 @@ final class SparkControlClient { deinit { urlSession.finishTasksAndInvalidate() } - /// One `label-merge` call. `timeline` is the flat `[{start,end,name,confidence}]` - /// JSON (chunk-local seconds). Retries on `503 + Retry-After`. + /// Mono `label-merge`: one mixed-mono file + timeline. Retries on `503`. func labelMerge(audioURL: URL, timeline: Data, knownVoiceprints: [String: [Float]]?, @@ -97,14 +96,46 @@ final class SparkControlClient { minOverlap: Double? = nil, voiceprintThreshold: Double? = nil, maxRetries: Int = 3) async throws -> LabelMergeResponse { - guard let url = URL(string: baseURL + "/api/audio/label-merge") else { - throw SparkControlError.invalidHost - } + let fields = Self.commonFields(timeline: timeline, knownVoiceprints: knownVoiceprints, + transcribe: transcribe, minOverlap: minOverlap, + voiceprintThreshold: voiceprintThreshold) + let files = [(field: "file", filename: audioURL.lastPathComponent, data: try Data(contentsOf: audioURL))] + return try await perform(fields: fields, files: files, maxRetries: maxRetries) + } + /// Dual-channel `label-merge`: separate mic (local user) + system (remote) + /// tracks. The mic channel is attributed as `self_name`; `timeline` names only + /// the remote/system speakers; `selfVad` (optional) are the chunk-local windows + /// where the mic is genuinely the user (active and louder than system). + func labelMergeDual(micURL: URL, + systemURL: URL, + selfName: String, + selfVad: Data?, + timeline: Data, + knownVoiceprints: [String: [Float]]?, + transcribe: Bool, + minOverlap: Double? = nil, + voiceprintThreshold: Double? = nil, + maxRetries: Int = 3) async throws -> LabelMergeResponse { + var fields = Self.commonFields(timeline: timeline, knownVoiceprints: knownVoiceprints, + transcribe: transcribe, minOverlap: minOverlap, + voiceprintThreshold: voiceprintThreshold) + fields["self_name"] = selfName + if let selfVad, let str = String(data: selfVad, encoding: .utf8) { fields["self_vad"] = str } + let files = [ + (field: "mic_file", filename: micURL.lastPathComponent, data: try Data(contentsOf: micURL)), + (field: "system_file", filename: systemURL.lastPathComponent, data: try Data(contentsOf: systemURL)), + ] + return try await perform(fields: fields, files: files, maxRetries: maxRetries) + } + + // MARK: - Transport + + private static func commonFields(timeline: Data, knownVoiceprints: [String: [Float]]?, + transcribe: Bool, minOverlap: Double?, + voiceprintThreshold: Double?) -> [String: String] { var fields: [String: String] = ["transcribe": transcribe ? "true" : "false"] - if let timelineString = String(data: timeline, encoding: .utf8) { - fields["timeline"] = timelineString - } + if let timelineString = String(data: timeline, encoding: .utf8) { fields["timeline"] = timelineString } if let known = knownVoiceprints, !known.isEmpty, let data = try? JSONSerialization.data(withJSONObject: known.mapValues { $0.map { Double($0) } }), let str = String(data: data, encoding: .utf8) { @@ -112,11 +143,17 @@ final class SparkControlClient { } if let minOverlap { fields["min_overlap"] = String(minOverlap) } if let voiceprintThreshold { fields["voiceprint_threshold"] = String(voiceprintThreshold) } + return fields + } - let audio = try Data(contentsOf: audioURL) - // Body doesn't change between retries — build it once. - let (body, contentType) = Self.multipart(fields: fields, fileField: "file", - filename: audioURL.lastPathComponent, fileData: audio) + /// Shared POST + retry-on-503 + decode. Body is built once (constant across retries). + private func perform(fields: [String: String], + files: [(field: String, filename: String, data: Data)], + maxRetries: Int) async throws -> LabelMergeResponse { + guard let url = URL(string: baseURL + "/api/audio/label-merge") else { + throw SparkControlError.invalidHost + } + let (body, contentType) = Self.multipart(fields: fields, files: files) var attempt = 0 while true { @@ -158,8 +195,8 @@ final class SparkControlClient { return String(data: data, encoding: .utf8) ?? "unknown error" } - private static func multipart(fields: [String: String], fileField: String, - filename: String, fileData: Data) -> (Data, String) { + private static func multipart(fields: [String: String], + files: [(field: String, filename: String, data: Data)]) -> (Data, String) { let boundary = "Boundary-\(UUID().uuidString)" var body = Data() func append(_ s: String) { body.append(s.data(using: .utf8)!) } @@ -169,11 +206,14 @@ final class SparkControlClient { append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n") append("\(value)\r\n") } - append("--\(boundary)\r\n") - append("Content-Disposition: form-data; name=\"\(fileField)\"; filename=\"\(filename)\"\r\n") - append("Content-Type: audio/wav\r\n\r\n") - body.append(fileData) - append("\r\n--\(boundary)--\r\n") + for file in files { + append("--\(boundary)\r\n") + append("Content-Disposition: form-data; name=\"\(file.field)\"; filename=\"\(file.filename)\"\r\n") + append("Content-Type: audio/wav\r\n\r\n") + body.append(file.data) + append("\r\n") + } + append("--\(boundary)--\r\n") return (body, "multipart/form-data; boundary=\(boundary)") } } diff --git a/Ten31Transcripts/Backend/VoiceprintStore.swift b/Ten31Transcripts/Backend/VoiceprintStore.swift index 143e154..3bdf33c 100644 --- a/Ten31Transcripts/Backend/VoiceprintStore.swift +++ b/Ten31Transcripts/Backend/VoiceprintStore.swift @@ -48,9 +48,10 @@ final class VoiceprintStore { guard !Self.isUnknown(sp.name) else { continue } let acceptable: Bool switch sp.source { - case "visual": acceptable = (sp.overlapConfidence ?? 0) >= minOverlapToStore - case "voiceprint": acceptable = true // already matched a known print - default: acceptable = false // unmatched + case "mic_channel": acceptable = true // the user's own clean mic voiceprint + case "visual": acceptable = (sp.overlapConfidence ?? 0) >= minOverlapToStore + case "voiceprint": acceptable = true // already matched a known print + default: acceptable = false // unmatched } guard acceptable, let vector = sp.fingerprint ?? response.fingerprints[sp.name], !vector.isEmpty else { continue } diff --git a/Ten31Transcripts/Session/SessionController.swift b/Ten31Transcripts/Session/SessionController.swift index efe9eb9..dbcf677 100644 --- a/Ten31Transcripts/Session/SessionController.swift +++ b/Ten31Transcripts/Session/SessionController.swift @@ -65,8 +65,13 @@ final class SessionController: ObservableObject { let folder: URL let sessionId: String let app: String + let micURL: URL + let systemURL: URL let mixedURL: URL - let timeline: [VisualTimeline.Segment] + let timeline: [VisualTimeline.Segment] // remote visual names; self handled via the mic channel + let selfSpans: [VADSpan] + let selfName: String + let systemHealthy: Bool } private var lastProcess: ProcessInputs? private var processTask: Task? @@ -275,19 +280,20 @@ final class SessionController: ObservableObject { /// ran, otherwise the mic-VAD self spans alone. `visualRan` reports whether the /// visual pipeline actually attached (for the after-session indicator). private func stopVisualAndTimeline(_ result: RecordingResult, folder: URL?) - async -> (timeline: [VisualTimeline.Segment], visualRan: Bool) { + async -> (timeline: [VisualTimeline.Segment], selfSpans: [VADSpan], visualRan: Bool) { let selfName = settings.selfName let selfSpans = await channelSelfSpans(result: result, folder: folder) if let vc = visualCapture, let folder { visualCapture = nil - let timeline = await vc.finish( + // Remote (vision) segments only; self travels separately as the mic channel. + let remote = await vc.finish( selfSpans: selfSpans, selfName: selfName, sessionId: folder.lastPathComponent, t0Unix: result.t0Unix, durationSec: result.duration, folder: folder) - return (timeline, true) + return (remote, selfSpans, true) } if let vc = visualCapture { await vc.cancel(); visualCapture = nil } - return (TranscriptPipeline.timeline(fromSelfSpans: selfSpans, selfName: selfName), false) + return ([], selfSpans, false) } /// Self spans for the backend timeline, identified by CHANNEL: the mic track is @@ -312,26 +318,29 @@ final class SessionController: ObservableObject { lifecycleTask = Task { let result = await recorder.stop() let visual = await self.stopVisualAndTimeline(result, folder: folder) - self.finish(result, timeline: visual.timeline, visualRan: visual.visualRan) + self.finish(result, timeline: visual.timeline, selfSpans: visual.selfSpans, visualRan: visual.visualRan) } } - private func finish(_ result: RecordingResult, timeline: [VisualTimeline.Segment], visualRan: Bool) { + private func finish(_ result: RecordingResult, timeline: [VisualTimeline.Segment], + selfSpans: [VADSpan], visualRan: Bool) { recorder = nil micLevel = 0 systemLevel = 0 warning = result.systemNote.map { "System audio stopped early: \($0)" } transcriptStatus = .idle if let folder = currentFolder { - writeSelfSpans(result, to: folder) - let visualCount = visualRan ? timeline.filter { $0.source == "vision" }.count : nil + writeSelfSpans(spans: selfSpans, result: result, to: folder) + let visualCount = visualRan ? timeline.count : nil // `timeline` is the remote vision segments lastSession = SessionInfo( folder: folder, mixedURL: result.mixedURL, - duration: result.duration, selfSpanCount: result.selfSpans.count, + duration: result.duration, selfSpanCount: selfSpans.count, visualSegmentCount: visualCount) lastProcess = ProcessInputs( folder: folder, sessionId: folder.lastPathComponent, app: currentLabel, - mixedURL: result.mixedURL, timeline: timeline) + micURL: result.micURL, systemURL: result.systemURL, mixedURL: result.mixedURL, + timeline: timeline, selfSpans: selfSpans, selfName: settings.selfName, + systemHealthy: result.systemNote == nil) } let autoSend = settings.autoSendOnStop currentFolder = nil @@ -360,11 +369,12 @@ final class SessionController: ObservableObject { baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification, voiceprints: voiceprints) - let timeline = inputs.timeline do { let speakers = try await pipeline.process( sessionFolder: inputs.folder, sessionId: inputs.sessionId, app: inputs.app, - mixedURL: inputs.mixedURL, timeline: timeline, + micURL: inputs.micURL, systemURL: inputs.systemURL, mixedURL: inputs.mixedURL, + timeline: inputs.timeline, selfSpans: inputs.selfSpans, selfName: inputs.selfName, + systemHealthy: inputs.systemHealthy, progress: { done, total in await MainActor.run { self.transcriptStatus = .processing(done, total) } }) @@ -411,7 +421,7 @@ final class SessionController: ObservableObject { let folder = currentFolder let result = await recorder.stop() let visual = await stopVisualAndTimeline(result, folder: folder) - finish(result, timeline: visual.timeline, visualRan: visual.visualRan) + finish(result, timeline: visual.timeline, selfSpans: visual.selfSpans, visualRan: visual.visualRan) } else if lifecycleGeneration == gen { break // settled: no new transition was spawned } @@ -461,15 +471,15 @@ final class SessionController: ObservableObject { return f.string(from: Date()) } - /// Phase-1 preview of the mic-VAD "self" spans (the eventual - /// `visual_timeline.json` `mic_vad` segments). Lets us eyeball VAD quality. - private func writeSelfSpans(_ result: RecordingResult, to folder: URL) { - let segments = result.selfSpans.map { span -> [String: Any] in + /// Debug artifact: the channel-verified "self" spans actually sent to the backend + /// as `self_vad` (mic active AND louder than system). Lets us eyeball self detection. + private func writeSelfSpans(spans: [VADSpan], result: RecordingResult, to folder: URL) { + let segments = spans.map { span -> [String: Any] in ["start": span.start, "end": span.end, "name": "self", - "confidence": span.confidence, "source": "mic_vad"] + "confidence": span.confidence, "source": "mic_channel"] } let object: [String: Any] = [ - "note": "Phase 1 mic-VAD self spans (preview of visual_timeline segments)", + "note": "channel-verified self spans (mic active and louder than system) — the self_vad sent to label-merge", "t0_unix": result.t0Unix, "duration_sec": result.duration, "self_spans": segments, diff --git a/Ten31Transcripts/Session/SessionPackager.swift b/Ten31Transcripts/Session/SessionPackager.swift index 473441c..cc8bb6a 100644 --- a/Ten31Transcripts/Session/SessionPackager.swift +++ b/Ten31Transcripts/Session/SessionPackager.swift @@ -46,6 +46,18 @@ enum SessionPackager { return try JSONSerialization.data(withJSONObject: flat, options: []) } + /// Clip self-VAD spans to `[start, end)` and rebase to chunk-local seconds, as + /// the `self_vad` array `[{start,end}]` for dual-channel `label-merge`. + static func rebasedSelfVadData(_ spans: [VADSpan], start: Double, end: Double) throws -> Data { + let flat: [[String: Any]] = spans.compactMap { span in + let s = max(span.start, start) + let e = min(span.end, end) + guard e > s else { return nil } + return ["start": s - start, "end": e - start] + } + return try JSONSerialization.data(withJSONObject: flat, options: []) + } + /// Slice `[startSec, endSec)` of a 16 kHz mono WAV into `dest`. static func sliceAudio(from source: URL, startSec: Double, endSec: Double, to dest: URL) throws { let input = try AVAudioFile(forReading: source) diff --git a/Ten31Transcripts/Session/TranscriptAssembler.swift b/Ten31Transcripts/Session/TranscriptAssembler.swift index bb44a60..da32325 100644 --- a/Ten31Transcripts/Session/TranscriptAssembler.swift +++ b/Ten31Transcripts/Session/TranscriptAssembler.swift @@ -15,8 +15,10 @@ enum TranscriptAssembler { } /// Source ranking when the same name appears across chunks with different sources. + /// `mic_channel` (the local user's own microphone) is the most authoritative. private static func rank(_ source: String) -> Int { switch source { + case "mic_channel": return 4 case "visual": return 3 case "voiceprint": return 2 default: return 1 // unmatched diff --git a/Ten31Transcripts/Session/TranscriptPipeline.swift b/Ten31Transcripts/Session/TranscriptPipeline.swift index 911033a..b985437 100644 --- a/Ten31Transcripts/Session/TranscriptPipeline.swift +++ b/Ten31Transcripts/Session/TranscriptPipeline.swift @@ -12,16 +12,30 @@ final class TranscriptPipeline { self.voiceprints = voiceprints } - /// Process `mixedURL` against `timeline` (visual + self spans). Writes - /// `speakers.json` into `sessionFolder` and returns it. `progress(done,total)` + /// Process a finished session. **Dual-channel** when the system track is healthy + /// and present: mic (the local user) + system (remote) go as separate files, the + /// `timeline` names only the remote speakers, and `selfSpans` become `self_vad`. + /// Otherwise falls back to the **mono** mixed file with self folded into the + /// timeline. Writes `speakers.json` into `sessionFolder`. `progress(done,total)` /// is called per chunk. func process(sessionFolder: URL, sessionId: String, app: String, + micURL: URL, + systemURL: URL, mixedURL: URL, timeline: [VisualTimeline.Segment], + selfSpans: [VADSpan], + selfName: String, + systemHealthy: Bool, progress: ((Int, Int) async -> Void)? = nil) async throws -> SpeakersFile { - let duration = SessionPackager.duration(of: mixedURL) + let fm = FileManager.default + let dual = systemHealthy + && fm.fileExists(atPath: micURL.path) && fm.fileExists(atPath: systemURL.path) + && SessionPackager.duration(of: systemURL) > 0 + let duration = dual + ? max(SessionPackager.duration(of: micURL), SessionPackager.duration(of: systemURL)) + : SessionPackager.duration(of: mixedURL) let plan = SessionPackager.planChunks(durationSec: duration) // Zero-duration / empty session → a valid empty speakers.json, no backend call. @@ -33,32 +47,51 @@ final class TranscriptPipeline { } let chunksDir = sessionFolder.appendingPathComponent("chunks", isDirectory: true) - try? FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true) - defer { try? FileManager.default.removeItem(at: chunksDir) } // cleanup on success OR throw + try? fm.createDirectory(at: chunksDir, withIntermediateDirectories: true) + defer { try? fm.removeItem(at: chunksDir) } // cleanup on success OR throw // Start from stored voiceprints; accumulate this call's prints across chunks // for within-call unification (the store only persists high-confidence ones). var known = voiceprints.knownVoiceprints() var results: [TranscriptAssembler.ChunkResult] = [] + // Mono fallback needs self folded into the timeline; dual sends it separately. + let monoTimeline = dual ? timeline + : timeline + Self.timeline(fromSelfSpans: selfSpans, selfName: selfName) for chunk in plan { try Task.checkCancellation() await progress?(chunk.index, plan.count) - let chunkURL = chunksDir.appendingPathComponent("chunk_\(String(format: "%03d", chunk.index)).wav") - try SessionPackager.sliceAudio(from: mixedURL, startSec: chunk.start, endSec: chunk.end, to: chunkURL) - guard FileManager.default.fileExists(atPath: chunkURL.path) else { continue } // empty slice → skip + let pad = String(format: "%03d", chunk.index) + let response: LabelMergeResponse - let timelineData = try SessionPackager.rebasedTimelineData(timeline, start: chunk.start, end: chunk.end) - let response = try await client.labelMerge( - audioURL: chunkURL, timeline: timelineData, - knownVoiceprints: known.isEmpty ? nil : known, transcribe: true) + if dual { + let micChunk = chunksDir.appendingPathComponent("chunk_\(pad)_mic.wav") + let sysChunk = chunksDir.appendingPathComponent("chunk_\(pad)_sys.wav") + try SessionPackager.sliceAudio(from: micURL, startSec: chunk.start, endSec: chunk.end, to: micChunk) + try SessionPackager.sliceAudio(from: systemURL, startSec: chunk.start, endSec: chunk.end, to: sysChunk) + guard fm.fileExists(atPath: micChunk.path), fm.fileExists(atPath: sysChunk.path) else { continue } + let timelineData = try SessionPackager.rebasedTimelineData(timeline, start: chunk.start, end: chunk.end) + let selfVadData = try SessionPackager.rebasedSelfVadData(selfSpans, start: chunk.start, end: chunk.end) + response = try await client.labelMergeDual( + micURL: micChunk, systemURL: sysChunk, selfName: selfName, selfVad: selfVadData, + timeline: timelineData, knownVoiceprints: known.isEmpty ? nil : known, transcribe: true) + try? fm.removeItem(at: micChunk); try? fm.removeItem(at: sysChunk) + } else { + let chunkURL = chunksDir.appendingPathComponent("chunk_\(pad).wav") + try SessionPackager.sliceAudio(from: mixedURL, startSec: chunk.start, endSec: chunk.end, to: chunkURL) + guard fm.fileExists(atPath: chunkURL.path) else { continue } // empty slice → skip + let timelineData = try SessionPackager.rebasedTimelineData(monoTimeline, start: chunk.start, end: chunk.end) + response = try await client.labelMerge( + audioURL: chunkURL, timeline: timelineData, + knownVoiceprints: known.isEmpty ? nil : known, transcribe: true) + try? fm.removeItem(at: chunkURL) + } for (name, fp) in response.fingerprints where !LabelMergeResponse.isUnknownName(name) { known[name] = fp } voiceprints.update(with: response) results.append(.init(chunkStart: chunk.start, response: response)) - try? FileManager.default.removeItem(at: chunkURL) } await progress?(plan.count, plan.count) diff --git a/Ten31Transcripts/Visual/VisualCapture.swift b/Ten31Transcripts/Visual/VisualCapture.swift index 6fa7f7b..638c9bb 100644 --- a/Ten31Transcripts/Visual/VisualCapture.swift +++ b/Ten31Transcripts/Visual/VisualCapture.swift @@ -55,29 +55,35 @@ final class VisualCapture { } } - /// Stop capture, fold in the mic-VAD self spans, write `visual_timeline.json` - /// into the session folder, and return the merged segments for `label-merge`. + /// Stop capture and write `visual_timeline.json` (the full human-readable picture: + /// remote visual segments + the mic-VAD self spans, merged). Returns ONLY the + /// remote (vision) segments — in dual-channel mode the backend names the system + /// track from these, while self is handled by the mic channel + `self_vad`. func finish(selfSpans: [VADSpan], selfName: String, sessionId: String, t0Unix: Double, durationSec: Double, folder: URL) async -> [VisualTimeline.Segment] { - observer.addSelfSpans(selfSpans, selfName: selfName) let (rawSegments, rawGaps) = await observer.stop() // The observer stops slightly after audio fixes `durationSec`, so a trailing // gap/segment can run past it. Clamp ends so the JSON is internally consistent // (and we never hand the backend a segment longer than the audio). - let segments = Self.clampSegments(rawSegments, to: durationSec) + let vision = Self.clampSegments(rawSegments, to: durationSec) // remote speakers let gaps = Self.clampGaps(rawGaps, to: durationSec) + let selfSegs = Self.clampSegments(selfSpans.map { + VisualTimeline.Segment(start: $0.start, end: $0.end, name: selfName, + confidence: $0.confidence, source: "mic_vad") + }, to: durationSec) - let names = Set(segments.map { $0.name }) + let artifact = (vision + selfSegs).sorted { $0.start < $1.start } + let names = Set(artifact.map { $0.name }) let participants = names.sorted().map { VisualTimeline.Participant(name: $0, isSelf: $0 == selfName ? true : nil, aliases: nil) } let timeline = VisualTimeline( sessionId: sessionId, app: app.label, adapterVersion: adapter.adapterVersion, t0Unix: t0Unix, durationSec: durationSec, fpsSampled: adapter.preferredFPS, - selfName: selfName, participants: participants, segments: segments, visualGaps: gaps) + selfName: selfName, participants: participants, segments: artifact, visualGaps: gaps) try? timeline.write(to: folder.appendingPathComponent("visual_timeline.json")) - return segments + return vision } } diff --git a/Ten31TranscriptsTests/Phase5Tests.swift b/Ten31TranscriptsTests/Phase5Tests.swift index bc52674..39012c6 100644 --- a/Ten31TranscriptsTests/Phase5Tests.swift +++ b/Ten31TranscriptsTests/Phase5Tests.swift @@ -29,6 +29,31 @@ final class Phase5Tests: XCTestCase { XCTAssertEqual(arr[1]["end"] as? Double, 110) } + func testRebaseSelfVadClipsAndRebases() throws { + let spans = [VADSpan(start: 140, end: 160, confidence: 0.9), + VADSpan(start: 200, end: 260, confidence: 0.8)] + let data = try SessionPackager.rebasedSelfVadData(spans, start: 150, end: 300) + let arr = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [[String: Any]]) + XCTAssertEqual(arr.count, 2) + XCTAssertEqual(arr[0]["start"] as? Double, 0) + XCTAssertEqual(arr[0]["end"] as? Double, 10) // 160 clipped at 150 base → 0–10 + XCTAssertEqual(arr[1]["start"] as? Double, 50) + XCTAssertEqual(arr[1]["end"] as? Double, 110) + XCTAssertNil(arr[0]["name"]) // self_vad carries no name + } + + func testAssembleRanksMicChannelOverVisual() throws { + // Same person resolved by visual in one chunk and by the mic channel in + // another → the mic-channel attribution (the user's own track) wins. + let visual = #"{"duration":100,"speakers":[{"cluster":"Speaker_0","name":"Grant","source":"visual","overlap_confidence":0.99,"fingerprint":[0.1]}],"segments":[],"fingerprints":{},"models":{}}"# + let mic = #"{"duration":100,"speakers":[{"cluster":"mic","name":"Grant","source":"mic_channel","fingerprint":[0.2]}],"segments":[],"fingerprints":{"Grant":[0.2]},"models":{}}"# + let rv = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(visual.utf8)) + let rm = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(mic.utf8)) + let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet", + chunks: [.init(chunkStart: 0, response: rv), .init(chunkStart: 100, response: rm)]) + XCTAssertEqual(asm.speakersFile.speakers.first { $0.name == "Grant" }?.source, "mic_channel") + } + func testAssembleOffsetsAndUnifies() throws { let resp0 = #"{"duration":150,"speakers":[{"cluster":"Speaker_0","name":"Grant","source":"visual","overlap_confidence":0.99,"fingerprint":[0.1,0.2]}],"segments":[{"start_ms":1000,"end_ms":2000,"speaker":"Grant","text":"hi"}],"fingerprints":{"Grant":[0.1,0.2]},"models":{"diarization":"x"}}"# let resp1 = #"{"duration":100,"speakers":[{"cluster":"Speaker_0","name":"Sarah","source":"voiceprint","match_similarity":0.7,"fingerprint":[0.3,0.4]},{"cluster":"Speaker_1","name":"Unknown_0","source":"unmatched"}],"segments":[{"start_ms":500,"end_ms":1500,"speaker":"Sarah","text":"hello"}],"fingerprints":{"Sarah":[0.3,0.4]},"models":{"diarization":"x"}}"# diff --git a/Ten31TranscriptsTests/VoiceprintStoreTests.swift b/Ten31TranscriptsTests/VoiceprintStoreTests.swift index 7833dc0..d3d427f 100644 --- a/Ten31TranscriptsTests/VoiceprintStoreTests.swift +++ b/Ten31TranscriptsTests/VoiceprintStoreTests.swift @@ -23,6 +23,14 @@ final class VoiceprintStoreTests: XCTestCase { XCTAssertEqual(store.entries["Grant"]?.calls, 1) } + func testStoresMicChannelSelf() throws { + let url = tempURL(); defer { try? FileManager.default.removeItem(at: url) } + let store = VoiceprintStore(fileURL: url) + let json = #"{"duration":10,"speakers":[{"cluster":"mic","name":"Grant","source":"mic_channel","fingerprint":[0.5,0.6]}],"segments":[],"fingerprints":{"Grant":[0.5,0.6]},"models":{}}"# + store.update(with: try JSONDecoder().decode(LabelMergeResponse.self, from: Data(json.utf8))) + XCTAssertEqual(store.knownVoiceprints()["Grant"], [0.5, 0.6]) // clean self print stored + } + func testPersistsAcrossInstancesAndIncrementsCalls() throws { let url = tempURL(); defer { try? FileManager.default.removeItem(at: url) } let store = VoiceprintStore(fileURL: url)