Files
ten31-transcripts/Ten31TranscriptsTests/Phase5Tests.swift
T
Grant Gilliam 53d7fcdac0 Client: dual-channel label-merge (mic_file + system_file)
The backend shipped dual-channel mode; wire the client to it. We already capture
mic (you) and system (others) separately, so send them as two files instead of the
mono mix — fixing the misattribution at the source.

- SparkControlClient: labelMergeDual(mic_file, system_file, self_name, self_vad);
  multipart generalized to N files; shared POST/retry/decode extracted.
- SessionPackager.rebasedSelfVadData: chunk-local [{start,end}] for self_vad;
  sliceAudio reused for both tracks.
- TranscriptPipeline.process: dual-channel chunking (slice mic+system, rebase
  timeline + self_vad per chunk) when system audio is healthy; mono mixed-file
  fallback (self folded into the timeline) otherwise.
- VisualCapture.finish: write the full visual_timeline.json (remote + self merged)
  but return REMOTE (vision) segments only — self travels via the mic channel.
- TranscriptAssembler: rank mic_channel highest (the user's own track wins).
- VoiceprintStore: store the clean mic_channel self voiceprint.
- SessionController: pass mic/system URLs + remote timeline + channel self-spans +
  self_name + systemHealthy; self_vad.json now reflects the channel-verified spans.

Validated END-TO-END against the live backend on the real misattributing session:
'Go Bitcoin' (remote) is now attributed to Unknown_0, NOT the user; the user's own
lines come back source=mic_channel; per-channel ASR recovered fuller remote text.
36/36 XCTest (4 new: self_vad rebase, mic_channel ranking + voiceprint storage).
2026-06-06 13:15:29 -05:00

73 lines
4.5 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import XCTest
@testable import Ten31Transcripts
final class Phase5Tests: XCTestCase {
func testPlanChunksShort() {
let c = SessionPackager.planChunks(durationSec: 70)
XCTAssertEqual(c.count, 1)
XCTAssertEqual(c[0].end, 70, accuracy: 0.001)
}
func testPlanChunksLong() {
let c = SessionPackager.planChunks(durationSec: 400, chunkSeconds: 150)
XCTAssertEqual(c.count, 3)
XCTAssertEqual(c[0].start, 0); XCTAssertEqual(c[0].end, 150)
XCTAssertEqual(c[1].start, 150); XCTAssertEqual(c[2].end, 400)
}
func testRebaseClipsAndRebases() throws {
let segs = [
VisualTimeline.Segment(start: 140, end: 160, name: "A", confidence: 0.9, source: "vision"),
VisualTimeline.Segment(start: 200, end: 260, name: "B", confidence: 0.8, source: "vision"),
]
let data = try SessionPackager.rebasedTimelineData(segs, start: 150, end: 300)
let arr = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [[String: Any]])
XCTAssertEqual(arr.count, 2)
XCTAssertEqual(arr[0]["start"] as? Double, 0)
XCTAssertEqual(arr[0]["end"] as? Double, 10)
XCTAssertEqual(arr[1]["start"] as? Double, 50)
XCTAssertEqual(arr[1]["end"] as? Double, 110)
}
func testRebaseSelfVadClipsAndRebases() throws {
let spans = [VADSpan(start: 140, end: 160, confidence: 0.9),
VADSpan(start: 200, end: 260, confidence: 0.8)]
let data = try SessionPackager.rebasedSelfVadData(spans, start: 150, end: 300)
let arr = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [[String: Any]])
XCTAssertEqual(arr.count, 2)
XCTAssertEqual(arr[0]["start"] as? Double, 0)
XCTAssertEqual(arr[0]["end"] as? Double, 10) // 160 clipped at 150 base 010
XCTAssertEqual(arr[1]["start"] as? Double, 50)
XCTAssertEqual(arr[1]["end"] as? Double, 110)
XCTAssertNil(arr[0]["name"]) // self_vad carries no name
}
func testAssembleRanksMicChannelOverVisual() throws {
// Same person resolved by visual in one chunk and by the mic channel in
// another the mic-channel attribution (the user's own track) wins.
let visual = #"{"duration":100,"speakers":[{"cluster":"Speaker_0","name":"Grant","source":"visual","overlap_confidence":0.99,"fingerprint":[0.1]}],"segments":[],"fingerprints":{},"models":{}}"#
let mic = #"{"duration":100,"speakers":[{"cluster":"mic","name":"Grant","source":"mic_channel","fingerprint":[0.2]}],"segments":[],"fingerprints":{"Grant":[0.2]},"models":{}}"#
let rv = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(visual.utf8))
let rm = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(mic.utf8))
let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet",
chunks: [.init(chunkStart: 0, response: rv), .init(chunkStart: 100, response: rm)])
XCTAssertEqual(asm.speakersFile.speakers.first { $0.name == "Grant" }?.source, "mic_channel")
}
func testAssembleOffsetsAndUnifies() throws {
let resp0 = #"{"duration":150,"speakers":[{"cluster":"Speaker_0","name":"Grant","source":"visual","overlap_confidence":0.99,"fingerprint":[0.1,0.2]}],"segments":[{"start_ms":1000,"end_ms":2000,"speaker":"Grant","text":"hi"}],"fingerprints":{"Grant":[0.1,0.2]},"models":{"diarization":"x"}}"#
let resp1 = #"{"duration":100,"speakers":[{"cluster":"Speaker_0","name":"Sarah","source":"voiceprint","match_similarity":0.7,"fingerprint":[0.3,0.4]},{"cluster":"Speaker_1","name":"Unknown_0","source":"unmatched"}],"segments":[{"start_ms":500,"end_ms":1500,"speaker":"Sarah","text":"hello"}],"fingerprints":{"Sarah":[0.3,0.4]},"models":{"diarization":"x"}}"#
let r0 = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(resp0.utf8))
let r1 = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(resp1.utf8))
let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet",
chunks: [.init(chunkStart: 0, response: r0), .init(chunkStart: 150, response: r1)])
XCTAssertEqual(asm.speakersFile.segments.count, 2)
XCTAssertEqual(asm.speakersFile.segments[0].start, 1, accuracy: 0.001)
XCTAssertEqual(asm.speakersFile.segments[1].start, 150.5, accuracy: 0.001)
XCTAssertEqual(asm.speakersFile.speakers.count, 3)
XCTAssertNotNil(asm.fingerprints["Grant"])
XCTAssertNotNil(asm.fingerprints["Sarah"])
XCTAssertNil(asm.fingerprints["Unknown_0"])
}
}