Files
ten31-transcripts/Ten31TranscriptsTests/Phase5Tests.swift
T
Grant Gilliam 1b6bb8ab67 Drop stuck whole-call visual spans at processing time
Defense-in-depth + salvage for sessions captured before the adapter fix: drop any
vision-source span whose single unbroken duration covers ≥60% of the call. No one
speaks that long without a break, so it's a stuck/false active-speaker cue that
would dominate backend name attribution. Self (mic_vad) spans are never dropped.
Applied to both the live and re-process paths. Test added; 66 pass.
2026-06-08 16:21:45 -05:00

107 lines
7.2 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import XCTest
@testable import Ten31Transcripts
final class Phase5Tests: XCTestCase {
func testPlanChunksShort() {
let c = SessionPackager.planChunks(durationSec: 70)
XCTAssertEqual(c.count, 1)
XCTAssertEqual(c[0].end, 70, accuracy: 0.001)
}
func testPlanChunksLongOverlapsWindowsWithContiguousBodies() {
let c = SessionPackager.planChunks(durationSec: 400, chunkSeconds: 150, overlapSeconds: 15)
XCTAssertEqual(c.count, 3)
// Owned bodies tile the call with no gaps/overlap.
XCTAssertEqual(c[0].bodyStart, 0); XCTAssertEqual(c[0].bodyEnd, 150)
XCTAssertEqual(c[1].bodyStart, 150); XCTAssertEqual(c[1].bodyEnd, 300)
XCTAssertEqual(c[2].bodyEnd, 400)
// Sliced windows overlap by the margin (and clamp at the ends).
XCTAssertEqual(c[0].start, 0); XCTAssertEqual(c[0].end, 165) // +15 trailing
XCTAssertEqual(c[1].start, 135) // -15 leading
XCTAssertLessThan(c[1].start, c[0].end) // windows overlap
XCTAssertEqual(c[2].end, 400) // clamped
}
func testAssembleDedupsOverlapByBody() {
// A segment at global 152156 sits in chunk1's body but also in chunk0's
// trailing margin (overlap). It must be kept exactly once (by chunk1).
let r0 = #"{"duration":165,"speakers":[{"cluster":"S0","name":"A","source":"visual","overlap_confidence":0.9}],"segments":[{"start_ms":152000,"end_ms":156000,"speaker":"A","text":"boundary"}],"fingerprints":{},"models":{}}"#
let r1 = #"{"duration":180,"speakers":[{"cluster":"S0","name":"A","source":"visual","overlap_confidence":0.9}],"segments":[{"start_ms":17000,"end_ms":21000,"speaker":"A","text":"boundary"}],"fingerprints":{},"models":{}}"#
let c0 = try! JSONDecoder().decode(LabelMergeResponse.self, from: Data(r0.utf8))
let c1 = try! JSONDecoder().decode(LabelMergeResponse.self, from: Data(r1.utf8))
let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet", chunks: [
.init(chunkStart: 0, response: c0, bodyStart: 0, bodyEnd: 150),
.init(chunkStart: 135, response: c1, bodyStart: 150, bodyEnd: 300),
])
XCTAssertEqual(asm.speakersFile.segments.count, 1) // deduped
XCTAssertEqual(asm.speakersFile.segments[0].start, 152, accuracy: 0.01)
}
func testDropStuckSpansRemovesWholeCallCue() {
let segs = [
VisualTimeline.Segment(start: 0, end: 1900, name: "Grant Gilliam", confidence: 1, source: "vision"), // stuck whole-call tile
VisualTimeline.Segment(start: 100, end: 130, name: "Matt Odell", confidence: 0.9, source: "vision"), // real
VisualTimeline.Segment(start: 0, end: 1900, name: "Grant", confidence: 1, source: "mic_vad"), // self span: keep
]
let out = TranscriptPipeline.dropStuckSpans(segs, duration: 1976)
XCTAssertFalse(out.contains { $0.name == "Grant Gilliam" }) // 96% of call in one span dropped
XCTAssertTrue(out.contains { $0.name == "Matt Odell" }) // short real span kept
XCTAssertTrue(out.contains { $0.source == "mic_vad" }) // self never dropped
}
func testRebaseClipsAndRebases() throws {
let segs = [
VisualTimeline.Segment(start: 140, end: 160, name: "A", confidence: 0.9, source: "vision"),
VisualTimeline.Segment(start: 200, end: 260, name: "B", confidence: 0.8, source: "vision"),
]
let data = try SessionPackager.rebasedTimelineData(segs, start: 150, end: 300)
let arr = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [[String: Any]])
XCTAssertEqual(arr.count, 2)
XCTAssertEqual(arr[0]["start"] as? Double, 0)
XCTAssertEqual(arr[0]["end"] as? Double, 10)
XCTAssertEqual(arr[1]["start"] as? Double, 50)
XCTAssertEqual(arr[1]["end"] as? Double, 110)
}
func testRebaseSelfVadClipsAndRebases() throws {
let spans = [VADSpan(start: 140, end: 160, confidence: 0.9),
VADSpan(start: 200, end: 260, confidence: 0.8)]
let data = try SessionPackager.rebasedSelfVadData(spans, start: 150, end: 300)
let arr = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [[String: Any]])
XCTAssertEqual(arr.count, 2)
XCTAssertEqual(arr[0]["start"] as? Double, 0)
XCTAssertEqual(arr[0]["end"] as? Double, 10) // 160 clipped at 150 base 010
XCTAssertEqual(arr[1]["start"] as? Double, 50)
XCTAssertEqual(arr[1]["end"] as? Double, 110)
XCTAssertNil(arr[0]["name"]) // self_vad carries no name
}
func testAssembleRanksMicChannelOverVisual() throws {
// Same person resolved by visual in one chunk and by the mic channel in
// another the mic-channel attribution (the user's own track) wins.
let visual = #"{"duration":100,"speakers":[{"cluster":"Speaker_0","name":"Grant","source":"visual","overlap_confidence":0.99,"fingerprint":[0.1]}],"segments":[],"fingerprints":{},"models":{}}"#
let mic = #"{"duration":100,"speakers":[{"cluster":"mic","name":"Grant","source":"mic_channel","fingerprint":[0.2]}],"segments":[],"fingerprints":{"Grant":[0.2]},"models":{}}"#
let rv = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(visual.utf8))
let rm = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(mic.utf8))
let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet",
chunks: [.init(chunkStart: 0, response: rv), .init(chunkStart: 100, response: rm)])
XCTAssertEqual(asm.speakersFile.speakers.first { $0.name == "Grant" }?.source, "mic_channel")
}
func testAssembleOffsetsAndUnifies() throws {
let resp0 = #"{"duration":150,"speakers":[{"cluster":"Speaker_0","name":"Grant","source":"visual","overlap_confidence":0.99,"fingerprint":[0.1,0.2]}],"segments":[{"start_ms":1000,"end_ms":2000,"speaker":"Grant","text":"hi"}],"fingerprints":{"Grant":[0.1,0.2]},"models":{"diarization":"x"}}"#
let resp1 = #"{"duration":100,"speakers":[{"cluster":"Speaker_0","name":"Sarah","source":"voiceprint","match_similarity":0.7,"fingerprint":[0.3,0.4]},{"cluster":"Speaker_1","name":"Unknown_0","source":"unmatched"}],"segments":[{"start_ms":500,"end_ms":1500,"speaker":"Sarah","text":"hello"}],"fingerprints":{"Sarah":[0.3,0.4]},"models":{"diarization":"x"}}"#
let r0 = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(resp0.utf8))
let r1 = try JSONDecoder().decode(LabelMergeResponse.self, from: Data(resp1.utf8))
let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet",
chunks: [.init(chunkStart: 0, response: r0), .init(chunkStart: 150, response: r1)])
XCTAssertEqual(asm.speakersFile.segments.count, 2)
XCTAssertEqual(asm.speakersFile.segments[0].start, 1, accuracy: 0.001)
XCTAssertEqual(asm.speakersFile.segments[1].start, 150.5, accuracy: 0.001)
XCTAssertEqual(asm.speakersFile.speakers.count, 3)
XCTAssertNotNil(asm.fingerprints["Grant"])
XCTAssertNotNil(asm.fingerprints["Sarah"])
XCTAssertNil(asm.fingerprints["Unknown_0"])
}
}