Drop stuck whole-call visual spans at processing time

Defense-in-depth + salvage for sessions captured before the adapter fix: drop any
vision-source span whose single unbroken duration covers ≥60% of the call. No one
speaks that long without a break, so it's a stuck/false active-speaker cue that
would dominate backend name attribution. Self (mic_vad) spans are never dropped.
Applied to both the live and re-process paths. Test added; 66 pass.
This commit is contained in:
Grant Gilliam
2026-06-08 16:21:45 -05:00
parent 39beccf7f4
commit 1b6bb8ab67
2 changed files with 32 additions and 3 deletions
+12
View File
@@ -37,6 +37,18 @@ final class Phase5Tests: XCTestCase {
XCTAssertEqual(asm.speakersFile.segments[0].start, 152, accuracy: 0.01)
}
func testDropStuckSpansRemovesWholeCallCue() {
let segs = [
VisualTimeline.Segment(start: 0, end: 1900, name: "Grant Gilliam", confidence: 1, source: "vision"), // stuck whole-call tile
VisualTimeline.Segment(start: 100, end: 130, name: "Matt Odell", confidence: 0.9, source: "vision"), // real
VisualTimeline.Segment(start: 0, end: 1900, name: "Grant", confidence: 1, source: "mic_vad"), // self span: keep
]
let out = TranscriptPipeline.dropStuckSpans(segs, duration: 1976)
XCTAssertFalse(out.contains { $0.name == "Grant Gilliam" }) // 96% of call in one span dropped
XCTAssertTrue(out.contains { $0.name == "Matt Odell" }) // short real span kept
XCTAssertTrue(out.contains { $0.source == "mic_vad" }) // self never dropped
}
func testRebaseClipsAndRebases() throws {
let segs = [
VisualTimeline.Segment(start: 140, end: 160, name: "A", confidence: 0.9, source: "vision"),