Drop stuck whole-call visual spans at processing time

Defense-in-depth + salvage for sessions captured before the adapter fix: drop any vision-source span whose single unbroken duration covers ≥60% of the call. No one speaks that long without a break, so it's a stuck/false active-speaker cue that would dominate backend name attribution. Self (mic_vad) spans are never dropped. Applied to both the live and re-process paths. Test added; 66 pass.
2026-06-08 16:21:45 -05:00
parent 39beccf7f4
commit 1b6bb8ab67
2 changed files with 32 additions and 3 deletions
@@ -37,6 +37,18 @@ final class Phase5Tests: XCTestCase {
        XCTAssertEqual(asm.speakersFile.segments[0].start, 152, accuracy: 0.01)
    }

+    func testDropStuckSpansRemovesWholeCallCue() {
+        let segs = [
+            VisualTimeline.Segment(start: 0, end: 1900, name: "Grant Gilliam", confidence: 1, source: "vision"), // stuck whole-call tile
+            VisualTimeline.Segment(start: 100, end: 130, name: "Matt Odell", confidence: 0.9, source: "vision"),  // real
+            VisualTimeline.Segment(start: 0, end: 1900, name: "Grant", confidence: 1, source: "mic_vad"),         // self span: keep
+        ]
+        let out = TranscriptPipeline.dropStuckSpans(segs, duration: 1976)
+        XCTAssertFalse(out.contains { $0.name == "Grant Gilliam" })   // 96% of call in one span → dropped
+        XCTAssertTrue(out.contains { $0.name == "Matt Odell" })       // short real span kept
+        XCTAssertTrue(out.contains { $0.source == "mic_vad" })        // self never dropped
+    }
+
    func testRebaseClipsAndRebases() throws {
        let segs = [
            VisualTimeline.Segment(start: 140, end: 160, name: "A", confidence: 0.9, source: "vision"),