Files
Grant Gilliam 2191486506 Channel-verified self identity: the mic track is you
Grant's insight + proven on real session audio: we capture self (mic) and others
(system) as separate tracks, then throw the separation away by mixing to mono — so
the backend has to re-guess who's who. Analysis of a real call showed the channels
are cleanly separated (envelope corr 0.015, NO echo); Caitlyn's 'Go Bitcoin' was
11.8x louder in system than mic, yet the mono mix + noisy visual named it 'Grant'.

ChannelSelfVAD marks self-speech as windows where the mic is active AND louder than
system (mic > system x1.5). Benefits: (1) self is identified by CHANNEL, not by the
on-screen name — set one name in Settings, no per-platform matching; (2) a remote
speaker (or room echo) can never be mislabeled as self. Computed at finalize from
the two finished WAVs; the live capture path is untouched. Falls back to mic-VAD if
tracks can't be read. SessionController feeds these spans to the backend timeline.

Validated on the real session: 16 self spans; 'Go Bitcoin' (72-74s) correctly
EXCLUDED, Grant's 49.9-53.3s / 62.6-64s correctly INCLUDED. 33/33 XCTest (5 new).
2026-06-06 12:24:29 -05:00

46 lines
2.2 KiB
Swift
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import XCTest
@testable import Ten31Transcripts
/// Channel-verified self detection: self = mic active AND louder than system, so a
/// remote speaker (in the system channel) is never mislabeled as the local user.
final class ChannelSelfVADTests: XCTestCase {
private let win = 0.05
func testSelfSpanWhereMicDominates() {
// Self talks windows 09 (mic loud, system silent), other talks 1019.
let mic = Array(repeating: Float(0.15), count: 10) + Array(repeating: Float(0.0), count: 10)
let sys = Array(repeating: Float(0.0), count: 10) + Array(repeating: Float(0.15), count: 10)
let spans = ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win)
XCTAssertEqual(spans.count, 1)
XCTAssertEqual(spans.first?.start ?? -1, 0.0, accuracy: 0.001)
XCTAssertEqual(spans.first?.end ?? -1, 0.5, accuracy: win + 0.001) // ~windows 09
}
func testRemoteEchoIntoMicIsNotSelf() {
// Remote person loud in system, only faintly echoed into mic (below margin).
let mic = Array(repeating: Float(0.02), count: 20)
let sys = Array(repeating: Float(0.15), count: 20)
XCTAssertTrue(ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win).isEmpty)
}
func testSilenceProducesNoSpans() {
let q = Array(repeating: Float(0.003), count: 20)
XCTAssertTrue(ChannelSelfVAD.selfSpans(micRMS: q, systemRMS: q, windowSec: win).isEmpty)
}
func testShortBlipDropped() {
// 2 active windows < minWindows(3) ignored.
let mic: [Float] = [0.2, 0.2] + Array(repeating: Float(0.0), count: 18)
let sys = Array(repeating: Float(0.0), count: 20)
XCTAssertTrue(ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win).isEmpty)
}
func testHangoverBridgesShortGap() {
// Brief dip (2 windows) inside a self turn stays ONE span, not two.
let mic = Array(repeating: Float(0.2), count: 8) + [0.0, 0.0] + Array(repeating: Float(0.2), count: 8)
let sys = Array(repeating: Float(0.0), count: 18)
let spans = ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win)
XCTAssertEqual(spans.count, 1)
}
}