2191486506
Grant's insight + proven on real session audio: we capture self (mic) and others (system) as separate tracks, then throw the separation away by mixing to mono — so the backend has to re-guess who's who. Analysis of a real call showed the channels are cleanly separated (envelope corr 0.015, NO echo); Caitlyn's 'Go Bitcoin' was 11.8x louder in system than mic, yet the mono mix + noisy visual named it 'Grant'. ChannelSelfVAD marks self-speech as windows where the mic is active AND louder than system (mic > system x1.5). Benefits: (1) self is identified by CHANNEL, not by the on-screen name — set one name in Settings, no per-platform matching; (2) a remote speaker (or room echo) can never be mislabeled as self. Computed at finalize from the two finished WAVs; the live capture path is untouched. Falls back to mic-VAD if tracks can't be read. SessionController feeds these spans to the backend timeline. Validated on the real session: 16 self spans; 'Go Bitcoin' (72-74s) correctly EXCLUDED, Grant's 49.9-53.3s / 62.6-64s correctly INCLUDED. 33/33 XCTest (5 new).
46 lines
2.2 KiB
Swift
46 lines
2.2 KiB
Swift
import XCTest
|
||
@testable import Ten31Transcripts
|
||
|
||
/// Channel-verified self detection: self = mic active AND louder than system, so a
|
||
/// remote speaker (in the system channel) is never mislabeled as the local user.
|
||
final class ChannelSelfVADTests: XCTestCase {
|
||
private let win = 0.05
|
||
|
||
func testSelfSpanWhereMicDominates() {
|
||
// Self talks windows 0–9 (mic loud, system silent), other talks 10–19.
|
||
let mic = Array(repeating: Float(0.15), count: 10) + Array(repeating: Float(0.0), count: 10)
|
||
let sys = Array(repeating: Float(0.0), count: 10) + Array(repeating: Float(0.15), count: 10)
|
||
let spans = ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win)
|
||
XCTAssertEqual(spans.count, 1)
|
||
XCTAssertEqual(spans.first?.start ?? -1, 0.0, accuracy: 0.001)
|
||
XCTAssertEqual(spans.first?.end ?? -1, 0.5, accuracy: win + 0.001) // ~windows 0–9
|
||
}
|
||
|
||
func testRemoteEchoIntoMicIsNotSelf() {
|
||
// Remote person loud in system, only faintly echoed into mic (below margin).
|
||
let mic = Array(repeating: Float(0.02), count: 20)
|
||
let sys = Array(repeating: Float(0.15), count: 20)
|
||
XCTAssertTrue(ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win).isEmpty)
|
||
}
|
||
|
||
func testSilenceProducesNoSpans() {
|
||
let q = Array(repeating: Float(0.003), count: 20)
|
||
XCTAssertTrue(ChannelSelfVAD.selfSpans(micRMS: q, systemRMS: q, windowSec: win).isEmpty)
|
||
}
|
||
|
||
func testShortBlipDropped() {
|
||
// 2 active windows < minWindows(3) → ignored.
|
||
let mic: [Float] = [0.2, 0.2] + Array(repeating: Float(0.0), count: 18)
|
||
let sys = Array(repeating: Float(0.0), count: 20)
|
||
XCTAssertTrue(ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win).isEmpty)
|
||
}
|
||
|
||
func testHangoverBridgesShortGap() {
|
||
// Brief dip (2 windows) inside a self turn stays ONE span, not two.
|
||
let mic = Array(repeating: Float(0.2), count: 8) + [0.0, 0.0] + Array(repeating: Float(0.2), count: 8)
|
||
let sys = Array(repeating: Float(0.0), count: 18)
|
||
let spans = ChannelSelfVAD.selfSpans(micRMS: mic, systemRMS: sys, windowSec: win)
|
||
XCTAssertEqual(spans.count, 1)
|
||
}
|
||
}
|