880b56e426
Visual capture now runs alongside audio: on call start the session picks the
app's adapter, captures the call window on the SAME monotonic clock as the audio
(AudioRecorder.sharedT0Host), and on stop writes visual_timeline.json and hands
the backend the visual segments with mic-VAD self-spans merged. Any visual
failure (no adapter, no window, Screen Recording denied) leaves the session
recording audio-only — the proven path is never blocked or broken.
- CallDetector now emits DetectedCall{app, bundleID, windowID}: the exact
CGWindowID of the matched Meet browser window (native apps → nil → largest).
- VisualCapture wraps VisualObserver + AdapterRegistry, writes visual_timeline.json.
- AudioRecorder.sharedT0Host() exposes the shared t0 for frame alignment.
Hardened per a 3-lens adversarial review (concurrency / failure-isolation /
data-flow), all 6 confirmed findings fixed:
- P0 (critical): startVisual could adopt a stale capture into a DIFFERENT session
(cross-session SCStream leak + visual_timeline.json written to the wrong
folder). Now gated on session identity — generation + recorder ===, still
.recording — with fail-closed adoption; otherwise the stream is cancelled.
- P1: observer captured the browser's largest window, not the detected Meet
window. Now targets the exact CGWindowID (pickWindowIndex, unit-tested),
largest-area only as fallback.
- P2: a startVisual orphaned by a concurrent stop could leak a stream on quit.
inFlightVisual is registered before the await and drained in prepareForTermination.
- P3: trailing visual gap/segment ends could exceed duration_sec. Clamped in
VisualCapture (clampSegments/clampGaps, unit-tested).
- P4: capture pixel size used NSScreen.main scale; now uses the scale of the
display actually hosting the window (OCR clarity on secondary displays).
- VisualObserver.stop() bounds stopCapture() with a 3s timeout (mirrors audio) so
a wedged stream can't hang finalization.
25/25 XCTest pass. Live validation on real calls still pending.
68 lines
2.9 KiB
Swift
68 lines
2.9 KiB
Swift
import XCTest
|
|
import CoreGraphics
|
|
@testable import Ten31Transcripts
|
|
|
|
/// Window-selection logic: prefer the exact detected window (e.g. the Meet browser
|
|
/// window) by ID, else fall back to the largest owned window. This is the fix for
|
|
/// the "captures the wrong browser window" data-flow bug.
|
|
final class VisualObserverTests: XCTestCase {
|
|
|
|
private func c(_ id: CGWindowID, _ w: CGFloat, _ h: CGFloat) -> (id: CGWindowID, frame: CGRect) {
|
|
(id, CGRect(x: 0, y: 0, width: w, height: h))
|
|
}
|
|
|
|
func testPrefersMatchingWindowIDOverLargest() {
|
|
// The Meet window (id 42) is NOT the largest — must still be chosen by ID.
|
|
let candidates = [c(7, 1600, 1000), c(42, 800, 600), c(9, 1200, 900)]
|
|
let idx = VisualObserver.pickWindowIndex(candidates, preferredID: 42)
|
|
XCTAssertEqual(idx, 1)
|
|
}
|
|
|
|
func testFallsBackToLargestWhenNoPreferredID() {
|
|
let candidates = [c(7, 800, 600), c(9, 1600, 1000), c(11, 1200, 900)]
|
|
let idx = VisualObserver.pickWindowIndex(candidates, preferredID: nil)
|
|
XCTAssertEqual(idx, 1) // the 1600x1000 window
|
|
}
|
|
|
|
func testFallsBackToLargestWhenPreferredIDMissing() {
|
|
let candidates = [c(7, 800, 600), c(9, 1600, 1000)]
|
|
let idx = VisualObserver.pickWindowIndex(candidates, preferredID: 999) // gone
|
|
XCTAssertEqual(idx, 1)
|
|
}
|
|
|
|
func testNilWhenNoCandidates() {
|
|
XCTAssertNil(VisualObserver.pickWindowIndex([], preferredID: 42))
|
|
XCTAssertNil(VisualObserver.pickWindowIndex([], preferredID: nil))
|
|
}
|
|
|
|
// MARK: - Duration clamping (visual_timeline.json internal consistency)
|
|
|
|
func testClampSegmentsToDuration() {
|
|
let segs = [
|
|
VisualTimeline.Segment(start: 1, end: 5, name: "A", confidence: 0.9, source: "vision"),
|
|
VisualTimeline.Segment(start: 8, end: 12, name: "B", confidence: 0.8, source: "vision"), // end past 10
|
|
VisualTimeline.Segment(start: 10.5, end: 11, name: "C", confidence: 0.7, source: "vision"), // fully past → dropped
|
|
]
|
|
let out = VisualCapture.clampSegments(segs, to: 10)
|
|
XCTAssertEqual(out.count, 2)
|
|
XCTAssertEqual(out[0].end, 5, accuracy: 0.001)
|
|
XCTAssertEqual(out[1].end, 10, accuracy: 0.001) // clamped
|
|
XCTAssertFalse(out.contains { $0.name == "C" }) // dropped
|
|
}
|
|
|
|
func testClampGapsToDuration() {
|
|
let gaps = [
|
|
VisualTimeline.Gap(start: 2, end: 4, reason: "minimized"),
|
|
VisualTimeline.Gap(start: 9, end: 13, reason: "minimized"), // clamped to 10
|
|
]
|
|
let out = VisualCapture.clampGaps(gaps, to: 10)
|
|
XCTAssertEqual(out.count, 2)
|
|
XCTAssertEqual(out[1].end, 10, accuracy: 0.001)
|
|
}
|
|
|
|
func testClampPassthroughWhenDurationUnknown() {
|
|
let segs = [VisualTimeline.Segment(start: 1, end: 99, name: "A", confidence: 1, source: "vision")]
|
|
XCTAssertEqual(VisualCapture.clampSegments(segs, to: 0), segs) // no duration → unchanged
|
|
}
|
|
}
|