Wire visual capture into the recording lifecycle (failure-isolated)
Visual capture now runs alongside audio: on call start the session picks the
app's adapter, captures the call window on the SAME monotonic clock as the audio
(AudioRecorder.sharedT0Host), and on stop writes visual_timeline.json and hands
the backend the visual segments with mic-VAD self-spans merged. Any visual
failure (no adapter, no window, Screen Recording denied) leaves the session
recording audio-only — the proven path is never blocked or broken.
- CallDetector now emits DetectedCall{app, bundleID, windowID}: the exact
CGWindowID of the matched Meet browser window (native apps → nil → largest).
- VisualCapture wraps VisualObserver + AdapterRegistry, writes visual_timeline.json.
- AudioRecorder.sharedT0Host() exposes the shared t0 for frame alignment.
Hardened per a 3-lens adversarial review (concurrency / failure-isolation /
data-flow), all 6 confirmed findings fixed:
- P0 (critical): startVisual could adopt a stale capture into a DIFFERENT session
(cross-session SCStream leak + visual_timeline.json written to the wrong
folder). Now gated on session identity — generation + recorder ===, still
.recording — with fail-closed adoption; otherwise the stream is cancelled.
- P1: observer captured the browser's largest window, not the detected Meet
window. Now targets the exact CGWindowID (pickWindowIndex, unit-tested),
largest-area only as fallback.
- P2: a startVisual orphaned by a concurrent stop could leak a stream on quit.
inFlightVisual is registered before the await and drained in prepareForTermination.
- P3: trailing visual gap/segment ends could exceed duration_sec. Clamped in
VisualCapture (clampSegments/clampGaps, unit-tested).
- P4: capture pixel size used NSScreen.main scale; now uses the scale of the
display actually hosting the window (OCR clarity on secondary displays).
- VisualObserver.stop() bounds stopCapture() with a 3s timeout (mirrors audio) so
a wedged stream can't hang finalization.
25/25 XCTest pass. Live validation on real calls still pending.
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
import XCTest
|
||||
import CoreGraphics
|
||||
@testable import Ten31Transcripts
|
||||
|
||||
/// Window-selection logic: prefer the exact detected window (e.g. the Meet browser
|
||||
/// window) by ID, else fall back to the largest owned window. This is the fix for
|
||||
/// the "captures the wrong browser window" data-flow bug.
|
||||
final class VisualObserverTests: XCTestCase {
|
||||
|
||||
private func c(_ id: CGWindowID, _ w: CGFloat, _ h: CGFloat) -> (id: CGWindowID, frame: CGRect) {
|
||||
(id, CGRect(x: 0, y: 0, width: w, height: h))
|
||||
}
|
||||
|
||||
func testPrefersMatchingWindowIDOverLargest() {
|
||||
// The Meet window (id 42) is NOT the largest — must still be chosen by ID.
|
||||
let candidates = [c(7, 1600, 1000), c(42, 800, 600), c(9, 1200, 900)]
|
||||
let idx = VisualObserver.pickWindowIndex(candidates, preferredID: 42)
|
||||
XCTAssertEqual(idx, 1)
|
||||
}
|
||||
|
||||
func testFallsBackToLargestWhenNoPreferredID() {
|
||||
let candidates = [c(7, 800, 600), c(9, 1600, 1000), c(11, 1200, 900)]
|
||||
let idx = VisualObserver.pickWindowIndex(candidates, preferredID: nil)
|
||||
XCTAssertEqual(idx, 1) // the 1600x1000 window
|
||||
}
|
||||
|
||||
func testFallsBackToLargestWhenPreferredIDMissing() {
|
||||
let candidates = [c(7, 800, 600), c(9, 1600, 1000)]
|
||||
let idx = VisualObserver.pickWindowIndex(candidates, preferredID: 999) // gone
|
||||
XCTAssertEqual(idx, 1)
|
||||
}
|
||||
|
||||
func testNilWhenNoCandidates() {
|
||||
XCTAssertNil(VisualObserver.pickWindowIndex([], preferredID: 42))
|
||||
XCTAssertNil(VisualObserver.pickWindowIndex([], preferredID: nil))
|
||||
}
|
||||
|
||||
// MARK: - Duration clamping (visual_timeline.json internal consistency)
|
||||
|
||||
func testClampSegmentsToDuration() {
|
||||
let segs = [
|
||||
VisualTimeline.Segment(start: 1, end: 5, name: "A", confidence: 0.9, source: "vision"),
|
||||
VisualTimeline.Segment(start: 8, end: 12, name: "B", confidence: 0.8, source: "vision"), // end past 10
|
||||
VisualTimeline.Segment(start: 10.5, end: 11, name: "C", confidence: 0.7, source: "vision"), // fully past → dropped
|
||||
]
|
||||
let out = VisualCapture.clampSegments(segs, to: 10)
|
||||
XCTAssertEqual(out.count, 2)
|
||||
XCTAssertEqual(out[0].end, 5, accuracy: 0.001)
|
||||
XCTAssertEqual(out[1].end, 10, accuracy: 0.001) // clamped
|
||||
XCTAssertFalse(out.contains { $0.name == "C" }) // dropped
|
||||
}
|
||||
|
||||
func testClampGapsToDuration() {
|
||||
let gaps = [
|
||||
VisualTimeline.Gap(start: 2, end: 4, reason: "minimized"),
|
||||
VisualTimeline.Gap(start: 9, end: 13, reason: "minimized"), // clamped to 10
|
||||
]
|
||||
let out = VisualCapture.clampGaps(gaps, to: 10)
|
||||
XCTAssertEqual(out.count, 2)
|
||||
XCTAssertEqual(out[1].end, 10, accuracy: 0.001)
|
||||
}
|
||||
|
||||
func testClampPassthroughWhenDurationUnknown() {
|
||||
let segs = [VisualTimeline.Segment(start: 1, end: 99, name: "A", confidence: 1, source: "vision")]
|
||||
XCTAssertEqual(VisualCapture.clampSegments(segs, to: 0), segs) // no duration → unchanged
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user