import Foundation import CoreVideo /// Zoom adapter (native app: us.zoom.xos). /// /// Zoom's active-speaker cue is a **coloured border** around the speaking tile — /// by default a green/yellow outline (configurable in Zoom settings). The /// participant **name sits in the tile's bottom-LEFT corner**, so the tile is /// estimated extending up and to the right of the name. /// /// Gotchas to calibrate against real fixtures later: /// - **Speaker view** shows one big tile; the active speaker fills it (no useful /// per-tile border) — handle by attributing speech to the large tile. /// - The self-view tile and screen-share change the layout. /// /// Detection *logic* is validated on synthetic frames; geometry constants are a /// first pass pending real Zoom screenshots. struct ZoomAdapter: AppAdapter { static let bundleIDs = ["us.zoom.xos"] let adapterVersion = "zoom-0.1.0" let preferredFPS = 3 private let analyzer: GridCallAnalyzer init() { var config = GridCallAnalyzer.Config() config.nameAnchor = .bottomLeft config.detectColoredBorder = true // green/yellow speaking border (vivid) config.detectWhiteBorder = false // Zoom's frame is vivid (green #2d8c3c ≈ 0.68, yellow ≈ 0.96); the green→ // yellow hue band spans ~45–140°. Keep a generous-but-not-trivial threshold; // require all-four-sides distribution (handled upstream) to reject bright video. config.colorSaturation = 0.45 config.colorHueRange = 40...150 config.tileExpandX = 3.0 config.tileExpandY = 5.0 self.analyzer = GridCallAnalyzer(config: config) } func analyze(frame: CVPixelBuffer, at t: TimeInterval) -> [SpeakerObservation] { analyzer.analyze(pixelBuffer: frame, at: t) } // Exposed for fixture/synthetic tests. func analyze(cgImage: CGImage, at t: TimeInterval) -> [SpeakerObservation] { analyzer.analyze(cgImage: cgImage, at: t) } }