import Foundation import CoreVideo /// Google Meet adapter (browser tab — capture is at the browser-window level). /// /// Meet's active-speaker cue is a **coloured (Google-blue) ring/glow** around the /// speaking participant's tile, plus animated audio bars in the tile's mic chip. /// The participant **name sits in the tile's bottom-LEFT corner**, so the tile is /// estimated extending up and to the right of the name. /// /// Detection *logic* is validated on synthetic frames; the geometry constants are a /// first pass and will be calibrated against real Meet screenshots. Meet runs in a /// browser, so there's no Accessibility name source we rely on — OCR only. struct MeetAdapter: AppAdapter { // Browsers that can host a Meet tab. The window, not the app, is what we capture; // CallDetector decides a browser window is a Meet call by its title. static let bundleIDs = [ "com.google.Chrome", "org.mozilla.firefox", "com.apple.Safari", "company.thebrowser.Browser", "com.brave.Browser", "com.microsoft.edgemac", "com.google.Chrome.canary", "org.chromium.Chromium", ] let adapterVersion = "meet-0.1.0" let preferredFPS = 3 private let analyzer: GridCallAnalyzer init() { var config = GridCallAnalyzer.Config() config.nameAnchor = .bottomLeft config.detectColoredBorder = true // Google-blue speaking ring config.detectWhiteBorder = false config.tileExpandX = 3.0 config.tileExpandY = 5.0 self.analyzer = GridCallAnalyzer(config: config) } func analyze(frame: CVPixelBuffer, at t: TimeInterval) -> [SpeakerObservation] { analyzer.analyze(pixelBuffer: frame, at: t) } // Exposed for fixture/synthetic tests. func analyze(cgImage: CGImage, at t: TimeInterval) -> [SpeakerObservation] { analyzer.analyze(cgImage: cgImage, at: t) } }