import Foundation
import CoreVideo

/// Google Meet adapter (browser tab — capture is at the browser-window level).
///
/// Meet's active-speaker cue is a **coloured (Google-blue) ring/glow** around the
/// speaking participant's tile, plus animated audio bars in the tile's mic chip.
/// The participant **name sits in the tile's bottom-LEFT corner**, so the tile is
/// estimated extending up and to the right of the name.
///
/// Detection *logic* is validated on synthetic frames; the geometry constants are a
/// first pass and will be calibrated against real Meet screenshots. Meet runs in a
/// browser, so there's no Accessibility name source we rely on — OCR only.
struct MeetAdapter: AppAdapter {
    // Browsers that can host a Meet tab. The window, not the app, is what we capture;
    // CallDetector decides a browser window is a Meet call by its title.
    static let bundleIDs = [
        "com.google.Chrome", "org.mozilla.firefox", "com.apple.Safari",
        "company.thebrowser.Browser", "com.brave.Browser", "com.microsoft.edgemac",
        "com.google.Chrome.canary", "org.chromium.Chromium",
    ]
    let adapterVersion = "meet-0.1.0"
    let preferredFPS = 3

    private let analyzer: GridCallAnalyzer

    init() {
        var config = GridCallAnalyzer.Config()
        config.nameAnchor = .bottomLeft
        config.detectColoredBorder = true     // Google-blue speaking ring
        config.detectWhiteBorder = false
        config.tileExpandX = 3.0
        config.tileExpandY = 5.0
        self.analyzer = GridCallAnalyzer(config: config)
    }

    func analyze(frame: CVPixelBuffer, at t: TimeInterval) -> [SpeakerObservation] {
        analyzer.analyze(pixelBuffer: frame, at: t)
    }

    // Exposed for fixture/synthetic tests.
    func analyze(cgImage: CGImage, at t: TimeInterval) -> [SpeakerObservation] {
        analyzer.analyze(cgImage: cgImage, at: t)
    }
}