f2856bc363
Signal 1:1 (and group) calls didn't auto-record. Root cause confirmed on-device: Signal is Electron and holds the mic in a HELPER process (org.whispersystems.signal-desktop.helper.Renderer, a child of the main app). detectViaMicAttribution only matched PIDs listed in NSWorkspace.runningApplications against the main bundle ID, so the helper's mic use was never attributed to Signal. (Zoom worked = single native process; Meet worked = browser resolved.) Fix: iterate the mic-using PIDs and resolve each to its owning app by walking the parent-process chain (sysctl KERN_PROC_PID → ppid) until an NSRunningApplication is found. Helper PIDs that return nil directly now resolve to the main app. Validated against the live Signal helpers: pids 2383/2372 → org.whispersystems.signal-desktop. Superset of the old behavior, so Zoom/Meet detection is preserved (browser case now also more robust); our own recording is still skipped (selfPID).
275 lines
11 KiB
Swift
275 lines
11 KiB
Swift
import AppKit
|
||
import CoreGraphics
|
||
import Combine
|
||
import Darwin // sysctl / kinfo_proc for parent-PID resolution
|
||
|
||
/// Detects when the user joins/leaves a call and reports it via callbacks.
|
||
///
|
||
/// Heuristic: the mic is live system-wide AND a known call app is present —
|
||
/// Zoom/Teams/Signal by bundle ID, or Google Meet by a browser window whose
|
||
/// title looks like a Meet call (read via `CGWindowList`, using the Screen
|
||
/// Recording permission). Debounced so a quick unrelated mic use doesn't trigger.
|
||
///
|
||
/// Main-actor: all evaluation runs on the main thread.
|
||
@MainActor
|
||
final class CallDetector: ObservableObject {
|
||
|
||
enum DetectedApp: String, Equatable {
|
||
case zoom, teams, signal, meet
|
||
var label: String { rawValue }
|
||
var display: String {
|
||
switch self {
|
||
case .zoom: return "Zoom"
|
||
case .teams: return "Microsoft Teams"
|
||
case .signal: return "Signal"
|
||
case .meet: return "Google Meet"
|
||
}
|
||
}
|
||
}
|
||
|
||
/// A detected call plus what to capture for visuals: the bundle ID of the owner
|
||
/// (the native app for Zoom/Teams/Signal, or the *browser* hosting the Meet tab)
|
||
/// and — for Meet — the exact `CGWindowID` of the matched call window, so the
|
||
/// observer captures that window instead of guessing the browser's largest one.
|
||
struct DetectedCall: Equatable {
|
||
let app: DetectedApp
|
||
let bundleID: String
|
||
let windowID: CGWindowID?
|
||
}
|
||
|
||
enum Status: Equatable {
|
||
case disabled
|
||
case listening
|
||
case inCall(DetectedApp)
|
||
}
|
||
|
||
@Published private(set) var status: Status = .disabled
|
||
|
||
var onCallStart: ((DetectedCall) -> Void)?
|
||
var onCallEnd: (() -> Void)?
|
||
|
||
private let mic = MicActivityMonitor()
|
||
private var pollTimer: Timer?
|
||
private var openTimer: Timer?
|
||
private var closeTimer: Timer?
|
||
private var inCall = false
|
||
private var currentCall: DetectedCall?
|
||
private var enabled = false
|
||
|
||
private let openDelay: TimeInterval = 2.0
|
||
private let closeDelay: TimeInterval = 4.0
|
||
private let pollInterval: TimeInterval = 3.0
|
||
|
||
private static let nativeApps: [(id: String, app: DetectedApp)] = [
|
||
("us.zoom.xos", .zoom),
|
||
("com.microsoft.teams2", .teams),
|
||
("com.microsoft.teams", .teams),
|
||
("org.whispersystems.signal-desktop", .signal),
|
||
]
|
||
private static let browserIDs: Set<String> = [
|
||
"org.mozilla.firefox", "com.google.Chrome", "com.apple.Safari",
|
||
"company.thebrowser.Browser", "com.brave.Browser", "com.microsoft.edgemac",
|
||
]
|
||
|
||
func enable() {
|
||
guard !enabled else { return }
|
||
enabled = true
|
||
mic.onChange = { [weak self] _ in self?.evaluate() }
|
||
mic.start()
|
||
status = .listening
|
||
pollTimer = Timer.scheduledTimer(withTimeInterval: pollInterval, repeats: true) { [weak self] _ in
|
||
Task { @MainActor in self?.evaluate() }
|
||
}
|
||
evaluate()
|
||
}
|
||
|
||
func disable() {
|
||
guard enabled else { return }
|
||
enabled = false
|
||
mic.stop()
|
||
pollTimer?.invalidate(); pollTimer = nil
|
||
cancelOpen(); cancelClose()
|
||
inCall = false
|
||
currentCall = nil
|
||
status = .disabled
|
||
}
|
||
|
||
// MARK: - Evaluation
|
||
|
||
private func evaluate() {
|
||
guard enabled else { return }
|
||
let candidate = mic.isRunning ? detectApp() : nil
|
||
|
||
if let candidate {
|
||
cancelClose()
|
||
if inCall {
|
||
currentCall = candidate
|
||
status = .inCall(candidate.app)
|
||
} else if openTimer == nil {
|
||
openTimer = Timer.scheduledTimer(withTimeInterval: openDelay, repeats: false) { [weak self] _ in
|
||
Task { @MainActor in self?.fireOpen() }
|
||
}
|
||
}
|
||
} else {
|
||
cancelOpen()
|
||
if inCall && closeTimer == nil {
|
||
closeTimer = Timer.scheduledTimer(withTimeInterval: closeDelay, repeats: false) { [weak self] _ in
|
||
Task { @MainActor in self?.fireClose() }
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
private func fireOpen() {
|
||
openTimer = nil
|
||
// Re-resolve the app at fire time (the debounce window may have changed it).
|
||
guard enabled, mic.isRunning, let call = detectApp(), !inCall else { return }
|
||
inCall = true
|
||
currentCall = call
|
||
status = .inCall(call.app)
|
||
onCallStart?(call)
|
||
}
|
||
|
||
private func fireClose() {
|
||
closeTimer = nil
|
||
guard enabled, inCall else { return }
|
||
inCall = false
|
||
currentCall = nil
|
||
status = .listening
|
||
onCallEnd?()
|
||
}
|
||
|
||
private func cancelOpen() { openTimer?.invalidate(); openTimer = nil }
|
||
private func cancelClose() { closeTimer?.invalidate(); closeTimer = nil }
|
||
|
||
// MARK: - App detection
|
||
|
||
/// A call is active when a known call app is actually using the mic.
|
||
/// On macOS 14+ we attribute mic usage per-process (robust start AND stop,
|
||
/// works for Signal/Zoom/Teams/Meet, ignores our own recording). On macOS 13
|
||
/// we fall back to the per-app call-window heuristic.
|
||
private func detectApp() -> DetectedCall? {
|
||
if #available(macOS 14.0, *) {
|
||
return detectViaMicAttribution()
|
||
}
|
||
return detectViaWindowTitle()
|
||
}
|
||
|
||
@available(macOS 14.0, *)
|
||
private func detectViaMicAttribution() -> DetectedCall? {
|
||
let micPIDs = AudioInputProcesses.micUsingPIDs()
|
||
guard !micPIDs.isEmpty else { return nil }
|
||
let selfPID = NSRunningApplication.current.processIdentifier
|
||
|
||
// The process holding the mic is often a HELPER subprocess (Electron apps
|
||
// like Signal, browser audio/renderer processes) that isn't itself a listed
|
||
// application. Resolve each mic-using PID to its owning app by walking the
|
||
// parent-process chain — that's what makes Signal/Teams (Electron) detect.
|
||
for micPID in micPIDs {
|
||
guard let app = Self.owningApp(of: micPID),
|
||
app.processIdentifier != selfPID,
|
||
let id = app.bundleIdentifier else { continue }
|
||
if let native = Self.nativeApps.first(where: { $0.id == id }) {
|
||
return DetectedCall(app: native.app, bundleID: id, windowID: nil) // native: capture largest owned window
|
||
}
|
||
// A browser using the mic + a Meet window = a Meet call. The mic state
|
||
// gives reliable start/stop; the window check keeps non-Meet browser
|
||
// mic use (other web apps) from being mislabeled as a Meet recording.
|
||
// Capture that exact browser window (by ID), not just the browser.
|
||
if Self.browserIDs.contains(id), let wid = meetWindowID(app.processIdentifier) {
|
||
return DetectedCall(app: .meet, bundleID: id, windowID: wid)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
/// The application that owns `pid` — `pid` itself if it's an app, else the
|
||
/// nearest ancestor that is (Electron/browser helpers run as children of their
|
||
/// main app). nil if none within a few levels.
|
||
private static func owningApp(of pid: pid_t) -> NSRunningApplication? {
|
||
var current = pid
|
||
for _ in 0..<6 {
|
||
if let app = NSRunningApplication(processIdentifier: current) { return app }
|
||
let parent = parentPID(of: current)
|
||
guard parent > 1, parent != current else { return nil }
|
||
current = parent
|
||
}
|
||
return nil
|
||
}
|
||
|
||
/// Parent PID of `pid` via sysctl, or 0 on failure.
|
||
private static func parentPID(of pid: pid_t) -> pid_t {
|
||
var info = kinfo_proc()
|
||
var size = MemoryLayout<kinfo_proc>.stride
|
||
var mib: [Int32] = [CTL_KERN, KERN_PROC, KERN_PROC_PID, pid]
|
||
let rc = sysctl(&mib, u_int(mib.count), &info, &size, nil, 0)
|
||
guard rc == 0, size > 0 else { return 0 }
|
||
return info.kp_eproc.e_ppid
|
||
}
|
||
|
||
/// The `CGWindowID` of this PID's Google Meet call window (title "Meet - …"),
|
||
/// or nil if none — also serves as the "is this a Meet call?" check.
|
||
private func meetWindowID(_ pid: pid_t) -> CGWindowID? {
|
||
guard let info = CGWindowListCopyWindowInfo([.excludeDesktopElements], kCGNullWindowID) as? [[String: Any]]
|
||
else { return nil }
|
||
for w in info {
|
||
guard let wpid = w[kCGWindowOwnerPID as String] as? pid_t, wpid == pid,
|
||
let title = w[kCGWindowName as String] as? String, Self.looksLikeMeet(title) else { continue }
|
||
return w[kCGWindowNumber as String] as? CGWindowID
|
||
}
|
||
return nil
|
||
}
|
||
|
||
/// macOS 13 fallback: detect by the presence of a call WINDOW per app.
|
||
private func detectViaWindowTitle() -> DetectedCall? {
|
||
var pidToApp: [pid_t: (app: DetectedApp, id: String)] = [:]
|
||
var browserPIDs: [pid_t: String] = [:]
|
||
for app in NSWorkspace.shared.runningApplications {
|
||
guard let id = app.bundleIdentifier else { continue }
|
||
if let native = Self.nativeApps.first(where: { $0.id == id }) {
|
||
pidToApp[app.processIdentifier] = (native.app, id)
|
||
} else if Self.browserIDs.contains(id) {
|
||
browserPIDs[app.processIdentifier] = id
|
||
}
|
||
}
|
||
guard !pidToApp.isEmpty || !browserPIDs.isEmpty else { return nil }
|
||
guard let infoList = CGWindowListCopyWindowInfo([.excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
|
||
return nil
|
||
}
|
||
for info in infoList {
|
||
guard let pid = info[kCGWindowOwnerPID as String] as? pid_t,
|
||
let title = info[kCGWindowName as String] as? String,
|
||
!title.isEmpty else { continue }
|
||
if let id = browserPIDs[pid], Self.looksLikeMeet(title) {
|
||
return DetectedCall(app: .meet, bundleID: id,
|
||
windowID: info[kCGWindowNumber as String] as? CGWindowID)
|
||
}
|
||
if let native = pidToApp[pid], Self.isCallWindow(native.app, title) {
|
||
return DetectedCall(app: native.app, bundleID: native.id, windowID: nil)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
/// Per-app in-call window-title signatures (macOS 13 fallback only).
|
||
private static func isCallWindow(_ app: DetectedApp, _ title: String) -> Bool {
|
||
let t = title.lowercased()
|
||
switch app {
|
||
case .zoom: return t.contains("zoom meeting") || t.contains("meeting")
|
||
case .teams: return t.contains("meeting")
|
||
case .signal: return t.contains("signal call") || t.contains("group call")
|
||
case .meet: return false // handled via the browser path above
|
||
}
|
||
}
|
||
|
||
/// Match an ACTIVE Google Meet call. Verified against real Firefox titles:
|
||
/// in a call the title is "Meet - <code>" (e.g. "Meet - tjh-pixe-ier"), while
|
||
/// the home/lobby/"you left" pages are bare "Meet" or "Google Meet". Matching
|
||
/// only the "Meet - …" form is what lets auto-STOP fire when you leave (and
|
||
/// avoids false-starting on the home page). Also excludes "Zoom Meeting" etc.
|
||
private static func looksLikeMeet(_ title: String) -> Bool {
|
||
let t = title.lowercased()
|
||
return t.hasPrefix("meet - ") || t.hasPrefix("meet – ") || t.hasPrefix("meet — ")
|
||
}
|
||
}
|