Files
ten31-transcripts/Ten31Transcripts/Detection/CallDetector.swift
T
Grant Gilliam f2856bc363 Fix Signal (Electron) call detection: resolve mic-using helper to its app
Signal 1:1 (and group) calls didn't auto-record. Root cause confirmed on-device:
Signal is Electron and holds the mic in a HELPER process
(org.whispersystems.signal-desktop.helper.Renderer, a child of the main app).
detectViaMicAttribution only matched PIDs listed in NSWorkspace.runningApplications
against the main bundle ID, so the helper's mic use was never attributed to Signal.
(Zoom worked = single native process; Meet worked = browser resolved.)

Fix: iterate the mic-using PIDs and resolve each to its owning app by walking the
parent-process chain (sysctl KERN_PROC_PID → ppid) until an NSRunningApplication is
found. Helper PIDs that return nil directly now resolve to the main app. Validated
against the live Signal helpers: pids 2383/2372 → org.whispersystems.signal-desktop.
Superset of the old behavior, so Zoom/Meet detection is preserved (browser case now
also more robust); our own recording is still skipped (selfPID).
2026-06-06 11:50:58 -05:00

275 lines
11 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import AppKit
import CoreGraphics
import Combine
import Darwin // sysctl / kinfo_proc for parent-PID resolution
/// Detects when the user joins/leaves a call and reports it via callbacks.
///
/// Heuristic: the mic is live system-wide AND a known call app is present
/// Zoom/Teams/Signal by bundle ID, or Google Meet by a browser window whose
/// title looks like a Meet call (read via `CGWindowList`, using the Screen
/// Recording permission). Debounced so a quick unrelated mic use doesn't trigger.
///
/// Main-actor: all evaluation runs on the main thread.
@MainActor
final class CallDetector: ObservableObject {
enum DetectedApp: String, Equatable {
case zoom, teams, signal, meet
var label: String { rawValue }
var display: String {
switch self {
case .zoom: return "Zoom"
case .teams: return "Microsoft Teams"
case .signal: return "Signal"
case .meet: return "Google Meet"
}
}
}
/// A detected call plus what to capture for visuals: the bundle ID of the owner
/// (the native app for Zoom/Teams/Signal, or the *browser* hosting the Meet tab)
/// and for Meet the exact `CGWindowID` of the matched call window, so the
/// observer captures that window instead of guessing the browser's largest one.
struct DetectedCall: Equatable {
let app: DetectedApp
let bundleID: String
let windowID: CGWindowID?
}
enum Status: Equatable {
case disabled
case listening
case inCall(DetectedApp)
}
@Published private(set) var status: Status = .disabled
var onCallStart: ((DetectedCall) -> Void)?
var onCallEnd: (() -> Void)?
private let mic = MicActivityMonitor()
private var pollTimer: Timer?
private var openTimer: Timer?
private var closeTimer: Timer?
private var inCall = false
private var currentCall: DetectedCall?
private var enabled = false
private let openDelay: TimeInterval = 2.0
private let closeDelay: TimeInterval = 4.0
private let pollInterval: TimeInterval = 3.0
private static let nativeApps: [(id: String, app: DetectedApp)] = [
("us.zoom.xos", .zoom),
("com.microsoft.teams2", .teams),
("com.microsoft.teams", .teams),
("org.whispersystems.signal-desktop", .signal),
]
private static let browserIDs: Set<String> = [
"org.mozilla.firefox", "com.google.Chrome", "com.apple.Safari",
"company.thebrowser.Browser", "com.brave.Browser", "com.microsoft.edgemac",
]
func enable() {
guard !enabled else { return }
enabled = true
mic.onChange = { [weak self] _ in self?.evaluate() }
mic.start()
status = .listening
pollTimer = Timer.scheduledTimer(withTimeInterval: pollInterval, repeats: true) { [weak self] _ in
Task { @MainActor in self?.evaluate() }
}
evaluate()
}
func disable() {
guard enabled else { return }
enabled = false
mic.stop()
pollTimer?.invalidate(); pollTimer = nil
cancelOpen(); cancelClose()
inCall = false
currentCall = nil
status = .disabled
}
// MARK: - Evaluation
private func evaluate() {
guard enabled else { return }
let candidate = mic.isRunning ? detectApp() : nil
if let candidate {
cancelClose()
if inCall {
currentCall = candidate
status = .inCall(candidate.app)
} else if openTimer == nil {
openTimer = Timer.scheduledTimer(withTimeInterval: openDelay, repeats: false) { [weak self] _ in
Task { @MainActor in self?.fireOpen() }
}
}
} else {
cancelOpen()
if inCall && closeTimer == nil {
closeTimer = Timer.scheduledTimer(withTimeInterval: closeDelay, repeats: false) { [weak self] _ in
Task { @MainActor in self?.fireClose() }
}
}
}
}
private func fireOpen() {
openTimer = nil
// Re-resolve the app at fire time (the debounce window may have changed it).
guard enabled, mic.isRunning, let call = detectApp(), !inCall else { return }
inCall = true
currentCall = call
status = .inCall(call.app)
onCallStart?(call)
}
private func fireClose() {
closeTimer = nil
guard enabled, inCall else { return }
inCall = false
currentCall = nil
status = .listening
onCallEnd?()
}
private func cancelOpen() { openTimer?.invalidate(); openTimer = nil }
private func cancelClose() { closeTimer?.invalidate(); closeTimer = nil }
// MARK: - App detection
/// A call is active when a known call app is actually using the mic.
/// On macOS 14+ we attribute mic usage per-process (robust start AND stop,
/// works for Signal/Zoom/Teams/Meet, ignores our own recording). On macOS 13
/// we fall back to the per-app call-window heuristic.
private func detectApp() -> DetectedCall? {
if #available(macOS 14.0, *) {
return detectViaMicAttribution()
}
return detectViaWindowTitle()
}
@available(macOS 14.0, *)
private func detectViaMicAttribution() -> DetectedCall? {
let micPIDs = AudioInputProcesses.micUsingPIDs()
guard !micPIDs.isEmpty else { return nil }
let selfPID = NSRunningApplication.current.processIdentifier
// The process holding the mic is often a HELPER subprocess (Electron apps
// like Signal, browser audio/renderer processes) that isn't itself a listed
// application. Resolve each mic-using PID to its owning app by walking the
// parent-process chain that's what makes Signal/Teams (Electron) detect.
for micPID in micPIDs {
guard let app = Self.owningApp(of: micPID),
app.processIdentifier != selfPID,
let id = app.bundleIdentifier else { continue }
if let native = Self.nativeApps.first(where: { $0.id == id }) {
return DetectedCall(app: native.app, bundleID: id, windowID: nil) // native: capture largest owned window
}
// A browser using the mic + a Meet window = a Meet call. The mic state
// gives reliable start/stop; the window check keeps non-Meet browser
// mic use (other web apps) from being mislabeled as a Meet recording.
// Capture that exact browser window (by ID), not just the browser.
if Self.browserIDs.contains(id), let wid = meetWindowID(app.processIdentifier) {
return DetectedCall(app: .meet, bundleID: id, windowID: wid)
}
}
return nil
}
/// The application that owns `pid` `pid` itself if it's an app, else the
/// nearest ancestor that is (Electron/browser helpers run as children of their
/// main app). nil if none within a few levels.
private static func owningApp(of pid: pid_t) -> NSRunningApplication? {
var current = pid
for _ in 0..<6 {
if let app = NSRunningApplication(processIdentifier: current) { return app }
let parent = parentPID(of: current)
guard parent > 1, parent != current else { return nil }
current = parent
}
return nil
}
/// Parent PID of `pid` via sysctl, or 0 on failure.
private static func parentPID(of pid: pid_t) -> pid_t {
var info = kinfo_proc()
var size = MemoryLayout<kinfo_proc>.stride
var mib: [Int32] = [CTL_KERN, KERN_PROC, KERN_PROC_PID, pid]
let rc = sysctl(&mib, u_int(mib.count), &info, &size, nil, 0)
guard rc == 0, size > 0 else { return 0 }
return info.kp_eproc.e_ppid
}
/// The `CGWindowID` of this PID's Google Meet call window (title "Meet - "),
/// or nil if none also serves as the "is this a Meet call?" check.
private func meetWindowID(_ pid: pid_t) -> CGWindowID? {
guard let info = CGWindowListCopyWindowInfo([.excludeDesktopElements], kCGNullWindowID) as? [[String: Any]]
else { return nil }
for w in info {
guard let wpid = w[kCGWindowOwnerPID as String] as? pid_t, wpid == pid,
let title = w[kCGWindowName as String] as? String, Self.looksLikeMeet(title) else { continue }
return w[kCGWindowNumber as String] as? CGWindowID
}
return nil
}
/// macOS 13 fallback: detect by the presence of a call WINDOW per app.
private func detectViaWindowTitle() -> DetectedCall? {
var pidToApp: [pid_t: (app: DetectedApp, id: String)] = [:]
var browserPIDs: [pid_t: String] = [:]
for app in NSWorkspace.shared.runningApplications {
guard let id = app.bundleIdentifier else { continue }
if let native = Self.nativeApps.first(where: { $0.id == id }) {
pidToApp[app.processIdentifier] = (native.app, id)
} else if Self.browserIDs.contains(id) {
browserPIDs[app.processIdentifier] = id
}
}
guard !pidToApp.isEmpty || !browserPIDs.isEmpty else { return nil }
guard let infoList = CGWindowListCopyWindowInfo([.excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
return nil
}
for info in infoList {
guard let pid = info[kCGWindowOwnerPID as String] as? pid_t,
let title = info[kCGWindowName as String] as? String,
!title.isEmpty else { continue }
if let id = browserPIDs[pid], Self.looksLikeMeet(title) {
return DetectedCall(app: .meet, bundleID: id,
windowID: info[kCGWindowNumber as String] as? CGWindowID)
}
if let native = pidToApp[pid], Self.isCallWindow(native.app, title) {
return DetectedCall(app: native.app, bundleID: native.id, windowID: nil)
}
}
return nil
}
/// Per-app in-call window-title signatures (macOS 13 fallback only).
private static func isCallWindow(_ app: DetectedApp, _ title: String) -> Bool {
let t = title.lowercased()
switch app {
case .zoom: return t.contains("zoom meeting") || t.contains("meeting")
case .teams: return t.contains("meeting")
case .signal: return t.contains("signal call") || t.contains("group call")
case .meet: return false // handled via the browser path above
}
}
/// Match an ACTIVE Google Meet call. Verified against real Firefox titles:
/// in a call the title is "Meet - <code>" (e.g. "Meet - tjh-pixe-ier"), while
/// the home/lobby/"you left" pages are bare "Meet" or "Google Meet". Matching
/// only the "Meet - " form is what lets auto-STOP fire when you leave (and
/// avoids false-starting on the home page). Also excludes "Zoom Meeting" etc.
private static func looksLikeMeet(_ title: String) -> Bool {
let t = title.lowercased()
return t.hasPrefix("meet - ") || t.hasPrefix("meet ") || t.hasPrefix("meet — ")
}
}