863136aeec
Phase 2 (call detection): CallDetector using CoreAudio per-process mic attribution (anarlog technique) — robust start+stop for Zoom/Teams/Signal/Meet, ignoring our own recording; auto-record toggle. Built; pending live multi-app confirmation by the user. Phase 3 (visual timeline foundation): AppAdapter protocol + SpeakerObservation, TimelineBuilder (hysteresis/overlap/self-merge/aliases), VisualTimeline (schema 1.1), TextRecognizer (Vision OCR), FrameSampler + GridCallAnalyzer (name OCR + saturated-highlight active-speaker attribution), SignalAdapter, VisualObserver (window capture; frames released, never saved; minimized->visual_gap, idle != gap). Synthetic-frame tested; adapter geometry pending real Signal fixtures + live VisualObserver validation. Phase 5 (backend hand-off): SparkControlClient (multipart label-merge, sequential, TLS-skip, 503 Retry-After/413), SessionPackager (chunk plan + WAV slice + timeline slice/rebase), TranscriptAssembler + SpeakersFile, TranscriptPipeline. Validated END-TO-END against the live backend (chunk -> label-merge -> speakers.json). Phase 6 (voiceprints): VoiceprintStore (known_voiceprints, persist named fingerprints, skip Unknown). Wired: 'Send to backend' button + transcript status, auto-send toggle (default off) + self-name setting. All adversarial-review findings fixed. App + XCTest suite build; tests pass.
62 lines
2.6 KiB
Swift
62 lines
2.6 KiB
Swift
import CoreAudio
|
|
import Foundation
|
|
|
|
/// Lists the PIDs of processes currently using an audio **input** (the mic), via
|
|
/// the CoreAudio process-object API (macOS 14+).
|
|
///
|
|
/// This is how we attribute mic usage to a *specific* app — e.g. "is Signal in a
|
|
/// call?" — which is far more robust than matching window titles, and it works
|
|
/// uniformly for Zoom/Teams/Signal and browser calls (Meet). It also lets us
|
|
/// ignore our own recording: we look at the *call app's* PID, not the global mic,
|
|
/// so a call's end is detected even while we keep the mic open.
|
|
///
|
|
/// Approach mirrors fastrepl/anarlog's `list_mic_using_apps`.
|
|
@available(macOS 14.0, *)
|
|
enum AudioInputProcesses {
|
|
static func micUsingPIDs() -> Set<pid_t> {
|
|
var listAddr = AudioObjectPropertyAddress(
|
|
mSelector: kAudioHardwarePropertyProcessObjectList,
|
|
mScope: kAudioObjectPropertyScopeGlobal,
|
|
mElement: kAudioObjectPropertyElementMain)
|
|
|
|
var dataSize: UInt32 = 0
|
|
guard AudioObjectGetPropertyDataSize(
|
|
AudioObjectID(kAudioObjectSystemObject), &listAddr, 0, nil, &dataSize) == noErr,
|
|
dataSize > 0 else { return [] }
|
|
|
|
let count = Int(dataSize) / MemoryLayout<AudioObjectID>.size
|
|
var processes = [AudioObjectID](repeating: 0, count: count)
|
|
guard AudioObjectGetPropertyData(
|
|
AudioObjectID(kAudioObjectSystemObject), &listAddr, 0, nil, &dataSize, &processes) == noErr
|
|
else { return [] }
|
|
|
|
var pids = Set<pid_t>()
|
|
for process in processes where isRunningInput(process) {
|
|
if let pid = pid(of: process) { pids.insert(pid) }
|
|
}
|
|
return pids
|
|
}
|
|
|
|
private static func isRunningInput(_ process: AudioObjectID) -> Bool {
|
|
var addr = AudioObjectPropertyAddress(
|
|
mSelector: kAudioProcessPropertyIsRunningInput,
|
|
mScope: kAudioObjectPropertyScopeGlobal,
|
|
mElement: kAudioObjectPropertyElementMain)
|
|
var value: UInt32 = 0
|
|
var size = UInt32(MemoryLayout<UInt32>.size)
|
|
guard AudioObjectGetPropertyData(process, &addr, 0, nil, &size, &value) == noErr else { return false }
|
|
return value != 0
|
|
}
|
|
|
|
private static func pid(of process: AudioObjectID) -> pid_t? {
|
|
var addr = AudioObjectPropertyAddress(
|
|
mSelector: kAudioProcessPropertyPID,
|
|
mScope: kAudioObjectPropertyScopeGlobal,
|
|
mElement: kAudioObjectPropertyElementMain)
|
|
var value: pid_t = 0
|
|
var size = UInt32(MemoryLayout<pid_t>.size)
|
|
guard AudioObjectGetPropertyData(process, &addr, 0, nil, &size, &value) == noErr else { return nil }
|
|
return value
|
|
}
|
|
}
|