Phase 1: dual-track audio capture → mixed-mono 16 kHz WAV + mic VAD
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps / trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans. AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController drives a serialized start/stop state machine, writes the session folder + self_vad.json, exposes live level meters, and finalizes on quit. Hardening from review: ioQueue single-domain (no races), stop() never hangs (mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy, discard-only video output to keep SCStream alive, VAD lockstep on committed frames, stable signing team in project.yml, single-instance enforcement.
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
import AVFoundation
|
||||
|
||||
/// Converts arbitrary input PCM buffers to **16 kHz mono Float32**, maintaining
|
||||
/// resampler state across calls. Reuse one instance per source stream so the
|
||||
/// internal sample-rate converter stays continuous across buffers.
|
||||
///
|
||||
/// Not thread-safe: use one instance from a single thread. Both the mic and
|
||||
/// system instances are driven exclusively from `AudioRecorder.ioQueue` (one per
|
||||
/// source stream), kept continuous across buffers.
|
||||
final class Resampler {
|
||||
/// The canonical Phase-1 audio format: 16 kHz, mono, Float32, deinterleaved.
|
||||
static let targetFormat = AVAudioFormat(
|
||||
commonFormat: .pcmFormatFloat32,
|
||||
sampleRate: 16_000,
|
||||
channels: 1,
|
||||
interleaved: false)!
|
||||
|
||||
private var converter: AVAudioConverter?
|
||||
private var sourceFormat: AVAudioFormat?
|
||||
private var ended = false
|
||||
|
||||
/// 16 kHz mono buffer for `input`, or nil if conversion produced nothing.
|
||||
func resample(_ input: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
|
||||
guard !ended, input.frameLength > 0 else { return nil }
|
||||
|
||||
if converter == nil || sourceFormat != input.format {
|
||||
converter = AVAudioConverter(from: input.format, to: Self.targetFormat)
|
||||
sourceFormat = input.format
|
||||
}
|
||||
guard let converter else { return nil }
|
||||
|
||||
let ratio = Self.targetFormat.sampleRate / input.format.sampleRate
|
||||
let capacity = AVAudioFrameCount((Double(input.frameLength) * ratio).rounded(.up)) + 64
|
||||
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: capacity) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var consumed = false
|
||||
var error: NSError?
|
||||
let status = converter.convert(to: output, error: &error) { _, inputStatus in
|
||||
if consumed { inputStatus.pointee = .noDataNow; return nil }
|
||||
consumed = true
|
||||
inputStatus.pointee = .haveData
|
||||
return input
|
||||
}
|
||||
if status == .error || output.frameLength == 0 { return nil }
|
||||
return output
|
||||
}
|
||||
|
||||
/// Flush the converter's internal tail at end of stream (call once on stop).
|
||||
func drain() -> AVAudioPCMBuffer? {
|
||||
guard !ended, let converter else { ended = true; return nil }
|
||||
ended = true
|
||||
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: 8192) else {
|
||||
return nil
|
||||
}
|
||||
var error: NSError?
|
||||
let status = converter.convert(to: output, error: &error) { _, inputStatus in
|
||||
inputStatus.pointee = .endOfStream
|
||||
return nil
|
||||
}
|
||||
if status == .error || output.frameLength == 0 { return nil }
|
||||
return output
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user