fd7e1a5907
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps / trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans. AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController drives a serialized start/stop state machine, writes the session folder + self_vad.json, exposes live level meters, and finalizes on quit. Hardening from review: ioQueue single-domain (no races), stop() never hangs (mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy, discard-only video output to keep SCStream alive, VAD lockstep on committed frames, stable signing team in project.yml, single-instance enforcement.
66 lines
2.6 KiB
Swift
66 lines
2.6 KiB
Swift
import AVFoundation
|
|
|
|
/// Converts arbitrary input PCM buffers to **16 kHz mono Float32**, maintaining
|
|
/// resampler state across calls. Reuse one instance per source stream so the
|
|
/// internal sample-rate converter stays continuous across buffers.
|
|
///
|
|
/// Not thread-safe: use one instance from a single thread. Both the mic and
|
|
/// system instances are driven exclusively from `AudioRecorder.ioQueue` (one per
|
|
/// source stream), kept continuous across buffers.
|
|
final class Resampler {
|
|
/// The canonical Phase-1 audio format: 16 kHz, mono, Float32, deinterleaved.
|
|
static let targetFormat = AVAudioFormat(
|
|
commonFormat: .pcmFormatFloat32,
|
|
sampleRate: 16_000,
|
|
channels: 1,
|
|
interleaved: false)!
|
|
|
|
private var converter: AVAudioConverter?
|
|
private var sourceFormat: AVAudioFormat?
|
|
private var ended = false
|
|
|
|
/// 16 kHz mono buffer for `input`, or nil if conversion produced nothing.
|
|
func resample(_ input: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
|
|
guard !ended, input.frameLength > 0 else { return nil }
|
|
|
|
if converter == nil || sourceFormat != input.format {
|
|
converter = AVAudioConverter(from: input.format, to: Self.targetFormat)
|
|
sourceFormat = input.format
|
|
}
|
|
guard let converter else { return nil }
|
|
|
|
let ratio = Self.targetFormat.sampleRate / input.format.sampleRate
|
|
let capacity = AVAudioFrameCount((Double(input.frameLength) * ratio).rounded(.up)) + 64
|
|
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: capacity) else {
|
|
return nil
|
|
}
|
|
|
|
var consumed = false
|
|
var error: NSError?
|
|
let status = converter.convert(to: output, error: &error) { _, inputStatus in
|
|
if consumed { inputStatus.pointee = .noDataNow; return nil }
|
|
consumed = true
|
|
inputStatus.pointee = .haveData
|
|
return input
|
|
}
|
|
if status == .error || output.frameLength == 0 { return nil }
|
|
return output
|
|
}
|
|
|
|
/// Flush the converter's internal tail at end of stream (call once on stop).
|
|
func drain() -> AVAudioPCMBuffer? {
|
|
guard !ended, let converter else { ended = true; return nil }
|
|
ended = true
|
|
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: 8192) else {
|
|
return nil
|
|
}
|
|
var error: NSError?
|
|
let status = converter.convert(to: output, error: &error) { _, inputStatus in
|
|
inputStatus.pointee = .endOfStream
|
|
return nil
|
|
}
|
|
if status == .error || output.frameLength == 0 { return nil }
|
|
return output
|
|
}
|
|
}
|