fd7e1a5907
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps / trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans. AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController drives a serialized start/stop state machine, writes the session folder + self_vad.json, exposes live level meters, and finalizes on quit. Hardening from review: ioQueue single-domain (no races), stop() never hangs (mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy, discard-only video output to keep SCStream alive, VAD lockstep on committed frames, stable signing team in project.yml, single-instance enforcement.
68 lines
2.5 KiB
Swift
68 lines
2.5 KiB
Swift
import AVFoundation
|
|
|
|
/// Sequential **16 kHz mono PCM-16 WAV** writer. Deliberately "dumb": it only
|
|
/// appends buffers and silence and tracks `framesWritten`. Time alignment to the
|
|
/// shared `t0` is done by the caller (`AudioRecorder`), which pads/trims using
|
|
/// each buffer's true host time so the mic and system tracks stay anchored to
|
|
/// the same timeline even if buffers are dropped or the hardware clocks drift.
|
|
///
|
|
/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
|
|
final class MonoTrackWriter {
|
|
private let file: AVAudioFile
|
|
private(set) var framesWritten: Int64 = 0
|
|
|
|
init(url: URL) throws {
|
|
let settings: [String: Any] = [
|
|
AVFormatIDKey: kAudioFormatLinearPCM,
|
|
AVSampleRateKey: 16_000,
|
|
AVNumberOfChannelsKey: 1,
|
|
AVLinearPCMBitDepthKey: 16,
|
|
AVLinearPCMIsFloatKey: false,
|
|
AVLinearPCMIsBigEndianKey: false,
|
|
]
|
|
// On disk = Int16 PCM; processing/buffer format = Float32 (matches Resampler).
|
|
self.file = try AVAudioFile(
|
|
forWriting: url,
|
|
settings: settings,
|
|
commonFormat: .pcmFormatFloat32,
|
|
interleaved: false)
|
|
}
|
|
|
|
/// Writes the buffer; returns the number of frames actually committed (0 on
|
|
/// failure). Callers feed the VAD this committed count to stay in lockstep.
|
|
@discardableResult
|
|
func write(_ buffer: AVAudioPCMBuffer) -> Int64 {
|
|
guard buffer.frameLength > 0 else { return 0 }
|
|
do {
|
|
try file.write(from: buffer)
|
|
let n = Int64(buffer.frameLength)
|
|
framesWritten += n
|
|
return n
|
|
} catch {
|
|
return 0 // best-effort: drop a buffer rather than tear down
|
|
}
|
|
}
|
|
|
|
/// Append `count` frames of silence (to fill timeline gaps); returns frames
|
|
/// actually committed.
|
|
@discardableResult
|
|
func padSilence(_ count: Int64) -> Int64 {
|
|
guard count > 0 else { return 0 }
|
|
var remaining = count
|
|
var committed: Int64 = 0
|
|
let chunk: Int64 = 16_000
|
|
while remaining > 0 {
|
|
let n = AVAudioFrameCount(min(chunk, remaining))
|
|
guard let buffer = AVAudioPCMBuffer(pcmFormat: Resampler.targetFormat, frameCapacity: n) else { break }
|
|
buffer.frameLength = n
|
|
if let ch = buffer.floatChannelData {
|
|
memset(ch[0], 0, Int(n) * MemoryLayout<Float>.size)
|
|
}
|
|
if write(buffer) == 0 { break }
|
|
committed += Int64(n)
|
|
remaining -= Int64(n)
|
|
}
|
|
return committed
|
|
}
|
|
}
|