Phase 1: dual-track audio capture → mixed-mono 16 kHz WAV + mic VAD
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps / trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans. AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController drives a serialized start/stop state machine, writes the session folder + self_vad.json, exposes live level meters, and finalizes on quit. Hardening from review: ioQueue single-domain (no races), stop() never hangs (mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy, discard-only video output to keep SCStream alive, VAD lockstep on committed frames, stable signing team in project.yml, single-instance enforcement.
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
import AVFoundation
|
||||
|
||||
/// Sequential **16 kHz mono PCM-16 WAV** writer. Deliberately "dumb": it only
|
||||
/// appends buffers and silence and tracks `framesWritten`. Time alignment to the
|
||||
/// shared `t0` is done by the caller (`AudioRecorder`), which pads/trims using
|
||||
/// each buffer's true host time so the mic and system tracks stay anchored to
|
||||
/// the same timeline even if buffers are dropped or the hardware clocks drift.
|
||||
///
|
||||
/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
|
||||
final class MonoTrackWriter {
|
||||
private let file: AVAudioFile
|
||||
private(set) var framesWritten: Int64 = 0
|
||||
|
||||
init(url: URL) throws {
|
||||
let settings: [String: Any] = [
|
||||
AVFormatIDKey: kAudioFormatLinearPCM,
|
||||
AVSampleRateKey: 16_000,
|
||||
AVNumberOfChannelsKey: 1,
|
||||
AVLinearPCMBitDepthKey: 16,
|
||||
AVLinearPCMIsFloatKey: false,
|
||||
AVLinearPCMIsBigEndianKey: false,
|
||||
]
|
||||
// On disk = Int16 PCM; processing/buffer format = Float32 (matches Resampler).
|
||||
self.file = try AVAudioFile(
|
||||
forWriting: url,
|
||||
settings: settings,
|
||||
commonFormat: .pcmFormatFloat32,
|
||||
interleaved: false)
|
||||
}
|
||||
|
||||
/// Writes the buffer; returns the number of frames actually committed (0 on
|
||||
/// failure). Callers feed the VAD this committed count to stay in lockstep.
|
||||
@discardableResult
|
||||
func write(_ buffer: AVAudioPCMBuffer) -> Int64 {
|
||||
guard buffer.frameLength > 0 else { return 0 }
|
||||
do {
|
||||
try file.write(from: buffer)
|
||||
let n = Int64(buffer.frameLength)
|
||||
framesWritten += n
|
||||
return n
|
||||
} catch {
|
||||
return 0 // best-effort: drop a buffer rather than tear down
|
||||
}
|
||||
}
|
||||
|
||||
/// Append `count` frames of silence (to fill timeline gaps); returns frames
|
||||
/// actually committed.
|
||||
@discardableResult
|
||||
func padSilence(_ count: Int64) -> Int64 {
|
||||
guard count > 0 else { return 0 }
|
||||
var remaining = count
|
||||
var committed: Int64 = 0
|
||||
let chunk: Int64 = 16_000
|
||||
while remaining > 0 {
|
||||
let n = AVAudioFrameCount(min(chunk, remaining))
|
||||
guard let buffer = AVAudioPCMBuffer(pcmFormat: Resampler.targetFormat, frameCapacity: n) else { break }
|
||||
buffer.frameLength = n
|
||||
if let ch = buffer.floatChannelData {
|
||||
memset(ch[0], 0, Int(n) * MemoryLayout<Float>.size)
|
||||
}
|
||||
if write(buffer) == 0 { break }
|
||||
committed += Int64(n)
|
||||
remaining -= Int64(n)
|
||||
}
|
||||
return committed
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user