Files
ten31-transcripts/Ten31Transcripts/Audio/MonoTrackWriter.swift
T
Grant Gilliam fd7e1a5907 Phase 1: dual-track audio capture → mixed-mono 16 kHz WAV + mic VAD
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a
single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps /
trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans.
AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController
drives a serialized start/stop state machine, writes the session folder +
self_vad.json, exposes live level meters, and finalizes on quit.

Hardening from review: ioQueue single-domain (no races), stop() never hangs
(mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy,
discard-only video output to keep SCStream alive, VAD lockstep on committed
frames, stable signing team in project.yml, single-instance enforcement.
2026-06-05 21:30:11 -05:00

68 lines
2.5 KiB
Swift

import AVFoundation
/// Sequential **16 kHz mono PCM-16 WAV** writer. Deliberately "dumb": it only
/// appends buffers and silence and tracks `framesWritten`. Time alignment to the
/// shared `t0` is done by the caller (`AudioRecorder`), which pads/trims using
/// each buffer's true host time so the mic and system tracks stay anchored to
/// the same timeline even if buffers are dropped or the hardware clocks drift.
///
/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
final class MonoTrackWriter {
private let file: AVAudioFile
private(set) var framesWritten: Int64 = 0
init(url: URL) throws {
let settings: [String: Any] = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: 16_000,
AVNumberOfChannelsKey: 1,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false,
]
// On disk = Int16 PCM; processing/buffer format = Float32 (matches Resampler).
self.file = try AVAudioFile(
forWriting: url,
settings: settings,
commonFormat: .pcmFormatFloat32,
interleaved: false)
}
/// Writes the buffer; returns the number of frames actually committed (0 on
/// failure). Callers feed the VAD this committed count to stay in lockstep.
@discardableResult
func write(_ buffer: AVAudioPCMBuffer) -> Int64 {
guard buffer.frameLength > 0 else { return 0 }
do {
try file.write(from: buffer)
let n = Int64(buffer.frameLength)
framesWritten += n
return n
} catch {
return 0 // best-effort: drop a buffer rather than tear down
}
}
/// Append `count` frames of silence (to fill timeline gaps); returns frames
/// actually committed.
@discardableResult
func padSilence(_ count: Int64) -> Int64 {
guard count > 0 else { return 0 }
var remaining = count
var committed: Int64 = 0
let chunk: Int64 = 16_000
while remaining > 0 {
let n = AVAudioFrameCount(min(chunk, remaining))
guard let buffer = AVAudioPCMBuffer(pcmFormat: Resampler.targetFormat, frameCapacity: n) else { break }
buffer.frameLength = n
if let ch = buffer.floatChannelData {
memset(ch[0], 0, Int(n) * MemoryLayout<Float>.size)
}
if write(buffer) == 0 { break }
committed += Int64(n)
remaining -= Int64(n)
}
return committed
}
}