Files
ten31-transcripts/Ten31Transcripts/Audio/Resampler.swift
T
Grant Gilliam fd7e1a5907 Phase 1: dual-track audio capture → mixed-mono 16 kHz WAV + mic VAD
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a
single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps /
trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans.
AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController
drives a serialized start/stop state machine, writes the session folder +
self_vad.json, exposes live level meters, and finalizes on quit.

Hardening from review: ioQueue single-domain (no races), stop() never hangs
(mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy,
discard-only video output to keep SCStream alive, VAD lockstep on committed
frames, stable signing team in project.yml, single-instance enforcement.
2026-06-05 21:30:11 -05:00

66 lines
2.6 KiB
Swift

import AVFoundation
/// Converts arbitrary input PCM buffers to **16 kHz mono Float32**, maintaining
/// resampler state across calls. Reuse one instance per source stream so the
/// internal sample-rate converter stays continuous across buffers.
///
/// Not thread-safe: use one instance from a single thread. Both the mic and
/// system instances are driven exclusively from `AudioRecorder.ioQueue` (one per
/// source stream), kept continuous across buffers.
final class Resampler {
/// The canonical Phase-1 audio format: 16 kHz, mono, Float32, deinterleaved.
static let targetFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16_000,
channels: 1,
interleaved: false)!
private var converter: AVAudioConverter?
private var sourceFormat: AVAudioFormat?
private var ended = false
/// 16 kHz mono buffer for `input`, or nil if conversion produced nothing.
func resample(_ input: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
guard !ended, input.frameLength > 0 else { return nil }
if converter == nil || sourceFormat != input.format {
converter = AVAudioConverter(from: input.format, to: Self.targetFormat)
sourceFormat = input.format
}
guard let converter else { return nil }
let ratio = Self.targetFormat.sampleRate / input.format.sampleRate
let capacity = AVAudioFrameCount((Double(input.frameLength) * ratio).rounded(.up)) + 64
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: capacity) else {
return nil
}
var consumed = false
var error: NSError?
let status = converter.convert(to: output, error: &error) { _, inputStatus in
if consumed { inputStatus.pointee = .noDataNow; return nil }
consumed = true
inputStatus.pointee = .haveData
return input
}
if status == .error || output.frameLength == 0 { return nil }
return output
}
/// Flush the converter's internal tail at end of stream (call once on stop).
func drain() -> AVAudioPCMBuffer? {
guard !ended, let converter else { ended = true; return nil }
ended = true
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: 8192) else {
return nil
}
var error: NSError?
let status = converter.convert(to: output, error: &error) { _, inputStatus in
inputStatus.pointee = .endOfStream
return nil
}
if status == .error || output.frameLength == 0 { return nil }
return output
}
}