Phase 1: dual-track audio capture → mixed-mono 16 kHz WAV + mic VAD

AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a
single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps /
trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans.
AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController
drives a serialized start/stop state machine, writes the session folder +
self_vad.json, exposes live level meters, and finalizes on quit.

Hardening from review: ioQueue single-domain (no races), stop() never hangs
(mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy,
discard-only video output to keep SCStream alive, VAD lockstep on committed
frames, stable signing team in project.yml, single-instance enforcement.
This commit is contained in:
Grant Gilliam
2026-06-05 21:30:11 -05:00
parent b2ae3a62b9
commit fd7e1a5907
12 changed files with 1018 additions and 10 deletions
+38
View File
@@ -0,0 +1,38 @@
import SwiftUI
/// A small horizontal audio level meter. `level` is a peak amplitude (01);
/// it's mapped to a dBFS scale (60 dB 0 dB) so normal speech is clearly visible.
struct LevelBar: View {
let label: String
let level: Float
var body: some View {
HStack(spacing: 8) {
Text(label)
.font(.caption2)
.foregroundStyle(.secondary)
.frame(width: 48, alignment: .leading)
GeometryReader { geo in
ZStack(alignment: .leading) {
RoundedRectangle(cornerRadius: 2).fill(Color.secondary.opacity(0.2))
RoundedRectangle(cornerRadius: 2)
.fill(color)
.frame(width: geo.size.width * fraction)
}
}
.frame(height: 6)
}
}
private var fraction: CGFloat {
guard level > 0 else { return 0 }
let db = 20 * log10(Double(level)) // 0
return CGFloat(min(1, max(0, (db + 60) / 60)))
}
private var color: Color {
if fraction < 0.02 { return .gray }
if fraction > 0.9 { return .red }
return .green
}
}
+88
View File
@@ -7,12 +7,15 @@ struct MenuBarView: View {
@EnvironmentObject private var settings: AppSettings
@EnvironmentObject private var permissions: PermissionsManager
@EnvironmentObject private var health: SparkControlHealth
@EnvironmentObject private var session: SessionController
var body: some View {
NavigationStack {
VStack(alignment: .leading, spacing: 12) {
header
Divider()
recordingSection
Divider()
permissionsSection
Divider()
backendSection
@@ -26,6 +29,91 @@ struct MenuBarView: View {
.task { await refreshHealth() }
}
// MARK: Recording
private var canRecord: Bool {
permissions.microphone == .granted && permissions.screenRecording == .granted
}
private var recordingSection: some View {
VStack(alignment: .leading, spacing: 8) {
HStack {
Text("Recording").font(.subheadline).bold()
Spacer()
if session.state == .recording {
Text(timeString(session.elapsed))
.font(.system(.caption, design: .monospaced))
.foregroundStyle(.secondary)
}
}
Button {
session.toggle()
} label: {
Label(recordButtonTitle, systemImage: recordButtonIcon)
.frame(maxWidth: .infinity)
}
.controlSize(.large)
.tint(session.state == .recording ? .red : .accentColor)
.disabled(recordButtonDisabled)
if session.state == .recording {
LevelBar(label: "Mic", level: session.micLevel)
LevelBar(label: "System", level: session.systemLevel)
}
if !canRecord && !session.isBusy {
Text("Grant Microphone + Screen Recording above to record.")
.font(.caption)
.foregroundStyle(.secondary)
}
if case .error(let message) = session.state {
Text(message).font(.caption).foregroundStyle(.red)
}
if let warning = session.warning {
Text(warning).font(.caption).foregroundStyle(.orange)
}
if let last = session.lastSession {
Button {
NSWorkspace.shared.activateFileViewerSelecting([last.mixedURL])
} label: {
Text("Last: \(Int(last.duration.rounded()))s · \(last.selfSpanCount) self-spans — reveal in Finder")
.font(.caption)
}
.buttonStyle(.link)
}
}
}
private var recordButtonTitle: String {
switch session.state {
case .starting: return "Starting…"
case .recording: return "Stop Recording"
case .finishing: return "Finishing…"
case .idle, .error: return "Start Recording"
}
}
private var recordButtonIcon: String {
session.state == .recording ? "stop.circle.fill" : "record.circle"
}
private var recordButtonDisabled: Bool {
switch session.state {
case .starting, .finishing: return true
case .recording: return false
case .idle, .error: return !canRecord
}
}
private func timeString(_ t: TimeInterval) -> String {
let total = Int(t)
return String(format: "%02d:%02d", total / 60, total % 60)
}
private var header: some View {
VStack(alignment: .leading, spacing: 2) {
Text("Ten31 Transcripts").font(.headline)