Phase 1: dual-track audio capture → mixed-mono 16 kHz WAV + mic VAD
AudioRecorder captures system audio (ScreenCaptureKit) + mic (AVAudioEngine) on a single serial ioQueue, one shared monotonic t0, time-driven writers (pad gaps / trim overlaps) so tracks stay aligned, and an energy mic-VAD for 'self' spans. AudioMixer sums the aligned tracks into mixed_mono_16k.wav. SessionController drives a serialized start/stop state machine, writes the session folder + self_vad.json, exposes live level meters, and finalizes on quit. Hardening from review: ioQueue single-domain (no races), stop() never hangs (mic-first teardown + bounded stopCapture), layout-agnostic mic deep-copy, discard-only video output to keep SCStream alive, VAD lockstep on committed frames, stable signing team in project.yml, single-instance enforcement.
This commit is contained in:
@@ -23,13 +23,17 @@ This repo is at **Phase 0** (scaffold, permissions, backend health check).
|
|||||||
```sh
|
```sh
|
||||||
open Ten31Transcripts.xcodeproj
|
open Ten31Transcripts.xcodeproj
|
||||||
```
|
```
|
||||||
5. In Xcode, select the **Ten31Transcripts** target → **Signing & Capabilities**:
|
5. Signing is preconfigured: `project.yml` sets `DEVELOPMENT_TEAM` to the free
|
||||||
- Check **Automatically manage signing**.
|
personal team `BK4Y6CXN35` with automatic signing, so **Signing & Capabilities
|
||||||
- For **Team**, pick your personal team (sign in with your Apple ID — free; no
|
should already show the team** — no manual selection needed. (If you ever sign
|
||||||
paid developer account needed). A stable team keeps macOS from re-asking for
|
with a different Apple ID, update `DEVELOPMENT_TEAM` in `project.yml`, not in
|
||||||
permissions on every rebuild.
|
Xcode — `xcodegen generate` overwrites Xcode-side changes.)
|
||||||
6. Press **Run** (⌘R).
|
6. Press **Run** (⌘R).
|
||||||
|
|
||||||
|
> **Note:** after adding files in a new phase, re-run `xcodegen generate` and let
|
||||||
|
> Xcode reload the project. The signing team persists because it lives in
|
||||||
|
> `project.yml`, so macOS permissions stay granted across rebuilds.
|
||||||
|
|
||||||
## What Phase 0 does
|
## What Phase 0 does
|
||||||
|
|
||||||
- Launches as a menu-bar-only app (no Dock icon).
|
- Launches as a menu-bar-only app (no Dock icon).
|
||||||
|
|||||||
@@ -6,5 +6,30 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
|
|||||||
// LSUIElement in Info.plist already enforces this; set it explicitly too
|
// LSUIElement in Info.plist already enforces this; set it explicitly too
|
||||||
// so behavior is unambiguous regardless of how the app is launched.
|
// so behavior is unambiguous regardless of how the app is launched.
|
||||||
NSApp.setActivationPolicy(.accessory)
|
NSApp.setActivationPolicy(.accessory)
|
||||||
|
terminateOtherInstances()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single-instance: a fresh launch (e.g. each Xcode ⌘R) terminates any older
|
||||||
|
/// copies so you never end up with two menu-bar icons.
|
||||||
|
private func terminateOtherInstances() {
|
||||||
|
guard let bundleID = Bundle.main.bundleIdentifier else { return }
|
||||||
|
let me = NSRunningApplication.current.processIdentifier
|
||||||
|
for app in NSRunningApplication.runningApplications(withBundleIdentifier: bundleID)
|
||||||
|
where app.processIdentifier != me {
|
||||||
|
app.terminate()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If a recording is in progress when the user quits, finalize it (flush WAV
|
||||||
|
/// headers) before the process exits, so the session isn't corrupted.
|
||||||
|
func applicationShouldTerminate(_ sender: NSApplication) -> NSApplication.TerminateReply {
|
||||||
|
guard let controller = SessionController.shared, controller.isBusy else {
|
||||||
|
return .terminateNow
|
||||||
|
}
|
||||||
|
Task { @MainActor in
|
||||||
|
await controller.prepareForTermination()
|
||||||
|
NSApp.reply(toApplicationShouldTerminate: true)
|
||||||
|
}
|
||||||
|
return .terminateLater
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,9 +10,16 @@ import SwiftUI
|
|||||||
struct Ten31TranscriptsApp: App {
|
struct Ten31TranscriptsApp: App {
|
||||||
@NSApplicationDelegateAdaptor(AppDelegate.self) private var appDelegate
|
@NSApplicationDelegateAdaptor(AppDelegate.self) private var appDelegate
|
||||||
|
|
||||||
@StateObject private var settings = AppSettings()
|
@StateObject private var settings: AppSettings
|
||||||
@StateObject private var permissions = PermissionsManager()
|
@StateObject private var permissions = PermissionsManager()
|
||||||
@StateObject private var health = SparkControlHealth()
|
@StateObject private var health = SparkControlHealth()
|
||||||
|
@StateObject private var session: SessionController
|
||||||
|
|
||||||
|
init() {
|
||||||
|
let settings = AppSettings()
|
||||||
|
_settings = StateObject(wrappedValue: settings)
|
||||||
|
_session = StateObject(wrappedValue: SessionController(settings: settings))
|
||||||
|
}
|
||||||
|
|
||||||
var body: some Scene {
|
var body: some Scene {
|
||||||
MenuBarExtra {
|
MenuBarExtra {
|
||||||
@@ -20,8 +27,9 @@ struct Ten31TranscriptsApp: App {
|
|||||||
.environmentObject(settings)
|
.environmentObject(settings)
|
||||||
.environmentObject(permissions)
|
.environmentObject(permissions)
|
||||||
.environmentObject(health)
|
.environmentObject(health)
|
||||||
|
.environmentObject(session)
|
||||||
} label: {
|
} label: {
|
||||||
Image(systemName: "waveform.circle")
|
Image(systemName: session.state == .recording ? "waveform.circle.fill" : "waveform.circle")
|
||||||
}
|
}
|
||||||
.menuBarExtraStyle(.window)
|
.menuBarExtraStyle(.window)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,67 @@
|
|||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
/// Sums the two aligned 16 kHz mono tracks (mic + system) into the single
|
||||||
|
/// **mixed-mono 16 kHz WAV** that the backend receives. Both inputs are already
|
||||||
|
/// front-padded to the shared t0, so frame N of each file is the same instant.
|
||||||
|
/// Streamed in 1-second chunks to keep memory flat for long calls.
|
||||||
|
enum AudioMixer {
|
||||||
|
static func mix(mic micURL: URL, system systemURL: URL, into outURL: URL) throws {
|
||||||
|
let mic = try? AVAudioFile(forReading: micURL)
|
||||||
|
let sys = try? AVAudioFile(forReading: systemURL)
|
||||||
|
|
||||||
|
let settings: [String: Any] = [
|
||||||
|
AVFormatIDKey: kAudioFormatLinearPCM,
|
||||||
|
AVSampleRateKey: 16_000,
|
||||||
|
AVNumberOfChannelsKey: 1,
|
||||||
|
AVLinearPCMBitDepthKey: 16,
|
||||||
|
AVLinearPCMIsFloatKey: false,
|
||||||
|
AVLinearPCMIsBigEndianKey: false,
|
||||||
|
]
|
||||||
|
let out = try AVAudioFile(
|
||||||
|
forWriting: outURL,
|
||||||
|
settings: settings,
|
||||||
|
commonFormat: .pcmFormatFloat32,
|
||||||
|
interleaved: false)
|
||||||
|
|
||||||
|
let outFormat = Resampler.targetFormat
|
||||||
|
let chunk: AVAudioFramePosition = 16_000
|
||||||
|
let total = max(mic?.length ?? 0, sys?.length ?? 0)
|
||||||
|
var pos: AVAudioFramePosition = 0
|
||||||
|
|
||||||
|
while pos < total {
|
||||||
|
let frames = AVAudioFrameCount(min(chunk, total - pos))
|
||||||
|
guard let mixBuf = AVAudioPCMBuffer(pcmFormat: outFormat, frameCapacity: frames),
|
||||||
|
let dst = mixBuf.floatChannelData?[0] else { break }
|
||||||
|
mixBuf.frameLength = frames
|
||||||
|
memset(dst, 0, Int(frames) * MemoryLayout<Float>.size)
|
||||||
|
|
||||||
|
add(file: mic, at: pos, maxFrames: frames, into: dst)
|
||||||
|
add(file: sys, at: pos, maxFrames: frames, into: dst)
|
||||||
|
|
||||||
|
var i = 0
|
||||||
|
while i < Int(frames) {
|
||||||
|
if dst[i] > 1 { dst[i] = 1 } else if dst[i] < -1 { dst[i] = -1 }
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
try out.write(from: mixBuf)
|
||||||
|
pos += AVAudioFramePosition(frames)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func add(file: AVAudioFile?, at pos: AVAudioFramePosition,
|
||||||
|
maxFrames: AVAudioFrameCount, into dst: UnsafeMutablePointer<Float>) {
|
||||||
|
guard let file, pos < file.length else { return }
|
||||||
|
file.framePosition = pos
|
||||||
|
let toRead = AVAudioFrameCount(min(AVAudioFramePosition(maxFrames), file.length - pos))
|
||||||
|
guard toRead > 0,
|
||||||
|
let buf = AVAudioPCMBuffer(pcmFormat: file.processingFormat, frameCapacity: toRead)
|
||||||
|
else { return }
|
||||||
|
do {
|
||||||
|
try file.read(into: buf, frameCount: toRead)
|
||||||
|
guard let src = buf.floatChannelData?[0] else { return }
|
||||||
|
var i = 0
|
||||||
|
let count = Int(buf.frameLength)
|
||||||
|
while i < count { dst[i] += src[i]; i += 1 }
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,333 @@
|
|||||||
|
import AVFoundation
|
||||||
|
import ScreenCaptureKit
|
||||||
|
import CoreMedia
|
||||||
|
import QuartzCore
|
||||||
|
|
||||||
|
struct RecordingResult {
|
||||||
|
let micURL: URL
|
||||||
|
let systemURL: URL
|
||||||
|
let mixedURL: URL
|
||||||
|
let duration: Double
|
||||||
|
let selfSpans: [VADSpan]
|
||||||
|
let t0Unix: Double
|
||||||
|
/// Non-nil if system-audio capture stopped early (e.g. SCStream error).
|
||||||
|
let systemNote: String?
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Dual-track local audio capture for Phase 1.
|
||||||
|
///
|
||||||
|
/// - System audio via `SCStream` (`capturesAudio`); its audio handler runs on
|
||||||
|
/// `ioQueue`. A discard-only video output runs on `screenQueue` purely to keep
|
||||||
|
/// SCStream's frame pipeline drained (an unconsumed video queue can stall the
|
||||||
|
/// whole stream) — frames are dropped instantly, never stored.
|
||||||
|
/// - Mic via `AVAudioEngine` input tap: the tap deep-copies the raw buffer and
|
||||||
|
/// hands it to `ioQueue`, where it is resampled and written.
|
||||||
|
/// - **`ioQueue` is the single isolation domain** for the writers, VAD, both
|
||||||
|
/// resamplers, and lifecycle flags.
|
||||||
|
/// - One shared monotonic `t0` (`CACurrentMediaTime`). Each buffer is placed at
|
||||||
|
/// its true `(startHost − t0)` frame (gaps padded, overlaps trimmed), so mic
|
||||||
|
/// and system stay aligned and the mix is a straight sum.
|
||||||
|
/// - Live peak levels are exposed via `currentLevels()` for the UI meter.
|
||||||
|
/// - `stop()` tears the mic down first and bounds `stopCapture()` with a timeout,
|
||||||
|
/// so a wedged stream can never block finalization. No video is written.
|
||||||
|
final class AudioRecorder: NSObject, SCStreamDelegate, SCStreamOutput {
|
||||||
|
private let micURL: URL
|
||||||
|
private let systemURL: URL
|
||||||
|
private let mixedURL: URL
|
||||||
|
|
||||||
|
private let ioQueue = DispatchQueue(label: "xyz.ten31.audio.io")
|
||||||
|
private let screenQueue = DispatchQueue(label: "xyz.ten31.audio.screen")
|
||||||
|
|
||||||
|
// ioQueue-only state:
|
||||||
|
private var t0Host: Double = 0
|
||||||
|
private var t0Unix: Double = 0
|
||||||
|
private var micWriter: MonoTrackWriter?
|
||||||
|
private var systemWriter: MonoTrackWriter?
|
||||||
|
private var vad: MicVAD?
|
||||||
|
private var tornDown = true
|
||||||
|
private let micResampler = Resampler()
|
||||||
|
private let systemResampler = Resampler()
|
||||||
|
|
||||||
|
// Cross-thread, guarded by levelLock:
|
||||||
|
private let levelLock = NSLock()
|
||||||
|
private var micPeak: Float = 0
|
||||||
|
private var sysPeak: Float = 0
|
||||||
|
private var streamStopped = false
|
||||||
|
private var systemErrorMessage: String?
|
||||||
|
|
||||||
|
private var engine: AVAudioEngine?
|
||||||
|
private var stream: SCStream?
|
||||||
|
|
||||||
|
init(micURL: URL, systemURL: URL, mixedURL: URL) {
|
||||||
|
self.micURL = micURL
|
||||||
|
self.systemURL = systemURL
|
||||||
|
self.mixedURL = mixedURL
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Lifecycle
|
||||||
|
|
||||||
|
func start() async throws {
|
||||||
|
let t0 = CACurrentMediaTime()
|
||||||
|
let t0u = Date().timeIntervalSince1970
|
||||||
|
try ioQueue.sync {
|
||||||
|
let mic = try MonoTrackWriter(url: self.micURL)
|
||||||
|
let sys = try MonoTrackWriter(url: self.systemURL)
|
||||||
|
self.t0Host = t0
|
||||||
|
self.t0Unix = t0u
|
||||||
|
self.micWriter = mic
|
||||||
|
self.systemWriter = sys
|
||||||
|
self.vad = MicVAD()
|
||||||
|
self.tornDown = false
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
try startMic()
|
||||||
|
try await startSystem() // throws if Screen Recording is denied
|
||||||
|
} catch {
|
||||||
|
await abortStart()
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stop() async -> RecordingResult {
|
||||||
|
// Stop the mic FIRST — always succeeds and halts mic capture immediately.
|
||||||
|
engine?.inputNode.removeTap(onBus: 0)
|
||||||
|
engine?.stop()
|
||||||
|
engine = nil
|
||||||
|
|
||||||
|
// Stop system capture WITHOUT hanging: an already-errored stream can make
|
||||||
|
// stopCapture() block forever, so skip it if it already stopped and bound
|
||||||
|
// it with a timeout otherwise.
|
||||||
|
if let stream, !flag({ self.streamStopped }) {
|
||||||
|
await Self.stopCaptureWithTimeout(stream, seconds: 3)
|
||||||
|
}
|
||||||
|
stream = nil
|
||||||
|
|
||||||
|
var micFrames: Int64 = 0
|
||||||
|
var sysFrames: Int64 = 0
|
||||||
|
var spans: [VADSpan] = []
|
||||||
|
var t0u: Double = 0
|
||||||
|
|
||||||
|
ioQueue.sync {
|
||||||
|
if let tail = micResampler.drain() {
|
||||||
|
if (micWriter?.write(tail) ?? 0) > 0 { vad?.feed(tail) }
|
||||||
|
}
|
||||||
|
if let tail = systemResampler.drain() { systemWriter?.write(tail) }
|
||||||
|
vad?.finish()
|
||||||
|
micFrames = micWriter?.framesWritten ?? 0
|
||||||
|
sysFrames = systemWriter?.framesWritten ?? 0
|
||||||
|
spans = vad?.spans ?? []
|
||||||
|
t0u = t0Unix
|
||||||
|
tornDown = true
|
||||||
|
micWriter = nil
|
||||||
|
systemWriter = nil
|
||||||
|
vad = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
try? AudioMixer.mix(mic: micURL, system: systemURL, into: mixedURL)
|
||||||
|
|
||||||
|
let duration = Double(max(micFrames, sysFrames)) / 16_000.0
|
||||||
|
let note = flag { self.systemErrorMessage } as String?
|
||||||
|
return RecordingResult(
|
||||||
|
micURL: micURL, systemURL: systemURL, mixedURL: mixedURL,
|
||||||
|
duration: duration, selfSpans: spans, t0Unix: t0u, systemNote: note)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func abortStart() async {
|
||||||
|
engine?.inputNode.removeTap(onBus: 0)
|
||||||
|
engine?.stop()
|
||||||
|
engine = nil
|
||||||
|
if let stream { await Self.stopCaptureWithTimeout(stream, seconds: 3) }
|
||||||
|
stream = nil
|
||||||
|
ioQueue.sync {
|
||||||
|
tornDown = true
|
||||||
|
micWriter = nil
|
||||||
|
systemWriter = nil
|
||||||
|
vad = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Latest peak levels (0…1) for each source; decays so a stalled source fades.
|
||||||
|
func currentLevels() -> (mic: Float, system: Float) {
|
||||||
|
levelLock.lock(); defer { levelLock.unlock() }
|
||||||
|
let m = micPeak, s = sysPeak
|
||||||
|
micPeak *= 0.55; sysPeak *= 0.55
|
||||||
|
return (m, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Ingest (ioQueue only)
|
||||||
|
|
||||||
|
private func ingestMic(_ buffer: AVAudioPCMBuffer, startHost: Double) {
|
||||||
|
guard !tornDown, let writer = micWriter, let vad else { return }
|
||||||
|
let expected = max(0, Int64(((startHost - t0Host) * 16_000).rounded()))
|
||||||
|
if expected > writer.framesWritten {
|
||||||
|
let padded = writer.padSilence(expected - writer.framesWritten)
|
||||||
|
if padded > 0 { vad.feedSilence(padded) }
|
||||||
|
}
|
||||||
|
let startIdx = max(0, Int(writer.framesWritten - expected))
|
||||||
|
if startIdx >= Int(buffer.frameLength) { return }
|
||||||
|
guard let chunk = Self.trimFront(buffer, by: startIdx) else { return }
|
||||||
|
updateLevel(chunk, isMic: true)
|
||||||
|
if writer.write(chunk) > 0 { vad.feed(chunk) }
|
||||||
|
}
|
||||||
|
|
||||||
|
private func ingestSystem(_ buffer: AVAudioPCMBuffer, startHost: Double) {
|
||||||
|
guard !tornDown, let writer = systemWriter else { return }
|
||||||
|
let expected = max(0, Int64(((startHost - t0Host) * 16_000).rounded()))
|
||||||
|
if expected > writer.framesWritten {
|
||||||
|
writer.padSilence(expected - writer.framesWritten)
|
||||||
|
}
|
||||||
|
let startIdx = max(0, Int(writer.framesWritten - expected))
|
||||||
|
if startIdx >= Int(buffer.frameLength) { return }
|
||||||
|
guard let chunk = Self.trimFront(buffer, by: startIdx) else { return }
|
||||||
|
updateLevel(chunk, isMic: false)
|
||||||
|
writer.write(chunk)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Mic (AVAudioEngine)
|
||||||
|
|
||||||
|
private func startMic() throws {
|
||||||
|
let engine = AVAudioEngine()
|
||||||
|
let input = engine.inputNode
|
||||||
|
let format = input.inputFormat(forBus: 0)
|
||||||
|
|
||||||
|
input.installTap(onBus: 0, bufferSize: 4096, format: format) { [weak self] buffer, when in
|
||||||
|
guard let self else { return }
|
||||||
|
let entry = CACurrentMediaTime()
|
||||||
|
let stamped = when.isHostTimeValid ? AudioRecorder.hostSeconds(when.hostTime) : entry
|
||||||
|
let startHost = abs(stamped - entry) < 5 ? stamped : entry
|
||||||
|
guard let raw = AudioRecorder.copy(buffer) else { return }
|
||||||
|
self.ioQueue.async {
|
||||||
|
guard !self.tornDown, let resampled = self.micResampler.resample(raw) else { return }
|
||||||
|
self.ingestMic(resampled, startHost: startHost)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
engine.prepare()
|
||||||
|
try engine.start()
|
||||||
|
self.engine = engine
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - System (ScreenCaptureKit)
|
||||||
|
|
||||||
|
private func startSystem() async throws {
|
||||||
|
let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)
|
||||||
|
guard let display = content.displays.first else {
|
||||||
|
throw NSError(domain: "Ten31", code: 1,
|
||||||
|
userInfo: [NSLocalizedDescriptionKey: "No display available for system-audio capture."])
|
||||||
|
}
|
||||||
|
let filter = SCContentFilter(display: display, excludingWindows: [])
|
||||||
|
let config = SCStreamConfiguration()
|
||||||
|
config.capturesAudio = true
|
||||||
|
config.excludesCurrentProcessAudio = true
|
||||||
|
config.sampleRate = 48_000
|
||||||
|
config.channelCount = 2
|
||||||
|
config.width = 2
|
||||||
|
config.height = 2
|
||||||
|
config.minimumFrameInterval = CMTime(value: 1, timescale: 2) // ~2 fps tiny video
|
||||||
|
config.queueDepth = 6
|
||||||
|
|
||||||
|
let stream = SCStream(filter: filter, configuration: config, delegate: self)
|
||||||
|
try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: ioQueue)
|
||||||
|
// Discard-only video consumer keeps SCStream's frame queue drained so the
|
||||||
|
// stream stays alive; frames are dropped immediately and never stored.
|
||||||
|
try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: screenQueue)
|
||||||
|
try await stream.startCapture()
|
||||||
|
self.stream = stream
|
||||||
|
}
|
||||||
|
|
||||||
|
func stream(_ stream: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
|
||||||
|
of type: SCStreamOutputType) {
|
||||||
|
guard type == .audio else { return } // .screen frames discarded here
|
||||||
|
guard CMSampleBufferDataIsReady(sampleBuffer),
|
||||||
|
let pcm = Self.pcmBuffer(from: sampleBuffer),
|
||||||
|
let resampled = systemResampler.resample(pcm) else { return }
|
||||||
|
let entry = CACurrentMediaTime()
|
||||||
|
let pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
|
||||||
|
let stamped = pts.isValid ? pts.seconds : entry
|
||||||
|
let startHost = abs(stamped - entry) < 5 ? stamped : entry
|
||||||
|
ingestSystem(resampled, startHost: startHost)
|
||||||
|
}
|
||||||
|
|
||||||
|
func stream(_ stream: SCStream, didStopWithError error: Error) {
|
||||||
|
levelLock.lock()
|
||||||
|
streamStopped = true
|
||||||
|
systemErrorMessage = error.localizedDescription
|
||||||
|
levelLock.unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Helpers
|
||||||
|
|
||||||
|
private func updateLevel(_ buffer: AVAudioPCMBuffer, isMic: Bool) {
|
||||||
|
guard let ch = buffer.floatChannelData?[0] else { return }
|
||||||
|
var peak: Float = 0
|
||||||
|
let n = Int(buffer.frameLength)
|
||||||
|
var i = 0
|
||||||
|
while i < n { let a = abs(ch[i]); if a > peak { peak = a }; i += 1 }
|
||||||
|
levelLock.lock()
|
||||||
|
if isMic { if peak > micPeak { micPeak = peak } }
|
||||||
|
else { if peak > sysPeak { sysPeak = peak } }
|
||||||
|
levelLock.unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a levelLock-guarded value.
|
||||||
|
private func flag<T>(_ body: () -> T) -> T {
|
||||||
|
levelLock.lock(); defer { levelLock.unlock() }
|
||||||
|
return body()
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func stopCaptureWithTimeout(_ stream: SCStream, seconds: Double) async {
|
||||||
|
await withTaskGroup(of: Void.self) { group in
|
||||||
|
group.addTask { try? await stream.stopCapture() }
|
||||||
|
group.addTask { try? await Task.sleep(nanoseconds: UInt64(seconds * 1_000_000_000)) }
|
||||||
|
_ = await group.next() // proceed as soon as either finishes
|
||||||
|
group.cancelAll()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deep-copy a PCM buffer (the engine reuses the tap buffer). Layout-agnostic.
|
||||||
|
private static func copy(_ buffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
|
||||||
|
guard buffer.frameLength > 0,
|
||||||
|
let out = AVAudioPCMBuffer(pcmFormat: buffer.format, frameCapacity: buffer.frameLength)
|
||||||
|
else { return nil }
|
||||||
|
out.frameLength = buffer.frameLength
|
||||||
|
let src = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: buffer.audioBufferList))
|
||||||
|
let dst = UnsafeMutableAudioBufferListPointer(out.mutableAudioBufferList)
|
||||||
|
guard src.count == dst.count else { return nil }
|
||||||
|
for i in 0..<src.count {
|
||||||
|
guard let s = src[i].mData, let d = dst[i].mData else { return nil }
|
||||||
|
memcpy(d, s, min(Int(src[i].mDataByteSize), Int(dst[i].mDataByteSize)))
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func trimFront(_ buffer: AVAudioPCMBuffer, by frames: Int) -> AVAudioPCMBuffer? {
|
||||||
|
if frames <= 0 { return buffer }
|
||||||
|
let total = Int(buffer.frameLength)
|
||||||
|
guard frames < total, let src = buffer.floatChannelData?[0] else { return nil }
|
||||||
|
let n = AVAudioFrameCount(total - frames)
|
||||||
|
guard let out = AVAudioPCMBuffer(pcmFormat: buffer.format, frameCapacity: n),
|
||||||
|
let dst = out.floatChannelData?[0] else { return nil }
|
||||||
|
out.frameLength = n
|
||||||
|
memcpy(dst, src + frames, Int(n) * MemoryLayout<Float>.size)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func hostSeconds(_ hostTime: UInt64) -> Double {
|
||||||
|
var info = mach_timebase_info_data_t()
|
||||||
|
mach_timebase_info(&info)
|
||||||
|
return Double(hostTime) * Double(info.numer) / Double(info.denom) / 1_000_000_000.0
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? {
|
||||||
|
guard let fmtDesc = CMSampleBufferGetFormatDescription(sampleBuffer),
|
||||||
|
let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(fmtDesc) else { return nil }
|
||||||
|
var asbd = asbdPtr.pointee
|
||||||
|
guard let format = AVAudioFormat(streamDescription: &asbd) else { return nil }
|
||||||
|
let frames = AVAudioFrameCount(CMSampleBufferGetNumSamples(sampleBuffer))
|
||||||
|
guard frames > 0,
|
||||||
|
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frames) else { return nil }
|
||||||
|
buffer.frameLength = frames
|
||||||
|
let status = CMSampleBufferCopyPCMDataIntoAudioBufferList(
|
||||||
|
sampleBuffer, at: 0, frameCount: Int32(frames), into: buffer.mutableAudioBufferList)
|
||||||
|
return status == noErr ? buffer : nil
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
/// A speaking span on the session `t0` timeline (seconds).
|
||||||
|
struct VADSpan: Equatable {
|
||||||
|
let start: Double
|
||||||
|
let end: Double
|
||||||
|
let confidence: Double
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lightweight energy-based voice-activity detector for the **mic** track (the
|
||||||
|
/// user). It is fed the *exact same* 16 kHz mono stream the mic WAV receives —
|
||||||
|
/// real samples via `feed` and timeline-gap silence via `feedSilence` — so its
|
||||||
|
/// internal sample cursor always equals the mic file position, and span times
|
||||||
|
/// land on the same instants as `mixed_mono_16k.wav`.
|
||||||
|
///
|
||||||
|
/// Phase 3's `TimelineBuilder` will fold these in as high-confidence pre-seeded
|
||||||
|
/// "self" segments. Thresholds are intentionally simple and will be tuned later.
|
||||||
|
///
|
||||||
|
/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
|
||||||
|
final class MicVAD {
|
||||||
|
private let frameSize = 320 // 20 ms @ 16 kHz
|
||||||
|
private let openFrames = 2 // ~40 ms above threshold to open
|
||||||
|
private let closeFrames = 10 // ~200 ms hangover to close
|
||||||
|
private let absoluteFloor: Float = 0.006
|
||||||
|
private let floorMultiplier: Float = 2.5
|
||||||
|
|
||||||
|
private var cursorSamples = 0 // total samples fed (== mic file position)
|
||||||
|
private var noiseFloor: Float = 0.01
|
||||||
|
private var voicedRun = 0
|
||||||
|
private var silentRun = 0
|
||||||
|
private var inSpeech = false
|
||||||
|
private var spanStartSample = 0
|
||||||
|
private var acc: [Float] = []
|
||||||
|
private(set) var spans: [VADSpan] = []
|
||||||
|
|
||||||
|
func feed(_ buffer: AVAudioPCMBuffer) {
|
||||||
|
guard let ch = buffer.floatChannelData, buffer.frameLength > 0 else { return }
|
||||||
|
acc.append(contentsOf: UnsafeBufferPointer(start: ch[0], count: Int(buffer.frameLength)))
|
||||||
|
drainFrames()
|
||||||
|
}
|
||||||
|
|
||||||
|
func feedSilence(_ count: Int64) {
|
||||||
|
guard count > 0 else { return }
|
||||||
|
acc.append(contentsOf: repeatElement(0, count: Int(count)))
|
||||||
|
drainFrames()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Close any span still open at end of capture.
|
||||||
|
func finish() {
|
||||||
|
if inSpeech {
|
||||||
|
appendSpan(startSample: spanStartSample, endSample: cursorSamples)
|
||||||
|
inSpeech = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func drainFrames() {
|
||||||
|
var i = 0
|
||||||
|
while i + frameSize <= acc.count {
|
||||||
|
var sum: Float = 0
|
||||||
|
var j = i
|
||||||
|
while j < i + frameSize { sum += acc[j] * acc[j]; j += 1 }
|
||||||
|
step(rms: (sum / Float(frameSize)).squareRoot())
|
||||||
|
cursorSamples += frameSize
|
||||||
|
i += frameSize
|
||||||
|
}
|
||||||
|
if i > 0 { acc.removeFirst(i) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `cursorSamples` is the start sample of the frame being evaluated.
|
||||||
|
private func step(rms: Float) {
|
||||||
|
if rms < noiseFloor { noiseFloor = 0.9 * noiseFloor + 0.1 * rms }
|
||||||
|
else { noiseFloor = 0.995 * noiseFloor + 0.005 * rms }
|
||||||
|
|
||||||
|
let threshold = max(absoluteFloor, noiseFloor * floorMultiplier)
|
||||||
|
let voiced = rms > threshold
|
||||||
|
|
||||||
|
if voiced {
|
||||||
|
voicedRun += 1; silentRun = 0
|
||||||
|
if !inSpeech && voicedRun >= openFrames {
|
||||||
|
inSpeech = true
|
||||||
|
spanStartSample = cursorSamples - (voicedRun - 1) * frameSize
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
silentRun += 1; voicedRun = 0
|
||||||
|
if inSpeech && silentRun >= closeFrames {
|
||||||
|
inSpeech = false
|
||||||
|
appendSpan(startSample: spanStartSample,
|
||||||
|
endSample: cursorSamples - (closeFrames - 1) * frameSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func appendSpan(startSample: Int, endSample: Int) {
|
||||||
|
let start = Double(max(0, startSample)) / 16_000.0
|
||||||
|
let end = Double(endSample) / 16_000.0
|
||||||
|
if end > start { spans.append(VADSpan(start: start, end: end, confidence: 0.9)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
/// Sequential **16 kHz mono PCM-16 WAV** writer. Deliberately "dumb": it only
|
||||||
|
/// appends buffers and silence and tracks `framesWritten`. Time alignment to the
|
||||||
|
/// shared `t0` is done by the caller (`AudioRecorder`), which pads/trims using
|
||||||
|
/// each buffer's true host time so the mic and system tracks stay anchored to
|
||||||
|
/// the same timeline even if buffers are dropped or the hardware clocks drift.
|
||||||
|
///
|
||||||
|
/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
|
||||||
|
final class MonoTrackWriter {
|
||||||
|
private let file: AVAudioFile
|
||||||
|
private(set) var framesWritten: Int64 = 0
|
||||||
|
|
||||||
|
init(url: URL) throws {
|
||||||
|
let settings: [String: Any] = [
|
||||||
|
AVFormatIDKey: kAudioFormatLinearPCM,
|
||||||
|
AVSampleRateKey: 16_000,
|
||||||
|
AVNumberOfChannelsKey: 1,
|
||||||
|
AVLinearPCMBitDepthKey: 16,
|
||||||
|
AVLinearPCMIsFloatKey: false,
|
||||||
|
AVLinearPCMIsBigEndianKey: false,
|
||||||
|
]
|
||||||
|
// On disk = Int16 PCM; processing/buffer format = Float32 (matches Resampler).
|
||||||
|
self.file = try AVAudioFile(
|
||||||
|
forWriting: url,
|
||||||
|
settings: settings,
|
||||||
|
commonFormat: .pcmFormatFloat32,
|
||||||
|
interleaved: false)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes the buffer; returns the number of frames actually committed (0 on
|
||||||
|
/// failure). Callers feed the VAD this committed count to stay in lockstep.
|
||||||
|
@discardableResult
|
||||||
|
func write(_ buffer: AVAudioPCMBuffer) -> Int64 {
|
||||||
|
guard buffer.frameLength > 0 else { return 0 }
|
||||||
|
do {
|
||||||
|
try file.write(from: buffer)
|
||||||
|
let n = Int64(buffer.frameLength)
|
||||||
|
framesWritten += n
|
||||||
|
return n
|
||||||
|
} catch {
|
||||||
|
return 0 // best-effort: drop a buffer rather than tear down
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append `count` frames of silence (to fill timeline gaps); returns frames
|
||||||
|
/// actually committed.
|
||||||
|
@discardableResult
|
||||||
|
func padSilence(_ count: Int64) -> Int64 {
|
||||||
|
guard count > 0 else { return 0 }
|
||||||
|
var remaining = count
|
||||||
|
var committed: Int64 = 0
|
||||||
|
let chunk: Int64 = 16_000
|
||||||
|
while remaining > 0 {
|
||||||
|
let n = AVAudioFrameCount(min(chunk, remaining))
|
||||||
|
guard let buffer = AVAudioPCMBuffer(pcmFormat: Resampler.targetFormat, frameCapacity: n) else { break }
|
||||||
|
buffer.frameLength = n
|
||||||
|
if let ch = buffer.floatChannelData {
|
||||||
|
memset(ch[0], 0, Int(n) * MemoryLayout<Float>.size)
|
||||||
|
}
|
||||||
|
if write(buffer) == 0 { break }
|
||||||
|
committed += Int64(n)
|
||||||
|
remaining -= Int64(n)
|
||||||
|
}
|
||||||
|
return committed
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
/// Converts arbitrary input PCM buffers to **16 kHz mono Float32**, maintaining
|
||||||
|
/// resampler state across calls. Reuse one instance per source stream so the
|
||||||
|
/// internal sample-rate converter stays continuous across buffers.
|
||||||
|
///
|
||||||
|
/// Not thread-safe: use one instance from a single thread. Both the mic and
|
||||||
|
/// system instances are driven exclusively from `AudioRecorder.ioQueue` (one per
|
||||||
|
/// source stream), kept continuous across buffers.
|
||||||
|
final class Resampler {
|
||||||
|
/// The canonical Phase-1 audio format: 16 kHz, mono, Float32, deinterleaved.
|
||||||
|
static let targetFormat = AVAudioFormat(
|
||||||
|
commonFormat: .pcmFormatFloat32,
|
||||||
|
sampleRate: 16_000,
|
||||||
|
channels: 1,
|
||||||
|
interleaved: false)!
|
||||||
|
|
||||||
|
private var converter: AVAudioConverter?
|
||||||
|
private var sourceFormat: AVAudioFormat?
|
||||||
|
private var ended = false
|
||||||
|
|
||||||
|
/// 16 kHz mono buffer for `input`, or nil if conversion produced nothing.
|
||||||
|
func resample(_ input: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
|
||||||
|
guard !ended, input.frameLength > 0 else { return nil }
|
||||||
|
|
||||||
|
if converter == nil || sourceFormat != input.format {
|
||||||
|
converter = AVAudioConverter(from: input.format, to: Self.targetFormat)
|
||||||
|
sourceFormat = input.format
|
||||||
|
}
|
||||||
|
guard let converter else { return nil }
|
||||||
|
|
||||||
|
let ratio = Self.targetFormat.sampleRate / input.format.sampleRate
|
||||||
|
let capacity = AVAudioFrameCount((Double(input.frameLength) * ratio).rounded(.up)) + 64
|
||||||
|
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: capacity) else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var consumed = false
|
||||||
|
var error: NSError?
|
||||||
|
let status = converter.convert(to: output, error: &error) { _, inputStatus in
|
||||||
|
if consumed { inputStatus.pointee = .noDataNow; return nil }
|
||||||
|
consumed = true
|
||||||
|
inputStatus.pointee = .haveData
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
if status == .error || output.frameLength == 0 { return nil }
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flush the converter's internal tail at end of stream (call once on stop).
|
||||||
|
func drain() -> AVAudioPCMBuffer? {
|
||||||
|
guard !ended, let converter else { ended = true; return nil }
|
||||||
|
ended = true
|
||||||
|
guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: 8192) else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var error: NSError?
|
||||||
|
let status = converter.convert(to: output, error: &error) { _, inputStatus in
|
||||||
|
inputStatus.pointee = .endOfStream
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if status == .error || output.frameLength == 0 { return nil }
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,213 @@
|
|||||||
|
import Foundation
|
||||||
|
import Combine
|
||||||
|
import AppKit
|
||||||
|
|
||||||
|
struct SessionInfo: Equatable {
|
||||||
|
let folder: URL
|
||||||
|
let mixedURL: URL
|
||||||
|
let duration: Double
|
||||||
|
let selfSpanCount: Int
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Owns a single recording session: creates the session folder, drives
|
||||||
|
/// `AudioRecorder` start/stop, tracks elapsed time, and writes the Phase-1
|
||||||
|
/// preview of mic-VAD self spans. Detection/visual/backend wiring come later.
|
||||||
|
///
|
||||||
|
/// The lifecycle is serialized through an explicit state machine so start and
|
||||||
|
/// stop can never interleave (`.starting` → `.recording` → `.finishing`).
|
||||||
|
@MainActor
|
||||||
|
final class SessionController: ObservableObject {
|
||||||
|
enum State: Equatable {
|
||||||
|
case idle
|
||||||
|
case starting
|
||||||
|
case recording
|
||||||
|
case finishing
|
||||||
|
case error(String)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set in init so `AppDelegate.applicationShouldTerminate` can finalize a
|
||||||
|
/// recording in progress before the app quits.
|
||||||
|
static weak var shared: SessionController?
|
||||||
|
|
||||||
|
@Published private(set) var state: State = .idle
|
||||||
|
@Published private(set) var elapsed: TimeInterval = 0
|
||||||
|
@Published private(set) var lastSession: SessionInfo?
|
||||||
|
/// Live input peak levels (0…1) while recording, for the UI meters.
|
||||||
|
@Published private(set) var micLevel: Float = 0
|
||||||
|
@Published private(set) var systemLevel: Float = 0
|
||||||
|
/// Surfaced after a session if system audio stopped early.
|
||||||
|
@Published private(set) var warning: String?
|
||||||
|
|
||||||
|
private let settings: AppSettings
|
||||||
|
private var recorder: AudioRecorder?
|
||||||
|
private var currentFolder: URL?
|
||||||
|
private var startTime: Date?
|
||||||
|
private var timer: Timer?
|
||||||
|
/// The in-flight start or stop Task, so `prepareForTermination` can await it.
|
||||||
|
private var lifecycleTask: Task<Void, Never>?
|
||||||
|
/// Bumped each time a start/stop Task is spawned (Task is a value type, so this
|
||||||
|
/// is how `prepareForTermination` detects a newly-spawned transition).
|
||||||
|
private var lifecycleGeneration = 0
|
||||||
|
|
||||||
|
init(settings: AppSettings) {
|
||||||
|
self.settings = settings
|
||||||
|
SessionController.shared = self
|
||||||
|
}
|
||||||
|
|
||||||
|
var isBusy: Bool {
|
||||||
|
state == .starting || state == .recording || state == .finishing
|
||||||
|
}
|
||||||
|
|
||||||
|
func toggle() {
|
||||||
|
switch state {
|
||||||
|
case .idle, .error: start()
|
||||||
|
case .recording: stop()
|
||||||
|
case .starting, .finishing: break // ignore taps mid-transition
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Start / Stop
|
||||||
|
|
||||||
|
private func start() {
|
||||||
|
let folder: URL
|
||||||
|
do {
|
||||||
|
folder = try makeSessionFolder()
|
||||||
|
} catch {
|
||||||
|
fail("Couldn't create session folder: \(error.localizedDescription)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
currentFolder = folder
|
||||||
|
let recorder = AudioRecorder(
|
||||||
|
micURL: folder.appendingPathComponent("mic.wav"),
|
||||||
|
systemURL: folder.appendingPathComponent("system.wav"),
|
||||||
|
mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"))
|
||||||
|
self.recorder = recorder
|
||||||
|
warning = nil
|
||||||
|
state = .starting
|
||||||
|
|
||||||
|
lifecycleGeneration += 1
|
||||||
|
lifecycleTask = Task {
|
||||||
|
do {
|
||||||
|
try await recorder.start() // self-tears-down if it throws
|
||||||
|
self.state = .recording
|
||||||
|
self.startTime = Date()
|
||||||
|
self.startTimer()
|
||||||
|
} catch {
|
||||||
|
self.fail("Couldn't start recording: \(error.localizedDescription)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func stop() {
|
||||||
|
guard let recorder else { return }
|
||||||
|
state = .finishing
|
||||||
|
stopTimer()
|
||||||
|
lifecycleGeneration += 1
|
||||||
|
lifecycleTask = Task {
|
||||||
|
let result = await recorder.stop()
|
||||||
|
self.finish(result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func finish(_ result: RecordingResult) {
|
||||||
|
recorder = nil
|
||||||
|
micLevel = 0
|
||||||
|
systemLevel = 0
|
||||||
|
warning = result.systemNote.map { "System audio stopped early: \($0)" }
|
||||||
|
if let folder = currentFolder {
|
||||||
|
writeSelfSpans(result, to: folder)
|
||||||
|
lastSession = SessionInfo(
|
||||||
|
folder: folder, mixedURL: result.mixedURL,
|
||||||
|
duration: result.duration, selfSpanCount: result.selfSpans.count)
|
||||||
|
}
|
||||||
|
currentFolder = nil
|
||||||
|
elapsed = 0
|
||||||
|
state = .idle
|
||||||
|
}
|
||||||
|
|
||||||
|
private func fail(_ message: String) {
|
||||||
|
recorder = nil
|
||||||
|
currentFolder = nil
|
||||||
|
stopTimer()
|
||||||
|
micLevel = 0
|
||||||
|
systemLevel = 0
|
||||||
|
elapsed = 0
|
||||||
|
state = .error(message)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Called from `applicationShouldTerminate`: flush any in-progress session so
|
||||||
|
/// its WAV headers are finalized before the process exits. Handles quit while
|
||||||
|
/// `.starting` and `.finishing`, not just `.recording`.
|
||||||
|
func prepareForTermination() async {
|
||||||
|
// Drain whatever lifecycle Task is in flight until nothing is busy. A Stop
|
||||||
|
// click landing in an await window can spawn a new stop Task, so loop
|
||||||
|
// rather than awaiting a single captured task.
|
||||||
|
while isBusy {
|
||||||
|
let gen = lifecycleGeneration
|
||||||
|
await lifecycleTask?.value
|
||||||
|
if state == .recording, let recorder {
|
||||||
|
state = .finishing
|
||||||
|
stopTimer()
|
||||||
|
finish(await recorder.stop())
|
||||||
|
} else if lifecycleGeneration == gen {
|
||||||
|
break // settled: no new transition was spawned
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Timer
|
||||||
|
|
||||||
|
private func startTimer() {
|
||||||
|
timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { [weak self] _ in
|
||||||
|
Task { @MainActor in
|
||||||
|
guard let self else { return }
|
||||||
|
if let start = self.startTime { self.elapsed = Date().timeIntervalSince(start) }
|
||||||
|
if let recorder = self.recorder {
|
||||||
|
let levels = recorder.currentLevels()
|
||||||
|
self.micLevel = levels.mic
|
||||||
|
self.systemLevel = levels.system
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func stopTimer() {
|
||||||
|
timer?.invalidate()
|
||||||
|
timer = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Files
|
||||||
|
|
||||||
|
private func makeSessionFolder() throws -> URL {
|
||||||
|
let base = settings.outputFolderURL.appendingPathComponent("sessions", isDirectory: true)
|
||||||
|
let folder = base.appendingPathComponent("\(Self.timestamp())_manual", isDirectory: true)
|
||||||
|
try FileManager.default.createDirectory(at: folder, withIntermediateDirectories: true)
|
||||||
|
return folder
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func timestamp() -> String {
|
||||||
|
let f = DateFormatter()
|
||||||
|
f.locale = Locale(identifier: "en_US_POSIX")
|
||||||
|
f.dateFormat = "yyyy-MM-dd'T'HH-mm-ss"
|
||||||
|
return f.string(from: Date())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Phase-1 preview of the mic-VAD "self" spans (the eventual
|
||||||
|
/// `visual_timeline.json` `mic_vad` segments). Lets us eyeball VAD quality.
|
||||||
|
private func writeSelfSpans(_ result: RecordingResult, to folder: URL) {
|
||||||
|
let segments = result.selfSpans.map { span -> [String: Any] in
|
||||||
|
["start": span.start, "end": span.end, "name": "self",
|
||||||
|
"confidence": span.confidence, "source": "mic_vad"]
|
||||||
|
}
|
||||||
|
let object: [String: Any] = [
|
||||||
|
"note": "Phase 1 mic-VAD self spans (preview of visual_timeline segments)",
|
||||||
|
"t0_unix": result.t0Unix,
|
||||||
|
"duration_sec": result.duration,
|
||||||
|
"self_spans": segments,
|
||||||
|
]
|
||||||
|
if let data = try? JSONSerialization.data(withJSONObject: object,
|
||||||
|
options: [.prettyPrinted, .sortedKeys]) {
|
||||||
|
try? data.write(to: folder.appendingPathComponent("self_vad.json"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
/// A small horizontal audio level meter. `level` is a peak amplitude (0…1);
|
||||||
|
/// it's mapped to a dBFS scale (−60 dB … 0 dB) so normal speech is clearly visible.
|
||||||
|
struct LevelBar: View {
|
||||||
|
let label: String
|
||||||
|
let level: Float
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
HStack(spacing: 8) {
|
||||||
|
Text(label)
|
||||||
|
.font(.caption2)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
.frame(width: 48, alignment: .leading)
|
||||||
|
GeometryReader { geo in
|
||||||
|
ZStack(alignment: .leading) {
|
||||||
|
RoundedRectangle(cornerRadius: 2).fill(Color.secondary.opacity(0.2))
|
||||||
|
RoundedRectangle(cornerRadius: 2)
|
||||||
|
.fill(color)
|
||||||
|
.frame(width: geo.size.width * fraction)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.frame(height: 6)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private var fraction: CGFloat {
|
||||||
|
guard level > 0 else { return 0 }
|
||||||
|
let db = 20 * log10(Double(level)) // −∞ … 0
|
||||||
|
return CGFloat(min(1, max(0, (db + 60) / 60)))
|
||||||
|
}
|
||||||
|
|
||||||
|
private var color: Color {
|
||||||
|
if fraction < 0.02 { return .gray }
|
||||||
|
if fraction > 0.9 { return .red }
|
||||||
|
return .green
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,12 +7,15 @@ struct MenuBarView: View {
|
|||||||
@EnvironmentObject private var settings: AppSettings
|
@EnvironmentObject private var settings: AppSettings
|
||||||
@EnvironmentObject private var permissions: PermissionsManager
|
@EnvironmentObject private var permissions: PermissionsManager
|
||||||
@EnvironmentObject private var health: SparkControlHealth
|
@EnvironmentObject private var health: SparkControlHealth
|
||||||
|
@EnvironmentObject private var session: SessionController
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
NavigationStack {
|
NavigationStack {
|
||||||
VStack(alignment: .leading, spacing: 12) {
|
VStack(alignment: .leading, spacing: 12) {
|
||||||
header
|
header
|
||||||
Divider()
|
Divider()
|
||||||
|
recordingSection
|
||||||
|
Divider()
|
||||||
permissionsSection
|
permissionsSection
|
||||||
Divider()
|
Divider()
|
||||||
backendSection
|
backendSection
|
||||||
@@ -26,6 +29,91 @@ struct MenuBarView: View {
|
|||||||
.task { await refreshHealth() }
|
.task { await refreshHealth() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MARK: Recording
|
||||||
|
|
||||||
|
private var canRecord: Bool {
|
||||||
|
permissions.microphone == .granted && permissions.screenRecording == .granted
|
||||||
|
}
|
||||||
|
|
||||||
|
private var recordingSection: some View {
|
||||||
|
VStack(alignment: .leading, spacing: 8) {
|
||||||
|
HStack {
|
||||||
|
Text("Recording").font(.subheadline).bold()
|
||||||
|
Spacer()
|
||||||
|
if session.state == .recording {
|
||||||
|
Text(timeString(session.elapsed))
|
||||||
|
.font(.system(.caption, design: .monospaced))
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Button {
|
||||||
|
session.toggle()
|
||||||
|
} label: {
|
||||||
|
Label(recordButtonTitle, systemImage: recordButtonIcon)
|
||||||
|
.frame(maxWidth: .infinity)
|
||||||
|
}
|
||||||
|
.controlSize(.large)
|
||||||
|
.tint(session.state == .recording ? .red : .accentColor)
|
||||||
|
.disabled(recordButtonDisabled)
|
||||||
|
|
||||||
|
if session.state == .recording {
|
||||||
|
LevelBar(label: "Mic", level: session.micLevel)
|
||||||
|
LevelBar(label: "System", level: session.systemLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !canRecord && !session.isBusy {
|
||||||
|
Text("Grant Microphone + Screen Recording above to record.")
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
}
|
||||||
|
|
||||||
|
if case .error(let message) = session.state {
|
||||||
|
Text(message).font(.caption).foregroundStyle(.red)
|
||||||
|
}
|
||||||
|
|
||||||
|
if let warning = session.warning {
|
||||||
|
Text(warning).font(.caption).foregroundStyle(.orange)
|
||||||
|
}
|
||||||
|
|
||||||
|
if let last = session.lastSession {
|
||||||
|
Button {
|
||||||
|
NSWorkspace.shared.activateFileViewerSelecting([last.mixedURL])
|
||||||
|
} label: {
|
||||||
|
Text("Last: \(Int(last.duration.rounded()))s · \(last.selfSpanCount) self-spans — reveal in Finder")
|
||||||
|
.font(.caption)
|
||||||
|
}
|
||||||
|
.buttonStyle(.link)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private var recordButtonTitle: String {
|
||||||
|
switch session.state {
|
||||||
|
case .starting: return "Starting…"
|
||||||
|
case .recording: return "Stop Recording"
|
||||||
|
case .finishing: return "Finishing…"
|
||||||
|
case .idle, .error: return "Start Recording"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private var recordButtonIcon: String {
|
||||||
|
session.state == .recording ? "stop.circle.fill" : "record.circle"
|
||||||
|
}
|
||||||
|
|
||||||
|
private var recordButtonDisabled: Bool {
|
||||||
|
switch session.state {
|
||||||
|
case .starting, .finishing: return true
|
||||||
|
case .recording: return false
|
||||||
|
case .idle, .error: return !canRecord
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func timeString(_ t: TimeInterval) -> String {
|
||||||
|
let total = Int(t)
|
||||||
|
return String(format: "%02d:%02d", total / 60, total % 60)
|
||||||
|
}
|
||||||
|
|
||||||
private var header: some View {
|
private var header: some View {
|
||||||
VStack(alignment: .leading, spacing: 2) {
|
VStack(alignment: .leading, spacing: 2) {
|
||||||
Text("Ten31 Transcripts").font(.headline)
|
Text("Ten31 Transcripts").font(.headline)
|
||||||
|
|||||||
+5
-3
@@ -13,9 +13,11 @@ settings:
|
|||||||
CURRENT_PROJECT_VERSION: "1"
|
CURRENT_PROJECT_VERSION: "1"
|
||||||
SWIFT_VERSION: "5.0"
|
SWIFT_VERSION: "5.0"
|
||||||
CODE_SIGN_STYLE: Automatic
|
CODE_SIGN_STYLE: Automatic
|
||||||
# Leave the team empty; pick your free personal team in Xcode's
|
# Grant's free personal team (cert OU). Baked in so `xcodegen generate` keeps
|
||||||
# Signing & Capabilities tab on first open (see README).
|
# a STABLE signing identity across regenerations — macOS ties TCC permission
|
||||||
DEVELOPMENT_TEAM: ""
|
# grants (Mic / Screen Recording / Accessibility) to this identity, so a
|
||||||
|
# stable team is what makes those permissions persist across rebuilds.
|
||||||
|
DEVELOPMENT_TEAM: "BK4Y6CXN35"
|
||||||
|
|
||||||
targets:
|
targets:
|
||||||
Ten31Transcripts:
|
Ten31Transcripts:
|
||||||
|
|||||||
Reference in New Issue
Block a user