diff --git a/README.md b/README.md
index 6273494..172efc7 100644
--- a/README.md
+++ b/README.md
@@ -23,13 +23,17 @@ This repo is at **Phase 0** (scaffold, permissions, backend health check).
    ```sh
    open Ten31Transcripts.xcodeproj
    ```
-5. In Xcode, select the **Ten31Transcripts** target → **Signing & Capabilities**:
-   - Check **Automatically manage signing**.
-   - For **Team**, pick your personal team (sign in with your Apple ID — free; no
-     paid developer account needed). A stable team keeps macOS from re-asking for
-     permissions on every rebuild.
+5. Signing is preconfigured: `project.yml` sets `DEVELOPMENT_TEAM` to the free
+   personal team `BK4Y6CXN35` with automatic signing, so **Signing & Capabilities
+   should already show the team** — no manual selection needed. (If you ever sign
+   with a different Apple ID, update `DEVELOPMENT_TEAM` in `project.yml`, not in
+   Xcode — `xcodegen generate` overwrites Xcode-side changes.)
 6. Press **Run** (⌘R).
 
+> **Note:** after adding files in a new phase, re-run `xcodegen generate` and let
+> Xcode reload the project. The signing team persists because it lives in
+> `project.yml`, so macOS permissions stay granted across rebuilds.
+
 ## What Phase 0 does
 
 - Launches as a menu-bar-only app (no Dock icon).
diff --git a/Ten31Transcripts/App/AppDelegate.swift b/Ten31Transcripts/App/AppDelegate.swift
index 96b4c6b..cd294c7 100644
--- a/Ten31Transcripts/App/AppDelegate.swift
+++ b/Ten31Transcripts/App/AppDelegate.swift
@@ -6,5 +6,30 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
         // LSUIElement in Info.plist already enforces this; set it explicitly too
         // so behavior is unambiguous regardless of how the app is launched.
         NSApp.setActivationPolicy(.accessory)
+        terminateOtherInstances()
+    }
+
+    /// Single-instance: a fresh launch (e.g. each Xcode ⌘R) terminates any older
+    /// copies so you never end up with two menu-bar icons.
+    private func terminateOtherInstances() {
+        guard let bundleID = Bundle.main.bundleIdentifier else { return }
+        let me = NSRunningApplication.current.processIdentifier
+        for app in NSRunningApplication.runningApplications(withBundleIdentifier: bundleID)
+        where app.processIdentifier != me {
+            app.terminate()
+        }
+    }
+
+    /// If a recording is in progress when the user quits, finalize it (flush WAV
+    /// headers) before the process exits, so the session isn't corrupted.
+    func applicationShouldTerminate(_ sender: NSApplication) -> NSApplication.TerminateReply {
+        guard let controller = SessionController.shared, controller.isBusy else {
+            return .terminateNow
+        }
+        Task { @MainActor in
+            await controller.prepareForTermination()
+            NSApp.reply(toApplicationShouldTerminate: true)
+        }
+        return .terminateLater
     }
 }
diff --git a/Ten31Transcripts/App/Ten31TranscriptsApp.swift b/Ten31Transcripts/App/Ten31TranscriptsApp.swift
index 94e3e58..1db4a83 100644
--- a/Ten31Transcripts/App/Ten31TranscriptsApp.swift
+++ b/Ten31Transcripts/App/Ten31TranscriptsApp.swift
@@ -10,9 +10,16 @@ import SwiftUI
 struct Ten31TranscriptsApp: App {
     @NSApplicationDelegateAdaptor(AppDelegate.self) private var appDelegate
 
-    @StateObject private var settings = AppSettings()
+    @StateObject private var settings: AppSettings
     @StateObject private var permissions = PermissionsManager()
     @StateObject private var health = SparkControlHealth()
+    @StateObject private var session: SessionController
+
+    init() {
+        let settings = AppSettings()
+        _settings = StateObject(wrappedValue: settings)
+        _session = StateObject(wrappedValue: SessionController(settings: settings))
+    }
 
     var body: some Scene {
         MenuBarExtra {
@@ -20,8 +27,9 @@ struct Ten31TranscriptsApp: App {
                 .environmentObject(settings)
                 .environmentObject(permissions)
                 .environmentObject(health)
+                .environmentObject(session)
         } label: {
-            Image(systemName: "waveform.circle")
+            Image(systemName: session.state == .recording ? "waveform.circle.fill" : "waveform.circle")
         }
         .menuBarExtraStyle(.window)
     }
diff --git a/Ten31Transcripts/Audio/AudioMixer.swift b/Ten31Transcripts/Audio/AudioMixer.swift
new file mode 100644
index 0000000..c97fb2c
--- /dev/null
+++ b/Ten31Transcripts/Audio/AudioMixer.swift
@@ -0,0 +1,67 @@
+import AVFoundation
+
+/// Sums the two aligned 16 kHz mono tracks (mic + system) into the single
+/// **mixed-mono 16 kHz WAV** that the backend receives. Both inputs are already
+/// front-padded to the shared t0, so frame N of each file is the same instant.
+/// Streamed in 1-second chunks to keep memory flat for long calls.
+enum AudioMixer {
+    static func mix(mic micURL: URL, system systemURL: URL, into outURL: URL) throws {
+        let mic = try? AVAudioFile(forReading: micURL)
+        let sys = try? AVAudioFile(forReading: systemURL)
+
+        let settings: [String: Any] = [
+            AVFormatIDKey: kAudioFormatLinearPCM,
+            AVSampleRateKey: 16_000,
+            AVNumberOfChannelsKey: 1,
+            AVLinearPCMBitDepthKey: 16,
+            AVLinearPCMIsFloatKey: false,
+            AVLinearPCMIsBigEndianKey: false,
+        ]
+        let out = try AVAudioFile(
+            forWriting: outURL,
+            settings: settings,
+            commonFormat: .pcmFormatFloat32,
+            interleaved: false)
+
+        let outFormat = Resampler.targetFormat
+        let chunk: AVAudioFramePosition = 16_000
+        let total = max(mic?.length ?? 0, sys?.length ?? 0)
+        var pos: AVAudioFramePosition = 0
+
+        while pos < total {
+            let frames = AVAudioFrameCount(min(chunk, total - pos))
+            guard let mixBuf = AVAudioPCMBuffer(pcmFormat: outFormat, frameCapacity: frames),
+                  let dst = mixBuf.floatChannelData?[0] else { break }
+            mixBuf.frameLength = frames
+            memset(dst, 0, Int(frames) * MemoryLayout<Float>.size)
+
+            add(file: mic, at: pos, maxFrames: frames, into: dst)
+            add(file: sys, at: pos, maxFrames: frames, into: dst)
+
+            var i = 0
+            while i < Int(frames) {
+                if dst[i] > 1 { dst[i] = 1 } else if dst[i] < -1 { dst[i] = -1 }
+                i += 1
+            }
+            try out.write(from: mixBuf)
+            pos += AVAudioFramePosition(frames)
+        }
+    }
+
+    private static func add(file: AVAudioFile?, at pos: AVAudioFramePosition,
+                            maxFrames: AVAudioFrameCount, into dst: UnsafeMutablePointer<Float>) {
+        guard let file, pos < file.length else { return }
+        file.framePosition = pos
+        let toRead = AVAudioFrameCount(min(AVAudioFramePosition(maxFrames), file.length - pos))
+        guard toRead > 0,
+              let buf = AVAudioPCMBuffer(pcmFormat: file.processingFormat, frameCapacity: toRead)
+        else { return }
+        do {
+            try file.read(into: buf, frameCount: toRead)
+            guard let src = buf.floatChannelData?[0] else { return }
+            var i = 0
+            let count = Int(buf.frameLength)
+            while i < count { dst[i] += src[i]; i += 1 }
+        } catch {}
+    }
+}
diff --git a/Ten31Transcripts/Audio/AudioRecorder.swift b/Ten31Transcripts/Audio/AudioRecorder.swift
new file mode 100644
index 0000000..9af33a3
--- /dev/null
+++ b/Ten31Transcripts/Audio/AudioRecorder.swift
@@ -0,0 +1,333 @@
+import AVFoundation
+import ScreenCaptureKit
+import CoreMedia
+import QuartzCore
+
+struct RecordingResult {
+    let micURL: URL
+    let systemURL: URL
+    let mixedURL: URL
+    let duration: Double
+    let selfSpans: [VADSpan]
+    let t0Unix: Double
+    /// Non-nil if system-audio capture stopped early (e.g. SCStream error).
+    let systemNote: String?
+}
+
+/// Dual-track local audio capture for Phase 1.
+///
+/// - System audio via `SCStream` (`capturesAudio`); its audio handler runs on
+///   `ioQueue`. A discard-only video output runs on `screenQueue` purely to keep
+///   SCStream's frame pipeline drained (an unconsumed video queue can stall the
+///   whole stream) — frames are dropped instantly, never stored.
+/// - Mic via `AVAudioEngine` input tap: the tap deep-copies the raw buffer and
+///   hands it to `ioQueue`, where it is resampled and written.
+/// - **`ioQueue` is the single isolation domain** for the writers, VAD, both
+///   resamplers, and lifecycle flags.
+/// - One shared monotonic `t0` (`CACurrentMediaTime`). Each buffer is placed at
+///   its true `(startHost − t0)` frame (gaps padded, overlaps trimmed), so mic
+///   and system stay aligned and the mix is a straight sum.
+/// - Live peak levels are exposed via `currentLevels()` for the UI meter.
+/// - `stop()` tears the mic down first and bounds `stopCapture()` with a timeout,
+///   so a wedged stream can never block finalization. No video is written.
+final class AudioRecorder: NSObject, SCStreamDelegate, SCStreamOutput {
+    private let micURL: URL
+    private let systemURL: URL
+    private let mixedURL: URL
+
+    private let ioQueue = DispatchQueue(label: "xyz.ten31.audio.io")
+    private let screenQueue = DispatchQueue(label: "xyz.ten31.audio.screen")
+
+    // ioQueue-only state:
+    private var t0Host: Double = 0
+    private var t0Unix: Double = 0
+    private var micWriter: MonoTrackWriter?
+    private var systemWriter: MonoTrackWriter?
+    private var vad: MicVAD?
+    private var tornDown = true
+    private let micResampler = Resampler()
+    private let systemResampler = Resampler()
+
+    // Cross-thread, guarded by levelLock:
+    private let levelLock = NSLock()
+    private var micPeak: Float = 0
+    private var sysPeak: Float = 0
+    private var streamStopped = false
+    private var systemErrorMessage: String?
+
+    private var engine: AVAudioEngine?
+    private var stream: SCStream?
+
+    init(micURL: URL, systemURL: URL, mixedURL: URL) {
+        self.micURL = micURL
+        self.systemURL = systemURL
+        self.mixedURL = mixedURL
+    }
+
+    // MARK: - Lifecycle
+
+    func start() async throws {
+        let t0 = CACurrentMediaTime()
+        let t0u = Date().timeIntervalSince1970
+        try ioQueue.sync {
+            let mic = try MonoTrackWriter(url: self.micURL)
+            let sys = try MonoTrackWriter(url: self.systemURL)
+            self.t0Host = t0
+            self.t0Unix = t0u
+            self.micWriter = mic
+            self.systemWriter = sys
+            self.vad = MicVAD()
+            self.tornDown = false
+        }
+        do {
+            try startMic()
+            try await startSystem()   // throws if Screen Recording is denied
+        } catch {
+            await abortStart()
+            throw error
+        }
+    }
+
+    func stop() async -> RecordingResult {
+        // Stop the mic FIRST — always succeeds and halts mic capture immediately.
+        engine?.inputNode.removeTap(onBus: 0)
+        engine?.stop()
+        engine = nil
+
+        // Stop system capture WITHOUT hanging: an already-errored stream can make
+        // stopCapture() block forever, so skip it if it already stopped and bound
+        // it with a timeout otherwise.
+        if let stream, !flag({ self.streamStopped }) {
+            await Self.stopCaptureWithTimeout(stream, seconds: 3)
+        }
+        stream = nil
+
+        var micFrames: Int64 = 0
+        var sysFrames: Int64 = 0
+        var spans: [VADSpan] = []
+        var t0u: Double = 0
+
+        ioQueue.sync {
+            if let tail = micResampler.drain() {
+                if (micWriter?.write(tail) ?? 0) > 0 { vad?.feed(tail) }
+            }
+            if let tail = systemResampler.drain() { systemWriter?.write(tail) }
+            vad?.finish()
+            micFrames = micWriter?.framesWritten ?? 0
+            sysFrames = systemWriter?.framesWritten ?? 0
+            spans = vad?.spans ?? []
+            t0u = t0Unix
+            tornDown = true
+            micWriter = nil
+            systemWriter = nil
+            vad = nil
+        }
+
+        try? AudioMixer.mix(mic: micURL, system: systemURL, into: mixedURL)
+
+        let duration = Double(max(micFrames, sysFrames)) / 16_000.0
+        let note = flag { self.systemErrorMessage } as String?
+        return RecordingResult(
+            micURL: micURL, systemURL: systemURL, mixedURL: mixedURL,
+            duration: duration, selfSpans: spans, t0Unix: t0u, systemNote: note)
+    }
+
+    private func abortStart() async {
+        engine?.inputNode.removeTap(onBus: 0)
+        engine?.stop()
+        engine = nil
+        if let stream { await Self.stopCaptureWithTimeout(stream, seconds: 3) }
+        stream = nil
+        ioQueue.sync {
+            tornDown = true
+            micWriter = nil
+            systemWriter = nil
+            vad = nil
+        }
+    }
+
+    /// Latest peak levels (0…1) for each source; decays so a stalled source fades.
+    func currentLevels() -> (mic: Float, system: Float) {
+        levelLock.lock(); defer { levelLock.unlock() }
+        let m = micPeak, s = sysPeak
+        micPeak *= 0.55; sysPeak *= 0.55
+        return (m, s)
+    }
+
+    // MARK: - Ingest (ioQueue only)
+
+    private func ingestMic(_ buffer: AVAudioPCMBuffer, startHost: Double) {
+        guard !tornDown, let writer = micWriter, let vad else { return }
+        let expected = max(0, Int64(((startHost - t0Host) * 16_000).rounded()))
+        if expected > writer.framesWritten {
+            let padded = writer.padSilence(expected - writer.framesWritten)
+            if padded > 0 { vad.feedSilence(padded) }
+        }
+        let startIdx = max(0, Int(writer.framesWritten - expected))
+        if startIdx >= Int(buffer.frameLength) { return }
+        guard let chunk = Self.trimFront(buffer, by: startIdx) else { return }
+        updateLevel(chunk, isMic: true)
+        if writer.write(chunk) > 0 { vad.feed(chunk) }
+    }
+
+    private func ingestSystem(_ buffer: AVAudioPCMBuffer, startHost: Double) {
+        guard !tornDown, let writer = systemWriter else { return }
+        let expected = max(0, Int64(((startHost - t0Host) * 16_000).rounded()))
+        if expected > writer.framesWritten {
+            writer.padSilence(expected - writer.framesWritten)
+        }
+        let startIdx = max(0, Int(writer.framesWritten - expected))
+        if startIdx >= Int(buffer.frameLength) { return }
+        guard let chunk = Self.trimFront(buffer, by: startIdx) else { return }
+        updateLevel(chunk, isMic: false)
+        writer.write(chunk)
+    }
+
+    // MARK: - Mic (AVAudioEngine)
+
+    private func startMic() throws {
+        let engine = AVAudioEngine()
+        let input = engine.inputNode
+        let format = input.inputFormat(forBus: 0)
+
+        input.installTap(onBus: 0, bufferSize: 4096, format: format) { [weak self] buffer, when in
+            guard let self else { return }
+            let entry = CACurrentMediaTime()
+            let stamped = when.isHostTimeValid ? AudioRecorder.hostSeconds(when.hostTime) : entry
+            let startHost = abs(stamped - entry) < 5 ? stamped : entry
+            guard let raw = AudioRecorder.copy(buffer) else { return }
+            self.ioQueue.async {
+                guard !self.tornDown, let resampled = self.micResampler.resample(raw) else { return }
+                self.ingestMic(resampled, startHost: startHost)
+            }
+        }
+        engine.prepare()
+        try engine.start()
+        self.engine = engine
+    }
+
+    // MARK: - System (ScreenCaptureKit)
+
+    private func startSystem() async throws {
+        let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)
+        guard let display = content.displays.first else {
+            throw NSError(domain: "Ten31", code: 1,
+                          userInfo: [NSLocalizedDescriptionKey: "No display available for system-audio capture."])
+        }
+        let filter = SCContentFilter(display: display, excludingWindows: [])
+        let config = SCStreamConfiguration()
+        config.capturesAudio = true
+        config.excludesCurrentProcessAudio = true
+        config.sampleRate = 48_000
+        config.channelCount = 2
+        config.width = 2
+        config.height = 2
+        config.minimumFrameInterval = CMTime(value: 1, timescale: 2)   // ~2 fps tiny video
+        config.queueDepth = 6
+
+        let stream = SCStream(filter: filter, configuration: config, delegate: self)
+        try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: ioQueue)
+        // Discard-only video consumer keeps SCStream's frame queue drained so the
+        // stream stays alive; frames are dropped immediately and never stored.
+        try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: screenQueue)
+        try await stream.startCapture()
+        self.stream = stream
+    }
+
+    func stream(_ stream: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
+                of type: SCStreamOutputType) {
+        guard type == .audio else { return }   // .screen frames discarded here
+        guard CMSampleBufferDataIsReady(sampleBuffer),
+              let pcm = Self.pcmBuffer(from: sampleBuffer),
+              let resampled = systemResampler.resample(pcm) else { return }
+        let entry = CACurrentMediaTime()
+        let pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
+        let stamped = pts.isValid ? pts.seconds : entry
+        let startHost = abs(stamped - entry) < 5 ? stamped : entry
+        ingestSystem(resampled, startHost: startHost)
+    }
+
+    func stream(_ stream: SCStream, didStopWithError error: Error) {
+        levelLock.lock()
+        streamStopped = true
+        systemErrorMessage = error.localizedDescription
+        levelLock.unlock()
+    }
+
+    // MARK: - Helpers
+
+    private func updateLevel(_ buffer: AVAudioPCMBuffer, isMic: Bool) {
+        guard let ch = buffer.floatChannelData?[0] else { return }
+        var peak: Float = 0
+        let n = Int(buffer.frameLength)
+        var i = 0
+        while i < n { let a = abs(ch[i]); if a > peak { peak = a }; i += 1 }
+        levelLock.lock()
+        if isMic { if peak > micPeak { micPeak = peak } }
+        else { if peak > sysPeak { sysPeak = peak } }
+        levelLock.unlock()
+    }
+
+    /// Read a levelLock-guarded value.
+    private func flag<T>(_ body: () -> T) -> T {
+        levelLock.lock(); defer { levelLock.unlock() }
+        return body()
+    }
+
+    private static func stopCaptureWithTimeout(_ stream: SCStream, seconds: Double) async {
+        await withTaskGroup(of: Void.self) { group in
+            group.addTask { try? await stream.stopCapture() }
+            group.addTask { try? await Task.sleep(nanoseconds: UInt64(seconds * 1_000_000_000)) }
+            _ = await group.next()   // proceed as soon as either finishes
+            group.cancelAll()
+        }
+    }
+
+    /// Deep-copy a PCM buffer (the engine reuses the tap buffer). Layout-agnostic.
+    private static func copy(_ buffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
+        guard buffer.frameLength > 0,
+              let out = AVAudioPCMBuffer(pcmFormat: buffer.format, frameCapacity: buffer.frameLength)
+        else { return nil }
+        out.frameLength = buffer.frameLength
+        let src = UnsafeMutableAudioBufferListPointer(UnsafeMutablePointer(mutating: buffer.audioBufferList))
+        let dst = UnsafeMutableAudioBufferListPointer(out.mutableAudioBufferList)
+        guard src.count == dst.count else { return nil }
+        for i in 0..<src.count {
+            guard let s = src[i].mData, let d = dst[i].mData else { return nil }
+            memcpy(d, s, min(Int(src[i].mDataByteSize), Int(dst[i].mDataByteSize)))
+        }
+        return out
+    }
+
+    private static func trimFront(_ buffer: AVAudioPCMBuffer, by frames: Int) -> AVAudioPCMBuffer? {
+        if frames <= 0 { return buffer }
+        let total = Int(buffer.frameLength)
+        guard frames < total, let src = buffer.floatChannelData?[0] else { return nil }
+        let n = AVAudioFrameCount(total - frames)
+        guard let out = AVAudioPCMBuffer(pcmFormat: buffer.format, frameCapacity: n),
+              let dst = out.floatChannelData?[0] else { return nil }
+        out.frameLength = n
+        memcpy(dst, src + frames, Int(n) * MemoryLayout<Float>.size)
+        return out
+    }
+
+    private static func hostSeconds(_ hostTime: UInt64) -> Double {
+        var info = mach_timebase_info_data_t()
+        mach_timebase_info(&info)
+        return Double(hostTime) * Double(info.numer) / Double(info.denom) / 1_000_000_000.0
+    }
+
+    private static func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? {
+        guard let fmtDesc = CMSampleBufferGetFormatDescription(sampleBuffer),
+              let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(fmtDesc) else { return nil }
+        var asbd = asbdPtr.pointee
+        guard let format = AVAudioFormat(streamDescription: &asbd) else { return nil }
+        let frames = AVAudioFrameCount(CMSampleBufferGetNumSamples(sampleBuffer))
+        guard frames > 0,
+              let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frames) else { return nil }
+        buffer.frameLength = frames
+        let status = CMSampleBufferCopyPCMDataIntoAudioBufferList(
+            sampleBuffer, at: 0, frameCount: Int32(frames), into: buffer.mutableAudioBufferList)
+        return status == noErr ? buffer : nil
+    }
+}
diff --git a/Ten31Transcripts/Audio/MicVAD.swift b/Ten31Transcripts/Audio/MicVAD.swift
new file mode 100644
index 0000000..922221a
--- /dev/null
+++ b/Ten31Transcripts/Audio/MicVAD.swift
@@ -0,0 +1,98 @@
+import AVFoundation
+
+/// A speaking span on the session `t0` timeline (seconds).
+struct VADSpan: Equatable {
+    let start: Double
+    let end: Double
+    let confidence: Double
+}
+
+/// Lightweight energy-based voice-activity detector for the **mic** track (the
+/// user). It is fed the *exact same* 16 kHz mono stream the mic WAV receives —
+/// real samples via `feed` and timeline-gap silence via `feedSilence` — so its
+/// internal sample cursor always equals the mic file position, and span times
+/// land on the same instants as `mixed_mono_16k.wav`.
+///
+/// Phase 3's `TimelineBuilder` will fold these in as high-confidence pre-seeded
+/// "self" segments. Thresholds are intentionally simple and will be tuned later.
+///
+/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
+final class MicVAD {
+    private let frameSize = 320            // 20 ms @ 16 kHz
+    private let openFrames = 2             // ~40 ms above threshold to open
+    private let closeFrames = 10           // ~200 ms hangover to close
+    private let absoluteFloor: Float = 0.006
+    private let floorMultiplier: Float = 2.5
+
+    private var cursorSamples = 0           // total samples fed (== mic file position)
+    private var noiseFloor: Float = 0.01
+    private var voicedRun = 0
+    private var silentRun = 0
+    private var inSpeech = false
+    private var spanStartSample = 0
+    private var acc: [Float] = []
+    private(set) var spans: [VADSpan] = []
+
+    func feed(_ buffer: AVAudioPCMBuffer) {
+        guard let ch = buffer.floatChannelData, buffer.frameLength > 0 else { return }
+        acc.append(contentsOf: UnsafeBufferPointer(start: ch[0], count: Int(buffer.frameLength)))
+        drainFrames()
+    }
+
+    func feedSilence(_ count: Int64) {
+        guard count > 0 else { return }
+        acc.append(contentsOf: repeatElement(0, count: Int(count)))
+        drainFrames()
+    }
+
+    /// Close any span still open at end of capture.
+    func finish() {
+        if inSpeech {
+            appendSpan(startSample: spanStartSample, endSample: cursorSamples)
+            inSpeech = false
+        }
+    }
+
+    private func drainFrames() {
+        var i = 0
+        while i + frameSize <= acc.count {
+            var sum: Float = 0
+            var j = i
+            while j < i + frameSize { sum += acc[j] * acc[j]; j += 1 }
+            step(rms: (sum / Float(frameSize)).squareRoot())
+            cursorSamples += frameSize
+            i += frameSize
+        }
+        if i > 0 { acc.removeFirst(i) }
+    }
+
+    /// `cursorSamples` is the start sample of the frame being evaluated.
+    private func step(rms: Float) {
+        if rms < noiseFloor { noiseFloor = 0.9 * noiseFloor + 0.1 * rms }
+        else { noiseFloor = 0.995 * noiseFloor + 0.005 * rms }
+
+        let threshold = max(absoluteFloor, noiseFloor * floorMultiplier)
+        let voiced = rms > threshold
+
+        if voiced {
+            voicedRun += 1; silentRun = 0
+            if !inSpeech && voicedRun >= openFrames {
+                inSpeech = true
+                spanStartSample = cursorSamples - (voicedRun - 1) * frameSize
+            }
+        } else {
+            silentRun += 1; voicedRun = 0
+            if inSpeech && silentRun >= closeFrames {
+                inSpeech = false
+                appendSpan(startSample: spanStartSample,
+                           endSample: cursorSamples - (closeFrames - 1) * frameSize)
+            }
+        }
+    }
+
+    private func appendSpan(startSample: Int, endSample: Int) {
+        let start = Double(max(0, startSample)) / 16_000.0
+        let end = Double(endSample) / 16_000.0
+        if end > start { spans.append(VADSpan(start: start, end: end, confidence: 0.9)) }
+    }
+}
diff --git a/Ten31Transcripts/Audio/MonoTrackWriter.swift b/Ten31Transcripts/Audio/MonoTrackWriter.swift
new file mode 100644
index 0000000..261b942
--- /dev/null
+++ b/Ten31Transcripts/Audio/MonoTrackWriter.swift
@@ -0,0 +1,67 @@
+import AVFoundation
+
+/// Sequential **16 kHz mono PCM-16 WAV** writer. Deliberately "dumb": it only
+/// appends buffers and silence and tracks `framesWritten`. Time alignment to the
+/// shared `t0` is done by the caller (`AudioRecorder`), which pads/trims using
+/// each buffer's true host time so the mic and system tracks stay anchored to
+/// the same timeline even if buffers are dropped or the hardware clocks drift.
+///
+/// Single-threaded: all calls happen on `AudioRecorder.ioQueue`.
+final class MonoTrackWriter {
+    private let file: AVAudioFile
+    private(set) var framesWritten: Int64 = 0
+
+    init(url: URL) throws {
+        let settings: [String: Any] = [
+            AVFormatIDKey: kAudioFormatLinearPCM,
+            AVSampleRateKey: 16_000,
+            AVNumberOfChannelsKey: 1,
+            AVLinearPCMBitDepthKey: 16,
+            AVLinearPCMIsFloatKey: false,
+            AVLinearPCMIsBigEndianKey: false,
+        ]
+        // On disk = Int16 PCM; processing/buffer format = Float32 (matches Resampler).
+        self.file = try AVAudioFile(
+            forWriting: url,
+            settings: settings,
+            commonFormat: .pcmFormatFloat32,
+            interleaved: false)
+    }
+
+    /// Writes the buffer; returns the number of frames actually committed (0 on
+    /// failure). Callers feed the VAD this committed count to stay in lockstep.
+    @discardableResult
+    func write(_ buffer: AVAudioPCMBuffer) -> Int64 {
+        guard buffer.frameLength > 0 else { return 0 }
+        do {
+            try file.write(from: buffer)
+            let n = Int64(buffer.frameLength)
+            framesWritten += n
+            return n
+        } catch {
+            return 0   // best-effort: drop a buffer rather than tear down
+        }
+    }
+
+    /// Append `count` frames of silence (to fill timeline gaps); returns frames
+    /// actually committed.
+    @discardableResult
+    func padSilence(_ count: Int64) -> Int64 {
+        guard count > 0 else { return 0 }
+        var remaining = count
+        var committed: Int64 = 0
+        let chunk: Int64 = 16_000
+        while remaining > 0 {
+            let n = AVAudioFrameCount(min(chunk, remaining))
+            guard let buffer = AVAudioPCMBuffer(pcmFormat: Resampler.targetFormat, frameCapacity: n) else { break }
+            buffer.frameLength = n
+            if let ch = buffer.floatChannelData {
+                memset(ch[0], 0, Int(n) * MemoryLayout<Float>.size)
+            }
+            if write(buffer) == 0 { break }
+            committed += Int64(n)
+            remaining -= Int64(n)
+        }
+        return committed
+    }
+}
diff --git a/Ten31Transcripts/Audio/Resampler.swift b/Ten31Transcripts/Audio/Resampler.swift
new file mode 100644
index 0000000..87b53da
--- /dev/null
+++ b/Ten31Transcripts/Audio/Resampler.swift
@@ -0,0 +1,65 @@
+import AVFoundation
+
+/// Converts arbitrary input PCM buffers to **16 kHz mono Float32**, maintaining
+/// resampler state across calls. Reuse one instance per source stream so the
+/// internal sample-rate converter stays continuous across buffers.
+///
+/// Not thread-safe: use one instance from a single thread. Both the mic and
+/// system instances are driven exclusively from `AudioRecorder.ioQueue` (one per
+/// source stream), kept continuous across buffers.
+final class Resampler {
+    /// The canonical Phase-1 audio format: 16 kHz, mono, Float32, deinterleaved.
+    static let targetFormat = AVAudioFormat(
+        commonFormat: .pcmFormatFloat32,
+        sampleRate: 16_000,
+        channels: 1,
+        interleaved: false)!
+
+    private var converter: AVAudioConverter?
+    private var sourceFormat: AVAudioFormat?
+    private var ended = false
+
+    /// 16 kHz mono buffer for `input`, or nil if conversion produced nothing.
+    func resample(_ input: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
+        guard !ended, input.frameLength > 0 else { return nil }
+
+        if converter == nil || sourceFormat != input.format {
+            converter = AVAudioConverter(from: input.format, to: Self.targetFormat)
+            sourceFormat = input.format
+        }
+        guard let converter else { return nil }
+
+        let ratio = Self.targetFormat.sampleRate / input.format.sampleRate
+        let capacity = AVAudioFrameCount((Double(input.frameLength) * ratio).rounded(.up)) + 64
+        guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: capacity) else {
+            return nil
+        }
+
+        var consumed = false
+        var error: NSError?
+        let status = converter.convert(to: output, error: &error) { _, inputStatus in
+            if consumed { inputStatus.pointee = .noDataNow; return nil }
+            consumed = true
+            inputStatus.pointee = .haveData
+            return input
+        }
+        if status == .error || output.frameLength == 0 { return nil }
+        return output
+    }
+
+    /// Flush the converter's internal tail at end of stream (call once on stop).
+    func drain() -> AVAudioPCMBuffer? {
+        guard !ended, let converter else { ended = true; return nil }
+        ended = true
+        guard let output = AVAudioPCMBuffer(pcmFormat: Self.targetFormat, frameCapacity: 8192) else {
+            return nil
+        }
+        var error: NSError?
+        let status = converter.convert(to: output, error: &error) { _, inputStatus in
+            inputStatus.pointee = .endOfStream
+            return nil
+        }
+        if status == .error || output.frameLength == 0 { return nil }
+        return output
+    }
+}
diff --git a/Ten31Transcripts/Session/SessionController.swift b/Ten31Transcripts/Session/SessionController.swift
new file mode 100644
index 0000000..2637aa1
--- /dev/null
+++ b/Ten31Transcripts/Session/SessionController.swift
@@ -0,0 +1,213 @@
+import Foundation
+import Combine
+import AppKit
+
+struct SessionInfo: Equatable {
+    let folder: URL
+    let mixedURL: URL
+    let duration: Double
+    let selfSpanCount: Int
+}
+
+/// Owns a single recording session: creates the session folder, drives
+/// `AudioRecorder` start/stop, tracks elapsed time, and writes the Phase-1
+/// preview of mic-VAD self spans. Detection/visual/backend wiring come later.
+///
+/// The lifecycle is serialized through an explicit state machine so start and
+/// stop can never interleave (`.starting` → `.recording` → `.finishing`).
+@MainActor
+final class SessionController: ObservableObject {
+    enum State: Equatable {
+        case idle
+        case starting
+        case recording
+        case finishing
+        case error(String)
+    }
+
+    /// Set in init so `AppDelegate.applicationShouldTerminate` can finalize a
+    /// recording in progress before the app quits.
+    static weak var shared: SessionController?
+
+    @Published private(set) var state: State = .idle
+    @Published private(set) var elapsed: TimeInterval = 0
+    @Published private(set) var lastSession: SessionInfo?
+    /// Live input peak levels (0…1) while recording, for the UI meters.
+    @Published private(set) var micLevel: Float = 0
+    @Published private(set) var systemLevel: Float = 0
+    /// Surfaced after a session if system audio stopped early.
+    @Published private(set) var warning: String?
+
+    private let settings: AppSettings
+    private var recorder: AudioRecorder?
+    private var currentFolder: URL?
+    private var startTime: Date?
+    private var timer: Timer?
+    /// The in-flight start or stop Task, so `prepareForTermination` can await it.
+    private var lifecycleTask: Task<Void, Never>?
+    /// Bumped each time a start/stop Task is spawned (Task is a value type, so this
+    /// is how `prepareForTermination` detects a newly-spawned transition).
+    private var lifecycleGeneration = 0
+
+    init(settings: AppSettings) {
+        self.settings = settings
+        SessionController.shared = self
+    }
+
+    var isBusy: Bool {
+        state == .starting || state == .recording || state == .finishing
+    }
+
+    func toggle() {
+        switch state {
+        case .idle, .error: start()
+        case .recording: stop()
+        case .starting, .finishing: break   // ignore taps mid-transition
+        }
+    }
+
+    // MARK: - Start / Stop
+
+    private func start() {
+        let folder: URL
+        do {
+            folder = try makeSessionFolder()
+        } catch {
+            fail("Couldn't create session folder: \(error.localizedDescription)")
+            return
+        }
+        currentFolder = folder
+        let recorder = AudioRecorder(
+            micURL: folder.appendingPathComponent("mic.wav"),
+            systemURL: folder.appendingPathComponent("system.wav"),
+            mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"))
+        self.recorder = recorder
+        warning = nil
+        state = .starting
+
+        lifecycleGeneration += 1
+        lifecycleTask = Task {
+            do {
+                try await recorder.start()        // self-tears-down if it throws
+                self.state = .recording
+                self.startTime = Date()
+                self.startTimer()
+            } catch {
+                self.fail("Couldn't start recording: \(error.localizedDescription)")
+            }
+        }
+    }
+
+    private func stop() {
+        guard let recorder else { return }
+        state = .finishing
+        stopTimer()
+        lifecycleGeneration += 1
+        lifecycleTask = Task {
+            let result = await recorder.stop()
+            self.finish(result)
+        }
+    }
+
+    private func finish(_ result: RecordingResult) {
+        recorder = nil
+        micLevel = 0
+        systemLevel = 0
+        warning = result.systemNote.map { "System audio stopped early: \($0)" }
+        if let folder = currentFolder {
+            writeSelfSpans(result, to: folder)
+            lastSession = SessionInfo(
+                folder: folder, mixedURL: result.mixedURL,
+                duration: result.duration, selfSpanCount: result.selfSpans.count)
+        }
+        currentFolder = nil
+        elapsed = 0
+        state = .idle
+    }
+
+    private func fail(_ message: String) {
+        recorder = nil
+        currentFolder = nil
+        stopTimer()
+        micLevel = 0
+        systemLevel = 0
+        elapsed = 0
+        state = .error(message)
+    }
+
+    /// Called from `applicationShouldTerminate`: flush any in-progress session so
+    /// its WAV headers are finalized before the process exits. Handles quit while
+    /// `.starting` and `.finishing`, not just `.recording`.
+    func prepareForTermination() async {
+        // Drain whatever lifecycle Task is in flight until nothing is busy. A Stop
+        // click landing in an await window can spawn a new stop Task, so loop
+        // rather than awaiting a single captured task.
+        while isBusy {
+            let gen = lifecycleGeneration
+            await lifecycleTask?.value
+            if state == .recording, let recorder {
+                state = .finishing
+                stopTimer()
+                finish(await recorder.stop())
+            } else if lifecycleGeneration == gen {
+                break   // settled: no new transition was spawned
+            }
+        }
+    }
+
+    // MARK: - Timer
+
+    private func startTimer() {
+        timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { [weak self] _ in
+            Task { @MainActor in
+                guard let self else { return }
+                if let start = self.startTime { self.elapsed = Date().timeIntervalSince(start) }
+                if let recorder = self.recorder {
+                    let levels = recorder.currentLevels()
+                    self.micLevel = levels.mic
+                    self.systemLevel = levels.system
+                }
+            }
+        }
+    }
+
+    private func stopTimer() {
+        timer?.invalidate()
+        timer = nil
+    }
+
+    // MARK: - Files
+
+    private func makeSessionFolder() throws -> URL {
+        let base = settings.outputFolderURL.appendingPathComponent("sessions", isDirectory: true)
+        let folder = base.appendingPathComponent("\(Self.timestamp())_manual", isDirectory: true)
+        try FileManager.default.createDirectory(at: folder, withIntermediateDirectories: true)
+        return folder
+    }
+
+    private static func timestamp() -> String {
+        let f = DateFormatter()
+        f.locale = Locale(identifier: "en_US_POSIX")
+        f.dateFormat = "yyyy-MM-dd'T'HH-mm-ss"
+        return f.string(from: Date())
+    }
+
+    /// Phase-1 preview of the mic-VAD "self" spans (the eventual
+    /// `visual_timeline.json` `mic_vad` segments). Lets us eyeball VAD quality.
+    private func writeSelfSpans(_ result: RecordingResult, to folder: URL) {
+        let segments = result.selfSpans.map { span -> [String: Any] in
+            ["start": span.start, "end": span.end, "name": "self",
+             "confidence": span.confidence, "source": "mic_vad"]
+        }
+        let object: [String: Any] = [
+            "note": "Phase 1 mic-VAD self spans (preview of visual_timeline segments)",
+            "t0_unix": result.t0Unix,
+            "duration_sec": result.duration,
+            "self_spans": segments,
+        ]
+        if let data = try? JSONSerialization.data(withJSONObject: object,
+                                                  options: [.prettyPrinted, .sortedKeys]) {
+            try? data.write(to: folder.appendingPathComponent("self_vad.json"))
+        }
+    }
+}
diff --git a/Ten31Transcripts/UI/LevelBar.swift b/Ten31Transcripts/UI/LevelBar.swift
new file mode 100644
index 0000000..6080666
--- /dev/null
+++ b/Ten31Transcripts/UI/LevelBar.swift
@@ -0,0 +1,38 @@
+import SwiftUI
+
+/// A small horizontal audio level meter. `level` is a peak amplitude (0…1);
+/// it's mapped to a dBFS scale (−60 dB … 0 dB) so normal speech is clearly visible.
+struct LevelBar: View {
+    let label: String
+    let level: Float
+
+    var body: some View {
+        HStack(spacing: 8) {
+            Text(label)
+                .font(.caption2)
+                .foregroundStyle(.secondary)
+                .frame(width: 48, alignment: .leading)
+            GeometryReader { geo in
+                ZStack(alignment: .leading) {
+                    RoundedRectangle(cornerRadius: 2).fill(Color.secondary.opacity(0.2))
+                    RoundedRectangle(cornerRadius: 2)
+                        .fill(color)
+                        .frame(width: geo.size.width * fraction)
+                }
+            }
+            .frame(height: 6)
+        }
+    }
+
+    private var fraction: CGFloat {
+        guard level > 0 else { return 0 }
+        let db = 20 * log10(Double(level))          // −∞ … 0
+        return CGFloat(min(1, max(0, (db + 60) / 60)))
+    }
+
+    private var color: Color {
+        if fraction < 0.02 { return .gray }
+        if fraction > 0.9 { return .red }
+        return .green
+    }
+}
diff --git a/Ten31Transcripts/UI/MenuBarView.swift b/Ten31Transcripts/UI/MenuBarView.swift
index 63b387c..4ee27ef 100644
--- a/Ten31Transcripts/UI/MenuBarView.swift
+++ b/Ten31Transcripts/UI/MenuBarView.swift
@@ -7,12 +7,15 @@ struct MenuBarView: View {
     @EnvironmentObject private var settings: AppSettings
     @EnvironmentObject private var permissions: PermissionsManager
     @EnvironmentObject private var health: SparkControlHealth
+    @EnvironmentObject private var session: SessionController
 
     var body: some View {
         NavigationStack {
             VStack(alignment: .leading, spacing: 12) {
                 header
                 Divider()
+                recordingSection
+                Divider()
                 permissionsSection
                 Divider()
                 backendSection
@@ -26,6 +29,91 @@ struct MenuBarView: View {
         .task { await refreshHealth() }
     }
 
+    // MARK: Recording
+
+    private var canRecord: Bool {
+        permissions.microphone == .granted && permissions.screenRecording == .granted
+    }
+
+    private var recordingSection: some View {
+        VStack(alignment: .leading, spacing: 8) {
+            HStack {
+                Text("Recording").font(.subheadline).bold()
+                Spacer()
+                if session.state == .recording {
+                    Text(timeString(session.elapsed))
+                        .font(.system(.caption, design: .monospaced))
+                        .foregroundStyle(.secondary)
+                }
+            }
+
+            Button {
+                session.toggle()
+            } label: {
+                Label(recordButtonTitle, systemImage: recordButtonIcon)
+                    .frame(maxWidth: .infinity)
+            }
+            .controlSize(.large)
+            .tint(session.state == .recording ? .red : .accentColor)
+            .disabled(recordButtonDisabled)
+
+            if session.state == .recording {
+                LevelBar(label: "Mic", level: session.micLevel)
+                LevelBar(label: "System", level: session.systemLevel)
+            }
+
+            if !canRecord && !session.isBusy {
+                Text("Grant Microphone + Screen Recording above to record.")
+                    .font(.caption)
+                    .foregroundStyle(.secondary)
+            }
+
+            if case .error(let message) = session.state {
+                Text(message).font(.caption).foregroundStyle(.red)
+            }
+
+            if let warning = session.warning {
+                Text(warning).font(.caption).foregroundStyle(.orange)
+            }
+
+            if let last = session.lastSession {
+                Button {
+                    NSWorkspace.shared.activateFileViewerSelecting([last.mixedURL])
+                } label: {
+                    Text("Last: \(Int(last.duration.rounded()))s · \(last.selfSpanCount) self-spans — reveal in Finder")
+                        .font(.caption)
+                }
+                .buttonStyle(.link)
+            }
+        }
+    }
+
+    private var recordButtonTitle: String {
+        switch session.state {
+        case .starting:   return "Starting…"
+        case .recording:  return "Stop Recording"
+        case .finishing:  return "Finishing…"
+        case .idle, .error: return "Start Recording"
+        }
+    }
+
+    private var recordButtonIcon: String {
+        session.state == .recording ? "stop.circle.fill" : "record.circle"
+    }
+
+    private var recordButtonDisabled: Bool {
+        switch session.state {
+        case .starting, .finishing: return true
+        case .recording:            return false
+        case .idle, .error:         return !canRecord
+        }
+    }
+
+    private func timeString(_ t: TimeInterval) -> String {
+        let total = Int(t)
+        return String(format: "%02d:%02d", total / 60, total % 60)
+    }
+
     private var header: some View {
         VStack(alignment: .leading, spacing: 2) {
             Text("Ten31 Transcripts").font(.headline)
diff --git a/project.yml b/project.yml
index 7fe9914..06de0f4 100644
--- a/project.yml
+++ b/project.yml
@@ -13,9 +13,11 @@ settings:
     CURRENT_PROJECT_VERSION: "1"
     SWIFT_VERSION: "5.0"
     CODE_SIGN_STYLE: Automatic
-    # Leave the team empty; pick your free personal team in Xcode's
-    # Signing & Capabilities tab on first open (see README).
-    DEVELOPMENT_TEAM: ""
+    # Grant's free personal team (cert OU). Baked in so `xcodegen generate` keeps
+    # a STABLE signing identity across regenerations — macOS ties TCC permission
+    # grants (Mic / Screen Recording / Accessibility) to this identity, so a
+    # stable team is what makes those permissions persist across rebuilds.
+    DEVELOPMENT_TEAM: "BK4Y6CXN35"
 
 targets:
   Ten31Transcripts: