ten31-transcripts/Ten31Transcripts/Session/SessionController.swift

import Foundation
import Combine
import AppKit
import CoreGraphics

struct SessionInfo: Equatable {
    let folder: URL
    let mixedURL: URL
    let duration: Double
    let selfSpanCount: Int
    /// Count of vision-detected speaker segments if visual capture attached, or nil
    /// if the session was audio-only (no adapter / no window / capture failed). Lets
    /// the user see at a glance whether the visual pipeline ran on a real call.
    let visualSegmentCount: Int?
}

/// Owns a single recording session: creates the session folder, drives
/// `AudioRecorder` start/stop, tracks elapsed time, and writes the Phase-1
/// preview of mic-VAD self spans. Detection/visual/backend wiring come later.
///
/// The lifecycle is serialized through an explicit state machine so start and
/// stop can never interleave (`.starting` → `.recording` → `.finishing`).
@MainActor
final class SessionController: ObservableObject {
    enum State: Equatable {
        case idle
        case starting
        case recording
        case finishing
        case error(String)
    }

    /// Backend transcription status for the most recent session.
    enum TranscriptStatus: Equatable {
        case idle
        case processing(Int, Int)              // chunk done, total
        case done(speakers: Int, segments: Int)
        case failed(String)
    }

    /// Set in init so `AppDelegate.applicationShouldTerminate` can finalize a
    /// recording in progress before the app quits.
    static weak var shared: SessionController?

    @Published private(set) var state: State = .idle
    @Published private(set) var elapsed: TimeInterval = 0
    @Published private(set) var lastSession: SessionInfo?
    /// Live input peak levels (0…1) while recording, for the UI meters.
    @Published private(set) var micLevel: Float = 0
    @Published private(set) var systemLevel: Float = 0
    /// Surfaced after a session if system audio stopped early.
    @Published private(set) var warning: String?
    /// Mirrored from `CallDetector` for the UI.
    @Published private(set) var detectionStatus: CallDetector.Status = .disabled
    /// Backend transcription status for the last session.
    @Published private(set) var transcriptStatus: TranscriptStatus = .idle
    /// Set when a readable recap (`recap.html`) has been written for the last session.
    @Published private(set) var recapURL: URL?

    private let settings: AppSettings
    private var voiceprints: VoiceprintStore
    private let detector = CallDetector()
    private var cancellables = Set<AnyCancellable>()
    private var currentLabel = "manual"
    /// Inputs needed to (re)process the last finished session through the backend.
    private struct ProcessInputs {
        let folder: URL
        let sessionId: String
        let app: String
        let micURL: URL
        let systemURL: URL
        let mixedURL: URL
        let timeline: [VisualTimeline.Segment]   // remote visual names; self handled via the mic channel
        let selfSpans: [VADSpan]
        let selfName: String
        let systemHealthy: Bool
    }
    private var lastProcess: ProcessInputs?
    private var processTask: Task<Void, Never>?
    private var recorder: AudioRecorder?
    /// Visual capture for the current session (nil for manual recordings, apps with
    /// no adapter, or when the window can't be captured — those record audio-only).
    private var visualCapture: VisualCapture?
    /// A visual capture whose `start()` is in flight (registered before the await),
    /// so `prepareForTermination` can tear it down if its start-Task is orphaned.
    private var inFlightVisual: VisualCapture?
    /// App + capture target to start visual capture for, set at `start()`. `windowID`
    /// pins the exact detected window (e.g. the Meet browser window); nil → largest.
    private var pendingCapture: (app: CallDetector.DetectedApp, bundleID: String, windowID: CGWindowID?)?
    private var currentFolder: URL?
    private var startTime: Date?
    private var timer: Timer?
    /// True when the current session was started by call detection (not the user).
    private var autoStarted = false
    /// Set if a detected call ends while we're still in `.starting`.
    private var pendingAutoStop = false
    /// The in-flight start or stop Task, so `prepareForTermination` can await it.
    private var lifecycleTask: Task<Void, Never>?
    /// Bumped each time a start/stop Task is spawned (Task is a value type, so this
    /// is how `prepareForTermination` detects a newly-spawned transition).
    private var lifecycleGeneration = 0

    init(settings: AppSettings) {
        self.settings = settings
        self.voiceprints = VoiceprintStore(
            fileURL: settings.outputFolderURL.appendingPathComponent("voiceprints.json"))
        SessionController.shared = self

        detector.onCallStart = { [weak self] call in self?.handleCallStart(call) }
        detector.onCallEnd = { [weak self] in self?.handleCallEnd() }
        detector.$status
            .sink { [weak self] status in self?.detectionStatus = status }
            .store(in: &cancellables)
        // Re-point the voiceprint DB if the output folder changes. The in-flight
        // pipeline keeps its own captured reference, so this can't disrupt a run.
        settings.$outputFolderPath
            .dropFirst()
            .sink { [weak self] path in
                guard let self else { return }
                let dir = URL(fileURLWithPath: (path as NSString).expandingTildeInPath, isDirectory: true)
                self.voiceprints = VoiceprintStore(fileURL: dir.appendingPathComponent("voiceprints.json"))
            }
            .store(in: &cancellables)
        settings.$autoRecordOnDetection
            .sink { [weak self] on in
                guard let self else { return }
                if on {
                    self.detector.enable()
                } else {
                    self.detector.disable()
                    // Don't leave an auto-started session running with no detector —
                    // handle both .recording and the in-flight .starting case.
                    if self.autoStarted {
                        switch self.state {
                        case .recording: self.stop()
                        case .starting:  self.pendingAutoStop = true
                        default:         break
                        }
                    }
                }
            }
            .store(in: &cancellables)
    }

    // MARK: - Auto-detection

    private func handleCallStart(_ call: CallDetector.DetectedCall) {
        guard settings.autoRecordOnDetection else { return }
        switch state {
        case .idle, .error:
            start(label: call.app.label, auto: true, capture: (call.app, call.bundleID, call.windowID))
        case .starting, .recording, .finishing: break   // don't disturb an active session
        }
    }

    private func handleCallEnd() {
        // Only auto-stop a session we auto-started; never a manual recording.
        guard autoStarted else { return }
        switch state {
        case .recording: stop()
        case .starting:  pendingAutoStop = true   // resolved when start() completes
        case .idle, .error, .finishing: break
        }
    }

    var isBusy: Bool {
        state == .starting || state == .recording || state == .finishing
    }

    func toggle() {
        switch state {
        case .idle, .error: start()
        case .recording: stop()
        case .starting, .finishing: break   // ignore taps mid-transition
        }
    }

    // MARK: - Start / Stop

    private func start(label: String = "manual", auto: Bool = false,
                       capture: (app: CallDetector.DetectedApp, bundleID: String, windowID: CGWindowID?)? = nil) {
        let folder: URL
        do {
            folder = try makeSessionFolder(label: label)
        } catch {
            fail("Couldn't create session folder: \(error.localizedDescription)")
            return
        }
        currentFolder = folder
        currentLabel = label
        autoStarted = auto
        pendingAutoStop = false
        pendingCapture = capture
        let recorder = AudioRecorder(
            micURL: folder.appendingPathComponent("mic.wav"),
            systemURL: folder.appendingPathComponent("system.wav"),
            mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"))
        self.recorder = recorder
        warning = nil
        recapURL = nil
        state = .starting

        lifecycleGeneration += 1
        let myGen = lifecycleGeneration
        lifecycleTask = Task {
            do {
                try await recorder.start()        // self-tears-down if it throws
                self.state = .recording
                self.startTime = Date()
                self.startTimer()
                // A detected call may have ended while we were still starting.
                if self.pendingAutoStop {
                    self.pendingAutoStop = false
                    self.stop()
                    return
                }
                // Attach visual capture on the SAME clock (best-effort, audio-only on failure).
                // Pass this session's generation + recorder so a slow start can't
                // adopt itself into a different session that began meanwhile.
                await self.startVisual(t0Host: recorder.sharedT0Host(), generation: myGen, recorder: recorder)
            } catch {
                self.handleStartFailure(error)
            }
        }
    }

    /// Map a recorder start failure to an actionable message. The common case is
    /// Screen Recording getting re-checked after a rebuild (the SCStream auth
    /// check fails even though CGPreflight reports granted), so re-prompt and open
    /// the right Settings pane rather than show a cryptic TCC error.
    private func handleStartFailure(_ error: Error) {
        let msg = error.localizedDescription.lowercased()
        let screenIssue = msg.contains("declined") || msg.contains("tcc")
            || msg.contains("screen") || msg.contains("permission")
        if screenIssue {
            _ = CGRequestScreenCaptureAccess()
            if let url = URL(string: "x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture") {
                NSWorkspace.shared.open(url)
            }
            fail("Screen Recording needs re-approval for this build. Toggle Ten31Transcripts off then on in System Settings ▸ Screen Recording, then restart the app.")
        } else {
            fail("Couldn't start recording: \(error.localizedDescription)")
        }
    }

    // MARK: - Visual capture

    /// Best-effort: start window capture for the detected app on the audio clock.
    /// Any failure (no adapter, no window, Screen Recording denied) leaves
    /// `visualCapture` nil and the session records audio-only.
    ///
    /// `generation`/`recorder` identify the session that launched this; because
    /// `vc.start()` is a slow async call, a stop + a fresh start can complete during
    /// it. We adopt the stream ONLY back into the same session — otherwise we cancel
    /// it, so a stale capture can never attach to (or leak into) a different session.
    private func startVisual(t0Host: Double, generation: Int, recorder: AudioRecorder) async {
        guard let capture = pendingCapture else { return }   // manual recording → audio-only
        pendingCapture = nil
        guard let vc = VisualCapture(app: capture.app, bundleID: capture.bundleID,
                                     windowID: capture.windowID, t0Host: t0Host) else { return }
        // Register the live capture before the await so a quit (prepareForTermination)
        // can drain it even if this start-Task gets orphaned by a concurrent stop.
        inFlightVisual = vc
        defer { if inFlightVisual === vc { inFlightVisual = nil } }
        do {
            try await vc.start()
            // Adopt only if THIS session still owns the slot (same generation, same
            // recorder, still recording); otherwise discard rather than leak/misattach.
            guard generation == lifecycleGeneration, self.recorder === recorder,
                  case .recording = state else {
                await vc.cancel()
                return
            }
            if let existing = visualCapture { await existing.cancel() }   // fail-closed
            visualCapture = vc
        } catch {
            await vc.cancel()     // tear down any partial stream; never break recording
        }
    }

    /// Stop visual capture (if any), write `visual_timeline.json`, and return the
    /// timeline for the backend: visual segments + merged self-spans when visual
    /// ran, otherwise the mic-VAD self spans alone. `visualRan` reports whether the
    /// visual pipeline actually attached (for the after-session indicator).
    private func stopVisualAndTimeline(_ result: RecordingResult, folder: URL?)
        async -> (timeline: [VisualTimeline.Segment], selfSpans: [VADSpan], visualRan: Bool) {
        let selfName = settings.selfName
        let selfSpans = await channelSelfSpans(result: result, folder: folder)
        if let vc = visualCapture, let folder {
            visualCapture = nil
            // Remote (vision) segments only; self travels separately as the mic channel.
            let remote = await vc.finish(
                selfSpans: selfSpans, selfName: selfName,
                sessionId: folder.lastPathComponent, t0Unix: result.t0Unix,
                durationSec: result.duration, folder: folder)
            return (remote, selfSpans, true)
        }
        if let vc = visualCapture { await vc.cancel(); visualCapture = nil }
        return ([], selfSpans, false)
    }

    /// Self spans for the backend timeline, identified by CHANNEL: the mic track is
    /// the local user, so self = mic active AND louder than system. This makes self
    /// platform-independent (one name, no display-name matching) and stops a remote
    /// speaker from being mislabeled as self. Falls back to the mic-VAD spans if the
    /// tracks can't be read. Runs off the main actor (file I/O).
    private func channelSelfSpans(result: RecordingResult, folder: URL?) async -> [VADSpan] {
        guard let folder else { return result.selfSpans }
        let mic = folder.appendingPathComponent("mic.wav")
        let sys = folder.appendingPathComponent("system.wav")
        let spans = await Task.detached { ChannelSelfVAD.selfSpans(micURL: mic, systemURL: sys) }.value
        return spans ?? result.selfSpans
    }

    private func stop() {
        guard let recorder else { return }
        state = .finishing
        stopTimer()
        let folder = currentFolder
        lifecycleGeneration += 1
        lifecycleTask = Task {
            let result = await recorder.stop()
            let visual = await self.stopVisualAndTimeline(result, folder: folder)
            self.finish(result, timeline: visual.timeline, selfSpans: visual.selfSpans, visualRan: visual.visualRan)
        }
    }

    private func finish(_ result: RecordingResult, timeline: [VisualTimeline.Segment],
                        selfSpans: [VADSpan], visualRan: Bool) {
        recorder = nil
        micLevel = 0
        systemLevel = 0
        warning = result.systemNote.map { "System audio stopped early: \($0)" }
        transcriptStatus = .idle
        if let folder = currentFolder {
            writeSelfSpans(spans: selfSpans, result: result, to: folder)
            let visualCount = visualRan ? timeline.count : nil   // `timeline` is the remote vision segments
            lastSession = SessionInfo(
                folder: folder, mixedURL: result.mixedURL,
                duration: result.duration, selfSpanCount: selfSpans.count,
                visualSegmentCount: visualCount)
            lastProcess = ProcessInputs(
                folder: folder, sessionId: folder.lastPathComponent, app: currentLabel,
                micURL: result.micURL, systemURL: result.systemURL, mixedURL: result.mixedURL,
                timeline: timeline, selfSpans: selfSpans, selfName: settings.selfName,
                systemHealthy: result.systemNote == nil)
        }
        let autoSend = settings.autoSendOnStop
        currentFolder = nil
        autoStarted = false
        pendingAutoStop = false
        elapsed = 0
        state = .idle
        if autoSend { processLastSession() }
    }

    // MARK: - Backend transcription

    /// Send the last finished session to the backend → `speakers.json`. The
    /// timeline is the session's visual segments (with mic-VAD self spans merged)
    /// when visual capture ran, or the self spans alone otherwise. Safe to call
    /// manually ("Send to backend") or automatically on stop.
    func processLastSession() {
        guard let inputs = lastProcess, !isProcessing else { return }
        transcriptStatus = .processing(0, 1)
        recapURL = nil
        processTask = Task { await self.runBackend(inputs, openEditorWhenDone: false) }
    }

    private var isProcessing: Bool { if case .processing = transcriptStatus { return true }; return false }

    /// Transcribe + reconcile + recap one session's inputs. Shared by "Send to
    /// backend" and "Open saved session".
    private func runBackend(_ inputs: ProcessInputs, openEditorWhenDone: Bool) async {
        let settings = self.settings
        let pipeline = TranscriptPipeline(baseURL: settings.backendBaseURL,
                                          skipTLS: settings.skipTLSVerification, voiceprints: voiceprints)
        do {
            let speakers = try await pipeline.process(
                sessionFolder: inputs.folder, sessionId: inputs.sessionId, app: inputs.app,
                micURL: inputs.micURL, systemURL: inputs.systemURL, mixedURL: inputs.mixedURL,
                timeline: inputs.timeline, selfSpans: inputs.selfSpans, selfName: inputs.selfName,
                systemHealthy: inputs.systemHealthy,
                progress: { done, total in await MainActor.run { self.transcriptStatus = .processing(done, total) } })
            self.transcriptStatus = .done(speakers: speakers.speakers.count, segments: speakers.segments.count)
            try Task.checkCancellation()
            await self.finishBackend(speakers: speakers, inputs: inputs, settings: settings)
            if openEditorWhenDone { self.openEditor(folder: inputs.folder) }
        } catch is CancellationError {
            self.transcriptStatus = .idle
        } catch {
            self.transcriptStatus = .failed(error.localizedDescription)
        }
    }

    /// Post-transcription LLM passes (best-effort, share one gateway model lookup):
    /// reconcile speaker labels (merge split clusters + name from content), then build
    /// the readable recap. A missing LLM or any failure leaves speakers.json intact.
    private func finishBackend(speakers: SpeakersFile, inputs: ProcessInputs, settings: AppSettings) async {
        let llm = GatewayLLMClient(baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification)
        guard let model = await llm.chatModelId() else { return }   // no LLM on the gateway → skip both

        var resolved = speakers
        if settings.reconcileSpeakers, !speakers.segments.isEmpty {
            self.transcriptStatus = .processing(0, 0)
            let fps = RecapEditModel.loadFingerprints(inputs.folder.appendingPathComponent("cluster_fingerprints.json"))
            resolved = await SpeakerReconciler.reconcile(file: speakers, fingerprints: fps,
                                                         selfName: inputs.selfName, llm: llm, model: model)
            try? resolved.write(to: inputs.folder.appendingPathComponent("speakers.json"))
            self.transcriptStatus = .done(speakers: resolved.speakers.count, segments: resolved.segments.count)
        }

        guard settings.recapEnabled, !resolved.segments.isEmpty else { return }
        let analyzer = RecapAnalyzer(llm: llm, model: model)
        guard let result = try? await analyzer.recap(file: resolved, template: settings.defaultTemplate) else { return }
        let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
        try? RecapRenderer.write(file: resolved, result: result, title: title, to: inputs.folder)
        try? RecapFile(title: title, result: result).write(to: inputs.folder.appendingPathComponent("recap.json"))
        let url = inputs.folder.appendingPathComponent("recap.html")
        if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
    }

    /// Friendly recap title, e.g. "Google Meet call — 2026-06-06 11:43".
    private static func recapTitle(app: String, sessionId: String) -> String {
        let appName = CallDetector.DetectedApp(rawValue: app)?.display ?? app.capitalized
        let stamp = sessionId.split(separator: "_").first.map(String.init) ?? sessionId
        let parts = stamp.split(separator: "T")
        let date = parts.first.map(String.init) ?? ""
        let timeBits = parts.count > 1 ? parts[1].split(separator: "-") : []
        let time = timeBits.count >= 2 ? "\(timeBits[0]):\(timeBits[1])" : ""
        return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
    }

    // MARK: - Speaker corrections

    /// True once the last session has a transcribed `speakers.json` to correct.
    var canEditSpeakers: Bool {
        guard let folder = lastSession?.folder else { return false }
        return FileManager.default.fileExists(atPath: folder.appendingPathComponent("speakers.json").path)
    }

    /// Open the speaker-correction editor for the last session.
    func editLastSession() {
        if let folder = lastSession?.folder { openEditor(folder: folder) }
    }

    /// Open the editor for any session folder that has a `speakers.json`.
    private func openEditor(folder: URL) {
        guard let model = RecapEditModel(folder: folder, voiceprints: voiceprints,
                                         baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification,
                                         templates: settings.recapTemplates, defaultTemplateId: settings.defaultTemplateId)
        else { return }
        EditorWindow.shared.show(model: model)
    }

    /// Pick any past session folder and open it: edit it if already transcribed,
    /// otherwise transcribe + reconcile + recap it first, then open the editor.
    func openSavedSession() {
        let panel = NSOpenPanel()
        panel.canChooseDirectories = true
        panel.canChooseFiles = false
        panel.allowsMultipleSelection = false
        panel.prompt = "Open"
        panel.message = "Choose a session folder"
        panel.directoryURL = settings.outputFolderURL.appendingPathComponent("sessions", isDirectory: true)
        NSApp.activate(ignoringOtherApps: true)
        guard panel.runModal() == .OK, let folder = panel.url else { return }
        let fm = FileManager.default
        if fm.fileExists(atPath: folder.appendingPathComponent("speakers.json").path) {
            openEditor(folder: folder)
            return
        }
        // Not transcribed yet — needs the raw tracks to (re)process.
        let mic = folder.appendingPathComponent("mic.wav")
        let sys = folder.appendingPathComponent("system.wav")
        guard fm.fileExists(atPath: mic.path), fm.fileExists(atPath: sys.path), !isProcessing else { return }
        transcriptStatus = .processing(0, 1)
        recapURL = nil
        let selfName = settings.selfName
        processTask = Task {
            let selfSpans = await Task.detached { ChannelSelfVAD.selfSpans(micURL: mic, systemURL: sys) }.value ?? []
            let inputs = ProcessInputs(
                folder: folder, sessionId: folder.lastPathComponent, app: Self.appLabel(from: folder),
                micURL: mic, systemURL: sys, mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"),
                timeline: Self.remoteTimeline(in: folder), selfSpans: selfSpans,
                selfName: selfName, systemHealthy: true)
            await self.runBackend(inputs, openEditorWhenDone: true)
        }
    }

    /// The remote (vision) visual-timeline segments saved for a session, if any.
    private static func remoteTimeline(in folder: URL) -> [VisualTimeline.Segment] {
        guard let data = try? Data(contentsOf: folder.appendingPathComponent("visual_timeline.json")),
              let vt = try? JSONDecoder().decode(VisualTimeline.self, from: data) else { return [] }
        return vt.segments.filter { $0.source == "vision" }
    }

    /// App label from a session folder name like "…_signal".
    private static func appLabel(from folder: URL) -> String {
        folder.lastPathComponent.split(separator: "_").last.map(String.init) ?? "manual"
    }

    private func fail(_ message: String) {
        recorder = nil
        visualCapture = nil   // recorder.start() failed before visual started; nothing running
        inFlightVisual = nil
        pendingCapture = nil
        currentFolder = nil
        autoStarted = false
        pendingAutoStop = false
        stopTimer()
        micLevel = 0
        systemLevel = 0
        elapsed = 0
        state = .error(message)
    }

    /// Called from `applicationShouldTerminate`: flush any in-progress session so
    /// its WAV headers are finalized before the process exits. Handles quit while
    /// `.starting` and `.finishing`, not just `.recording`.
    func prepareForTermination() async {
        // Cancel any in-flight backend transcription (audio is already saved; the
        // user can resend). The pipeline's checkCancellation + defer clean up chunks.
        processTask?.cancel()
        // Drain whatever lifecycle Task is in flight until nothing is busy. A Stop
        // click landing in an await window can spawn a new stop Task, so loop
        // rather than awaiting a single captured task.
        while isBusy {
            let gen = lifecycleGeneration
            await lifecycleTask?.value
            if state == .recording, let recorder {
                state = .finishing
                stopTimer()
                let folder = currentFolder
                let result = await recorder.stop()
                let visual = await stopVisualAndTimeline(result, folder: folder)
                finish(result, timeline: visual.timeline, selfSpans: visual.selfSpans, visualRan: visual.visualRan)
            } else if lifecycleGeneration == gen {
                break   // settled: no new transition was spawned
            }
        }
        // A visual start-Task orphaned by a concurrent stop may still hold a live
        // stream that nothing else will tear down before exit — drain it here.
        if let vc = inFlightVisual {
            inFlightVisual = nil
            await vc.cancel()
        }
    }

    // MARK: - Timer

    private func startTimer() {
        timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { [weak self] _ in
            Task { @MainActor in
                guard let self else { return }
                if let start = self.startTime { self.elapsed = Date().timeIntervalSince(start) }
                if let recorder = self.recorder {
                    let levels = recorder.currentLevels()
                    self.micLevel = levels.mic
                    self.systemLevel = levels.system
                }
            }
        }
    }

    private func stopTimer() {
        timer?.invalidate()
        timer = nil
    }

    // MARK: - Files

    private func makeSessionFolder(label: String) throws -> URL {
        let base = settings.outputFolderURL.appendingPathComponent("sessions", isDirectory: true)
        let folder = base.appendingPathComponent("\(Self.timestamp())_\(label)", isDirectory: true)
        try FileManager.default.createDirectory(at: folder, withIntermediateDirectories: true)
        return folder
    }

    private static func timestamp() -> String {
        let f = DateFormatter()
        f.locale = Locale(identifier: "en_US_POSIX")
        f.dateFormat = "yyyy-MM-dd'T'HH-mm-ss"
        return f.string(from: Date())
    }

    /// Debug artifact: the channel-verified "self" spans actually sent to the backend
    /// as `self_vad` (mic active AND louder than system). Lets us eyeball self detection.
    private func writeSelfSpans(spans: [VADSpan], result: RecordingResult, to folder: URL) {
        let segments = spans.map { span -> [String: Any] in
            ["start": span.start, "end": span.end, "name": "self",
             "confidence": span.confidence, "source": "mic_channel"]
        }
        let object: [String: Any] = [
            "note": "channel-verified self spans (mic active and louder than system) — the self_vad sent to label-merge",
            "t0_unix": result.t0Unix,
            "duration_sec": result.duration,
            "self_spans": segments,
        ]
        if let data = try? JSONSerialization.data(withJSONObject: object,
                                                  options: [.prettyPrinted, .sortedKeys]) {
            try? data.write(to: folder.appendingPathComponent("self_vad.json"))
        }
    }
}