85bfdf2b56
New 'Recap' phase — turns speakers.json into a human-readable recap, leveraging recap-relay's proven logic/prompts but calling the Spark gateway's OpenAI-compatible /v1/chat/completions directly (same host/TLS as label-merge; Qwen3-35B). We start from already-named speakers (label-merge), so recap-relay's speaker clustering + name-inference are skipped entirely. - GatewayLLMClient: /v1/chat/completions (JSON mode), model discovery via /api/endpoints, TLS-skip reuse, 503 retry, sequential. - RecapAnalyzer: speakers.json → numbered [N] (MM:SS) Name: text transcript → time-windowed analyze (single window for short calls, 18min/2min overlap for long) → stitch/dedup topic sections → meeting extras (TLDR/decisions/action_items/ open_questions/key_quotes). Defensive JSON parsing of LLM output. - RecapRenderer: writes transcript.md + a self-contained dark-theme recap.html (topic sections w/ collapsible transcripts, extras panels, speaker color chips, full timestamped speaker-attributed transcript, print styles). - SessionController.buildRecap: best-effort after speakers.json (gated by settings.recapEnabled); surfaces recapURL → menu 'Open recap'. Skips silently if the gateway has no LLM. Settings toggle added. Validated END-TO-END on the real Meet session against the live gateway: dual-channel transcription → 3 topic sections + accurate TLDR + key quotes; 'Go Bitcoin' correctly attributed to the remote speaker. 46/46 XCTest (10 new).
527 lines
24 KiB
Swift
527 lines
24 KiB
Swift
import Foundation
|
|
import Combine
|
|
import AppKit
|
|
import CoreGraphics
|
|
|
|
struct SessionInfo: Equatable {
|
|
let folder: URL
|
|
let mixedURL: URL
|
|
let duration: Double
|
|
let selfSpanCount: Int
|
|
/// Count of vision-detected speaker segments if visual capture attached, or nil
|
|
/// if the session was audio-only (no adapter / no window / capture failed). Lets
|
|
/// the user see at a glance whether the visual pipeline ran on a real call.
|
|
let visualSegmentCount: Int?
|
|
}
|
|
|
|
/// Owns a single recording session: creates the session folder, drives
|
|
/// `AudioRecorder` start/stop, tracks elapsed time, and writes the Phase-1
|
|
/// preview of mic-VAD self spans. Detection/visual/backend wiring come later.
|
|
///
|
|
/// The lifecycle is serialized through an explicit state machine so start and
|
|
/// stop can never interleave (`.starting` → `.recording` → `.finishing`).
|
|
@MainActor
|
|
final class SessionController: ObservableObject {
|
|
enum State: Equatable {
|
|
case idle
|
|
case starting
|
|
case recording
|
|
case finishing
|
|
case error(String)
|
|
}
|
|
|
|
/// Backend transcription status for the most recent session.
|
|
enum TranscriptStatus: Equatable {
|
|
case idle
|
|
case processing(Int, Int) // chunk done, total
|
|
case done(speakers: Int, segments: Int)
|
|
case failed(String)
|
|
}
|
|
|
|
/// Set in init so `AppDelegate.applicationShouldTerminate` can finalize a
|
|
/// recording in progress before the app quits.
|
|
static weak var shared: SessionController?
|
|
|
|
@Published private(set) var state: State = .idle
|
|
@Published private(set) var elapsed: TimeInterval = 0
|
|
@Published private(set) var lastSession: SessionInfo?
|
|
/// Live input peak levels (0…1) while recording, for the UI meters.
|
|
@Published private(set) var micLevel: Float = 0
|
|
@Published private(set) var systemLevel: Float = 0
|
|
/// Surfaced after a session if system audio stopped early.
|
|
@Published private(set) var warning: String?
|
|
/// Mirrored from `CallDetector` for the UI.
|
|
@Published private(set) var detectionStatus: CallDetector.Status = .disabled
|
|
/// Backend transcription status for the last session.
|
|
@Published private(set) var transcriptStatus: TranscriptStatus = .idle
|
|
/// Set when a readable recap (`recap.html`) has been written for the last session.
|
|
@Published private(set) var recapURL: URL?
|
|
|
|
private let settings: AppSettings
|
|
private var voiceprints: VoiceprintStore
|
|
private let detector = CallDetector()
|
|
private var cancellables = Set<AnyCancellable>()
|
|
private var currentLabel = "manual"
|
|
/// Inputs needed to (re)process the last finished session through the backend.
|
|
private struct ProcessInputs {
|
|
let folder: URL
|
|
let sessionId: String
|
|
let app: String
|
|
let micURL: URL
|
|
let systemURL: URL
|
|
let mixedURL: URL
|
|
let timeline: [VisualTimeline.Segment] // remote visual names; self handled via the mic channel
|
|
let selfSpans: [VADSpan]
|
|
let selfName: String
|
|
let systemHealthy: Bool
|
|
}
|
|
private var lastProcess: ProcessInputs?
|
|
private var processTask: Task<Void, Never>?
|
|
private var recorder: AudioRecorder?
|
|
/// Visual capture for the current session (nil for manual recordings, apps with
|
|
/// no adapter, or when the window can't be captured — those record audio-only).
|
|
private var visualCapture: VisualCapture?
|
|
/// A visual capture whose `start()` is in flight (registered before the await),
|
|
/// so `prepareForTermination` can tear it down if its start-Task is orphaned.
|
|
private var inFlightVisual: VisualCapture?
|
|
/// App + capture target to start visual capture for, set at `start()`. `windowID`
|
|
/// pins the exact detected window (e.g. the Meet browser window); nil → largest.
|
|
private var pendingCapture: (app: CallDetector.DetectedApp, bundleID: String, windowID: CGWindowID?)?
|
|
private var currentFolder: URL?
|
|
private var startTime: Date?
|
|
private var timer: Timer?
|
|
/// True when the current session was started by call detection (not the user).
|
|
private var autoStarted = false
|
|
/// Set if a detected call ends while we're still in `.starting`.
|
|
private var pendingAutoStop = false
|
|
/// The in-flight start or stop Task, so `prepareForTermination` can await it.
|
|
private var lifecycleTask: Task<Void, Never>?
|
|
/// Bumped each time a start/stop Task is spawned (Task is a value type, so this
|
|
/// is how `prepareForTermination` detects a newly-spawned transition).
|
|
private var lifecycleGeneration = 0
|
|
|
|
init(settings: AppSettings) {
|
|
self.settings = settings
|
|
self.voiceprints = VoiceprintStore(
|
|
fileURL: settings.outputFolderURL.appendingPathComponent("voiceprints.json"))
|
|
SessionController.shared = self
|
|
|
|
detector.onCallStart = { [weak self] call in self?.handleCallStart(call) }
|
|
detector.onCallEnd = { [weak self] in self?.handleCallEnd() }
|
|
detector.$status
|
|
.sink { [weak self] status in self?.detectionStatus = status }
|
|
.store(in: &cancellables)
|
|
// Re-point the voiceprint DB if the output folder changes. The in-flight
|
|
// pipeline keeps its own captured reference, so this can't disrupt a run.
|
|
settings.$outputFolderPath
|
|
.dropFirst()
|
|
.sink { [weak self] path in
|
|
guard let self else { return }
|
|
let dir = URL(fileURLWithPath: (path as NSString).expandingTildeInPath, isDirectory: true)
|
|
self.voiceprints = VoiceprintStore(fileURL: dir.appendingPathComponent("voiceprints.json"))
|
|
}
|
|
.store(in: &cancellables)
|
|
settings.$autoRecordOnDetection
|
|
.sink { [weak self] on in
|
|
guard let self else { return }
|
|
if on {
|
|
self.detector.enable()
|
|
} else {
|
|
self.detector.disable()
|
|
// Don't leave an auto-started session running with no detector —
|
|
// handle both .recording and the in-flight .starting case.
|
|
if self.autoStarted {
|
|
switch self.state {
|
|
case .recording: self.stop()
|
|
case .starting: self.pendingAutoStop = true
|
|
default: break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
.store(in: &cancellables)
|
|
}
|
|
|
|
// MARK: - Auto-detection
|
|
|
|
private func handleCallStart(_ call: CallDetector.DetectedCall) {
|
|
guard settings.autoRecordOnDetection else { return }
|
|
switch state {
|
|
case .idle, .error:
|
|
start(label: call.app.label, auto: true, capture: (call.app, call.bundleID, call.windowID))
|
|
case .starting, .recording, .finishing: break // don't disturb an active session
|
|
}
|
|
}
|
|
|
|
private func handleCallEnd() {
|
|
// Only auto-stop a session we auto-started; never a manual recording.
|
|
guard autoStarted else { return }
|
|
switch state {
|
|
case .recording: stop()
|
|
case .starting: pendingAutoStop = true // resolved when start() completes
|
|
case .idle, .error, .finishing: break
|
|
}
|
|
}
|
|
|
|
var isBusy: Bool {
|
|
state == .starting || state == .recording || state == .finishing
|
|
}
|
|
|
|
func toggle() {
|
|
switch state {
|
|
case .idle, .error: start()
|
|
case .recording: stop()
|
|
case .starting, .finishing: break // ignore taps mid-transition
|
|
}
|
|
}
|
|
|
|
// MARK: - Start / Stop
|
|
|
|
private func start(label: String = "manual", auto: Bool = false,
|
|
capture: (app: CallDetector.DetectedApp, bundleID: String, windowID: CGWindowID?)? = nil) {
|
|
let folder: URL
|
|
do {
|
|
folder = try makeSessionFolder(label: label)
|
|
} catch {
|
|
fail("Couldn't create session folder: \(error.localizedDescription)")
|
|
return
|
|
}
|
|
currentFolder = folder
|
|
currentLabel = label
|
|
autoStarted = auto
|
|
pendingAutoStop = false
|
|
pendingCapture = capture
|
|
let recorder = AudioRecorder(
|
|
micURL: folder.appendingPathComponent("mic.wav"),
|
|
systemURL: folder.appendingPathComponent("system.wav"),
|
|
mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"))
|
|
self.recorder = recorder
|
|
warning = nil
|
|
recapURL = nil
|
|
state = .starting
|
|
|
|
lifecycleGeneration += 1
|
|
let myGen = lifecycleGeneration
|
|
lifecycleTask = Task {
|
|
do {
|
|
try await recorder.start() // self-tears-down if it throws
|
|
self.state = .recording
|
|
self.startTime = Date()
|
|
self.startTimer()
|
|
// A detected call may have ended while we were still starting.
|
|
if self.pendingAutoStop {
|
|
self.pendingAutoStop = false
|
|
self.stop()
|
|
return
|
|
}
|
|
// Attach visual capture on the SAME clock (best-effort, audio-only on failure).
|
|
// Pass this session's generation + recorder so a slow start can't
|
|
// adopt itself into a different session that began meanwhile.
|
|
await self.startVisual(t0Host: recorder.sharedT0Host(), generation: myGen, recorder: recorder)
|
|
} catch {
|
|
self.handleStartFailure(error)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Map a recorder start failure to an actionable message. The common case is
|
|
/// Screen Recording getting re-checked after a rebuild (the SCStream auth
|
|
/// check fails even though CGPreflight reports granted), so re-prompt and open
|
|
/// the right Settings pane rather than show a cryptic TCC error.
|
|
private func handleStartFailure(_ error: Error) {
|
|
let msg = error.localizedDescription.lowercased()
|
|
let screenIssue = msg.contains("declined") || msg.contains("tcc")
|
|
|| msg.contains("screen") || msg.contains("permission")
|
|
if screenIssue {
|
|
_ = CGRequestScreenCaptureAccess()
|
|
if let url = URL(string: "x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture") {
|
|
NSWorkspace.shared.open(url)
|
|
}
|
|
fail("Screen Recording needs re-approval for this build. Toggle Ten31Transcripts off then on in System Settings ▸ Screen Recording, then restart the app.")
|
|
} else {
|
|
fail("Couldn't start recording: \(error.localizedDescription)")
|
|
}
|
|
}
|
|
|
|
// MARK: - Visual capture
|
|
|
|
/// Best-effort: start window capture for the detected app on the audio clock.
|
|
/// Any failure (no adapter, no window, Screen Recording denied) leaves
|
|
/// `visualCapture` nil and the session records audio-only.
|
|
///
|
|
/// `generation`/`recorder` identify the session that launched this; because
|
|
/// `vc.start()` is a slow async call, a stop + a fresh start can complete during
|
|
/// it. We adopt the stream ONLY back into the same session — otherwise we cancel
|
|
/// it, so a stale capture can never attach to (or leak into) a different session.
|
|
private func startVisual(t0Host: Double, generation: Int, recorder: AudioRecorder) async {
|
|
guard let capture = pendingCapture else { return } // manual recording → audio-only
|
|
pendingCapture = nil
|
|
guard let vc = VisualCapture(app: capture.app, bundleID: capture.bundleID,
|
|
windowID: capture.windowID, t0Host: t0Host) else { return }
|
|
// Register the live capture before the await so a quit (prepareForTermination)
|
|
// can drain it even if this start-Task gets orphaned by a concurrent stop.
|
|
inFlightVisual = vc
|
|
defer { if inFlightVisual === vc { inFlightVisual = nil } }
|
|
do {
|
|
try await vc.start()
|
|
// Adopt only if THIS session still owns the slot (same generation, same
|
|
// recorder, still recording); otherwise discard rather than leak/misattach.
|
|
guard generation == lifecycleGeneration, self.recorder === recorder,
|
|
case .recording = state else {
|
|
await vc.cancel()
|
|
return
|
|
}
|
|
if let existing = visualCapture { await existing.cancel() } // fail-closed
|
|
visualCapture = vc
|
|
} catch {
|
|
await vc.cancel() // tear down any partial stream; never break recording
|
|
}
|
|
}
|
|
|
|
/// Stop visual capture (if any), write `visual_timeline.json`, and return the
|
|
/// timeline for the backend: visual segments + merged self-spans when visual
|
|
/// ran, otherwise the mic-VAD self spans alone. `visualRan` reports whether the
|
|
/// visual pipeline actually attached (for the after-session indicator).
|
|
private func stopVisualAndTimeline(_ result: RecordingResult, folder: URL?)
|
|
async -> (timeline: [VisualTimeline.Segment], selfSpans: [VADSpan], visualRan: Bool) {
|
|
let selfName = settings.selfName
|
|
let selfSpans = await channelSelfSpans(result: result, folder: folder)
|
|
if let vc = visualCapture, let folder {
|
|
visualCapture = nil
|
|
// Remote (vision) segments only; self travels separately as the mic channel.
|
|
let remote = await vc.finish(
|
|
selfSpans: selfSpans, selfName: selfName,
|
|
sessionId: folder.lastPathComponent, t0Unix: result.t0Unix,
|
|
durationSec: result.duration, folder: folder)
|
|
return (remote, selfSpans, true)
|
|
}
|
|
if let vc = visualCapture { await vc.cancel(); visualCapture = nil }
|
|
return ([], selfSpans, false)
|
|
}
|
|
|
|
/// Self spans for the backend timeline, identified by CHANNEL: the mic track is
|
|
/// the local user, so self = mic active AND louder than system. This makes self
|
|
/// platform-independent (one name, no display-name matching) and stops a remote
|
|
/// speaker from being mislabeled as self. Falls back to the mic-VAD spans if the
|
|
/// tracks can't be read. Runs off the main actor (file I/O).
|
|
private func channelSelfSpans(result: RecordingResult, folder: URL?) async -> [VADSpan] {
|
|
guard let folder else { return result.selfSpans }
|
|
let mic = folder.appendingPathComponent("mic.wav")
|
|
let sys = folder.appendingPathComponent("system.wav")
|
|
let spans = await Task.detached { ChannelSelfVAD.selfSpans(micURL: mic, systemURL: sys) }.value
|
|
return spans ?? result.selfSpans
|
|
}
|
|
|
|
private func stop() {
|
|
guard let recorder else { return }
|
|
state = .finishing
|
|
stopTimer()
|
|
let folder = currentFolder
|
|
lifecycleGeneration += 1
|
|
lifecycleTask = Task {
|
|
let result = await recorder.stop()
|
|
let visual = await self.stopVisualAndTimeline(result, folder: folder)
|
|
self.finish(result, timeline: visual.timeline, selfSpans: visual.selfSpans, visualRan: visual.visualRan)
|
|
}
|
|
}
|
|
|
|
private func finish(_ result: RecordingResult, timeline: [VisualTimeline.Segment],
|
|
selfSpans: [VADSpan], visualRan: Bool) {
|
|
recorder = nil
|
|
micLevel = 0
|
|
systemLevel = 0
|
|
warning = result.systemNote.map { "System audio stopped early: \($0)" }
|
|
transcriptStatus = .idle
|
|
if let folder = currentFolder {
|
|
writeSelfSpans(spans: selfSpans, result: result, to: folder)
|
|
let visualCount = visualRan ? timeline.count : nil // `timeline` is the remote vision segments
|
|
lastSession = SessionInfo(
|
|
folder: folder, mixedURL: result.mixedURL,
|
|
duration: result.duration, selfSpanCount: selfSpans.count,
|
|
visualSegmentCount: visualCount)
|
|
lastProcess = ProcessInputs(
|
|
folder: folder, sessionId: folder.lastPathComponent, app: currentLabel,
|
|
micURL: result.micURL, systemURL: result.systemURL, mixedURL: result.mixedURL,
|
|
timeline: timeline, selfSpans: selfSpans, selfName: settings.selfName,
|
|
systemHealthy: result.systemNote == nil)
|
|
}
|
|
let autoSend = settings.autoSendOnStop
|
|
currentFolder = nil
|
|
autoStarted = false
|
|
pendingAutoStop = false
|
|
elapsed = 0
|
|
state = .idle
|
|
if autoSend { processLastSession() }
|
|
}
|
|
|
|
// MARK: - Backend transcription
|
|
|
|
/// Send the last finished session to the backend → `speakers.json`. The
|
|
/// timeline is the session's visual segments (with mic-VAD self spans merged)
|
|
/// when visual capture ran, or the self spans alone otherwise. Safe to call
|
|
/// manually ("Send to backend") or automatically on stop.
|
|
func processLastSession() {
|
|
guard let inputs = lastProcess else { return }
|
|
if case .processing = transcriptStatus { return }
|
|
transcriptStatus = .processing(0, 1)
|
|
recapURL = nil
|
|
|
|
let settings = self.settings
|
|
let voiceprints = self.voiceprints
|
|
processTask = Task {
|
|
let pipeline = TranscriptPipeline(
|
|
baseURL: settings.backendBaseURL,
|
|
skipTLS: settings.skipTLSVerification,
|
|
voiceprints: voiceprints)
|
|
do {
|
|
let speakers = try await pipeline.process(
|
|
sessionFolder: inputs.folder, sessionId: inputs.sessionId, app: inputs.app,
|
|
micURL: inputs.micURL, systemURL: inputs.systemURL, mixedURL: inputs.mixedURL,
|
|
timeline: inputs.timeline, selfSpans: inputs.selfSpans, selfName: inputs.selfName,
|
|
systemHealthy: inputs.systemHealthy,
|
|
progress: { done, total in
|
|
await MainActor.run { self.transcriptStatus = .processing(done, total) }
|
|
})
|
|
self.transcriptStatus = .done(speakers: speakers.speakers.count, segments: speakers.segments.count)
|
|
// Best-effort readable recap (topic sections + extras) via the gateway LLM.
|
|
if settings.recapEnabled, !speakers.segments.isEmpty {
|
|
try Task.checkCancellation()
|
|
await self.buildRecap(speakers: speakers, inputs: inputs, settings: settings)
|
|
}
|
|
} catch is CancellationError {
|
|
self.transcriptStatus = .idle
|
|
} catch {
|
|
self.transcriptStatus = .failed(error.localizedDescription)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Build `transcript.md` + `recap.html` from the finished `speakers.json` using
|
|
/// the gateway LLM. Best-effort: a missing LLM or any failure leaves the
|
|
/// transcript intact and just skips the recap.
|
|
private func buildRecap(speakers: SpeakersFile, inputs: ProcessInputs, settings: AppSettings) async {
|
|
let llm = GatewayLLMClient(baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification)
|
|
guard let model = await llm.chatModelId() else { return } // no LLM on the gateway → skip
|
|
let analyzer = RecapAnalyzer(llm: llm, model: model)
|
|
guard let result = try? await analyzer.recap(file: speakers) else { return }
|
|
let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
|
|
try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder)
|
|
let url = inputs.folder.appendingPathComponent("recap.html")
|
|
if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
|
|
}
|
|
|
|
/// Friendly recap title, e.g. "Google Meet call — 2026-06-06 11:43".
|
|
private static func recapTitle(app: String, sessionId: String) -> String {
|
|
let appName = CallDetector.DetectedApp(rawValue: app)?.display ?? app.capitalized
|
|
let stamp = sessionId.split(separator: "_").first.map(String.init) ?? sessionId
|
|
let parts = stamp.split(separator: "T")
|
|
let date = parts.first.map(String.init) ?? ""
|
|
let timeBits = parts.count > 1 ? parts[1].split(separator: "-") : []
|
|
let time = timeBits.count >= 2 ? "\(timeBits[0]):\(timeBits[1])" : ""
|
|
return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
|
|
}
|
|
|
|
private func fail(_ message: String) {
|
|
recorder = nil
|
|
visualCapture = nil // recorder.start() failed before visual started; nothing running
|
|
inFlightVisual = nil
|
|
pendingCapture = nil
|
|
currentFolder = nil
|
|
autoStarted = false
|
|
pendingAutoStop = false
|
|
stopTimer()
|
|
micLevel = 0
|
|
systemLevel = 0
|
|
elapsed = 0
|
|
state = .error(message)
|
|
}
|
|
|
|
/// Called from `applicationShouldTerminate`: flush any in-progress session so
|
|
/// its WAV headers are finalized before the process exits. Handles quit while
|
|
/// `.starting` and `.finishing`, not just `.recording`.
|
|
func prepareForTermination() async {
|
|
// Cancel any in-flight backend transcription (audio is already saved; the
|
|
// user can resend). The pipeline's checkCancellation + defer clean up chunks.
|
|
processTask?.cancel()
|
|
// Drain whatever lifecycle Task is in flight until nothing is busy. A Stop
|
|
// click landing in an await window can spawn a new stop Task, so loop
|
|
// rather than awaiting a single captured task.
|
|
while isBusy {
|
|
let gen = lifecycleGeneration
|
|
await lifecycleTask?.value
|
|
if state == .recording, let recorder {
|
|
state = .finishing
|
|
stopTimer()
|
|
let folder = currentFolder
|
|
let result = await recorder.stop()
|
|
let visual = await stopVisualAndTimeline(result, folder: folder)
|
|
finish(result, timeline: visual.timeline, selfSpans: visual.selfSpans, visualRan: visual.visualRan)
|
|
} else if lifecycleGeneration == gen {
|
|
break // settled: no new transition was spawned
|
|
}
|
|
}
|
|
// A visual start-Task orphaned by a concurrent stop may still hold a live
|
|
// stream that nothing else will tear down before exit — drain it here.
|
|
if let vc = inFlightVisual {
|
|
inFlightVisual = nil
|
|
await vc.cancel()
|
|
}
|
|
}
|
|
|
|
// MARK: - Timer
|
|
|
|
private func startTimer() {
|
|
timer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { [weak self] _ in
|
|
Task { @MainActor in
|
|
guard let self else { return }
|
|
if let start = self.startTime { self.elapsed = Date().timeIntervalSince(start) }
|
|
if let recorder = self.recorder {
|
|
let levels = recorder.currentLevels()
|
|
self.micLevel = levels.mic
|
|
self.systemLevel = levels.system
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private func stopTimer() {
|
|
timer?.invalidate()
|
|
timer = nil
|
|
}
|
|
|
|
// MARK: - Files
|
|
|
|
private func makeSessionFolder(label: String) throws -> URL {
|
|
let base = settings.outputFolderURL.appendingPathComponent("sessions", isDirectory: true)
|
|
let folder = base.appendingPathComponent("\(Self.timestamp())_\(label)", isDirectory: true)
|
|
try FileManager.default.createDirectory(at: folder, withIntermediateDirectories: true)
|
|
return folder
|
|
}
|
|
|
|
private static func timestamp() -> String {
|
|
let f = DateFormatter()
|
|
f.locale = Locale(identifier: "en_US_POSIX")
|
|
f.dateFormat = "yyyy-MM-dd'T'HH-mm-ss"
|
|
return f.string(from: Date())
|
|
}
|
|
|
|
/// Debug artifact: the channel-verified "self" spans actually sent to the backend
|
|
/// as `self_vad` (mic active AND louder than system). Lets us eyeball self detection.
|
|
private func writeSelfSpans(spans: [VADSpan], result: RecordingResult, to folder: URL) {
|
|
let segments = spans.map { span -> [String: Any] in
|
|
["start": span.start, "end": span.end, "name": "self",
|
|
"confidence": span.confidence, "source": "mic_channel"]
|
|
}
|
|
let object: [String: Any] = [
|
|
"note": "channel-verified self spans (mic active and louder than system) — the self_vad sent to label-merge",
|
|
"t0_unix": result.t0Unix,
|
|
"duration_sec": result.duration,
|
|
"self_spans": segments,
|
|
]
|
|
if let data = try? JSONSerialization.data(withJSONObject: object,
|
|
options: [.prettyPrinted, .sortedKeys]) {
|
|
try? data.write(to: folder.appendingPathComponent("self_vad.json"))
|
|
}
|
|
}
|
|
}
|