ten31-transcripts/Ten31Transcripts/Session/TranscriptPipeline.swift

import Foundation

/// Drives a finished session through the backend: chunk → sequential
/// `label-merge` (accumulating voiceprints) → assemble `speakers.json` → persist
/// fingerprints. Requests are sequential by construction (one chunk at a time).
final class TranscriptPipeline {
    private let client: SparkControlClient
    private let voiceprints: VoiceprintStore

    init(baseURL: String, skipTLS: Bool, voiceprints: VoiceprintStore) {
        self.client = SparkControlClient(baseURL: baseURL, skipTLS: skipTLS)
        self.voiceprints = voiceprints
    }

    /// Process `mixedURL` against `timeline` (visual + self spans). Writes
    /// `speakers.json` into `sessionFolder` and returns it. `progress(done,total)`
    /// is called per chunk.
    func process(sessionFolder: URL,
                 sessionId: String,
                 app: String,
                 mixedURL: URL,
                 timeline: [VisualTimeline.Segment],
                 progress: ((Int, Int) async -> Void)? = nil) async throws -> SpeakersFile {
        let duration = SessionPackager.duration(of: mixedURL)
        let plan = SessionPackager.planChunks(durationSec: duration)

        // Zero-duration / empty session → a valid empty speakers.json, no backend call.
        if plan.isEmpty || duration <= 0 {
            let empty = TranscriptAssembler.assemble(sessionId: sessionId, app: app, chunks: [])
            try empty.speakersFile.write(to: sessionFolder.appendingPathComponent("speakers.json"))
            await progress?(0, 0)
            return empty.speakersFile
        }

        let chunksDir = sessionFolder.appendingPathComponent("chunks", isDirectory: true)
        try? FileManager.default.createDirectory(at: chunksDir, withIntermediateDirectories: true)
        defer { try? FileManager.default.removeItem(at: chunksDir) }   // cleanup on success OR throw

        // Start from stored voiceprints; accumulate this call's prints across chunks
        // for within-call unification (the store only persists high-confidence ones).
        var known = voiceprints.knownVoiceprints()
        var results: [TranscriptAssembler.ChunkResult] = []

        for chunk in plan {
            try Task.checkCancellation()
            await progress?(chunk.index, plan.count)
            let chunkURL = chunksDir.appendingPathComponent("chunk_\(String(format: "%03d", chunk.index)).wav")
            try SessionPackager.sliceAudio(from: mixedURL, startSec: chunk.start, endSec: chunk.end, to: chunkURL)
            guard FileManager.default.fileExists(atPath: chunkURL.path) else { continue }  // empty slice → skip

            let timelineData = try SessionPackager.rebasedTimelineData(timeline, start: chunk.start, end: chunk.end)
            let response = try await client.labelMerge(
                audioURL: chunkURL, timeline: timelineData,
                knownVoiceprints: known.isEmpty ? nil : known, transcribe: true)

            for (name, fp) in response.fingerprints where !LabelMergeResponse.isUnknownName(name) {
                known[name] = fp
            }
            voiceprints.update(with: response)
            results.append(.init(chunkStart: chunk.start, response: response))
            try? FileManager.default.removeItem(at: chunkURL)
        }
        await progress?(plan.count, plan.count)

        let assembled = TranscriptAssembler.assemble(sessionId: sessionId, app: app, chunks: results)
        try assembled.speakersFile.write(to: sessionFolder.appendingPathComponent("speakers.json"))
        return assembled.speakersFile
    }

    /// Build the `label-merge` timeline from mic-VAD self spans (Phase 1/2). Once
    /// the visual adapters land (Phase 3–4), their segments are merged in too.
    static func timeline(fromSelfSpans spans: [VADSpan], selfName: String) -> [VisualTimeline.Segment] {
        spans.map { .init(start: $0.start, end: $0.end, name: selfName, confidence: $0.confidence, source: "mic_vad") }
    }
}