Collapse adjacent same-speaker segments after reconciliation

Fragments reabsorbed by smoothFragments (e.g. "I" then "need to switch it back") were left as separate transcript lines. Add SpeakerReconciler.mergeAdjacent to join consecutive same-speaker segments within 2s, concatenating their text. Wire it into SessionController.finishBackend AFTER reconcile/LLM naming. The collapse needs no LLM, so finishBackend no longer early-returns when the gateway has no chat model — it runs the collapse and re-persists speakers.json unconditionally, gating only the reconcile and recap passes on the model.
2026-06-08 13:19:05 -05:00
parent ab910cf742
commit a95f27ecd1
3 changed files with 70 additions and 8 deletions
@@ -393,24 +393,32 @@ final class SessionController: ObservableObject {
        }
    }

-    /// Post-transcription LLM passes (best-effort, share one gateway model lookup):
-    /// reconcile speaker labels (merge split clusters + name from content), then build
-    /// the readable recap. A missing LLM or any failure leaves speakers.json intact.
+    /// Post-transcription cleanup + LLM passes. Speaker reconciliation (merge split
+    /// clusters + content-naming) and the readable recap need the gateway LLM; the
+    /// adjacent-segment collapse does not. So the collapse runs unconditionally and
+    /// always re-persists `speakers.json`, while the LLM passes are skipped when no
+    /// model is available. Any failure leaves the last good `speakers.json` intact.
    private func finishBackend(speakers: SpeakersFile, inputs: ProcessInputs, settings: AppSettings) async {
        let llm = GatewayLLMClient(baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification)
-        guard let model = await llm.chatModelId() else { return }   // no LLM on the gateway → skip both
+        let model = await llm.chatModelId()   // nil → no LLM on the gateway; LLM passes skipped

        var resolved = speakers
-        if settings.reconcileSpeakers, !speakers.segments.isEmpty {
+        // Reconcile labels (needs the LLM): merge split clusters, dissolve fragments,
+        // and name placeholders from transcript content.
+        if let model, settings.reconcileSpeakers, !speakers.segments.isEmpty {
            self.transcriptStatus = .processing(0, 0)
            let fps = RecapEditModel.loadFingerprints(inputs.folder.appendingPathComponent("cluster_fingerprints.json"))
            resolved = await SpeakerReconciler.reconcile(file: speakers, fingerprints: fps,
                                                         selfName: inputs.selfName, llm: llm, model: model)
-            try? resolved.write(to: inputs.folder.appendingPathComponent("speakers.json"))
-            self.transcriptStatus = .done(speakers: resolved.speakers.count, segments: resolved.segments.count)
        }
+        // Collapse adjacent same-speaker segments (no LLM needed) so fragments
+        // reabsorbed by smoothing read as one clean line, then persist. Always runs
+        // — even when the LLM is unavailable — so the saved transcript is cleaned up.
+        resolved = SpeakerReconciler.mergeAdjacent(resolved)
+        try? resolved.write(to: inputs.folder.appendingPathComponent("speakers.json"))
+        self.transcriptStatus = .done(speakers: resolved.speakers.count, segments: resolved.segments.count)

-        guard settings.recapEnabled, !resolved.segments.isEmpty else { return }
+        guard let model, settings.recapEnabled, !resolved.segments.isEmpty else { return }
        let analyzer = RecapAnalyzer(llm: llm, model: model)
        guard let result = try? await analyzer.recap(file: resolved, template: settings.defaultTemplate) else { return }
        let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)