Recap: readable transcript + topic sections + meeting extras (gateway LLM)

New 'Recap' phase — turns speakers.json into a human-readable recap, leveraging recap-relay's proven logic/prompts but calling the Spark gateway's OpenAI-compatible /v1/chat/completions directly (same host/TLS as label-merge; Qwen3-35B). We start from already-named speakers (label-merge), so recap-relay's speaker clustering + name-inference are skipped entirely. - GatewayLLMClient: /v1/chat/completions (JSON mode), model discovery via /api/endpoints, TLS-skip reuse, 503 retry, sequential. - RecapAnalyzer: speakers.json → numbered [N] (MM:SS) Name: text transcript → time-windowed analyze (single window for short calls, 18min/2min overlap for long) → stitch/dedup topic sections → meeting extras (TLDR/decisions/action_items/ open_questions/key_quotes). Defensive JSON parsing of LLM output. - RecapRenderer: writes transcript.md + a self-contained dark-theme recap.html (topic sections w/ collapsible transcripts, extras panels, speaker color chips, full timestamped speaker-attributed transcript, print styles). - SessionController.buildRecap: best-effort after speakers.json (gated by settings.recapEnabled); surfaces recapURL → menu 'Open recap'. Skips silently if the gateway has no LLM. Settings toggle added. Validated END-TO-END on the real Meet session against the live gateway: dual-channel transcription → 3 topic sections + accurate TLDR + key quotes; 'Go Bitcoin' correctly attributed to the remote speaker. 46/46 XCTest (10 new).
2026-06-06 14:36:18 -05:00
parent 53d7fcdac0
commit 85bfdf2b56
9 changed files with 941 additions and 1 deletions
@@ -0,0 +1,135 @@
+import Foundation
+
+enum GatewayLLMError: Error, LocalizedError {
+    case invalidHost
+    case notReady                       // vLLM not loaded on the gateway
+    case server(Int, String)
+    case decode(String)
+    case retriesExhausted
+
+    var errorDescription: String? {
+        switch self {
+        case .invalidHost: return "Invalid backend host URL."
+        case .notReady: return "The gateway's language model isn't ready."
+        case .server(let code, let detail): return "LLM error \(code): \(detail)"
+        case .decode(let msg): return "Couldn't decode the LLM response: \(msg)"
+        case .retriesExhausted: return "Gateway stayed busy (503) after retries."
+        }
+    }
+}
+
+/// Talks to the Spark Control gateway's OpenAI-compatible `/v1/chat/completions`
+/// (the same host + TLS as `label-merge`). Used for the recap analysis (topic
+/// sections, summary polish, meeting extras). **Call sequentially** — like audio,
+/// the gateway serializes GPU work; the recap pipeline issues one request at a time.
+final class GatewayLLMClient {
+    private let baseURL: String
+    private let urlSession: URLSession
+
+    init(baseURL: String, skipTLS: Bool) {
+        let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines)
+        self.baseURL = trimmed.hasSuffix("/") ? String(trimmed.dropLast()) : trimmed
+        let config = URLSessionConfiguration.ephemeral
+        config.timeoutIntervalForRequest = 600
+        config.timeoutIntervalForResource = 900
+        config.waitsForConnectivity = false
+        let delegate: URLSessionDelegate? = skipTLS ? InsecureTrustDelegate() : nil
+        self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil)
+    }
+
+    deinit { urlSession.finishTasksAndInvalidate() }
+
+    /// The ready chat model id from `/api/endpoints` (`vllm.model`), or nil if the
+    /// gateway has no language model loaded.
+    func chatModelId() async -> String? {
+        guard let url = URL(string: baseURL + "/api/endpoints") else { return nil }
+        guard let (data, _) = try? await urlSession.data(from: url),
+              let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
+              let vllm = obj["vllm"] as? [String: Any],
+              (vllm["ready"] as? Bool) == true,
+              let model = vllm["model"] as? String, !model.isEmpty else { return nil }
+        return model
+    }
+
+    /// One JSON-mode chat completion. Returns the raw `content` string (the caller
+    /// parses it as JSON). Retries on `503 + Retry-After`.
+    func completeJSON(model: String, system: String?, user: String,
+                      maxTokens: Int = 4096, maxRetries: Int = 3) async throws -> String {
+        guard let url = URL(string: baseURL + "/v1/chat/completions") else {
+            throw GatewayLLMError.invalidHost
+        }
+        var messages: [[String: String]] = []
+        if let system { messages.append(["role": "system", "content": system]) }
+        messages.append(["role": "user", "content": user])
+        let body: [String: Any] = [
+            "model": model,
+            "messages": messages,
+            "max_tokens": maxTokens,
+            "stream": false,
+            "response_format": ["type": "json_object"],
+            "chat_template_kwargs": ["enable_thinking": false],
+        ]
+        let bodyData = try JSONSerialization.data(withJSONObject: body)
+
+        var attempt = 0
+        while true {
+            var request = URLRequest(url: url)
+            request.httpMethod = "POST"
+            request.setValue("application/json", forHTTPHeaderField: "Content-Type")
+            request.httpBody = bodyData
+
+            let (data, response) = try await urlSession.data(for: request)
+            guard let http = response as? HTTPURLResponse else {
+                throw GatewayLLMError.decode("no HTTP response")
+            }
+            switch http.statusCode {
+            case 200..<300:
+                return try Self.content(from: data)
+            case 503:
+                attempt += 1
+                if attempt > maxRetries { throw GatewayLLMError.retriesExhausted }
+                let retryAfter = http.value(forHTTPHeaderField: "Retry-After").flatMap(Double.init) ?? 5
+                try await Task.sleep(nanoseconds: UInt64(max(1, retryAfter) * 1_000_000_000))
+            default:
+                throw GatewayLLMError.server(http.statusCode, Self.detail(from: data))
+            }
+        }
+    }
+
+    // MARK: - Parsing
+
+    private static func content(from data: Data) throws -> String {
+        struct ChatResponse: Decodable {
+            struct Choice: Decodable { struct Message: Decodable { let content: String }; let message: Message }
+            let choices: [Choice]
+        }
+        do {
+            let decoded = try JSONDecoder().decode(ChatResponse.self, from: data)
+            guard let text = decoded.choices.first?.message.content else {
+                throw GatewayLLMError.decode("no choices in response")
+            }
+            return text
+        } catch {
+            throw GatewayLLMError.decode(error.localizedDescription)
+        }
+    }
+
+    private static func detail(from data: Data) -> String {
+        if let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
+            if let detail = obj["detail"] as? String { return detail }
+            if let err = obj["error"] as? [String: Any], let msg = err["message"] as? String { return msg }
+        }
+        return String(data: data, encoding: .utf8) ?? "unknown error"
+    }
+
+    /// Strip a ```json … ``` code fence if the model wrapped its JSON (defensive;
+    /// JSON mode usually prevents this).
+    static func stripCodeFence(_ s: String) -> String {
+        var t = s.trimmingCharacters(in: .whitespacesAndNewlines)
+        if t.hasPrefix("```") {
+            if let firstNewline = t.firstIndex(of: "\n") { t = String(t[t.index(after: firstNewline)...]) }
+            if let fenceRange = t.range(of: "```", options: .backwards) { t = String(t[..<fenceRange.lowerBound]) }
+        }
+        return t.trimmingCharacters(in: .whitespacesAndNewlines)
+    }
+}
@@ -0,0 +1,339 @@
+import Foundation
+
+/// Turns a finished `speakers.json` into topic sections + meeting extras by driving
+/// the gateway LLM — a Swift port of recap-relay's chunked-window analysis, but
+/// starting from already-named speakers (label-merge), so we skip its speaker
+/// clustering and name-inference entirely. Pure helpers are static + testable; the
+/// LLM passes are sequential (one gateway request at a time).
+final class RecapAnalyzer {
+    private let llm: GatewayLLMClient
+    private let model: String
+
+    init(llm: GatewayLLMClient, model: String) {
+        self.llm = llm
+        self.model = model
+    }
+
+    struct Entry: Equatable {
+        let offset: Double      // seconds
+        let end: Double
+        let speaker: String
+        let text: String
+    }
+
+    struct Window: Equatable {
+        let startIdx: Int       // first entry index this window analyzes (incl. overlap)
+        let endIdx: Int         // last entry index (incl. overlap)
+        let bodyStartIdx: Int   // first entry this window "owns"
+    }
+
+    // MARK: - Orchestration
+
+    /// Analyze (topics) → extras. Extras are best-effort (nil on failure).
+    func recap(file: SpeakersFile, progress: ((String) async -> Void)? = nil) async throws -> RecapResult {
+        let entries = Self.entries(from: file)
+        guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) }
+        await progress?("Finding topics…")
+        let sections = try await analyze(entries: entries)
+        await progress?("Extracting highlights…")
+        let extras = try? await self.extras(file: file, entries: entries, sections: sections)
+        return RecapResult(sections: sections, extras: extras)
+    }
+
+    // MARK: - Analyze (chunked windows → stitched sections)
+
+    func analyze(entries: [Entry]) async throws -> [TopicSection] {
+        let windows = Self.planWindows(entries)
+        var all: [TopicSection] = []
+        for w in windows {
+            let local = Array(entries[w.startIdx...w.endIdx])
+            let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count)
+            let content = try await llm.completeJSON(model: model, system: nil, user: prompt)
+            for s in Self.parseSections(content) {
+                let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1))
+                let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1))
+                guard ge >= gs else { continue }
+                all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge))
+            }
+        }
+        let stitched = Self.stitch(all)
+        // If the model returned nothing usable, fall back to one section for the whole call.
+        if stitched.isEmpty {
+            return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)]
+        }
+        return stitched
+    }
+
+    /// Plan time-based windows over the entries. Single window for short calls;
+    /// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic
+    /// straddling a boundary is seen by both windows (the stitcher dedupes).
+    static func planWindows(_ entries: [Entry],
+                            bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60,
+                            cutoffSec: Double = 25 * 60) -> [Window] {
+        guard !entries.isEmpty else { return [] }
+        let total = entries.last!.end
+        if total <= cutoffSec {
+            return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)]
+        }
+        var windows: [Window] = []
+        var bodyStartIdx = 0
+        while bodyStartIdx < entries.count {
+            let bodyStartSec = entries[bodyStartIdx].offset
+            let winStartSec = bodyStartSec - overlapSec
+            let bodyEndSec = bodyStartSec + bodySec
+            let winEndSec = bodyEndSec + overlapSec
+            let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx
+            var endIdx = bodyStartIdx
+            while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 }
+            windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx))
+            let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count
+            bodyStartIdx = max(next, bodyStartIdx + 1)
+        }
+        return windows
+    }
+
+    /// Merge per-window sections into one chronological, non-overlapping list.
+    /// Sort by start (wider first on ties), drop fully-contained, trim front overlaps.
+    static func stitch(_ sections: [TopicSection]) -> [TopicSection] {
+        let sorted = sections.sorted {
+            $0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex
+        }
+        var out: [TopicSection] = []
+        var maxEnd = -1
+        for s in sorted {
+            var start = s.startIndex
+            let end = s.endIndex
+            if end <= maxEnd { continue }                // fully contained → drop
+            if start <= maxEnd { start = maxEnd + 1 }     // overlap → trim front
+            guard start <= end else { continue }
+            out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end))
+            maxEnd = end
+        }
+        return out
+    }
+
+    // MARK: - Extras
+
+    func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) async throws -> MeetingExtras? {
+        let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections)
+        let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096)
+        return Self.parseExtras(content)
+    }
+
+    // MARK: - Entries
+
+    static func entries(from file: SpeakersFile) -> [Entry] {
+        file.segments
+            .filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
+            .sorted { $0.start < $1.start }
+            .map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker,
+                         text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) }
+    }
+
+    static func mmss(_ sec: Double) -> String {
+        let t = max(0, Int(sec.rounded()))
+        let h = t / 3600, m = (t % 3600) / 60, s = t % 60
+        return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s)
+    }
+
+    // MARK: - Prompts
+
+    private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int) -> String {
+        let lines = window.enumerated()
+            .map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" }
+            .joined(separator: "\n")
+        let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0)
+        let windowMin = max(1, Int((windowSpan / 60).rounded()))
+        let maxIndex = window.count - 1
+        let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount)
+        return """
+        You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections).
+
+        TRANSCRIPT (each line is numbered with a timestamp):
+        \(lines)
+
+        INSTRUCTIONS:
+        1. Read the entire transcript carefully.
+        2. Identify where the discussion naturally shifts from one topic to another.
+        3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
+        4. For each section, write:
+           - A short, specific topic title (3-8 words)
+           - A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity.
+           - The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above.
+
+        IMPORTANT:
+        - Sections must be chronological and non-overlapping.
+        - Every segment index from 0 to \(maxIndex) must belong to exactly one section.
+        - startIndex of section N+1 must equal endIndex of section N plus 1.
+        - Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several.
+        - Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
+
+        Respond with ONLY valid JSON in this exact format, no other text:
+        {
+          "sections": [
+            {
+              "title": "Brief Topic Title",
+              "summary": "1-3 sentence summary of this discussion section.",
+              "startIndex": 0,
+              "endIndex": 15
+            }
+          ]
+        }
+        """
+    }
+
+    private static func targetSectionsPhrase(totalSec: Double, windowCount: Int) -> String {
+        let m = totalSec / 60
+        let total = m < 5 ? 3 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16
+        let per = max(2, Int((Double(total) / Double(max(1, windowCount))).rounded()))
+        return "around \(per) sections"
+    }
+
+    private static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) -> String {
+        let names = orderedSpeakerNames(entries)
+        let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ")
+        let topics = sections.isEmpty ? "(none)" :
+            sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n")
+        let transcript = cappedTranscript(entries, maxChars: 24_000)
+        let durationStr = mmss(file.durationSec)
+        return """
+        You are extracting structured information from an internal team meeting transcript. The transcript below is labeled with the speakers' real names where known.
+
+        MEETING METADATA:
+        - App: \(file.app)
+        - Duration: \(durationStr)
+
+        SPEAKERS: \(roster)
+
+        TOPIC SUMMARIES (already produced — for context only, do not duplicate):
+        \(topics)
+
+        TRANSCRIPT (each line is "[<name> <MM:SS>] text"):
+        \(transcript)
+
+        INSTRUCTIONS:
+        Extract FIVE categories of information. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. Use the speakers' names exactly as shown above; use null/empty when a person is unclear.
+
+        1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Past tense, third person, dense. Skip pleasantries. If the meeting was genuinely substanceless, write one factual sentence. This is the only required category.
+           - summary: the 2-4 sentence executive summary
+           - primary_speakers: array of names who drove the conversation (1-3, in rough order of contribution). Empty array if unclear.
+
+        2. DECISIONS — Things explicitly decided/agreed. Only clear commitments, not casual mentions. For each:
+           - statement: the decision in one sentence
+           - agreed_by: array of names who explicitly agreed (empty if unclear)
+           - supporting_offset: integer SECONDS where it was decided (convert the [<name> <MM:SS>] timestamp to total seconds)
+
+        3. ACTION_ITEMS — Explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should". For each:
+           - description: the action in imperative form
+           - owner: the person's name, or null if unclear
+           - due_hint: deadline string if mentioned ("by Friday"), or null
+           - supporting_offset: integer seconds where the commitment was made
+
+        4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered. Skip rhetorical/answered ones. For each:
+           - question: rephrased to be self-contained
+           - raised_by: the person's name, or null
+           - answered: false (always)
+
+        5. KEY_QUOTES — 3-6 max. Pivotal/insightful/strong-opinion statements worth surfacing verbatim. For each:
+           - speaker: the person's name (or null)
+           - offset: integer seconds where the quote occurs
+           - quote: the verbatim quote (4-30 words)
+           - why_notable: one short clause
+
+        Be conservative — better an empty array than a fabrication. Respond with ONLY valid JSON in this exact shape, no other text:
+        {
+          "tldr": {"summary": "...", "primary_speakers": []},
+          "decisions": [{"statement": "...", "agreed_by": [], "supporting_offset": 0}],
+          "action_items": [{"description": "...", "owner": null, "due_hint": null, "supporting_offset": 0}],
+          "open_questions": [{"question": "...", "raised_by": null, "answered": false}],
+          "key_quotes": [{"speaker": null, "offset": 0, "quote": "...", "why_notable": "..."}]
+        }
+        """
+    }
+
+    /// Distinct speaker names in first-appearance order.
+    static func orderedSpeakerNames(_ entries: [Entry]) -> [String] {
+        var seen = Set<String>(), order: [String] = []
+        for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) {
+            seen.insert(e.speaker); order.append(e.speaker)
+        }
+        return order
+    }
+
+    /// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long
+    /// call still fits the model context (keeps the start and end, drops the middle).
+    static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String {
+        let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n")
+        guard full.count > maxChars else { return full }
+        let half = maxChars / 2
+        let head = String(full.prefix(half))
+        let tail = String(full.suffix(half))
+        return head + "\n…[transcript truncated]…\n" + tail
+    }
+
+    // MARK: - Parsing (defensive — LLM output)
+
+    private static func jsonObject(_ content: String) -> [String: Any]? {
+        let cleaned = GatewayLLMClient.stripCodeFence(content)
+        return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any]
+    }
+
+    static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] {
+        guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] }
+        return arr.compactMap { d in
+            guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty,
+                  let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil }
+            let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
+            return (t, s, si, ei)
+        }
+    }
+
+    static func parseExtras(_ content: String) -> MeetingExtras? {
+        guard let o = jsonObject(content) else { return nil }
+        let tldrObj = o["tldr"] as? [String: Any]
+        let tldr = MeetingExtras.TLDR(
+            summary: (tldrObj?["summary"] as? String) ?? "",
+            primarySpeakers: stringArray(tldrObj?["primary_speakers"]))
+        let decisions = (o["decisions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.Decision? in
+            guard let st = nonEmpty(d["statement"]) else { return nil }
+            return .init(statement: st, agreedBy: stringArray(d["agreed_by"]), supportingOffset: intVal(d["supporting_offset"]))
+        }
+        let actions = (o["action_items"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.ActionItem? in
+            guard let desc = nonEmpty(d["description"]) else { return nil }
+            return .init(description: desc, owner: nonEmpty(d["owner"]), dueHint: nonEmpty(d["due_hint"]),
+                         supportingOffset: intVal(d["supporting_offset"]))
+        }
+        let questions = (o["open_questions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.OpenQuestion? in
+            guard let q = nonEmpty(d["question"]) else { return nil }
+            return .init(question: q, raisedBy: nonEmpty(d["raised_by"]))
+        }
+        let quotes = (o["key_quotes"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.KeyQuote? in
+            guard let q = nonEmpty(d["quote"]) else { return nil }
+            return .init(speaker: nonEmpty(d["speaker"]), offset: intVal(d["offset"]), quote: q,
+                         whyNotable: nonEmpty(d["why_notable"]) ?? "")
+        }
+        // Require at least a TLDR to consider extras present.
+        guard !tldr.summary.isEmpty || !decisions.isEmpty || !actions.isEmpty || !questions.isEmpty || !quotes.isEmpty
+        else { return nil }
+        return MeetingExtras(tldr: tldr, decisions: decisions, actionItems: actions,
+                             openQuestions: questions, keyQuotes: quotes)
+    }
+
+    private static func intVal(_ v: Any?) -> Int? {
+        if let i = v as? Int { return i }
+        if let d = v as? Double { return Int(d) }
+        if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
+        return nil
+    }
+
+    private static func stringArray(_ v: Any?) -> [String] {
+        (v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) }
+            .filter { !$0.isEmpty } ?? []
+    }
+
+    private static func nonEmpty(_ v: Any?) -> String? {
+        guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty,
+              s.lowercased() != "null" else { return nil }
+        return s
+    }
+}
@@ -0,0 +1,38 @@
+import Foundation
+
+/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]`
+/// (inclusive, indices into the canonical entries array) with an LLM title + summary.
+struct TopicSection: Equatable {
+    var title: String
+    var summary: String
+    var startIndex: Int
+    var endIndex: Int
+}
+
+/// Structured "meeting extras" extracted from the named transcript. Mirrors
+/// recap-relay's schema; speakers are real names (we already have them from
+/// label-merge), not anonymous cluster ids.
+struct MeetingExtras: Equatable {
+    struct TLDR: Equatable { var summary: String; var primarySpeakers: [String] }
+    struct Decision: Equatable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
+    struct ActionItem: Equatable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
+    struct OpenQuestion: Equatable { var question: String; var raisedBy: String? }
+    struct KeyQuote: Equatable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
+
+    var tldr: TLDR
+    var decisions: [Decision]
+    var actionItems: [ActionItem]
+    var openQuestions: [OpenQuestion]
+    var keyQuotes: [KeyQuote]
+
+    var isEmptyBeyondTLDR: Bool {
+        decisions.isEmpty && actionItems.isEmpty && openQuestions.isEmpty && keyQuotes.isEmpty
+    }
+}
+
+/// The assembled recap for one session: the topic sections + (optional) extras,
+/// over the session's transcript. Rendered to `transcript.md` / `recap.html`.
+struct RecapResult: Equatable {
+    var sections: [TopicSection]
+    var extras: MeetingExtras?
+}
@@ -0,0 +1,253 @@
+import Foundation
+
+/// Renders a session (`speakers.json` + recap) into human-readable artifacts:
+/// `transcript.md` (portable/editable) and `recap.html` (self-contained, dark
+/// theme, printable, shareable). Port of recap-relay's meetingToMarkdown/Html,
+/// driven by our already-named transcript.
+enum RecapRenderer {
+    static func write(file: SpeakersFile, result: RecapResult, title: String, to folder: URL) throws {
+        let entries = RecapAnalyzer.entries(from: file)
+        try markdown(file: file, result: result, title: title, entries: entries)
+            .data(using: .utf8)?.write(to: folder.appendingPathComponent("transcript.md"))
+        try html(file: file, result: result, title: title, entries: entries)
+            .data(using: .utf8)?.write(to: folder.appendingPathComponent("recap.html"))
+    }
+
+    // MARK: - Markdown
+
+    static func markdown(file: SpeakersFile, result: RecapResult, title: String,
+                         entries: [RecapAnalyzer.Entry]) -> String {
+        var out = "# \(title)\n\n"
+        let speakers = RecapAnalyzer.orderedSpeakerNames(entries)
+        out += "*\(file.app) · \(RecapAnalyzer.mmss(file.durationSec))"
+        if !speakers.isEmpty { out += " · \(speakers.count) speaker\(speakers.count == 1 ? "" : "s"): \(speakers.joined(separator: ", "))" }
+        out += "*\n\n"
+
+        if let x = result.extras {
+            if !x.tldr.summary.isEmpty {
+                out += "## Summary\n\n\(x.tldr.summary)\n"
+                if !x.tldr.primarySpeakers.isEmpty { out += "\n*Primary speakers: \(x.tldr.primarySpeakers.joined(separator: ", "))*\n" }
+                out += "\n"
+            }
+            if !x.decisions.isEmpty {
+                out += "## Decisions\n\n"
+                for d in x.decisions {
+                    var line = "- \(d.statement)"
+                    if !d.agreedBy.isEmpty { line += " — agreed by \(d.agreedBy.joined(separator: ", "))" }
+                    if let o = d.supportingOffset { line += " *(\(RecapAnalyzer.mmss(Double(o))))*" }
+                    out += line + "\n"
+                }
+                out += "\n"
+            }
+            if !x.actionItems.isEmpty {
+                out += "## Action Items\n\n"
+                for a in x.actionItems {
+                    var line = "- [ ] \(a.description)"
+                    if let o = a.owner { line += " — **\(o)**" }
+                    if let due = a.dueHint { line += " (\(due))" }
+                    if let off = a.supportingOffset { line += " *(\(RecapAnalyzer.mmss(Double(off))))*" }
+                    out += line + "\n"
+                }
+                out += "\n"
+            }
+            if !x.openQuestions.isEmpty {
+                out += "## Open Questions\n\n"
+                for q in x.openQuestions {
+                    var line = "- \(q.question)"
+                    if let r = q.raisedBy { line += " — *\(r)*" }
+                    out += line + "\n"
+                }
+                out += "\n"
+            }
+            if !x.keyQuotes.isEmpty {
+                out += "## Key Quotes\n\n"
+                for k in x.keyQuotes {
+                    out += "> \"\(k.quote)\""
+                    var attr: [String] = []
+                    if let s = k.speaker { attr.append(s) }
+                    if let o = k.offset { attr.append(RecapAnalyzer.mmss(Double(o))) }
+                    if !attr.isEmpty { out += " — \(attr.joined(separator: ", "))" }
+                    out += "\n"
+                    if !k.whyNotable.isEmpty { out += ">\n> \(k.whyNotable)\n" }
+                    out += "\n"
+                }
+            }
+        }
+
+        if !result.sections.isEmpty {
+            out += "## Topics\n\n"
+            for (i, sec) in result.sections.enumerated() {
+                let range = timeRange(sec, entries: entries)
+                out += "### \(i + 1). \(sec.title)\(range)\n\n"
+                if !sec.summary.isEmpty { out += "\(sec.summary)\n\n" }
+                out += "<details>\n<summary>Transcript</summary>\n\n"
+                out += transcriptLines(sec, entries: entries)
+                out += "\n</details>\n\n"
+            }
+        }
+
+        out += "## Full Transcript\n\n"
+        for e in entries { out += "**[\(RecapAnalyzer.mmss(e.offset))] \(e.speaker):** \(e.text)\n\n" }
+        return out
+    }
+
+    private static func timeRange(_ sec: TopicSection, entries: [RecapAnalyzer.Entry]) -> String {
+        guard entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex) else { return "" }
+        return "  *(\(RecapAnalyzer.mmss(entries[sec.startIndex].offset))–\(RecapAnalyzer.mmss(entries[sec.endIndex].end)))*"
+    }
+
+    private static func transcriptLines(_ sec: TopicSection, entries: [RecapAnalyzer.Entry]) -> String {
+        guard sec.startIndex <= sec.endIndex, entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex)
+        else { return "" }
+        return entries[sec.startIndex...sec.endIndex]
+            .map { "**[\(RecapAnalyzer.mmss($0.offset))] \($0.speaker):** \($0.text)" }
+            .joined(separator: "\n\n")
+    }
+
+    // MARK: - HTML
+
+    static func html(file: SpeakersFile, result: RecapResult, title: String,
+                     entries: [RecapAnalyzer.Entry]) -> String {
+        let speakers = RecapAnalyzer.orderedSpeakerNames(entries)
+        let colorFor = speakerColors(speakers)
+        func chip(_ name: String) -> String {
+            let c = colorFor[name] ?? "#8a8f98"
+            return "<span class=\"chip\" style=\"background:\(c)\">\(esc(name))</span>"
+        }
+
+        var body = ""
+        let sub = "\(esc(file.app)) · \(RecapAnalyzer.mmss(file.durationSec))"
+            + (speakers.isEmpty ? "" : " · \(speakers.count) speaker\(speakers.count == 1 ? "" : "s")")
+        body += "<header><h1>\(esc(title))</h1><div class=\"sub\">\(sub)</div>"
+        if !speakers.isEmpty {
+            body += "<div class=\"legend\">" + speakers.map { chip($0) }.joined() + "</div>"
+        }
+        body += "</header>"
+
+        if let x = result.extras {
+            if !x.tldr.summary.isEmpty {
+                body += card("Summary", "<p>\(esc(x.tldr.summary))</p>"
+                    + (x.tldr.primarySpeakers.isEmpty ? "" : "<p class=\"muted\">Primary: \(x.tldr.primarySpeakers.map(esc).joined(separator: ", "))</p>"))
+            }
+            if !x.decisions.isEmpty {
+                let items = x.decisions.map { d -> String in
+                    var s = "<li>\(esc(d.statement))"
+                    if !d.agreedBy.isEmpty { s += " <span class=\"muted\">— agreed by \(d.agreedBy.map(esc).joined(separator: ", "))</span>" }
+                    if let o = d.supportingOffset { s += " <span class=\"ts\">\(RecapAnalyzer.mmss(Double(o)))</span>" }
+                    return s + "</li>"
+                }.joined()
+                body += card("Decisions", "<ul>\(items)</ul>")
+            }
+            if !x.actionItems.isEmpty {
+                let items = x.actionItems.map { a -> String in
+                    var s = "<li>☐ \(esc(a.description))"
+                    if let o = a.owner { s += " <strong>\(esc(o))</strong>" }
+                    if let due = a.dueHint { s += " <span class=\"muted\">(\(esc(due)))</span>" }
+                    if let off = a.supportingOffset { s += " <span class=\"ts\">\(RecapAnalyzer.mmss(Double(off)))</span>" }
+                    return s + "</li>"
+                }.joined()
+                body += card("Action Items", "<ul class=\"actions\">\(items)</ul>")
+            }
+            if !x.openQuestions.isEmpty {
+                let items = x.openQuestions.map { q -> String in
+                    "<li>\(esc(q.question))" + (q.raisedBy.map { " <span class=\"muted\">— \(esc($0))</span>" } ?? "") + "</li>"
+                }.joined()
+                body += card("Open Questions", "<ul>\(items)</ul>")
+            }
+            if !x.keyQuotes.isEmpty {
+                let items = x.keyQuotes.map { k -> String in
+                    var attr: [String] = []
+                    if let s = k.speaker { attr.append(esc(s)) }
+                    if let o = k.offset { attr.append(RecapAnalyzer.mmss(Double(o))) }
+                    var s = "<blockquote>“\(esc(k.quote))”"
+                    if !attr.isEmpty { s += "<cite>— \(attr.joined(separator: ", "))</cite>" }
+                    if !k.whyNotable.isEmpty { s += "<div class=\"muted\">\(esc(k.whyNotable))</div>" }
+                    return s + "</blockquote>"
+                }.joined()
+                body += card("Key Quotes", items)
+            }
+        }
+
+        if !result.sections.isEmpty {
+            var topics = ""
+            for (i, sec) in result.sections.enumerated() {
+                let range = entries.indices.contains(sec.startIndex) && entries.indices.contains(sec.endIndex)
+                    ? "<span class=\"ts\">\(RecapAnalyzer.mmss(entries[sec.startIndex].offset))–\(RecapAnalyzer.mmss(entries[sec.endIndex].end))</span>" : ""
+                topics += "<details class=\"topic\"><summary><span class=\"tnum\">\(i + 1)</span> \(esc(sec.title)) \(range)</summary>"
+                if !sec.summary.isEmpty { topics += "<p>\(esc(sec.summary))</p>" }
+                topics += "<div class=\"turns\">" + turnsHtml(sec, entries: entries, chip: chip) + "</div></details>"
+            }
+            body += card("Topics", topics)
+        }
+
+        let full = entries.map { "<div class=\"turn\"><span class=\"ts\">\(RecapAnalyzer.mmss($0.offset))</span> \(chip($0.speaker)) <span class=\"txt\">\(esc($0.text))</span></div>" }.joined()
+        body += "<details class=\"topic\" open><summary>Full Transcript</summary><div class=\"turns\">\(full)</div></details>"
+
+        return htmlShell(title: esc(title), body: body)
+    }
+
+    private static func turnsHtml(_ sec: TopicSection, entries: [RecapAnalyzer.Entry],
+                                  chip: (String) -> String) -> String {
+        guard sec.startIndex <= sec.endIndex, entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex)
+        else { return "" }
+        return entries[sec.startIndex...sec.endIndex].map {
+            "<div class=\"turn\"><span class=\"ts\">\(RecapAnalyzer.mmss($0.offset))</span> \(chip($0.speaker)) <span class=\"txt\">\(esc($0.text))</span></div>"
+        }.joined()
+    }
+
+    private static func card(_ title: String, _ inner: String) -> String {
+        "<section class=\"card\"><h2>\(esc(title))</h2>\(inner)</section>"
+    }
+
+    private static let palette = ["#5b8def", "#e0719c", "#43b581", "#e8a33d", "#9b6dde",
+                                  "#3fb6c9", "#d96f6f", "#7aa55c"]
+    private static func speakerColors(_ names: [String]) -> [String: String] {
+        var map: [String: String] = [:]
+        for (i, n) in names.enumerated() { map[n] = palette[i % palette.count] }
+        return map
+    }
+
+    private static func esc(_ s: String) -> String {
+        s.replacingOccurrences(of: "&", with: "&amp;")
+            .replacingOccurrences(of: "<", with: "&lt;")
+            .replacingOccurrences(of: ">", with: "&gt;")
+            .replacingOccurrences(of: "\"", with: "&quot;")
+    }
+
+    private static func htmlShell(title: String, body: String) -> String {
+        """
+        <!DOCTYPE html><html lang="en"><head><meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1">
+        <title>\(title)</title>
+        <style>
+        :root{--bg:#15171c;--card:#1d2026;--fg:#e6e8ec;--muted:#9aa0aa;--line:#2a2e36;--accent:#5b8def;}
+        *{box-sizing:border-box}
+        body{margin:0;background:var(--bg);color:var(--fg);font:15px/1.55 -apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;}
+        main{max-width:820px;margin:0 auto;padding:32px 20px 80px;}
+        header h1{margin:0 0 4px;font-size:24px}
+        .sub{color:var(--muted);font-size:13px}
+        .legend{margin-top:12px;display:flex;flex-wrap:wrap;gap:6px}
+        .chip{display:inline-block;padding:1px 8px;border-radius:10px;color:#fff;font-size:12px;font-weight:600}
+        .card{background:var(--card);border:1px solid var(--line);border-radius:12px;padding:16px 18px;margin-top:18px}
+        .card h2{margin:0 0 10px;font-size:16px;color:var(--accent)}
+        .muted{color:var(--muted)}
+        ul{margin:0;padding-left:18px} li{margin:4px 0}
+        ul.actions{list-style:none;padding-left:0}
+        .ts{color:var(--muted);font-variant-numeric:tabular-nums;font-size:12px;margin-right:4px}
+        blockquote{margin:0 0 12px;padding:8px 12px;border-left:3px solid var(--accent);background:#0e0f13;border-radius:0 8px 8px 0}
+        blockquote cite{display:block;color:var(--muted);font-size:12px;margin-top:4px;font-style:normal}
+        details.topic{border-top:1px solid var(--line);padding:10px 0}
+        details.topic > summary{cursor:pointer;font-weight:600;list-style:none}
+        details.topic > summary::-webkit-details-marker{display:none}
+        .tnum{display:inline-block;min-width:20px;color:var(--accent);font-weight:700}
+        .turns{margin-top:10px}
+        .turn{margin:6px 0;display:flex;gap:8px;align-items:baseline;flex-wrap:wrap}
+        .turn .txt{flex:1;min-width:60%}
+        @media print{body{background:#fff;color:#000}.card,blockquote{background:#fff;border-color:#ccc}details.topic{}.chip{border:1px solid #999}}
+        </style></head>
+        <body><main>\(body)
+        <footer class="sub" style="margin-top:40px">Ten31 Transcripts · generated on-device</footer>
+        </main></body></html>
+        """
+    }
+}
@@ -54,6 +54,8 @@ final class SessionController: ObservableObject {
    @Published private(set) var detectionStatus: CallDetector.Status = .disabled
    /// Backend transcription status for the last session.
    @Published private(set) var transcriptStatus: TranscriptStatus = .idle
+    /// Set when a readable recap (`recap.html`) has been written for the last session.
+    @Published private(set) var recapURL: URL?

    private let settings: AppSettings
    private var voiceprints: VoiceprintStore
@@ -195,6 +197,7 @@ final class SessionController: ObservableObject {
            mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"))
        self.recorder = recorder
        warning = nil
+        recapURL = nil
        state = .starting

        lifecycleGeneration += 1
@@ -361,6 +364,7 @@ final class SessionController: ObservableObject {
        guard let inputs = lastProcess else { return }
        if case .processing = transcriptStatus { return }
        transcriptStatus = .processing(0, 1)
+        recapURL = nil

        let settings = self.settings
        let voiceprints = self.voiceprints
@@ -379,6 +383,11 @@ final class SessionController: ObservableObject {
                        await MainActor.run { self.transcriptStatus = .processing(done, total) }
                    })
                self.transcriptStatus = .done(speakers: speakers.speakers.count, segments: speakers.segments.count)
+                // Best-effort readable recap (topic sections + extras) via the gateway LLM.
+                if settings.recapEnabled, !speakers.segments.isEmpty {
+                    try Task.checkCancellation()
+                    await self.buildRecap(speakers: speakers, inputs: inputs, settings: settings)
+                }
            } catch is CancellationError {
                self.transcriptStatus = .idle
            } catch {
@@ -387,6 +396,31 @@ final class SessionController: ObservableObject {
        }
    }

+    /// Build `transcript.md` + `recap.html` from the finished `speakers.json` using
+    /// the gateway LLM. Best-effort: a missing LLM or any failure leaves the
+    /// transcript intact and just skips the recap.
+    private func buildRecap(speakers: SpeakersFile, inputs: ProcessInputs, settings: AppSettings) async {
+        let llm = GatewayLLMClient(baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification)
+        guard let model = await llm.chatModelId() else { return }   // no LLM on the gateway → skip
+        let analyzer = RecapAnalyzer(llm: llm, model: model)
+        guard let result = try? await analyzer.recap(file: speakers) else { return }
+        let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
+        try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder)
+        let url = inputs.folder.appendingPathComponent("recap.html")
+        if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
+    }
+
+    /// Friendly recap title, e.g. "Google Meet call — 2026-06-06 11:43".
+    private static func recapTitle(app: String, sessionId: String) -> String {
+        let appName = CallDetector.DetectedApp(rawValue: app)?.display ?? app.capitalized
+        let stamp = sessionId.split(separator: "_").first.map(String.init) ?? sessionId
+        let parts = stamp.split(separator: "T")
+        let date = parts.first.map(String.init) ?? ""
+        let timeBits = parts.count > 1 ? parts[1].split(separator: "-") : []
+        let time = timeBits.count >= 2 ? "\(timeBits[0]):\(timeBits[1])" : ""
+        return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
+    }
+
    private func fail(_ message: String) {
        recorder = nil
        visualCapture = nil   // recorder.start() failed before visual started; nothing running
@@ -47,6 +47,12 @@ final class AppSettings: ObservableObject {
        didSet { defaults.set(autoSendOnStop, forKey: Keys.autoSend) }
    }

+    /// After transcription, build the readable recap (topic sections + meeting
+    /// extras) via the gateway LLM and write transcript.md / recap.html. Best-effort.
+    @Published var recapEnabled: Bool {
+        didSet { defaults.set(recapEnabled, forKey: Keys.recapEnabled) }
+    }
+
    /// Output folder as a resolved file URL (expands a leading `~`).
    var outputFolderURL: URL {
        URL(fileURLWithPath: (outputFolderPath as NSString).expandingTildeInPath,
@@ -74,6 +80,7 @@ final class AppSettings: ObservableObject {
        self.autoRecordOnDetection = defaults.object(forKey: Keys.autoRecord) as? Bool ?? true
        self.selfName = defaults.string(forKey: Keys.selfName) ?? "Me"
        self.autoSendOnStop = defaults.object(forKey: Keys.autoSend) as? Bool ?? false
+        self.recapEnabled = defaults.object(forKey: Keys.recapEnabled) as? Bool ?? true
    }

    private enum Keys {
@@ -84,5 +91,6 @@ final class AppSettings: ObservableObject {
        static let autoRecord = "autoRecordOnDetection"
        static let selfName = "selfName"
        static let autoSend = "autoSendOnStop"
+        static let recapEnabled = "recapEnabled"
    }
 }
@@ -91,6 +91,9 @@ struct MenuBarView: View {
                HStack {
                    Button("Send to backend") { session.processLastSession() }
                        .disabled(transcriptProcessing)
+                    if let recap = session.recapURL {
+                        Button("Open recap") { NSWorkspace.shared.open(recap) }
+                    }
                    Spacer()
                }
                if !transcriptText.isEmpty {
@@ -25,7 +25,8 @@ struct SettingsView: View {
                TextField("Your name", text: $settings.selfName)
                    .textFieldStyle(.roundedBorder)
                Toggle("Auto-send recordings to backend", isOn: $settings.autoSendOnStop)
-                Text("Your name labels the mic-VAD \"self\" spans. Auto-send transcribes each recording on stop.")
+                Toggle("Build readable recap (topics + highlights)", isOn: $settings.recapEnabled)
+                Text("Your name labels your mic channel. Auto-send transcribes on stop; the recap writes transcript.md + recap.html.")
                    .font(.caption)
                    .foregroundStyle(.secondary)
            }