import Foundation /// Turns a finished `speakers.json` into topic sections + meeting extras by driving /// the gateway LLM — a Swift port of recap-relay's chunked-window analysis, but /// starting from already-named speakers (label-merge), so we skip its speaker /// clustering and name-inference entirely. Pure helpers are static + testable; the /// LLM passes are sequential (one gateway request at a time). final class RecapAnalyzer { private let llm: GatewayLLMClient private let model: String init(llm: GatewayLLMClient, model: String) { self.llm = llm self.model = model } struct Entry: Equatable { let offset: Double // seconds let end: Double let speaker: String let text: String } struct Window: Equatable { let startIdx: Int // first entry index this window analyzes (incl. overlap) let endIdx: Int // last entry index (incl. overlap) let bodyStartIdx: Int // first entry this window "owns" } // MARK: - Orchestration /// Analyze (topics) → extras. Extras are best-effort (nil on failure). func recap(file: SpeakersFile, progress: ((String) async -> Void)? = nil) async throws -> RecapResult { let entries = Self.entries(from: file) guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) } await progress?("Finding topics…") let sections = try await analyze(entries: entries) await progress?("Extracting highlights…") let extras = try? await self.extras(file: file, entries: entries, sections: sections) return RecapResult(sections: sections, extras: extras) } // MARK: - Analyze (chunked windows → stitched sections) func analyze(entries: [Entry]) async throws -> [TopicSection] { let windows = Self.planWindows(entries) var all: [TopicSection] = [] for w in windows { let local = Array(entries[w.startIdx...w.endIdx]) let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count) let content = try await llm.completeJSON(model: model, system: nil, user: prompt) for s in Self.parseSections(content) { let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1)) let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1)) guard ge >= gs else { continue } all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge)) } } let stitched = Self.stitch(all) // If the model returned nothing usable, fall back to one section for the whole call. if stitched.isEmpty { return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)] } return stitched } /// Plan time-based windows over the entries. Single window for short calls; /// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic /// straddling a boundary is seen by both windows (the stitcher dedupes). static func planWindows(_ entries: [Entry], bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60, cutoffSec: Double = 25 * 60) -> [Window] { guard !entries.isEmpty else { return [] } let total = entries.last!.end if total <= cutoffSec { return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)] } var windows: [Window] = [] var bodyStartIdx = 0 while bodyStartIdx < entries.count { let bodyStartSec = entries[bodyStartIdx].offset let winStartSec = bodyStartSec - overlapSec let bodyEndSec = bodyStartSec + bodySec let winEndSec = bodyEndSec + overlapSec let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx var endIdx = bodyStartIdx while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 } windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx)) let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count bodyStartIdx = max(next, bodyStartIdx + 1) } return windows } /// Merge per-window sections into one chronological, non-overlapping list. /// Sort by start (wider first on ties), drop fully-contained, trim front overlaps. static func stitch(_ sections: [TopicSection]) -> [TopicSection] { let sorted = sections.sorted { $0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex } var out: [TopicSection] = [] var maxEnd = -1 for s in sorted { var start = s.startIndex let end = s.endIndex if end <= maxEnd { continue } // fully contained → drop if start <= maxEnd { start = maxEnd + 1 } // overlap → trim front guard start <= end else { continue } out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end)) maxEnd = end } return out } // MARK: - Extras func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) async throws -> MeetingExtras? { let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections) let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096) return Self.parseExtras(content) } // MARK: - Entries static func entries(from file: SpeakersFile) -> [Entry] { file.segments .filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty } .sorted { $0.start < $1.start } .map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker, text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) } } static func mmss(_ sec: Double) -> String { let t = max(0, Int(sec.rounded())) let h = t / 3600, m = (t % 3600) / 60, s = t % 60 return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s) } // MARK: - Prompts private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int) -> String { let lines = window.enumerated() .map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" } .joined(separator: "\n") let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0) let windowMin = max(1, Int((windowSpan / 60).rounded())) let maxIndex = window.count - 1 let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount) return """ You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections). TRANSCRIPT (each line is numbered with a timestamp): \(lines) INSTRUCTIONS: 1. Read the entire transcript carefully. 2. Identify where the discussion naturally shifts from one topic to another. 3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length. 4. For each section, write: - A short, specific topic title (3-8 words) - A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity. - The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above. IMPORTANT: - Sections must be chronological and non-overlapping. - Every segment index from 0 to \(maxIndex) must belong to exactly one section. - startIndex of section N+1 must equal endIndex of section N plus 1. - Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several. - Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one. Respond with ONLY valid JSON in this exact format, no other text: { "sections": [ { "title": "Brief Topic Title", "summary": "1-3 sentence summary of this discussion section.", "startIndex": 0, "endIndex": 15 } ] } """ } private static func targetSectionsPhrase(totalSec: Double, windowCount: Int) -> String { let m = totalSec / 60 let total = m < 5 ? 3 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16 let per = max(2, Int((Double(total) / Double(max(1, windowCount))).rounded())) return "around \(per) sections" } private static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) -> String { let names = orderedSpeakerNames(entries) let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ") let topics = sections.isEmpty ? "(none)" : sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n") let transcript = cappedTranscript(entries, maxChars: 24_000) let durationStr = mmss(file.durationSec) return """ You are extracting structured information from an internal team meeting transcript. The transcript below is labeled with the speakers' real names where known. MEETING METADATA: - App: \(file.app) - Duration: \(durationStr) SPEAKERS: \(roster) TOPIC SUMMARIES (already produced — for context only, do not duplicate): \(topics) TRANSCRIPT (each line is "[ ] text"): \(transcript) INSTRUCTIONS: Extract FIVE categories of information. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. Use the speakers' names exactly as shown above; use null/empty when a person is unclear. 1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Past tense, third person, dense. Skip pleasantries. If the meeting was genuinely substanceless, write one factual sentence. This is the only required category. - summary: the 2-4 sentence executive summary - primary_speakers: array of names who drove the conversation (1-3, in rough order of contribution). Empty array if unclear. 2. DECISIONS — Things explicitly decided/agreed. Only clear commitments, not casual mentions. For each: - statement: the decision in one sentence - agreed_by: array of names who explicitly agreed (empty if unclear) - supporting_offset: integer SECONDS where it was decided (convert the [ ] timestamp to total seconds) 3. ACTION_ITEMS — Explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should". For each: - description: the action in imperative form - owner: the person's name, or null if unclear - due_hint: deadline string if mentioned ("by Friday"), or null - supporting_offset: integer seconds where the commitment was made 4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered. Skip rhetorical/answered ones. For each: - question: rephrased to be self-contained - raised_by: the person's name, or null - answered: false (always) 5. KEY_QUOTES — 3-6 max. Pivotal/insightful/strong-opinion statements worth surfacing verbatim. For each: - speaker: the person's name (or null) - offset: integer seconds where the quote occurs - quote: the verbatim quote (4-30 words) - why_notable: one short clause Be conservative — better an empty array than a fabrication. Respond with ONLY valid JSON in this exact shape, no other text: { "tldr": {"summary": "...", "primary_speakers": []}, "decisions": [{"statement": "...", "agreed_by": [], "supporting_offset": 0}], "action_items": [{"description": "...", "owner": null, "due_hint": null, "supporting_offset": 0}], "open_questions": [{"question": "...", "raised_by": null, "answered": false}], "key_quotes": [{"speaker": null, "offset": 0, "quote": "...", "why_notable": "..."}] } """ } /// Distinct speaker names in first-appearance order. static func orderedSpeakerNames(_ entries: [Entry]) -> [String] { var seen = Set(), order: [String] = [] for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) { seen.insert(e.speaker); order.append(e.speaker) } return order } /// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long /// call still fits the model context (keeps the start and end, drops the middle). static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String { let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n") guard full.count > maxChars else { return full } let half = maxChars / 2 let head = String(full.prefix(half)) let tail = String(full.suffix(half)) return head + "\n…[transcript truncated]…\n" + tail } // MARK: - Parsing (defensive — LLM output) private static func jsonObject(_ content: String) -> [String: Any]? { let cleaned = GatewayLLMClient.stripCodeFence(content) return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any] } static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] { guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] } return arr.compactMap { d in guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty, let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil } let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" return (t, s, si, ei) } } static func parseExtras(_ content: String) -> MeetingExtras? { guard let o = jsonObject(content) else { return nil } let tldrObj = o["tldr"] as? [String: Any] let tldr = MeetingExtras.TLDR( summary: (tldrObj?["summary"] as? String) ?? "", primarySpeakers: stringArray(tldrObj?["primary_speakers"])) let decisions = (o["decisions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.Decision? in guard let st = nonEmpty(d["statement"]) else { return nil } return .init(statement: st, agreedBy: stringArray(d["agreed_by"]), supportingOffset: intVal(d["supporting_offset"])) } let actions = (o["action_items"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.ActionItem? in guard let desc = nonEmpty(d["description"]) else { return nil } return .init(description: desc, owner: nonEmpty(d["owner"]), dueHint: nonEmpty(d["due_hint"]), supportingOffset: intVal(d["supporting_offset"])) } let questions = (o["open_questions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.OpenQuestion? in guard let q = nonEmpty(d["question"]) else { return nil } return .init(question: q, raisedBy: nonEmpty(d["raised_by"])) } let quotes = (o["key_quotes"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.KeyQuote? in guard let q = nonEmpty(d["quote"]) else { return nil } return .init(speaker: nonEmpty(d["speaker"]), offset: intVal(d["offset"]), quote: q, whyNotable: nonEmpty(d["why_notable"]) ?? "") } // Require at least a TLDR to consider extras present. guard !tldr.summary.isEmpty || !decisions.isEmpty || !actions.isEmpty || !questions.isEmpty || !quotes.isEmpty else { return nil } return MeetingExtras(tldr: tldr, decisions: decisions, actionItems: actions, openQuestions: questions, keyQuotes: quotes) } private static func intVal(_ v: Any?) -> Int? { if let i = v as? Int { return i } if let d = v as? Double { return Int(d) } if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) } return nil } private static func stringArray(_ v: Any?) -> [String] { (v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) } .filter { !$0.isEmpty } ?? [] } private static func nonEmpty(_ v: Any?) -> String? { guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty, s.lowercased() != "null" else { return nil } return s } }