import Foundation /// Turns a finished `speakers.json` into topic sections + meeting extras by driving /// the gateway LLM — a Swift port of recap-relay's chunked-window analysis, but /// starting from already-named speakers (label-merge), so we skip its speaker /// clustering and name-inference entirely. Pure helpers are static + testable; the /// LLM passes are sequential (one gateway request at a time). final class RecapAnalyzer { private let llm: GatewayLLMClient private let model: String init(llm: GatewayLLMClient, model: String) { self.llm = llm self.model = model } struct Entry: Equatable { let offset: Double // seconds let end: Double let speaker: String let text: String } struct Window: Equatable { let startIdx: Int // first entry index this window analyzes (incl. overlap) let endIdx: Int // last entry index (incl. overlap) let bodyStartIdx: Int // first entry this window "owns" } // MARK: - Orchestration /// Analyze (topics) → template-driven extras. Extras are best-effort (nil on failure). func recap(file: SpeakersFile, template: RecapTemplate, progress: ((String) async -> Void)? = nil) async throws -> RecapResult { let entries = Self.entries(from: file) guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) } await progress?("Finding topics…") let sections = try await analyze(entries: entries, granularity: template.topicGranularity) await progress?("Extracting takeaways…") let extras = try? await self.extras(file: file, entries: entries, sections: sections, template: template) return RecapResult(sections: sections, extras: extras) } // MARK: - Analyze (chunked windows → stitched sections) func analyze(entries: [Entry], granularity: TopicGranularity = .auto) async throws -> [TopicSection] { let windows = Self.planWindows(entries) var all: [TopicSection] = [] for w in windows { let local = Array(entries[w.startIdx...w.endIdx]) let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count, granularity: granularity) let content = try await llm.completeJSON(model: model, system: nil, user: prompt) for s in Self.parseSections(content) { let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1)) let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1)) guard ge >= gs else { continue } all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge)) } } let stitched = Self.stitch(all) // If the model returned nothing usable, fall back to one section for the whole call. if stitched.isEmpty { return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)] } return stitched } /// Plan time-based windows over the entries. Single window for short calls; /// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic /// straddling a boundary is seen by both windows (the stitcher dedupes). static func planWindows(_ entries: [Entry], bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60, cutoffSec: Double = 25 * 60) -> [Window] { guard !entries.isEmpty else { return [] } let total = entries.last!.end if total <= cutoffSec { return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)] } var windows: [Window] = [] var bodyStartIdx = 0 while bodyStartIdx < entries.count { let bodyStartSec = entries[bodyStartIdx].offset let winStartSec = bodyStartSec - overlapSec let bodyEndSec = bodyStartSec + bodySec let winEndSec = bodyEndSec + overlapSec let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx var endIdx = bodyStartIdx while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 } windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx)) let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count bodyStartIdx = max(next, bodyStartIdx + 1) } return windows } /// Merge per-window sections into one chronological, non-overlapping list. /// Sort by start (wider first on ties), drop fully-contained, trim front overlaps. static func stitch(_ sections: [TopicSection]) -> [TopicSection] { let sorted = sections.sorted { $0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex } var out: [TopicSection] = [] var maxEnd = -1 for s in sorted { var start = s.startIndex let end = s.endIndex if end <= maxEnd { continue } // fully contained → drop if start <= maxEnd { start = maxEnd + 1 } // overlap → trim front guard start <= end else { continue } out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end)) maxEnd = end } return out } // MARK: - Extras func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection], template: RecapTemplate) async throws -> RecapExtras? { let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections, template: template) let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096) return Self.parseExtras(content, template: template) } // MARK: - Entries static func entries(from file: SpeakersFile) -> [Entry] { file.segments .filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty } .sorted { $0.start < $1.start } .map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker, text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) } } static func mmss(_ sec: Double) -> String { let t = max(0, Int(sec.rounded())) let h = t / 3600, m = (t % 3600) / 60, s = t % 60 return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s) } // MARK: - Prompts private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int, granularity: TopicGranularity) -> String { let lines = window.enumerated() .map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" } .joined(separator: "\n") let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0) let windowMin = max(1, Int((windowSpan / 60).rounded())) let maxIndex = window.count - 1 let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount, granularity: granularity) return """ You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections). TRANSCRIPT (each line is numbered with a timestamp): \(lines) INSTRUCTIONS: 1. Read the entire transcript carefully. 2. Identify where the discussion naturally shifts from one topic to another. 3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length. 4. For each section, write: - A short, specific topic title (3-8 words) - A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity. - The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above. IMPORTANT: - Sections must be chronological and non-overlapping. - Every segment index from 0 to \(maxIndex) must belong to exactly one section. - startIndex of section N+1 must equal endIndex of section N plus 1. - Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several. - Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one. Respond with ONLY valid JSON in this exact format, no other text: { "sections": [ { "title": "Brief Topic Title", "summary": "1-3 sentence summary of this discussion section.", "startIndex": 0, "endIndex": 15 } ] } """ } private static func targetSectionsPhrase(totalSec: Double, windowCount: Int, granularity: TopicGranularity) -> String { let m = totalSec / 60 let base = m < 5 ? 3.0 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16 let total = base * granularity.multiplier let per = max(2, Int((total / Double(max(1, windowCount))).rounded())) return "around \(per) sections" } /// Assemble the extras prompt FROM the template, so prompt + parsing + rendering /// always agree. Each section becomes a `sec_` field shaped by its kind. /// Exposed so Settings can show the user the exact prompt that will be sent. static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection], template: RecapTemplate) -> String { let names = orderedSpeakerNames(entries) let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ") let topics = sections.isEmpty ? "(none)" : sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n") let transcript = cappedTranscript(entries, maxChars: 24_000) var fields: [String] = [] var shape: [String] = [] if template.includeTLDR { fields.append("- \"tldr\": a 2-4 sentence executive summary string (what it was about, the arc, the outcome; past tense, dense; skip pleasantries; one factual sentence if there's no substance).") fields.append("- \"primary_speakers\": array of the 1-3 names who drove the conversation, in rough order of contribution (empty if unclear).") shape.append("\"tldr\": \"...\"") shape.append("\"primary_speakers\": []") } for (i, sec) in template.sections.enumerated() { let key = "sec_\(i)" switch sec.kind { case .bullets: fields.append("- \"\(key)\" (\(sec.title)): array of short strings. \(sec.instruction)") shape.append("\"\(key)\": [\"...\"]") case .paragraph: fields.append("- \"\(key)\" (\(sec.title)): a string. \(sec.instruction)") shape.append("\"\(key)\": \"...\"") case .items: fields.append("- \"\(key)\" (\(sec.title)): array of {\"text\": string, \"who\": name or null, \"when\": integer seconds or null, \"note\": string or null}. \(sec.instruction)") shape.append("\"\(key)\": [{\"text\": \"...\", \"who\": null, \"when\": null, \"note\": null}]") } } return """ You are extracting structured takeaways from a call transcript. The transcript is labeled with the speakers' real names where known. MEETING METADATA: - App: \(file.app) - Duration: \(mmss(file.durationSec)) SPEAKERS: \(roster) TOPIC SUMMARIES (already produced — for context only, do not duplicate): \(topics) TRANSCRIPT (each line is "[ ] text"): \(transcript) INSTRUCTIONS: Produce ONLY the JSON object below. Use the speakers' names exactly as shown above; use null/empty when a person or value is unclear. For any \"when\"/offset, convert the [ ] timestamp to total seconds. Be conservative — return EMPTY arrays (or null) rather than inventing anything. Fields: \(fields.joined(separator: "\n")) Respond with ONLY valid JSON in this exact shape, no other text: { \(shape.joined(separator: ",\n ")) } """ } /// Distinct speaker names in first-appearance order. static func orderedSpeakerNames(_ entries: [Entry]) -> [String] { var seen = Set(), order: [String] = [] for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) { seen.insert(e.speaker); order.append(e.speaker) } return order } /// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long /// call still fits the model context (keeps the start and end, drops the middle). static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String { let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n") guard full.count > maxChars else { return full } let half = maxChars / 2 let head = String(full.prefix(half)) let tail = String(full.suffix(half)) return head + "\n…[transcript truncated]…\n" + tail } // MARK: - Parsing (defensive — LLM output) private static func jsonObject(_ content: String) -> [String: Any]? { let cleaned = GatewayLLMClient.stripCodeFence(content) return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any] } static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] { guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] } return arr.compactMap { d in guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty, let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil } let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" return (t, s, si, ei) } } static func parseExtras(_ content: String, template: RecapTemplate) -> RecapExtras? { guard let o = jsonObject(content) else { return nil } let tldr = template.includeTLDR ? ((o["tldr"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "") : "" let primary = stringArray(o["primary_speakers"]) var rendered: [RenderedSection] = [] for (i, sec) in template.sections.enumerated() { let v = o["sec_\(i)"] switch sec.kind { case .bullets: rendered.append(RenderedSection(title: sec.title, kind: .bullets, bullets: stringArray(v))) case .paragraph: let p = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" rendered.append(RenderedSection(title: sec.title, kind: .paragraph, paragraph: p)) case .items: let items = (v as? [[String: Any]] ?? []).compactMap { d -> RecapItem? in guard let t = nonEmpty(d["text"]) else { return nil } return RecapItem(text: t, who: nonEmpty(d["who"]), when: intVal(d["when"]), note: nonEmpty(d["note"])) } rendered.append(RenderedSection(title: sec.title, kind: .items, items: items)) } } guard !tldr.isEmpty || rendered.contains(where: { !$0.isEmpty }) else { return nil } return RecapExtras(tldr: tldr, primarySpeakers: primary, sections: rendered) } private static func intVal(_ v: Any?) -> Int? { if let i = v as? Int { return i } if let d = v as? Double { return Int(d) } if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) } return nil } private static func stringArray(_ v: Any?) -> [String] { (v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) } .filter { !$0.isEmpty } ?? [] } private static func nonEmpty(_ v: Any?) -> String? { guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty, s.lowercased() != "null" else { return nil } return s } }