diff --git a/Ten31Transcripts/Backend/GatewayLLMClient.swift b/Ten31Transcripts/Backend/GatewayLLMClient.swift new file mode 100644 index 0000000..9917d8f --- /dev/null +++ b/Ten31Transcripts/Backend/GatewayLLMClient.swift @@ -0,0 +1,135 @@ +import Foundation + +enum GatewayLLMError: Error, LocalizedError { + case invalidHost + case notReady // vLLM not loaded on the gateway + case server(Int, String) + case decode(String) + case retriesExhausted + + var errorDescription: String? { + switch self { + case .invalidHost: return "Invalid backend host URL." + case .notReady: return "The gateway's language model isn't ready." + case .server(let code, let detail): return "LLM error \(code): \(detail)" + case .decode(let msg): return "Couldn't decode the LLM response: \(msg)" + case .retriesExhausted: return "Gateway stayed busy (503) after retries." + } + } +} + +/// Talks to the Spark Control gateway's OpenAI-compatible `/v1/chat/completions` +/// (the same host + TLS as `label-merge`). Used for the recap analysis (topic +/// sections, summary polish, meeting extras). **Call sequentially** — like audio, +/// the gateway serializes GPU work; the recap pipeline issues one request at a time. +final class GatewayLLMClient { + private let baseURL: String + private let urlSession: URLSession + + init(baseURL: String, skipTLS: Bool) { + let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines) + self.baseURL = trimmed.hasSuffix("/") ? String(trimmed.dropLast()) : trimmed + let config = URLSessionConfiguration.ephemeral + config.timeoutIntervalForRequest = 600 + config.timeoutIntervalForResource = 900 + config.waitsForConnectivity = false + let delegate: URLSessionDelegate? = skipTLS ? InsecureTrustDelegate() : nil + self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil) + } + + deinit { urlSession.finishTasksAndInvalidate() } + + /// The ready chat model id from `/api/endpoints` (`vllm.model`), or nil if the + /// gateway has no language model loaded. + func chatModelId() async -> String? { + guard let url = URL(string: baseURL + "/api/endpoints") else { return nil } + guard let (data, _) = try? await urlSession.data(from: url), + let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let vllm = obj["vllm"] as? [String: Any], + (vllm["ready"] as? Bool) == true, + let model = vllm["model"] as? String, !model.isEmpty else { return nil } + return model + } + + /// One JSON-mode chat completion. Returns the raw `content` string (the caller + /// parses it as JSON). Retries on `503 + Retry-After`. + func completeJSON(model: String, system: String?, user: String, + maxTokens: Int = 4096, maxRetries: Int = 3) async throws -> String { + guard let url = URL(string: baseURL + "/v1/chat/completions") else { + throw GatewayLLMError.invalidHost + } + var messages: [[String: String]] = [] + if let system { messages.append(["role": "system", "content": system]) } + messages.append(["role": "user", "content": user]) + let body: [String: Any] = [ + "model": model, + "messages": messages, + "max_tokens": maxTokens, + "stream": false, + "response_format": ["type": "json_object"], + "chat_template_kwargs": ["enable_thinking": false], + ] + let bodyData = try JSONSerialization.data(withJSONObject: body) + + var attempt = 0 + while true { + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.httpBody = bodyData + + let (data, response) = try await urlSession.data(for: request) + guard let http = response as? HTTPURLResponse else { + throw GatewayLLMError.decode("no HTTP response") + } + switch http.statusCode { + case 200..<300: + return try Self.content(from: data) + case 503: + attempt += 1 + if attempt > maxRetries { throw GatewayLLMError.retriesExhausted } + let retryAfter = http.value(forHTTPHeaderField: "Retry-After").flatMap(Double.init) ?? 5 + try await Task.sleep(nanoseconds: UInt64(max(1, retryAfter) * 1_000_000_000)) + default: + throw GatewayLLMError.server(http.statusCode, Self.detail(from: data)) + } + } + } + + // MARK: - Parsing + + private static func content(from data: Data) throws -> String { + struct ChatResponse: Decodable { + struct Choice: Decodable { struct Message: Decodable { let content: String }; let message: Message } + let choices: [Choice] + } + do { + let decoded = try JSONDecoder().decode(ChatResponse.self, from: data) + guard let text = decoded.choices.first?.message.content else { + throw GatewayLLMError.decode("no choices in response") + } + return text + } catch { + throw GatewayLLMError.decode(error.localizedDescription) + } + } + + private static func detail(from data: Data) -> String { + if let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] { + if let detail = obj["detail"] as? String { return detail } + if let err = obj["error"] as? [String: Any], let msg = err["message"] as? String { return msg } + } + return String(data: data, encoding: .utf8) ?? "unknown error" + } + + /// Strip a ```json … ``` code fence if the model wrapped its JSON (defensive; + /// JSON mode usually prevents this). + static func stripCodeFence(_ s: String) -> String { + var t = s.trimmingCharacters(in: .whitespacesAndNewlines) + if t.hasPrefix("```") { + if let firstNewline = t.firstIndex(of: "\n") { t = String(t[t.index(after: firstNewline)...]) } + if let fenceRange = t.range(of: "```", options: .backwards) { t = String(t[.. Void)? = nil) async throws -> RecapResult { + let entries = Self.entries(from: file) + guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) } + await progress?("Finding topics…") + let sections = try await analyze(entries: entries) + await progress?("Extracting highlights…") + let extras = try? await self.extras(file: file, entries: entries, sections: sections) + return RecapResult(sections: sections, extras: extras) + } + + // MARK: - Analyze (chunked windows → stitched sections) + + func analyze(entries: [Entry]) async throws -> [TopicSection] { + let windows = Self.planWindows(entries) + var all: [TopicSection] = [] + for w in windows { + let local = Array(entries[w.startIdx...w.endIdx]) + let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count) + let content = try await llm.completeJSON(model: model, system: nil, user: prompt) + for s in Self.parseSections(content) { + let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1)) + let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1)) + guard ge >= gs else { continue } + all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge)) + } + } + let stitched = Self.stitch(all) + // If the model returned nothing usable, fall back to one section for the whole call. + if stitched.isEmpty { + return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)] + } + return stitched + } + + /// Plan time-based windows over the entries. Single window for short calls; + /// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic + /// straddling a boundary is seen by both windows (the stitcher dedupes). + static func planWindows(_ entries: [Entry], + bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60, + cutoffSec: Double = 25 * 60) -> [Window] { + guard !entries.isEmpty else { return [] } + let total = entries.last!.end + if total <= cutoffSec { + return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)] + } + var windows: [Window] = [] + var bodyStartIdx = 0 + while bodyStartIdx < entries.count { + let bodyStartSec = entries[bodyStartIdx].offset + let winStartSec = bodyStartSec - overlapSec + let bodyEndSec = bodyStartSec + bodySec + let winEndSec = bodyEndSec + overlapSec + let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx + var endIdx = bodyStartIdx + while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 } + windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx)) + let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count + bodyStartIdx = max(next, bodyStartIdx + 1) + } + return windows + } + + /// Merge per-window sections into one chronological, non-overlapping list. + /// Sort by start (wider first on ties), drop fully-contained, trim front overlaps. + static func stitch(_ sections: [TopicSection]) -> [TopicSection] { + let sorted = sections.sorted { + $0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex + } + var out: [TopicSection] = [] + var maxEnd = -1 + for s in sorted { + var start = s.startIndex + let end = s.endIndex + if end <= maxEnd { continue } // fully contained → drop + if start <= maxEnd { start = maxEnd + 1 } // overlap → trim front + guard start <= end else { continue } + out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end)) + maxEnd = end + } + return out + } + + // MARK: - Extras + + func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) async throws -> MeetingExtras? { + let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections) + let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096) + return Self.parseExtras(content) + } + + // MARK: - Entries + + static func entries(from file: SpeakersFile) -> [Entry] { + file.segments + .filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty } + .sorted { $0.start < $1.start } + .map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker, + text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) } + } + + static func mmss(_ sec: Double) -> String { + let t = max(0, Int(sec.rounded())) + let h = t / 3600, m = (t % 3600) / 60, s = t % 60 + return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s) + } + + // MARK: - Prompts + + private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int) -> String { + let lines = window.enumerated() + .map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" } + .joined(separator: "\n") + let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0) + let windowMin = max(1, Int((windowSpan / 60).rounded())) + let maxIndex = window.count - 1 + let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount) + return """ + You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections). + + TRANSCRIPT (each line is numbered with a timestamp): + \(lines) + + INSTRUCTIONS: + 1. Read the entire transcript carefully. + 2. Identify where the discussion naturally shifts from one topic to another. + 3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length. + 4. For each section, write: + - A short, specific topic title (3-8 words) + - A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity. + - The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above. + + IMPORTANT: + - Sections must be chronological and non-overlapping. + - Every segment index from 0 to \(maxIndex) must belong to exactly one section. + - startIndex of section N+1 must equal endIndex of section N plus 1. + - Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several. + - Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one. + + Respond with ONLY valid JSON in this exact format, no other text: + { + "sections": [ + { + "title": "Brief Topic Title", + "summary": "1-3 sentence summary of this discussion section.", + "startIndex": 0, + "endIndex": 15 + } + ] + } + """ + } + + private static func targetSectionsPhrase(totalSec: Double, windowCount: Int) -> String { + let m = totalSec / 60 + let total = m < 5 ? 3 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16 + let per = max(2, Int((Double(total) / Double(max(1, windowCount))).rounded())) + return "around \(per) sections" + } + + private static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) -> String { + let names = orderedSpeakerNames(entries) + let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ") + let topics = sections.isEmpty ? "(none)" : + sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n") + let transcript = cappedTranscript(entries, maxChars: 24_000) + let durationStr = mmss(file.durationSec) + return """ + You are extracting structured information from an internal team meeting transcript. The transcript below is labeled with the speakers' real names where known. + + MEETING METADATA: + - App: \(file.app) + - Duration: \(durationStr) + + SPEAKERS: \(roster) + + TOPIC SUMMARIES (already produced — for context only, do not duplicate): + \(topics) + + TRANSCRIPT (each line is "[ ] text"): + \(transcript) + + INSTRUCTIONS: + Extract FIVE categories of information. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. Use the speakers' names exactly as shown above; use null/empty when a person is unclear. + + 1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Past tense, third person, dense. Skip pleasantries. If the meeting was genuinely substanceless, write one factual sentence. This is the only required category. + - summary: the 2-4 sentence executive summary + - primary_speakers: array of names who drove the conversation (1-3, in rough order of contribution). Empty array if unclear. + + 2. DECISIONS — Things explicitly decided/agreed. Only clear commitments, not casual mentions. For each: + - statement: the decision in one sentence + - agreed_by: array of names who explicitly agreed (empty if unclear) + - supporting_offset: integer SECONDS where it was decided (convert the [ ] timestamp to total seconds) + + 3. ACTION_ITEMS — Explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should". For each: + - description: the action in imperative form + - owner: the person's name, or null if unclear + - due_hint: deadline string if mentioned ("by Friday"), or null + - supporting_offset: integer seconds where the commitment was made + + 4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered. Skip rhetorical/answered ones. For each: + - question: rephrased to be self-contained + - raised_by: the person's name, or null + - answered: false (always) + + 5. KEY_QUOTES — 3-6 max. Pivotal/insightful/strong-opinion statements worth surfacing verbatim. For each: + - speaker: the person's name (or null) + - offset: integer seconds where the quote occurs + - quote: the verbatim quote (4-30 words) + - why_notable: one short clause + + Be conservative — better an empty array than a fabrication. Respond with ONLY valid JSON in this exact shape, no other text: + { + "tldr": {"summary": "...", "primary_speakers": []}, + "decisions": [{"statement": "...", "agreed_by": [], "supporting_offset": 0}], + "action_items": [{"description": "...", "owner": null, "due_hint": null, "supporting_offset": 0}], + "open_questions": [{"question": "...", "raised_by": null, "answered": false}], + "key_quotes": [{"speaker": null, "offset": 0, "quote": "...", "why_notable": "..."}] + } + """ + } + + /// Distinct speaker names in first-appearance order. + static func orderedSpeakerNames(_ entries: [Entry]) -> [String] { + var seen = Set(), order: [String] = [] + for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) { + seen.insert(e.speaker); order.append(e.speaker) + } + return order + } + + /// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long + /// call still fits the model context (keeps the start and end, drops the middle). + static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String { + let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n") + guard full.count > maxChars else { return full } + let half = maxChars / 2 + let head = String(full.prefix(half)) + let tail = String(full.suffix(half)) + return head + "\n…[transcript truncated]…\n" + tail + } + + // MARK: - Parsing (defensive — LLM output) + + private static func jsonObject(_ content: String) -> [String: Any]? { + let cleaned = GatewayLLMClient.stripCodeFence(content) + return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any] + } + + static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] { + guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] } + return arr.compactMap { d in + guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty, + let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil } + let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + return (t, s, si, ei) + } + } + + static func parseExtras(_ content: String) -> MeetingExtras? { + guard let o = jsonObject(content) else { return nil } + let tldrObj = o["tldr"] as? [String: Any] + let tldr = MeetingExtras.TLDR( + summary: (tldrObj?["summary"] as? String) ?? "", + primarySpeakers: stringArray(tldrObj?["primary_speakers"])) + let decisions = (o["decisions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.Decision? in + guard let st = nonEmpty(d["statement"]) else { return nil } + return .init(statement: st, agreedBy: stringArray(d["agreed_by"]), supportingOffset: intVal(d["supporting_offset"])) + } + let actions = (o["action_items"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.ActionItem? in + guard let desc = nonEmpty(d["description"]) else { return nil } + return .init(description: desc, owner: nonEmpty(d["owner"]), dueHint: nonEmpty(d["due_hint"]), + supportingOffset: intVal(d["supporting_offset"])) + } + let questions = (o["open_questions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.OpenQuestion? in + guard let q = nonEmpty(d["question"]) else { return nil } + return .init(question: q, raisedBy: nonEmpty(d["raised_by"])) + } + let quotes = (o["key_quotes"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.KeyQuote? in + guard let q = nonEmpty(d["quote"]) else { return nil } + return .init(speaker: nonEmpty(d["speaker"]), offset: intVal(d["offset"]), quote: q, + whyNotable: nonEmpty(d["why_notable"]) ?? "") + } + // Require at least a TLDR to consider extras present. + guard !tldr.summary.isEmpty || !decisions.isEmpty || !actions.isEmpty || !questions.isEmpty || !quotes.isEmpty + else { return nil } + return MeetingExtras(tldr: tldr, decisions: decisions, actionItems: actions, + openQuestions: questions, keyQuotes: quotes) + } + + private static func intVal(_ v: Any?) -> Int? { + if let i = v as? Int { return i } + if let d = v as? Double { return Int(d) } + if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) } + return nil + } + + private static func stringArray(_ v: Any?) -> [String] { + (v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } ?? [] + } + + private static func nonEmpty(_ v: Any?) -> String? { + guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty, + s.lowercased() != "null" else { return nil } + return s + } +} diff --git a/Ten31Transcripts/Recap/RecapModels.swift b/Ten31Transcripts/Recap/RecapModels.swift new file mode 100644 index 0000000..7c3b44f --- /dev/null +++ b/Ten31Transcripts/Recap/RecapModels.swift @@ -0,0 +1,38 @@ +import Foundation + +/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]` +/// (inclusive, indices into the canonical entries array) with an LLM title + summary. +struct TopicSection: Equatable { + var title: String + var summary: String + var startIndex: Int + var endIndex: Int +} + +/// Structured "meeting extras" extracted from the named transcript. Mirrors +/// recap-relay's schema; speakers are real names (we already have them from +/// label-merge), not anonymous cluster ids. +struct MeetingExtras: Equatable { + struct TLDR: Equatable { var summary: String; var primarySpeakers: [String] } + struct Decision: Equatable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? } + struct ActionItem: Equatable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? } + struct OpenQuestion: Equatable { var question: String; var raisedBy: String? } + struct KeyQuote: Equatable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String } + + var tldr: TLDR + var decisions: [Decision] + var actionItems: [ActionItem] + var openQuestions: [OpenQuestion] + var keyQuotes: [KeyQuote] + + var isEmptyBeyondTLDR: Bool { + decisions.isEmpty && actionItems.isEmpty && openQuestions.isEmpty && keyQuotes.isEmpty + } +} + +/// The assembled recap for one session: the topic sections + (optional) extras, +/// over the session's transcript. Rendered to `transcript.md` / `recap.html`. +struct RecapResult: Equatable { + var sections: [TopicSection] + var extras: MeetingExtras? +} diff --git a/Ten31Transcripts/Recap/RecapRenderer.swift b/Ten31Transcripts/Recap/RecapRenderer.swift new file mode 100644 index 0000000..8a3337d --- /dev/null +++ b/Ten31Transcripts/Recap/RecapRenderer.swift @@ -0,0 +1,253 @@ +import Foundation + +/// Renders a session (`speakers.json` + recap) into human-readable artifacts: +/// `transcript.md` (portable/editable) and `recap.html` (self-contained, dark +/// theme, printable, shareable). Port of recap-relay's meetingToMarkdown/Html, +/// driven by our already-named transcript. +enum RecapRenderer { + static func write(file: SpeakersFile, result: RecapResult, title: String, to folder: URL) throws { + let entries = RecapAnalyzer.entries(from: file) + try markdown(file: file, result: result, title: title, entries: entries) + .data(using: .utf8)?.write(to: folder.appendingPathComponent("transcript.md")) + try html(file: file, result: result, title: title, entries: entries) + .data(using: .utf8)?.write(to: folder.appendingPathComponent("recap.html")) + } + + // MARK: - Markdown + + static func markdown(file: SpeakersFile, result: RecapResult, title: String, + entries: [RecapAnalyzer.Entry]) -> String { + var out = "# \(title)\n\n" + let speakers = RecapAnalyzer.orderedSpeakerNames(entries) + out += "*\(file.app) · \(RecapAnalyzer.mmss(file.durationSec))" + if !speakers.isEmpty { out += " · \(speakers.count) speaker\(speakers.count == 1 ? "" : "s"): \(speakers.joined(separator: ", "))" } + out += "*\n\n" + + if let x = result.extras { + if !x.tldr.summary.isEmpty { + out += "## Summary\n\n\(x.tldr.summary)\n" + if !x.tldr.primarySpeakers.isEmpty { out += "\n*Primary speakers: \(x.tldr.primarySpeakers.joined(separator: ", "))*\n" } + out += "\n" + } + if !x.decisions.isEmpty { + out += "## Decisions\n\n" + for d in x.decisions { + var line = "- \(d.statement)" + if !d.agreedBy.isEmpty { line += " — agreed by \(d.agreedBy.joined(separator: ", "))" } + if let o = d.supportingOffset { line += " *(\(RecapAnalyzer.mmss(Double(o))))*" } + out += line + "\n" + } + out += "\n" + } + if !x.actionItems.isEmpty { + out += "## Action Items\n\n" + for a in x.actionItems { + var line = "- [ ] \(a.description)" + if let o = a.owner { line += " — **\(o)**" } + if let due = a.dueHint { line += " (\(due))" } + if let off = a.supportingOffset { line += " *(\(RecapAnalyzer.mmss(Double(off))))*" } + out += line + "\n" + } + out += "\n" + } + if !x.openQuestions.isEmpty { + out += "## Open Questions\n\n" + for q in x.openQuestions { + var line = "- \(q.question)" + if let r = q.raisedBy { line += " — *\(r)*" } + out += line + "\n" + } + out += "\n" + } + if !x.keyQuotes.isEmpty { + out += "## Key Quotes\n\n" + for k in x.keyQuotes { + out += "> \"\(k.quote)\"" + var attr: [String] = [] + if let s = k.speaker { attr.append(s) } + if let o = k.offset { attr.append(RecapAnalyzer.mmss(Double(o))) } + if !attr.isEmpty { out += " — \(attr.joined(separator: ", "))" } + out += "\n" + if !k.whyNotable.isEmpty { out += ">\n> \(k.whyNotable)\n" } + out += "\n" + } + } + } + + if !result.sections.isEmpty { + out += "## Topics\n\n" + for (i, sec) in result.sections.enumerated() { + let range = timeRange(sec, entries: entries) + out += "### \(i + 1). \(sec.title)\(range)\n\n" + if !sec.summary.isEmpty { out += "\(sec.summary)\n\n" } + out += "
\nTranscript\n\n" + out += transcriptLines(sec, entries: entries) + out += "\n
\n\n" + } + } + + out += "## Full Transcript\n\n" + for e in entries { out += "**[\(RecapAnalyzer.mmss(e.offset))] \(e.speaker):** \(e.text)\n\n" } + return out + } + + private static func timeRange(_ sec: TopicSection, entries: [RecapAnalyzer.Entry]) -> String { + guard entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex) else { return "" } + return " *(\(RecapAnalyzer.mmss(entries[sec.startIndex].offset))–\(RecapAnalyzer.mmss(entries[sec.endIndex].end)))*" + } + + private static func transcriptLines(_ sec: TopicSection, entries: [RecapAnalyzer.Entry]) -> String { + guard sec.startIndex <= sec.endIndex, entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex) + else { return "" } + return entries[sec.startIndex...sec.endIndex] + .map { "**[\(RecapAnalyzer.mmss($0.offset))] \($0.speaker):** \($0.text)" } + .joined(separator: "\n\n") + } + + // MARK: - HTML + + static func html(file: SpeakersFile, result: RecapResult, title: String, + entries: [RecapAnalyzer.Entry]) -> String { + let speakers = RecapAnalyzer.orderedSpeakerNames(entries) + let colorFor = speakerColors(speakers) + func chip(_ name: String) -> String { + let c = colorFor[name] ?? "#8a8f98" + return "\(esc(name))" + } + + var body = "" + let sub = "\(esc(file.app)) · \(RecapAnalyzer.mmss(file.durationSec))" + + (speakers.isEmpty ? "" : " · \(speakers.count) speaker\(speakers.count == 1 ? "" : "s")") + body += "

\(esc(title))

\(sub)
" + if !speakers.isEmpty { + body += "
" + speakers.map { chip($0) }.joined() + "
" + } + body += "
" + + if let x = result.extras { + if !x.tldr.summary.isEmpty { + body += card("Summary", "

\(esc(x.tldr.summary))

" + + (x.tldr.primarySpeakers.isEmpty ? "" : "

Primary: \(x.tldr.primarySpeakers.map(esc).joined(separator: ", "))

")) + } + if !x.decisions.isEmpty { + let items = x.decisions.map { d -> String in + var s = "
  • \(esc(d.statement))" + if !d.agreedBy.isEmpty { s += " — agreed by \(d.agreedBy.map(esc).joined(separator: ", "))" } + if let o = d.supportingOffset { s += " \(RecapAnalyzer.mmss(Double(o)))" } + return s + "
  • " + }.joined() + body += card("Decisions", "
      \(items)
    ") + } + if !x.actionItems.isEmpty { + let items = x.actionItems.map { a -> String in + var s = "
  • ☐ \(esc(a.description))" + if let o = a.owner { s += " \(esc(o))" } + if let due = a.dueHint { s += " (\(esc(due)))" } + if let off = a.supportingOffset { s += " \(RecapAnalyzer.mmss(Double(off)))" } + return s + "
  • " + }.joined() + body += card("Action Items", "
      \(items)
    ") + } + if !x.openQuestions.isEmpty { + let items = x.openQuestions.map { q -> String in + "
  • \(esc(q.question))" + (q.raisedBy.map { " — \(esc($0))" } ?? "") + "
  • " + }.joined() + body += card("Open Questions", "
      \(items)
    ") + } + if !x.keyQuotes.isEmpty { + let items = x.keyQuotes.map { k -> String in + var attr: [String] = [] + if let s = k.speaker { attr.append(esc(s)) } + if let o = k.offset { attr.append(RecapAnalyzer.mmss(Double(o))) } + var s = "
    “\(esc(k.quote))”" + if !attr.isEmpty { s += "— \(attr.joined(separator: ", "))" } + if !k.whyNotable.isEmpty { s += "
    \(esc(k.whyNotable))
    " } + return s + "
    " + }.joined() + body += card("Key Quotes", items) + } + } + + if !result.sections.isEmpty { + var topics = "" + for (i, sec) in result.sections.enumerated() { + let range = entries.indices.contains(sec.startIndex) && entries.indices.contains(sec.endIndex) + ? "\(RecapAnalyzer.mmss(entries[sec.startIndex].offset))–\(RecapAnalyzer.mmss(entries[sec.endIndex].end))" : "" + topics += "
    \(i + 1) \(esc(sec.title)) \(range)" + if !sec.summary.isEmpty { topics += "

    \(esc(sec.summary))

    " } + topics += "
    " + turnsHtml(sec, entries: entries, chip: chip) + "
    " + } + body += card("Topics", topics) + } + + let full = entries.map { "
    \(RecapAnalyzer.mmss($0.offset)) \(chip($0.speaker)) \(esc($0.text))
    " }.joined() + body += "
    Full Transcript
    \(full)
    " + + return htmlShell(title: esc(title), body: body) + } + + private static func turnsHtml(_ sec: TopicSection, entries: [RecapAnalyzer.Entry], + chip: (String) -> String) -> String { + guard sec.startIndex <= sec.endIndex, entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex) + else { return "" } + return entries[sec.startIndex...sec.endIndex].map { + "
    \(RecapAnalyzer.mmss($0.offset)) \(chip($0.speaker)) \(esc($0.text))
    " + }.joined() + } + + private static func card(_ title: String, _ inner: String) -> String { + "

    \(esc(title))

    \(inner)
    " + } + + private static let palette = ["#5b8def", "#e0719c", "#43b581", "#e8a33d", "#9b6dde", + "#3fb6c9", "#d96f6f", "#7aa55c"] + private static func speakerColors(_ names: [String]) -> [String: String] { + var map: [String: String] = [:] + for (i, n) in names.enumerated() { map[n] = palette[i % palette.count] } + return map + } + + private static func esc(_ s: String) -> String { + s.replacingOccurrences(of: "&", with: "&") + .replacingOccurrences(of: "<", with: "<") + .replacingOccurrences(of: ">", with: ">") + .replacingOccurrences(of: "\"", with: """) + } + + private static func htmlShell(title: String, body: String) -> String { + """ + + + \(title) + +
    \(body) +
    Ten31 Transcripts · generated on-device
    +
    + """ + } +} diff --git a/Ten31Transcripts/Session/SessionController.swift b/Ten31Transcripts/Session/SessionController.swift index dbcf677..ea6b1b5 100644 --- a/Ten31Transcripts/Session/SessionController.swift +++ b/Ten31Transcripts/Session/SessionController.swift @@ -54,6 +54,8 @@ final class SessionController: ObservableObject { @Published private(set) var detectionStatus: CallDetector.Status = .disabled /// Backend transcription status for the last session. @Published private(set) var transcriptStatus: TranscriptStatus = .idle + /// Set when a readable recap (`recap.html`) has been written for the last session. + @Published private(set) var recapURL: URL? private let settings: AppSettings private var voiceprints: VoiceprintStore @@ -195,6 +197,7 @@ final class SessionController: ObservableObject { mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav")) self.recorder = recorder warning = nil + recapURL = nil state = .starting lifecycleGeneration += 1 @@ -361,6 +364,7 @@ final class SessionController: ObservableObject { guard let inputs = lastProcess else { return } if case .processing = transcriptStatus { return } transcriptStatus = .processing(0, 1) + recapURL = nil let settings = self.settings let voiceprints = self.voiceprints @@ -379,6 +383,11 @@ final class SessionController: ObservableObject { await MainActor.run { self.transcriptStatus = .processing(done, total) } }) self.transcriptStatus = .done(speakers: speakers.speakers.count, segments: speakers.segments.count) + // Best-effort readable recap (topic sections + extras) via the gateway LLM. + if settings.recapEnabled, !speakers.segments.isEmpty { + try Task.checkCancellation() + await self.buildRecap(speakers: speakers, inputs: inputs, settings: settings) + } } catch is CancellationError { self.transcriptStatus = .idle } catch { @@ -387,6 +396,31 @@ final class SessionController: ObservableObject { } } + /// Build `transcript.md` + `recap.html` from the finished `speakers.json` using + /// the gateway LLM. Best-effort: a missing LLM or any failure leaves the + /// transcript intact and just skips the recap. + private func buildRecap(speakers: SpeakersFile, inputs: ProcessInputs, settings: AppSettings) async { + let llm = GatewayLLMClient(baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification) + guard let model = await llm.chatModelId() else { return } // no LLM on the gateway → skip + let analyzer = RecapAnalyzer(llm: llm, model: model) + guard let result = try? await analyzer.recap(file: speakers) else { return } + let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId) + try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder) + let url = inputs.folder.appendingPathComponent("recap.html") + if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url } + } + + /// Friendly recap title, e.g. "Google Meet call — 2026-06-06 11:43". + private static func recapTitle(app: String, sessionId: String) -> String { + let appName = CallDetector.DetectedApp(rawValue: app)?.display ?? app.capitalized + let stamp = sessionId.split(separator: "_").first.map(String.init) ?? sessionId + let parts = stamp.split(separator: "T") + let date = parts.first.map(String.init) ?? "" + let timeBits = parts.count > 1 ? parts[1].split(separator: "-") : [] + let time = timeBits.count >= 2 ? "\(timeBits[0]):\(timeBits[1])" : "" + return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces) + } + private func fail(_ message: String) { recorder = nil visualCapture = nil // recorder.start() failed before visual started; nothing running diff --git a/Ten31Transcripts/Settings/AppSettings.swift b/Ten31Transcripts/Settings/AppSettings.swift index a2c153b..74376f9 100644 --- a/Ten31Transcripts/Settings/AppSettings.swift +++ b/Ten31Transcripts/Settings/AppSettings.swift @@ -47,6 +47,12 @@ final class AppSettings: ObservableObject { didSet { defaults.set(autoSendOnStop, forKey: Keys.autoSend) } } + /// After transcription, build the readable recap (topic sections + meeting + /// extras) via the gateway LLM and write transcript.md / recap.html. Best-effort. + @Published var recapEnabled: Bool { + didSet { defaults.set(recapEnabled, forKey: Keys.recapEnabled) } + } + /// Output folder as a resolved file URL (expands a leading `~`). var outputFolderURL: URL { URL(fileURLWithPath: (outputFolderPath as NSString).expandingTildeInPath, @@ -74,6 +80,7 @@ final class AppSettings: ObservableObject { self.autoRecordOnDetection = defaults.object(forKey: Keys.autoRecord) as? Bool ?? true self.selfName = defaults.string(forKey: Keys.selfName) ?? "Me" self.autoSendOnStop = defaults.object(forKey: Keys.autoSend) as? Bool ?? false + self.recapEnabled = defaults.object(forKey: Keys.recapEnabled) as? Bool ?? true } private enum Keys { @@ -84,5 +91,6 @@ final class AppSettings: ObservableObject { static let autoRecord = "autoRecordOnDetection" static let selfName = "selfName" static let autoSend = "autoSendOnStop" + static let recapEnabled = "recapEnabled" } } diff --git a/Ten31Transcripts/UI/MenuBarView.swift b/Ten31Transcripts/UI/MenuBarView.swift index 86fe4a6..2a455fe 100644 --- a/Ten31Transcripts/UI/MenuBarView.swift +++ b/Ten31Transcripts/UI/MenuBarView.swift @@ -91,6 +91,9 @@ struct MenuBarView: View { HStack { Button("Send to backend") { session.processLastSession() } .disabled(transcriptProcessing) + if let recap = session.recapURL { + Button("Open recap") { NSWorkspace.shared.open(recap) } + } Spacer() } if !transcriptText.isEmpty { diff --git a/Ten31Transcripts/UI/SettingsView.swift b/Ten31Transcripts/UI/SettingsView.swift index 0f2d97f..2396ded 100644 --- a/Ten31Transcripts/UI/SettingsView.swift +++ b/Ten31Transcripts/UI/SettingsView.swift @@ -25,7 +25,8 @@ struct SettingsView: View { TextField("Your name", text: $settings.selfName) .textFieldStyle(.roundedBorder) Toggle("Auto-send recordings to backend", isOn: $settings.autoSendOnStop) - Text("Your name labels the mic-VAD \"self\" spans. Auto-send transcribes each recording on stop.") + Toggle("Build readable recap (topics + highlights)", isOn: $settings.recapEnabled) + Text("Your name labels your mic channel. Auto-send transcribes on stop; the recap writes transcript.md + recap.html.") .font(.caption) .foregroundStyle(.secondary) } diff --git a/Ten31TranscriptsTests/RecapTests.swift b/Ten31TranscriptsTests/RecapTests.swift new file mode 100644 index 0000000..e90fe9d --- /dev/null +++ b/Ten31TranscriptsTests/RecapTests.swift @@ -0,0 +1,129 @@ +import XCTest +@testable import Ten31Transcripts + +final class RecapTests: XCTestCase { + + private func entry(_ off: Double, _ end: Double, _ who: String, _ text: String) -> RecapAnalyzer.Entry { + .init(offset: off, end: end, speaker: who, text: text) + } + + // MARK: - Parsing + + func testParseSectionsHandlesStringIndices() { + let json = #"{"sections":[{"title":"Intro","summary":"hi","startIndex":"0","endIndex":3},{"title":"Topic","summary":"x","startIndex":4,"endIndex":9}]}"# + let secs = RecapAnalyzer.parseSections(json) + XCTAssertEqual(secs.count, 2) + XCTAssertEqual(secs[0].title, "Intro") + XCTAssertEqual(secs[0].startIndex, 0) + XCTAssertEqual(secs[1].endIndex, 9) + } + + func testParseSectionsStripsCodeFence() { + let json = "```json\n{\"sections\":[{\"title\":\"A\",\"summary\":\"\",\"startIndex\":0,\"endIndex\":1}]}\n```" + XCTAssertEqual(RecapAnalyzer.parseSections(json).count, 1) + } + + func testParseExtras() { + let json = #""" + {"tldr":{"summary":"They discussed the roadmap.","primary_speakers":["Grant","Caitlyn"]}, + "decisions":[{"statement":"Ship dual-channel","agreed_by":["Grant"],"supporting_offset":72}], + "action_items":[{"description":"Send the doc","owner":"Caitlyn","due_hint":"by Friday","supporting_offset":120}], + "open_questions":[{"question":"What about Teams?","raised_by":"Grant","answered":false}], + "key_quotes":[{"speaker":"Caitlyn","offset":73,"quote":"Go Bitcoin","why_notable":"sets the tone"}]} + """# + let x = RecapAnalyzer.parseExtras(json) + XCTAssertNotNil(x) + XCTAssertEqual(x?.tldr.primarySpeakers, ["Grant", "Caitlyn"]) + XCTAssertEqual(x?.decisions.first?.supportingOffset, 72) + XCTAssertEqual(x?.actionItems.first?.owner, "Caitlyn") + XCTAssertEqual(x?.actionItems.first?.dueHint, "by Friday") + XCTAssertEqual(x?.openQuestions.first?.question, "What about Teams?") + XCTAssertEqual(x?.keyQuotes.first?.quote, "Go Bitcoin") + } + + func testParseExtrasDropsNullStrings() { + // owner/raised_by "null" or empty must become nil, not a literal "null". + let json = #"{"tldr":{"summary":"s","primary_speakers":[]},"action_items":[{"description":"do it","owner":"null","due_hint":""}],"decisions":[],"open_questions":[],"key_quotes":[]}"# + let x = RecapAnalyzer.parseExtras(json) + XCTAssertNil(x?.actionItems.first?.owner) + XCTAssertNil(x?.actionItems.first?.dueHint) + } + + // MARK: - Stitch / windows + + func testStitchDropsContainedAndTrimsOverlap() { + let secs = [ + TopicSection(title: "A", summary: "", startIndex: 0, endIndex: 5), + TopicSection(title: "B-contained", summary: "", startIndex: 2, endIndex: 4), + TopicSection(title: "C-overlap", summary: "", startIndex: 4, endIndex: 9), + ] + let out = RecapAnalyzer.stitch(secs) + XCTAssertEqual(out.map { $0.title }, ["A", "C-overlap"]) + XCTAssertEqual(out[0].startIndex, 0); XCTAssertEqual(out[0].endIndex, 5) + XCTAssertEqual(out[1].startIndex, 6); XCTAssertEqual(out[1].endIndex, 9) // trimmed front + } + + func testPlanWindowsSingleForShortCall() { + let entries = (0..<10).map { entry(Double($0 * 10), Double($0 * 10 + 5), "A", "x") } // ~100s + let w = RecapAnalyzer.planWindows(entries) + XCTAssertEqual(w.count, 1) + XCTAssertEqual(w[0].startIdx, 0); XCTAssertEqual(w[0].endIdx, 9) + } + + func testPlanWindowsMultipleForLongCall() { + // 40 entries at 60s spacing → ~39 min, over the 25-min cutoff. + let entries = (0..<40).map { entry(Double($0 * 60), Double($0 * 60 + 30), "A", "x") } + let w = RecapAnalyzer.planWindows(entries) + XCTAssertGreaterThan(w.count, 1) + XCTAssertEqual(w.first?.startIdx, 0) + XCTAssertEqual(w.last?.endIdx, 39) // last window reaches the end + for i in 1..")) + XCTAssertTrue(html.contains("Go Bitcoin")) + } +}