Recap: readable transcript + topic sections + meeting extras (gateway LLM)

New 'Recap' phase — turns speakers.json into a human-readable recap, leveraging
recap-relay's proven logic/prompts but calling the Spark gateway's OpenAI-compatible
/v1/chat/completions directly (same host/TLS as label-merge; Qwen3-35B). We start
from already-named speakers (label-merge), so recap-relay's speaker clustering +
name-inference are skipped entirely.

- GatewayLLMClient: /v1/chat/completions (JSON mode), model discovery via
  /api/endpoints, TLS-skip reuse, 503 retry, sequential.
- RecapAnalyzer: speakers.json → numbered [N] (MM:SS) Name: text transcript →
  time-windowed analyze (single window for short calls, 18min/2min overlap for long)
  → stitch/dedup topic sections → meeting extras (TLDR/decisions/action_items/
  open_questions/key_quotes). Defensive JSON parsing of LLM output.
- RecapRenderer: writes transcript.md + a self-contained dark-theme recap.html
  (topic sections w/ collapsible transcripts, extras panels, speaker color chips,
  full timestamped speaker-attributed transcript, print styles).
- SessionController.buildRecap: best-effort after speakers.json (gated by
  settings.recapEnabled); surfaces recapURL → menu 'Open recap'. Skips silently if
  the gateway has no LLM. Settings toggle added.

Validated END-TO-END on the real Meet session against the live gateway: dual-channel
transcription → 3 topic sections + accurate TLDR + key quotes; 'Go Bitcoin'
correctly attributed to the remote speaker. 46/46 XCTest (10 new).
This commit is contained in:
Grant Gilliam
2026-06-06 14:36:18 -05:00
parent 53d7fcdac0
commit 85bfdf2b56
9 changed files with 941 additions and 1 deletions
+339
View File
@@ -0,0 +1,339 @@
import Foundation
/// Turns a finished `speakers.json` into topic sections + meeting extras by driving
/// the gateway LLM a Swift port of recap-relay's chunked-window analysis, but
/// starting from already-named speakers (label-merge), so we skip its speaker
/// clustering and name-inference entirely. Pure helpers are static + testable; the
/// LLM passes are sequential (one gateway request at a time).
final class RecapAnalyzer {
private let llm: GatewayLLMClient
private let model: String
init(llm: GatewayLLMClient, model: String) {
self.llm = llm
self.model = model
}
struct Entry: Equatable {
let offset: Double // seconds
let end: Double
let speaker: String
let text: String
}
struct Window: Equatable {
let startIdx: Int // first entry index this window analyzes (incl. overlap)
let endIdx: Int // last entry index (incl. overlap)
let bodyStartIdx: Int // first entry this window "owns"
}
// MARK: - Orchestration
/// Analyze (topics) extras. Extras are best-effort (nil on failure).
func recap(file: SpeakersFile, progress: ((String) async -> Void)? = nil) async throws -> RecapResult {
let entries = Self.entries(from: file)
guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) }
await progress?("Finding topics…")
let sections = try await analyze(entries: entries)
await progress?("Extracting highlights…")
let extras = try? await self.extras(file: file, entries: entries, sections: sections)
return RecapResult(sections: sections, extras: extras)
}
// MARK: - Analyze (chunked windows stitched sections)
func analyze(entries: [Entry]) async throws -> [TopicSection] {
let windows = Self.planWindows(entries)
var all: [TopicSection] = []
for w in windows {
let local = Array(entries[w.startIdx...w.endIdx])
let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count)
let content = try await llm.completeJSON(model: model, system: nil, user: prompt)
for s in Self.parseSections(content) {
let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1))
let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1))
guard ge >= gs else { continue }
all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge))
}
}
let stitched = Self.stitch(all)
// If the model returned nothing usable, fall back to one section for the whole call.
if stitched.isEmpty {
return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)]
}
return stitched
}
/// Plan time-based windows over the entries. Single window for short calls;
/// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic
/// straddling a boundary is seen by both windows (the stitcher dedupes).
static func planWindows(_ entries: [Entry],
bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60,
cutoffSec: Double = 25 * 60) -> [Window] {
guard !entries.isEmpty else { return [] }
let total = entries.last!.end
if total <= cutoffSec {
return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)]
}
var windows: [Window] = []
var bodyStartIdx = 0
while bodyStartIdx < entries.count {
let bodyStartSec = entries[bodyStartIdx].offset
let winStartSec = bodyStartSec - overlapSec
let bodyEndSec = bodyStartSec + bodySec
let winEndSec = bodyEndSec + overlapSec
let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx
var endIdx = bodyStartIdx
while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 }
windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx))
let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count
bodyStartIdx = max(next, bodyStartIdx + 1)
}
return windows
}
/// Merge per-window sections into one chronological, non-overlapping list.
/// Sort by start (wider first on ties), drop fully-contained, trim front overlaps.
static func stitch(_ sections: [TopicSection]) -> [TopicSection] {
let sorted = sections.sorted {
$0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex
}
var out: [TopicSection] = []
var maxEnd = -1
for s in sorted {
var start = s.startIndex
let end = s.endIndex
if end <= maxEnd { continue } // fully contained drop
if start <= maxEnd { start = maxEnd + 1 } // overlap trim front
guard start <= end else { continue }
out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end))
maxEnd = end
}
return out
}
// MARK: - Extras
func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) async throws -> MeetingExtras? {
let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections)
let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096)
return Self.parseExtras(content)
}
// MARK: - Entries
static func entries(from file: SpeakersFile) -> [Entry] {
file.segments
.filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
.sorted { $0.start < $1.start }
.map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker,
text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) }
}
static func mmss(_ sec: Double) -> String {
let t = max(0, Int(sec.rounded()))
let h = t / 3600, m = (t % 3600) / 60, s = t % 60
return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s)
}
// MARK: - Prompts
private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int) -> String {
let lines = window.enumerated()
.map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" }
.joined(separator: "\n")
let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0)
let windowMin = max(1, Int((windowSpan / 60).rounded()))
let maxIndex = window.count - 1
let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount)
return """
You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections).
TRANSCRIPT (each line is numbered with a timestamp):
\(lines)
INSTRUCTIONS:
1. Read the entire transcript carefully.
2. Identify where the discussion naturally shifts from one topic to another.
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
4. For each section, write:
- A short, specific topic title (3-8 words)
- A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity.
- The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above.
IMPORTANT:
- Sections must be chronological and non-overlapping.
- Every segment index from 0 to \(maxIndex) must belong to exactly one section.
- startIndex of section N+1 must equal endIndex of section N plus 1.
- Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several.
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
Respond with ONLY valid JSON in this exact format, no other text:
{
"sections": [
{
"title": "Brief Topic Title",
"summary": "1-3 sentence summary of this discussion section.",
"startIndex": 0,
"endIndex": 15
}
]
}
"""
}
private static func targetSectionsPhrase(totalSec: Double, windowCount: Int) -> String {
let m = totalSec / 60
let total = m < 5 ? 3 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16
let per = max(2, Int((Double(total) / Double(max(1, windowCount))).rounded()))
return "around \(per) sections"
}
private static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) -> String {
let names = orderedSpeakerNames(entries)
let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ")
let topics = sections.isEmpty ? "(none)" :
sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n")
let transcript = cappedTranscript(entries, maxChars: 24_000)
let durationStr = mmss(file.durationSec)
return """
You are extracting structured information from an internal team meeting transcript. The transcript below is labeled with the speakers' real names where known.
MEETING METADATA:
- App: \(file.app)
- Duration: \(durationStr)
SPEAKERS: \(roster)
TOPIC SUMMARIES (already produced — for context only, do not duplicate):
\(topics)
TRANSCRIPT (each line is "[<name> <MM:SS>] text"):
\(transcript)
INSTRUCTIONS:
Extract FIVE categories of information. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. Use the speakers' names exactly as shown above; use null/empty when a person is unclear.
1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Past tense, third person, dense. Skip pleasantries. If the meeting was genuinely substanceless, write one factual sentence. This is the only required category.
- summary: the 2-4 sentence executive summary
- primary_speakers: array of names who drove the conversation (1-3, in rough order of contribution). Empty array if unclear.
2. DECISIONS — Things explicitly decided/agreed. Only clear commitments, not casual mentions. For each:
- statement: the decision in one sentence
- agreed_by: array of names who explicitly agreed (empty if unclear)
- supporting_offset: integer SECONDS where it was decided (convert the [<name> <MM:SS>] timestamp to total seconds)
3. ACTION_ITEMS — Explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should". For each:
- description: the action in imperative form
- owner: the person's name, or null if unclear
- due_hint: deadline string if mentioned ("by Friday"), or null
- supporting_offset: integer seconds where the commitment was made
4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered. Skip rhetorical/answered ones. For each:
- question: rephrased to be self-contained
- raised_by: the person's name, or null
- answered: false (always)
5. KEY_QUOTES — 3-6 max. Pivotal/insightful/strong-opinion statements worth surfacing verbatim. For each:
- speaker: the person's name (or null)
- offset: integer seconds where the quote occurs
- quote: the verbatim quote (4-30 words)
- why_notable: one short clause
Be conservative — better an empty array than a fabrication. Respond with ONLY valid JSON in this exact shape, no other text:
{
"tldr": {"summary": "...", "primary_speakers": []},
"decisions": [{"statement": "...", "agreed_by": [], "supporting_offset": 0}],
"action_items": [{"description": "...", "owner": null, "due_hint": null, "supporting_offset": 0}],
"open_questions": [{"question": "...", "raised_by": null, "answered": false}],
"key_quotes": [{"speaker": null, "offset": 0, "quote": "...", "why_notable": "..."}]
}
"""
}
/// Distinct speaker names in first-appearance order.
static func orderedSpeakerNames(_ entries: [Entry]) -> [String] {
var seen = Set<String>(), order: [String] = []
for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) {
seen.insert(e.speaker); order.append(e.speaker)
}
return order
}
/// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long
/// call still fits the model context (keeps the start and end, drops the middle).
static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String {
let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n")
guard full.count > maxChars else { return full }
let half = maxChars / 2
let head = String(full.prefix(half))
let tail = String(full.suffix(half))
return head + "\n…[transcript truncated]…\n" + tail
}
// MARK: - Parsing (defensive LLM output)
private static func jsonObject(_ content: String) -> [String: Any]? {
let cleaned = GatewayLLMClient.stripCodeFence(content)
return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any]
}
static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] {
guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] }
return arr.compactMap { d in
guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty,
let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil }
let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
return (t, s, si, ei)
}
}
static func parseExtras(_ content: String) -> MeetingExtras? {
guard let o = jsonObject(content) else { return nil }
let tldrObj = o["tldr"] as? [String: Any]
let tldr = MeetingExtras.TLDR(
summary: (tldrObj?["summary"] as? String) ?? "",
primarySpeakers: stringArray(tldrObj?["primary_speakers"]))
let decisions = (o["decisions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.Decision? in
guard let st = nonEmpty(d["statement"]) else { return nil }
return .init(statement: st, agreedBy: stringArray(d["agreed_by"]), supportingOffset: intVal(d["supporting_offset"]))
}
let actions = (o["action_items"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.ActionItem? in
guard let desc = nonEmpty(d["description"]) else { return nil }
return .init(description: desc, owner: nonEmpty(d["owner"]), dueHint: nonEmpty(d["due_hint"]),
supportingOffset: intVal(d["supporting_offset"]))
}
let questions = (o["open_questions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.OpenQuestion? in
guard let q = nonEmpty(d["question"]) else { return nil }
return .init(question: q, raisedBy: nonEmpty(d["raised_by"]))
}
let quotes = (o["key_quotes"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.KeyQuote? in
guard let q = nonEmpty(d["quote"]) else { return nil }
return .init(speaker: nonEmpty(d["speaker"]), offset: intVal(d["offset"]), quote: q,
whyNotable: nonEmpty(d["why_notable"]) ?? "")
}
// Require at least a TLDR to consider extras present.
guard !tldr.summary.isEmpty || !decisions.isEmpty || !actions.isEmpty || !questions.isEmpty || !quotes.isEmpty
else { return nil }
return MeetingExtras(tldr: tldr, decisions: decisions, actionItems: actions,
openQuestions: questions, keyQuotes: quotes)
}
private static func intVal(_ v: Any?) -> Int? {
if let i = v as? Int { return i }
if let d = v as? Double { return Int(d) }
if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
return nil
}
private static func stringArray(_ v: Any?) -> [String] {
(v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) }
.filter { !$0.isEmpty } ?? []
}
private static func nonEmpty(_ v: Any?) -> String? {
guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty,
s.lowercased() != "null" else { return nil }
return s
}
}