Recap: readable transcript + topic sections + meeting extras (gateway LLM)
New 'Recap' phase — turns speakers.json into a human-readable recap, leveraging recap-relay's proven logic/prompts but calling the Spark gateway's OpenAI-compatible /v1/chat/completions directly (same host/TLS as label-merge; Qwen3-35B). We start from already-named speakers (label-merge), so recap-relay's speaker clustering + name-inference are skipped entirely. - GatewayLLMClient: /v1/chat/completions (JSON mode), model discovery via /api/endpoints, TLS-skip reuse, 503 retry, sequential. - RecapAnalyzer: speakers.json → numbered [N] (MM:SS) Name: text transcript → time-windowed analyze (single window for short calls, 18min/2min overlap for long) → stitch/dedup topic sections → meeting extras (TLDR/decisions/action_items/ open_questions/key_quotes). Defensive JSON parsing of LLM output. - RecapRenderer: writes transcript.md + a self-contained dark-theme recap.html (topic sections w/ collapsible transcripts, extras panels, speaker color chips, full timestamped speaker-attributed transcript, print styles). - SessionController.buildRecap: best-effort after speakers.json (gated by settings.recapEnabled); surfaces recapURL → menu 'Open recap'. Skips silently if the gateway has no LLM. Settings toggle added. Validated END-TO-END on the real Meet session against the live gateway: dual-channel transcription → 3 topic sections + accurate TLDR + key quotes; 'Go Bitcoin' correctly attributed to the remote speaker. 46/46 XCTest (10 new).
This commit is contained in:
@@ -0,0 +1,339 @@
|
||||
import Foundation
|
||||
|
||||
/// Turns a finished `speakers.json` into topic sections + meeting extras by driving
|
||||
/// the gateway LLM — a Swift port of recap-relay's chunked-window analysis, but
|
||||
/// starting from already-named speakers (label-merge), so we skip its speaker
|
||||
/// clustering and name-inference entirely. Pure helpers are static + testable; the
|
||||
/// LLM passes are sequential (one gateway request at a time).
|
||||
final class RecapAnalyzer {
|
||||
private let llm: GatewayLLMClient
|
||||
private let model: String
|
||||
|
||||
init(llm: GatewayLLMClient, model: String) {
|
||||
self.llm = llm
|
||||
self.model = model
|
||||
}
|
||||
|
||||
struct Entry: Equatable {
|
||||
let offset: Double // seconds
|
||||
let end: Double
|
||||
let speaker: String
|
||||
let text: String
|
||||
}
|
||||
|
||||
struct Window: Equatable {
|
||||
let startIdx: Int // first entry index this window analyzes (incl. overlap)
|
||||
let endIdx: Int // last entry index (incl. overlap)
|
||||
let bodyStartIdx: Int // first entry this window "owns"
|
||||
}
|
||||
|
||||
// MARK: - Orchestration
|
||||
|
||||
/// Analyze (topics) → extras. Extras are best-effort (nil on failure).
|
||||
func recap(file: SpeakersFile, progress: ((String) async -> Void)? = nil) async throws -> RecapResult {
|
||||
let entries = Self.entries(from: file)
|
||||
guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) }
|
||||
await progress?("Finding topics…")
|
||||
let sections = try await analyze(entries: entries)
|
||||
await progress?("Extracting highlights…")
|
||||
let extras = try? await self.extras(file: file, entries: entries, sections: sections)
|
||||
return RecapResult(sections: sections, extras: extras)
|
||||
}
|
||||
|
||||
// MARK: - Analyze (chunked windows → stitched sections)
|
||||
|
||||
func analyze(entries: [Entry]) async throws -> [TopicSection] {
|
||||
let windows = Self.planWindows(entries)
|
||||
var all: [TopicSection] = []
|
||||
for w in windows {
|
||||
let local = Array(entries[w.startIdx...w.endIdx])
|
||||
let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count)
|
||||
let content = try await llm.completeJSON(model: model, system: nil, user: prompt)
|
||||
for s in Self.parseSections(content) {
|
||||
let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1))
|
||||
let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1))
|
||||
guard ge >= gs else { continue }
|
||||
all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge))
|
||||
}
|
||||
}
|
||||
let stitched = Self.stitch(all)
|
||||
// If the model returned nothing usable, fall back to one section for the whole call.
|
||||
if stitched.isEmpty {
|
||||
return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)]
|
||||
}
|
||||
return stitched
|
||||
}
|
||||
|
||||
/// Plan time-based windows over the entries. Single window for short calls;
|
||||
/// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic
|
||||
/// straddling a boundary is seen by both windows (the stitcher dedupes).
|
||||
static func planWindows(_ entries: [Entry],
|
||||
bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60,
|
||||
cutoffSec: Double = 25 * 60) -> [Window] {
|
||||
guard !entries.isEmpty else { return [] }
|
||||
let total = entries.last!.end
|
||||
if total <= cutoffSec {
|
||||
return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)]
|
||||
}
|
||||
var windows: [Window] = []
|
||||
var bodyStartIdx = 0
|
||||
while bodyStartIdx < entries.count {
|
||||
let bodyStartSec = entries[bodyStartIdx].offset
|
||||
let winStartSec = bodyStartSec - overlapSec
|
||||
let bodyEndSec = bodyStartSec + bodySec
|
||||
let winEndSec = bodyEndSec + overlapSec
|
||||
let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx
|
||||
var endIdx = bodyStartIdx
|
||||
while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 }
|
||||
windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx))
|
||||
let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count
|
||||
bodyStartIdx = max(next, bodyStartIdx + 1)
|
||||
}
|
||||
return windows
|
||||
}
|
||||
|
||||
/// Merge per-window sections into one chronological, non-overlapping list.
|
||||
/// Sort by start (wider first on ties), drop fully-contained, trim front overlaps.
|
||||
static func stitch(_ sections: [TopicSection]) -> [TopicSection] {
|
||||
let sorted = sections.sorted {
|
||||
$0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex
|
||||
}
|
||||
var out: [TopicSection] = []
|
||||
var maxEnd = -1
|
||||
for s in sorted {
|
||||
var start = s.startIndex
|
||||
let end = s.endIndex
|
||||
if end <= maxEnd { continue } // fully contained → drop
|
||||
if start <= maxEnd { start = maxEnd + 1 } // overlap → trim front
|
||||
guard start <= end else { continue }
|
||||
out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end))
|
||||
maxEnd = end
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// MARK: - Extras
|
||||
|
||||
func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) async throws -> MeetingExtras? {
|
||||
let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections)
|
||||
let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096)
|
||||
return Self.parseExtras(content)
|
||||
}
|
||||
|
||||
// MARK: - Entries
|
||||
|
||||
static func entries(from file: SpeakersFile) -> [Entry] {
|
||||
file.segments
|
||||
.filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
|
||||
.sorted { $0.start < $1.start }
|
||||
.map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker,
|
||||
text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) }
|
||||
}
|
||||
|
||||
static func mmss(_ sec: Double) -> String {
|
||||
let t = max(0, Int(sec.rounded()))
|
||||
let h = t / 3600, m = (t % 3600) / 60, s = t % 60
|
||||
return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s)
|
||||
}
|
||||
|
||||
// MARK: - Prompts
|
||||
|
||||
private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int) -> String {
|
||||
let lines = window.enumerated()
|
||||
.map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" }
|
||||
.joined(separator: "\n")
|
||||
let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0)
|
||||
let windowMin = max(1, Int((windowSpan / 60).rounded()))
|
||||
let maxIndex = window.count - 1
|
||||
let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount)
|
||||
return """
|
||||
You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections).
|
||||
|
||||
TRANSCRIPT (each line is numbered with a timestamp):
|
||||
\(lines)
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Read the entire transcript carefully.
|
||||
2. Identify where the discussion naturally shifts from one topic to another.
|
||||
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
|
||||
4. For each section, write:
|
||||
- A short, specific topic title (3-8 words)
|
||||
- A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity.
|
||||
- The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above.
|
||||
|
||||
IMPORTANT:
|
||||
- Sections must be chronological and non-overlapping.
|
||||
- Every segment index from 0 to \(maxIndex) must belong to exactly one section.
|
||||
- startIndex of section N+1 must equal endIndex of section N plus 1.
|
||||
- Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several.
|
||||
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
|
||||
|
||||
Respond with ONLY valid JSON in this exact format, no other text:
|
||||
{
|
||||
"sections": [
|
||||
{
|
||||
"title": "Brief Topic Title",
|
||||
"summary": "1-3 sentence summary of this discussion section.",
|
||||
"startIndex": 0,
|
||||
"endIndex": 15
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
}
|
||||
|
||||
private static func targetSectionsPhrase(totalSec: Double, windowCount: Int) -> String {
|
||||
let m = totalSec / 60
|
||||
let total = m < 5 ? 3 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16
|
||||
let per = max(2, Int((Double(total) / Double(max(1, windowCount))).rounded()))
|
||||
return "around \(per) sections"
|
||||
}
|
||||
|
||||
private static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) -> String {
|
||||
let names = orderedSpeakerNames(entries)
|
||||
let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ")
|
||||
let topics = sections.isEmpty ? "(none)" :
|
||||
sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n")
|
||||
let transcript = cappedTranscript(entries, maxChars: 24_000)
|
||||
let durationStr = mmss(file.durationSec)
|
||||
return """
|
||||
You are extracting structured information from an internal team meeting transcript. The transcript below is labeled with the speakers' real names where known.
|
||||
|
||||
MEETING METADATA:
|
||||
- App: \(file.app)
|
||||
- Duration: \(durationStr)
|
||||
|
||||
SPEAKERS: \(roster)
|
||||
|
||||
TOPIC SUMMARIES (already produced — for context only, do not duplicate):
|
||||
\(topics)
|
||||
|
||||
TRANSCRIPT (each line is "[<name> <MM:SS>] text"):
|
||||
\(transcript)
|
||||
|
||||
INSTRUCTIONS:
|
||||
Extract FIVE categories of information. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. Use the speakers' names exactly as shown above; use null/empty when a person is unclear.
|
||||
|
||||
1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Past tense, third person, dense. Skip pleasantries. If the meeting was genuinely substanceless, write one factual sentence. This is the only required category.
|
||||
- summary: the 2-4 sentence executive summary
|
||||
- primary_speakers: array of names who drove the conversation (1-3, in rough order of contribution). Empty array if unclear.
|
||||
|
||||
2. DECISIONS — Things explicitly decided/agreed. Only clear commitments, not casual mentions. For each:
|
||||
- statement: the decision in one sentence
|
||||
- agreed_by: array of names who explicitly agreed (empty if unclear)
|
||||
- supporting_offset: integer SECONDS where it was decided (convert the [<name> <MM:SS>] timestamp to total seconds)
|
||||
|
||||
3. ACTION_ITEMS — Explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should". For each:
|
||||
- description: the action in imperative form
|
||||
- owner: the person's name, or null if unclear
|
||||
- due_hint: deadline string if mentioned ("by Friday"), or null
|
||||
- supporting_offset: integer seconds where the commitment was made
|
||||
|
||||
4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered. Skip rhetorical/answered ones. For each:
|
||||
- question: rephrased to be self-contained
|
||||
- raised_by: the person's name, or null
|
||||
- answered: false (always)
|
||||
|
||||
5. KEY_QUOTES — 3-6 max. Pivotal/insightful/strong-opinion statements worth surfacing verbatim. For each:
|
||||
- speaker: the person's name (or null)
|
||||
- offset: integer seconds where the quote occurs
|
||||
- quote: the verbatim quote (4-30 words)
|
||||
- why_notable: one short clause
|
||||
|
||||
Be conservative — better an empty array than a fabrication. Respond with ONLY valid JSON in this exact shape, no other text:
|
||||
{
|
||||
"tldr": {"summary": "...", "primary_speakers": []},
|
||||
"decisions": [{"statement": "...", "agreed_by": [], "supporting_offset": 0}],
|
||||
"action_items": [{"description": "...", "owner": null, "due_hint": null, "supporting_offset": 0}],
|
||||
"open_questions": [{"question": "...", "raised_by": null, "answered": false}],
|
||||
"key_quotes": [{"speaker": null, "offset": 0, "quote": "...", "why_notable": "..."}]
|
||||
}
|
||||
"""
|
||||
}
|
||||
|
||||
/// Distinct speaker names in first-appearance order.
|
||||
static func orderedSpeakerNames(_ entries: [Entry]) -> [String] {
|
||||
var seen = Set<String>(), order: [String] = []
|
||||
for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) {
|
||||
seen.insert(e.speaker); order.append(e.speaker)
|
||||
}
|
||||
return order
|
||||
}
|
||||
|
||||
/// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long
|
||||
/// call still fits the model context (keeps the start and end, drops the middle).
|
||||
static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String {
|
||||
let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n")
|
||||
guard full.count > maxChars else { return full }
|
||||
let half = maxChars / 2
|
||||
let head = String(full.prefix(half))
|
||||
let tail = String(full.suffix(half))
|
||||
return head + "\n…[transcript truncated]…\n" + tail
|
||||
}
|
||||
|
||||
// MARK: - Parsing (defensive — LLM output)
|
||||
|
||||
private static func jsonObject(_ content: String) -> [String: Any]? {
|
||||
let cleaned = GatewayLLMClient.stripCodeFence(content)
|
||||
return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any]
|
||||
}
|
||||
|
||||
static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] {
|
||||
guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] }
|
||||
return arr.compactMap { d in
|
||||
guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty,
|
||||
let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil }
|
||||
let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
return (t, s, si, ei)
|
||||
}
|
||||
}
|
||||
|
||||
static func parseExtras(_ content: String) -> MeetingExtras? {
|
||||
guard let o = jsonObject(content) else { return nil }
|
||||
let tldrObj = o["tldr"] as? [String: Any]
|
||||
let tldr = MeetingExtras.TLDR(
|
||||
summary: (tldrObj?["summary"] as? String) ?? "",
|
||||
primarySpeakers: stringArray(tldrObj?["primary_speakers"]))
|
||||
let decisions = (o["decisions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.Decision? in
|
||||
guard let st = nonEmpty(d["statement"]) else { return nil }
|
||||
return .init(statement: st, agreedBy: stringArray(d["agreed_by"]), supportingOffset: intVal(d["supporting_offset"]))
|
||||
}
|
||||
let actions = (o["action_items"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.ActionItem? in
|
||||
guard let desc = nonEmpty(d["description"]) else { return nil }
|
||||
return .init(description: desc, owner: nonEmpty(d["owner"]), dueHint: nonEmpty(d["due_hint"]),
|
||||
supportingOffset: intVal(d["supporting_offset"]))
|
||||
}
|
||||
let questions = (o["open_questions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.OpenQuestion? in
|
||||
guard let q = nonEmpty(d["question"]) else { return nil }
|
||||
return .init(question: q, raisedBy: nonEmpty(d["raised_by"]))
|
||||
}
|
||||
let quotes = (o["key_quotes"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.KeyQuote? in
|
||||
guard let q = nonEmpty(d["quote"]) else { return nil }
|
||||
return .init(speaker: nonEmpty(d["speaker"]), offset: intVal(d["offset"]), quote: q,
|
||||
whyNotable: nonEmpty(d["why_notable"]) ?? "")
|
||||
}
|
||||
// Require at least a TLDR to consider extras present.
|
||||
guard !tldr.summary.isEmpty || !decisions.isEmpty || !actions.isEmpty || !questions.isEmpty || !quotes.isEmpty
|
||||
else { return nil }
|
||||
return MeetingExtras(tldr: tldr, decisions: decisions, actionItems: actions,
|
||||
openQuestions: questions, keyQuotes: quotes)
|
||||
}
|
||||
|
||||
private static func intVal(_ v: Any?) -> Int? {
|
||||
if let i = v as? Int { return i }
|
||||
if let d = v as? Double { return Int(d) }
|
||||
if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
|
||||
return nil
|
||||
}
|
||||
|
||||
private static func stringArray(_ v: Any?) -> [String] {
|
||||
(v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) }
|
||||
.filter { !$0.isEmpty } ?? []
|
||||
}
|
||||
|
||||
private static func nonEmpty(_ v: Any?) -> String? {
|
||||
guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty,
|
||||
s.lowercased() != "null" else { return nil }
|
||||
return s
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user