Files
Grant Gilliam c539b78a58 Configurable recap templates (categories per meeting type, in Settings)
Takeaways categories are no longer hardcoded — they're editable templates. A
template = the always-on TLDR + an ordered list of sections, each with a title, a
type (attributed items / bulleted list / paragraph), and an instruction (the prompt
text for that category). The analyzer assembles the LLM prompt FROM the template
and parses generically, so adding/removing/renaming a category needs zero code and
the output always renders.

- RecapTemplate / TemplateSection / SectionKind + TopicGranularity; built-in
  defaults (Internal Meeting, 1:1, Company/Sales Call), all editable.
- Generic extras: RecapExtras{tldr, primarySpeakers, sections:[RenderedSection]} +
  RecapItem{text,who,when,note} replaces the fixed MeetingExtras. Analyzer builds
  per-section sec_N fields + parses by kind; renderer + remap are generic.
- Topic granularity (coarse/auto/fine) answers 'should chunking be configurable' —
  it scales the target topic count; raw window sizes stay as tuned defaults.
- AppSettings persists templates + defaultTemplateId (seeded once). Settings gets a
  default-template picker + 'Manage…' → TemplatesView (CRUD, edit sections/
  instructions, set default, **Preview prompt** for full transparency).
- Recap editor gains a template picker; Regenerate uses the chosen template. Auto
  recap uses the default template.

54/54 XCTest (template prompt build, generic parse/remap/render updated).
2026-06-06 19:26:03 -05:00

341 lines
17 KiB
Swift

import Foundation
/// Turns a finished `speakers.json` into topic sections + meeting extras by driving
/// the gateway LLM a Swift port of recap-relay's chunked-window analysis, but
/// starting from already-named speakers (label-merge), so we skip its speaker
/// clustering and name-inference entirely. Pure helpers are static + testable; the
/// LLM passes are sequential (one gateway request at a time).
final class RecapAnalyzer {
private let llm: GatewayLLMClient
private let model: String
init(llm: GatewayLLMClient, model: String) {
self.llm = llm
self.model = model
}
struct Entry: Equatable {
let offset: Double // seconds
let end: Double
let speaker: String
let text: String
}
struct Window: Equatable {
let startIdx: Int // first entry index this window analyzes (incl. overlap)
let endIdx: Int // last entry index (incl. overlap)
let bodyStartIdx: Int // first entry this window "owns"
}
// MARK: - Orchestration
/// Analyze (topics) template-driven extras. Extras are best-effort (nil on failure).
func recap(file: SpeakersFile, template: RecapTemplate,
progress: ((String) async -> Void)? = nil) async throws -> RecapResult {
let entries = Self.entries(from: file)
guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) }
await progress?("Finding topics…")
let sections = try await analyze(entries: entries, granularity: template.topicGranularity)
await progress?("Extracting takeaways…")
let extras = try? await self.extras(file: file, entries: entries, sections: sections, template: template)
return RecapResult(sections: sections, extras: extras)
}
// MARK: - Analyze (chunked windows stitched sections)
func analyze(entries: [Entry], granularity: TopicGranularity = .auto) async throws -> [TopicSection] {
let windows = Self.planWindows(entries)
var all: [TopicSection] = []
for w in windows {
let local = Array(entries[w.startIdx...w.endIdx])
let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0,
windowCount: windows.count, granularity: granularity)
let content = try await llm.completeJSON(model: model, system: nil, user: prompt)
for s in Self.parseSections(content) {
let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1))
let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1))
guard ge >= gs else { continue }
all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge))
}
}
let stitched = Self.stitch(all)
// If the model returned nothing usable, fall back to one section for the whole call.
if stitched.isEmpty {
return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)]
}
return stitched
}
/// Plan time-based windows over the entries. Single window for short calls;
/// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic
/// straddling a boundary is seen by both windows (the stitcher dedupes).
static func planWindows(_ entries: [Entry],
bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60,
cutoffSec: Double = 25 * 60) -> [Window] {
guard !entries.isEmpty else { return [] }
let total = entries.last!.end
if total <= cutoffSec {
return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)]
}
var windows: [Window] = []
var bodyStartIdx = 0
while bodyStartIdx < entries.count {
let bodyStartSec = entries[bodyStartIdx].offset
let winStartSec = bodyStartSec - overlapSec
let bodyEndSec = bodyStartSec + bodySec
let winEndSec = bodyEndSec + overlapSec
let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx
var endIdx = bodyStartIdx
while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 }
windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx))
let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count
bodyStartIdx = max(next, bodyStartIdx + 1)
}
return windows
}
/// Merge per-window sections into one chronological, non-overlapping list.
/// Sort by start (wider first on ties), drop fully-contained, trim front overlaps.
static func stitch(_ sections: [TopicSection]) -> [TopicSection] {
let sorted = sections.sorted {
$0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex
}
var out: [TopicSection] = []
var maxEnd = -1
for s in sorted {
var start = s.startIndex
let end = s.endIndex
if end <= maxEnd { continue } // fully contained drop
if start <= maxEnd { start = maxEnd + 1 } // overlap trim front
guard start <= end else { continue }
out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end))
maxEnd = end
}
return out
}
// MARK: - Extras
func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection],
template: RecapTemplate) async throws -> RecapExtras? {
let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections, template: template)
let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096)
return Self.parseExtras(content, template: template)
}
// MARK: - Entries
static func entries(from file: SpeakersFile) -> [Entry] {
file.segments
.filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
.sorted { $0.start < $1.start }
.map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker,
text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) }
}
static func mmss(_ sec: Double) -> String {
let t = max(0, Int(sec.rounded()))
let h = t / 3600, m = (t % 3600) / 60, s = t % 60
return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s)
}
// MARK: - Prompts
private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int,
granularity: TopicGranularity) -> String {
let lines = window.enumerated()
.map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" }
.joined(separator: "\n")
let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0)
let windowMin = max(1, Int((windowSpan / 60).rounded()))
let maxIndex = window.count - 1
let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount, granularity: granularity)
return """
You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections).
TRANSCRIPT (each line is numbered with a timestamp):
\(lines)
INSTRUCTIONS:
1. Read the entire transcript carefully.
2. Identify where the discussion naturally shifts from one topic to another.
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
4. For each section, write:
- A short, specific topic title (3-8 words)
- A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity.
- The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above.
IMPORTANT:
- Sections must be chronological and non-overlapping.
- Every segment index from 0 to \(maxIndex) must belong to exactly one section.
- startIndex of section N+1 must equal endIndex of section N plus 1.
- Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several.
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
Respond with ONLY valid JSON in this exact format, no other text:
{
"sections": [
{
"title": "Brief Topic Title",
"summary": "1-3 sentence summary of this discussion section.",
"startIndex": 0,
"endIndex": 15
}
]
}
"""
}
private static func targetSectionsPhrase(totalSec: Double, windowCount: Int,
granularity: TopicGranularity) -> String {
let m = totalSec / 60
let base = m < 5 ? 3.0 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16
let total = base * granularity.multiplier
let per = max(2, Int((total / Double(max(1, windowCount))).rounded()))
return "around \(per) sections"
}
/// Assemble the extras prompt FROM the template, so prompt + parsing + rendering
/// always agree. Each section becomes a `sec_<i>` field shaped by its kind.
/// Exposed so Settings can show the user the exact prompt that will be sent.
static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection],
template: RecapTemplate) -> String {
let names = orderedSpeakerNames(entries)
let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ")
let topics = sections.isEmpty ? "(none)" :
sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n")
let transcript = cappedTranscript(entries, maxChars: 24_000)
var fields: [String] = []
var shape: [String] = []
if template.includeTLDR {
fields.append("- \"tldr\": a 2-4 sentence executive summary string (what it was about, the arc, the outcome; past tense, dense; skip pleasantries; one factual sentence if there's no substance).")
fields.append("- \"primary_speakers\": array of the 1-3 names who drove the conversation, in rough order of contribution (empty if unclear).")
shape.append("\"tldr\": \"...\"")
shape.append("\"primary_speakers\": []")
}
for (i, sec) in template.sections.enumerated() {
let key = "sec_\(i)"
switch sec.kind {
case .bullets:
fields.append("- \"\(key)\" (\(sec.title)): array of short strings. \(sec.instruction)")
shape.append("\"\(key)\": [\"...\"]")
case .paragraph:
fields.append("- \"\(key)\" (\(sec.title)): a string. \(sec.instruction)")
shape.append("\"\(key)\": \"...\"")
case .items:
fields.append("- \"\(key)\" (\(sec.title)): array of {\"text\": string, \"who\": name or null, \"when\": integer seconds or null, \"note\": string or null}. \(sec.instruction)")
shape.append("\"\(key)\": [{\"text\": \"...\", \"who\": null, \"when\": null, \"note\": null}]")
}
}
return """
You are extracting structured takeaways from a call transcript. The transcript is labeled with the speakers' real names where known.
MEETING METADATA:
- App: \(file.app)
- Duration: \(mmss(file.durationSec))
SPEAKERS: \(roster)
TOPIC SUMMARIES (already produced — for context only, do not duplicate):
\(topics)
TRANSCRIPT (each line is "[<name> <MM:SS>] text"):
\(transcript)
INSTRUCTIONS:
Produce ONLY the JSON object below. Use the speakers' names exactly as shown above; use null/empty when a person or value is unclear. For any \"when\"/offset, convert the [<name> <MM:SS>] timestamp to total seconds. Be conservative — return EMPTY arrays (or null) rather than inventing anything.
Fields:
\(fields.joined(separator: "\n"))
Respond with ONLY valid JSON in this exact shape, no other text:
{
\(shape.joined(separator: ",\n "))
}
"""
}
/// Distinct speaker names in first-appearance order.
static func orderedSpeakerNames(_ entries: [Entry]) -> [String] {
var seen = Set<String>(), order: [String] = []
for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) {
seen.insert(e.speaker); order.append(e.speaker)
}
return order
}
/// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long
/// call still fits the model context (keeps the start and end, drops the middle).
static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String {
let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n")
guard full.count > maxChars else { return full }
let half = maxChars / 2
let head = String(full.prefix(half))
let tail = String(full.suffix(half))
return head + "\n…[transcript truncated]…\n" + tail
}
// MARK: - Parsing (defensive LLM output)
private static func jsonObject(_ content: String) -> [String: Any]? {
let cleaned = GatewayLLMClient.stripCodeFence(content)
return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any]
}
static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] {
guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] }
return arr.compactMap { d in
guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty,
let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil }
let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
return (t, s, si, ei)
}
}
static func parseExtras(_ content: String, template: RecapTemplate) -> RecapExtras? {
guard let o = jsonObject(content) else { return nil }
let tldr = template.includeTLDR ? ((o["tldr"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "") : ""
let primary = stringArray(o["primary_speakers"])
var rendered: [RenderedSection] = []
for (i, sec) in template.sections.enumerated() {
let v = o["sec_\(i)"]
switch sec.kind {
case .bullets:
rendered.append(RenderedSection(title: sec.title, kind: .bullets, bullets: stringArray(v)))
case .paragraph:
let p = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
rendered.append(RenderedSection(title: sec.title, kind: .paragraph, paragraph: p))
case .items:
let items = (v as? [[String: Any]] ?? []).compactMap { d -> RecapItem? in
guard let t = nonEmpty(d["text"]) else { return nil }
return RecapItem(text: t, who: nonEmpty(d["who"]), when: intVal(d["when"]), note: nonEmpty(d["note"]))
}
rendered.append(RenderedSection(title: sec.title, kind: .items, items: items))
}
}
guard !tldr.isEmpty || rendered.contains(where: { !$0.isEmpty }) else { return nil }
return RecapExtras(tldr: tldr, primarySpeakers: primary, sections: rendered)
}
private static func intVal(_ v: Any?) -> Int? {
if let i = v as? Int { return i }
if let d = v as? Double { return Int(d) }
if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
return nil
}
private static func stringArray(_ v: Any?) -> [String] {
(v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) }
.filter { !$0.isEmpty } ?? []
}
private static func nonEmpty(_ v: Any?) -> String? {
guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty,
s.lowercased() != "null" else { return nil }
return s
}
}