Recap: readable transcript + topic sections + meeting extras (gateway LLM)

New 'Recap' phase — turns speakers.json into a human-readable recap, leveraging
recap-relay's proven logic/prompts but calling the Spark gateway's OpenAI-compatible
/v1/chat/completions directly (same host/TLS as label-merge; Qwen3-35B). We start
from already-named speakers (label-merge), so recap-relay's speaker clustering +
name-inference are skipped entirely.

- GatewayLLMClient: /v1/chat/completions (JSON mode), model discovery via
  /api/endpoints, TLS-skip reuse, 503 retry, sequential.
- RecapAnalyzer: speakers.json → numbered [N] (MM:SS) Name: text transcript →
  time-windowed analyze (single window for short calls, 18min/2min overlap for long)
  → stitch/dedup topic sections → meeting extras (TLDR/decisions/action_items/
  open_questions/key_quotes). Defensive JSON parsing of LLM output.
- RecapRenderer: writes transcript.md + a self-contained dark-theme recap.html
  (topic sections w/ collapsible transcripts, extras panels, speaker color chips,
  full timestamped speaker-attributed transcript, print styles).
- SessionController.buildRecap: best-effort after speakers.json (gated by
  settings.recapEnabled); surfaces recapURL → menu 'Open recap'. Skips silently if
  the gateway has no LLM. Settings toggle added.

Validated END-TO-END on the real Meet session against the live gateway: dual-channel
transcription → 3 topic sections + accurate TLDR + key quotes; 'Go Bitcoin'
correctly attributed to the remote speaker. 46/46 XCTest (10 new).
This commit is contained in:
Grant Gilliam
2026-06-06 14:36:18 -05:00
parent 53d7fcdac0
commit 85bfdf2b56
9 changed files with 941 additions and 1 deletions
@@ -0,0 +1,135 @@
import Foundation
enum GatewayLLMError: Error, LocalizedError {
case invalidHost
case notReady // vLLM not loaded on the gateway
case server(Int, String)
case decode(String)
case retriesExhausted
var errorDescription: String? {
switch self {
case .invalidHost: return "Invalid backend host URL."
case .notReady: return "The gateway's language model isn't ready."
case .server(let code, let detail): return "LLM error \(code): \(detail)"
case .decode(let msg): return "Couldn't decode the LLM response: \(msg)"
case .retriesExhausted: return "Gateway stayed busy (503) after retries."
}
}
}
/// Talks to the Spark Control gateway's OpenAI-compatible `/v1/chat/completions`
/// (the same host + TLS as `label-merge`). Used for the recap analysis (topic
/// sections, summary polish, meeting extras). **Call sequentially** like audio,
/// the gateway serializes GPU work; the recap pipeline issues one request at a time.
final class GatewayLLMClient {
private let baseURL: String
private let urlSession: URLSession
init(baseURL: String, skipTLS: Bool) {
let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines)
self.baseURL = trimmed.hasSuffix("/") ? String(trimmed.dropLast()) : trimmed
let config = URLSessionConfiguration.ephemeral
config.timeoutIntervalForRequest = 600
config.timeoutIntervalForResource = 900
config.waitsForConnectivity = false
let delegate: URLSessionDelegate? = skipTLS ? InsecureTrustDelegate() : nil
self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil)
}
deinit { urlSession.finishTasksAndInvalidate() }
/// The ready chat model id from `/api/endpoints` (`vllm.model`), or nil if the
/// gateway has no language model loaded.
func chatModelId() async -> String? {
guard let url = URL(string: baseURL + "/api/endpoints") else { return nil }
guard let (data, _) = try? await urlSession.data(from: url),
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let vllm = obj["vllm"] as? [String: Any],
(vllm["ready"] as? Bool) == true,
let model = vllm["model"] as? String, !model.isEmpty else { return nil }
return model
}
/// One JSON-mode chat completion. Returns the raw `content` string (the caller
/// parses it as JSON). Retries on `503 + Retry-After`.
func completeJSON(model: String, system: String?, user: String,
maxTokens: Int = 4096, maxRetries: Int = 3) async throws -> String {
guard let url = URL(string: baseURL + "/v1/chat/completions") else {
throw GatewayLLMError.invalidHost
}
var messages: [[String: String]] = []
if let system { messages.append(["role": "system", "content": system]) }
messages.append(["role": "user", "content": user])
let body: [String: Any] = [
"model": model,
"messages": messages,
"max_tokens": maxTokens,
"stream": false,
"response_format": ["type": "json_object"],
"chat_template_kwargs": ["enable_thinking": false],
]
let bodyData = try JSONSerialization.data(withJSONObject: body)
var attempt = 0
while true {
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = bodyData
let (data, response) = try await urlSession.data(for: request)
guard let http = response as? HTTPURLResponse else {
throw GatewayLLMError.decode("no HTTP response")
}
switch http.statusCode {
case 200..<300:
return try Self.content(from: data)
case 503:
attempt += 1
if attempt > maxRetries { throw GatewayLLMError.retriesExhausted }
let retryAfter = http.value(forHTTPHeaderField: "Retry-After").flatMap(Double.init) ?? 5
try await Task.sleep(nanoseconds: UInt64(max(1, retryAfter) * 1_000_000_000))
default:
throw GatewayLLMError.server(http.statusCode, Self.detail(from: data))
}
}
}
// MARK: - Parsing
private static func content(from data: Data) throws -> String {
struct ChatResponse: Decodable {
struct Choice: Decodable { struct Message: Decodable { let content: String }; let message: Message }
let choices: [Choice]
}
do {
let decoded = try JSONDecoder().decode(ChatResponse.self, from: data)
guard let text = decoded.choices.first?.message.content else {
throw GatewayLLMError.decode("no choices in response")
}
return text
} catch {
throw GatewayLLMError.decode(error.localizedDescription)
}
}
private static func detail(from data: Data) -> String {
if let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
if let detail = obj["detail"] as? String { return detail }
if let err = obj["error"] as? [String: Any], let msg = err["message"] as? String { return msg }
}
return String(data: data, encoding: .utf8) ?? "unknown error"
}
/// Strip a ```json ``` code fence if the model wrapped its JSON (defensive;
/// JSON mode usually prevents this).
static func stripCodeFence(_ s: String) -> String {
var t = s.trimmingCharacters(in: .whitespacesAndNewlines)
if t.hasPrefix("```") {
if let firstNewline = t.firstIndex(of: "\n") { t = String(t[t.index(after: firstNewline)...]) }
if let fenceRange = t.range(of: "```", options: .backwards) { t = String(t[..<fenceRange.lowerBound]) }
}
return t.trimmingCharacters(in: .whitespacesAndNewlines)
}
}
+339
View File
@@ -0,0 +1,339 @@
import Foundation
/// Turns a finished `speakers.json` into topic sections + meeting extras by driving
/// the gateway LLM a Swift port of recap-relay's chunked-window analysis, but
/// starting from already-named speakers (label-merge), so we skip its speaker
/// clustering and name-inference entirely. Pure helpers are static + testable; the
/// LLM passes are sequential (one gateway request at a time).
final class RecapAnalyzer {
private let llm: GatewayLLMClient
private let model: String
init(llm: GatewayLLMClient, model: String) {
self.llm = llm
self.model = model
}
struct Entry: Equatable {
let offset: Double // seconds
let end: Double
let speaker: String
let text: String
}
struct Window: Equatable {
let startIdx: Int // first entry index this window analyzes (incl. overlap)
let endIdx: Int // last entry index (incl. overlap)
let bodyStartIdx: Int // first entry this window "owns"
}
// MARK: - Orchestration
/// Analyze (topics) extras. Extras are best-effort (nil on failure).
func recap(file: SpeakersFile, progress: ((String) async -> Void)? = nil) async throws -> RecapResult {
let entries = Self.entries(from: file)
guard !entries.isEmpty else { return RecapResult(sections: [], extras: nil) }
await progress?("Finding topics…")
let sections = try await analyze(entries: entries)
await progress?("Extracting highlights…")
let extras = try? await self.extras(file: file, entries: entries, sections: sections)
return RecapResult(sections: sections, extras: extras)
}
// MARK: - Analyze (chunked windows stitched sections)
func analyze(entries: [Entry]) async throws -> [TopicSection] {
let windows = Self.planWindows(entries)
var all: [TopicSection] = []
for w in windows {
let local = Array(entries[w.startIdx...w.endIdx])
let prompt = Self.analyzePrompt(local, totalSec: entries.last?.end ?? 0, windowCount: windows.count)
let content = try await llm.completeJSON(model: model, system: nil, user: prompt)
for s in Self.parseSections(content) {
let gs = w.startIdx + max(0, min(s.startIndex, local.count - 1))
let ge = w.startIdx + max(0, min(s.endIndex, local.count - 1))
guard ge >= gs else { continue }
all.append(TopicSection(title: s.title, summary: s.summary, startIndex: gs, endIndex: ge))
}
}
let stitched = Self.stitch(all)
// If the model returned nothing usable, fall back to one section for the whole call.
if stitched.isEmpty {
return [TopicSection(title: "Conversation", summary: "", startIndex: 0, endIndex: entries.count - 1)]
}
return stitched
}
/// Plan time-based windows over the entries. Single window for short calls;
/// otherwise ~`bodySec` bodies with `overlapSec` of overlap each side so a topic
/// straddling a boundary is seen by both windows (the stitcher dedupes).
static func planWindows(_ entries: [Entry],
bodySec: Double = 18 * 60, overlapSec: Double = 2 * 60,
cutoffSec: Double = 25 * 60) -> [Window] {
guard !entries.isEmpty else { return [] }
let total = entries.last!.end
if total <= cutoffSec {
return [Window(startIdx: 0, endIdx: entries.count - 1, bodyStartIdx: 0)]
}
var windows: [Window] = []
var bodyStartIdx = 0
while bodyStartIdx < entries.count {
let bodyStartSec = entries[bodyStartIdx].offset
let winStartSec = bodyStartSec - overlapSec
let bodyEndSec = bodyStartSec + bodySec
let winEndSec = bodyEndSec + overlapSec
let startIdx = entries.firstIndex { $0.offset >= winStartSec } ?? bodyStartIdx
var endIdx = bodyStartIdx
while endIdx + 1 < entries.count && entries[endIdx + 1].offset <= winEndSec { endIdx += 1 }
windows.append(Window(startIdx: startIdx, endIdx: endIdx, bodyStartIdx: bodyStartIdx))
let next = entries.firstIndex { $0.offset >= bodyEndSec } ?? entries.count
bodyStartIdx = max(next, bodyStartIdx + 1)
}
return windows
}
/// Merge per-window sections into one chronological, non-overlapping list.
/// Sort by start (wider first on ties), drop fully-contained, trim front overlaps.
static func stitch(_ sections: [TopicSection]) -> [TopicSection] {
let sorted = sections.sorted {
$0.startIndex != $1.startIndex ? $0.startIndex < $1.startIndex : $0.endIndex > $1.endIndex
}
var out: [TopicSection] = []
var maxEnd = -1
for s in sorted {
var start = s.startIndex
let end = s.endIndex
if end <= maxEnd { continue } // fully contained drop
if start <= maxEnd { start = maxEnd + 1 } // overlap trim front
guard start <= end else { continue }
out.append(TopicSection(title: s.title, summary: s.summary, startIndex: start, endIndex: end))
maxEnd = end
}
return out
}
// MARK: - Extras
func extras(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) async throws -> MeetingExtras? {
let prompt = Self.extrasPrompt(file: file, entries: entries, sections: sections)
let content = try await llm.completeJSON(model: model, system: nil, user: prompt, maxTokens: 4096)
return Self.parseExtras(content)
}
// MARK: - Entries
static func entries(from file: SpeakersFile) -> [Entry] {
file.segments
.filter { !($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
.sorted { $0.start < $1.start }
.map { Entry(offset: $0.start, end: $0.end, speaker: $0.speaker,
text: ($0.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines)) }
}
static func mmss(_ sec: Double) -> String {
let t = max(0, Int(sec.rounded()))
let h = t / 3600, m = (t % 3600) / 60, s = t % 60
return h > 0 ? String(format: "%d:%02d:%02d", h, m, s) : String(format: "%d:%02d", m, s)
}
// MARK: - Prompts
private static func analyzePrompt(_ window: [Entry], totalSec: Double, windowCount: Int) -> String {
let lines = window.enumerated()
.map { "[\($0.offset)] (\(mmss($0.element.offset))) \($0.element.speaker): \($0.element.text)" }
.joined(separator: "\n")
let windowSpan = (window.last?.end ?? 0) - (window.first?.offset ?? 0)
let windowMin = max(1, Int((windowSpan / 60).rounded()))
let maxIndex = window.count - 1
let targetSections = targetSectionsPhrase(totalSec: totalSec, windowCount: windowCount)
return """
You are analyzing a ~\(windowMin)-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for \(targetSections).
TRANSCRIPT (each line is numbered with a timestamp):
\(lines)
INSTRUCTIONS:
1. Read the entire transcript carefully.
2. Identify where the discussion naturally shifts from one topic to another.
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
4. For each section, write:
- A short, specific topic title (3-8 words)
- A 1-3 sentence summary of what's discussed. Attribute points to speakers by name where it improves clarity.
- The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above.
IMPORTANT:
- Sections must be chronological and non-overlapping.
- Every segment index from 0 to \(maxIndex) must belong to exactly one section.
- startIndex of section N+1 must equal endIndex of section N plus 1.
- Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several.
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
Respond with ONLY valid JSON in this exact format, no other text:
{
"sections": [
{
"title": "Brief Topic Title",
"summary": "1-3 sentence summary of this discussion section.",
"startIndex": 0,
"endIndex": 15
}
]
}
"""
}
private static func targetSectionsPhrase(totalSec: Double, windowCount: Int) -> String {
let m = totalSec / 60
let total = m < 5 ? 3 : m < 15 ? 4 : m < 30 ? 6 : m < 60 ? 8 : m < 120 ? 12 : 16
let per = max(2, Int((Double(total) / Double(max(1, windowCount))).rounded()))
return "around \(per) sections"
}
private static func extrasPrompt(file: SpeakersFile, entries: [Entry], sections: [TopicSection]) -> String {
let names = orderedSpeakerNames(entries)
let roster = names.isEmpty ? "(unknown)" : names.joined(separator: ", ")
let topics = sections.isEmpty ? "(none)" :
sections.enumerated().map { "\($0.offset + 1). \($0.element.title)" }.joined(separator: "\n")
let transcript = cappedTranscript(entries, maxChars: 24_000)
let durationStr = mmss(file.durationSec)
return """
You are extracting structured information from an internal team meeting transcript. The transcript below is labeled with the speakers' real names where known.
MEETING METADATA:
- App: \(file.app)
- Duration: \(durationStr)
SPEAKERS: \(roster)
TOPIC SUMMARIES (already produced — for context only, do not duplicate):
\(topics)
TRANSCRIPT (each line is "[<name> <MM:SS>] text"):
\(transcript)
INSTRUCTIONS:
Extract FIVE categories of information. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items. Use the speakers' names exactly as shown above; use null/empty when a person is unclear.
1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Past tense, third person, dense. Skip pleasantries. If the meeting was genuinely substanceless, write one factual sentence. This is the only required category.
- summary: the 2-4 sentence executive summary
- primary_speakers: array of names who drove the conversation (1-3, in rough order of contribution). Empty array if unclear.
2. DECISIONS — Things explicitly decided/agreed. Only clear commitments, not casual mentions. For each:
- statement: the decision in one sentence
- agreed_by: array of names who explicitly agreed (empty if unclear)
- supporting_offset: integer SECONDS where it was decided (convert the [<name> <MM:SS>] timestamp to total seconds)
3. ACTION_ITEMS — Explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should". For each:
- description: the action in imperative form
- owner: the person's name, or null if unclear
- due_hint: deadline string if mentioned ("by Friday"), or null
- supporting_offset: integer seconds where the commitment was made
4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered. Skip rhetorical/answered ones. For each:
- question: rephrased to be self-contained
- raised_by: the person's name, or null
- answered: false (always)
5. KEY_QUOTES — 3-6 max. Pivotal/insightful/strong-opinion statements worth surfacing verbatim. For each:
- speaker: the person's name (or null)
- offset: integer seconds where the quote occurs
- quote: the verbatim quote (4-30 words)
- why_notable: one short clause
Be conservative — better an empty array than a fabrication. Respond with ONLY valid JSON in this exact shape, no other text:
{
"tldr": {"summary": "...", "primary_speakers": []},
"decisions": [{"statement": "...", "agreed_by": [], "supporting_offset": 0}],
"action_items": [{"description": "...", "owner": null, "due_hint": null, "supporting_offset": 0}],
"open_questions": [{"question": "...", "raised_by": null, "answered": false}],
"key_quotes": [{"speaker": null, "offset": 0, "quote": "...", "why_notable": "..."}]
}
"""
}
/// Distinct speaker names in first-appearance order.
static func orderedSpeakerNames(_ entries: [Entry]) -> [String] {
var seen = Set<String>(), order: [String] = []
for e in entries where !e.speaker.isEmpty && !seen.contains(e.speaker) {
seen.insert(e.speaker); order.append(e.speaker)
}
return order
}
/// Full `[name MM:SS] text` transcript, middle-truncated to `maxChars` so a long
/// call still fits the model context (keeps the start and end, drops the middle).
static func cappedTranscript(_ entries: [Entry], maxChars: Int) -> String {
let full = entries.map { "[\($0.speaker) \(mmss($0.offset))] \($0.text)" }.joined(separator: "\n")
guard full.count > maxChars else { return full }
let half = maxChars / 2
let head = String(full.prefix(half))
let tail = String(full.suffix(half))
return head + "\n…[transcript truncated]…\n" + tail
}
// MARK: - Parsing (defensive LLM output)
private static func jsonObject(_ content: String) -> [String: Any]? {
let cleaned = GatewayLLMClient.stripCodeFence(content)
return (try? JSONSerialization.jsonObject(with: Data(cleaned.utf8))) as? [String: Any]
}
static func parseSections(_ content: String) -> [(title: String, summary: String, startIndex: Int, endIndex: Int)] {
guard let o = jsonObject(content), let arr = o["sections"] as? [[String: Any]] else { return [] }
return arr.compactMap { d in
guard let t = (d["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty,
let si = intVal(d["startIndex"]), let ei = intVal(d["endIndex"]) else { return nil }
let s = (d["summary"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
return (t, s, si, ei)
}
}
static func parseExtras(_ content: String) -> MeetingExtras? {
guard let o = jsonObject(content) else { return nil }
let tldrObj = o["tldr"] as? [String: Any]
let tldr = MeetingExtras.TLDR(
summary: (tldrObj?["summary"] as? String) ?? "",
primarySpeakers: stringArray(tldrObj?["primary_speakers"]))
let decisions = (o["decisions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.Decision? in
guard let st = nonEmpty(d["statement"]) else { return nil }
return .init(statement: st, agreedBy: stringArray(d["agreed_by"]), supportingOffset: intVal(d["supporting_offset"]))
}
let actions = (o["action_items"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.ActionItem? in
guard let desc = nonEmpty(d["description"]) else { return nil }
return .init(description: desc, owner: nonEmpty(d["owner"]), dueHint: nonEmpty(d["due_hint"]),
supportingOffset: intVal(d["supporting_offset"]))
}
let questions = (o["open_questions"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.OpenQuestion? in
guard let q = nonEmpty(d["question"]) else { return nil }
return .init(question: q, raisedBy: nonEmpty(d["raised_by"]))
}
let quotes = (o["key_quotes"] as? [[String: Any]] ?? []).compactMap { d -> MeetingExtras.KeyQuote? in
guard let q = nonEmpty(d["quote"]) else { return nil }
return .init(speaker: nonEmpty(d["speaker"]), offset: intVal(d["offset"]), quote: q,
whyNotable: nonEmpty(d["why_notable"]) ?? "")
}
// Require at least a TLDR to consider extras present.
guard !tldr.summary.isEmpty || !decisions.isEmpty || !actions.isEmpty || !questions.isEmpty || !quotes.isEmpty
else { return nil }
return MeetingExtras(tldr: tldr, decisions: decisions, actionItems: actions,
openQuestions: questions, keyQuotes: quotes)
}
private static func intVal(_ v: Any?) -> Int? {
if let i = v as? Int { return i }
if let d = v as? Double { return Int(d) }
if let s = v as? String { return Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
return nil
}
private static func stringArray(_ v: Any?) -> [String] {
(v as? [Any])?.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) }
.filter { !$0.isEmpty } ?? []
}
private static func nonEmpty(_ v: Any?) -> String? {
guard let s = (v as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !s.isEmpty,
s.lowercased() != "null" else { return nil }
return s
}
}
+38
View File
@@ -0,0 +1,38 @@
import Foundation
/// One topic section: a contiguous run of transcript entries `[startIndex...endIndex]`
/// (inclusive, indices into the canonical entries array) with an LLM title + summary.
struct TopicSection: Equatable {
var title: String
var summary: String
var startIndex: Int
var endIndex: Int
}
/// Structured "meeting extras" extracted from the named transcript. Mirrors
/// recap-relay's schema; speakers are real names (we already have them from
/// label-merge), not anonymous cluster ids.
struct MeetingExtras: Equatable {
struct TLDR: Equatable { var summary: String; var primarySpeakers: [String] }
struct Decision: Equatable { var statement: String; var agreedBy: [String]; var supportingOffset: Int? }
struct ActionItem: Equatable { var description: String; var owner: String?; var dueHint: String?; var supportingOffset: Int? }
struct OpenQuestion: Equatable { var question: String; var raisedBy: String? }
struct KeyQuote: Equatable { var speaker: String?; var offset: Int?; var quote: String; var whyNotable: String }
var tldr: TLDR
var decisions: [Decision]
var actionItems: [ActionItem]
var openQuestions: [OpenQuestion]
var keyQuotes: [KeyQuote]
var isEmptyBeyondTLDR: Bool {
decisions.isEmpty && actionItems.isEmpty && openQuestions.isEmpty && keyQuotes.isEmpty
}
}
/// The assembled recap for one session: the topic sections + (optional) extras,
/// over the session's transcript. Rendered to `transcript.md` / `recap.html`.
struct RecapResult: Equatable {
var sections: [TopicSection]
var extras: MeetingExtras?
}
+253
View File
@@ -0,0 +1,253 @@
import Foundation
/// Renders a session (`speakers.json` + recap) into human-readable artifacts:
/// `transcript.md` (portable/editable) and `recap.html` (self-contained, dark
/// theme, printable, shareable). Port of recap-relay's meetingToMarkdown/Html,
/// driven by our already-named transcript.
enum RecapRenderer {
static func write(file: SpeakersFile, result: RecapResult, title: String, to folder: URL) throws {
let entries = RecapAnalyzer.entries(from: file)
try markdown(file: file, result: result, title: title, entries: entries)
.data(using: .utf8)?.write(to: folder.appendingPathComponent("transcript.md"))
try html(file: file, result: result, title: title, entries: entries)
.data(using: .utf8)?.write(to: folder.appendingPathComponent("recap.html"))
}
// MARK: - Markdown
static func markdown(file: SpeakersFile, result: RecapResult, title: String,
entries: [RecapAnalyzer.Entry]) -> String {
var out = "# \(title)\n\n"
let speakers = RecapAnalyzer.orderedSpeakerNames(entries)
out += "*\(file.app) · \(RecapAnalyzer.mmss(file.durationSec))"
if !speakers.isEmpty { out += " · \(speakers.count) speaker\(speakers.count == 1 ? "" : "s"): \(speakers.joined(separator: ", "))" }
out += "*\n\n"
if let x = result.extras {
if !x.tldr.summary.isEmpty {
out += "## Summary\n\n\(x.tldr.summary)\n"
if !x.tldr.primarySpeakers.isEmpty { out += "\n*Primary speakers: \(x.tldr.primarySpeakers.joined(separator: ", "))*\n" }
out += "\n"
}
if !x.decisions.isEmpty {
out += "## Decisions\n\n"
for d in x.decisions {
var line = "- \(d.statement)"
if !d.agreedBy.isEmpty { line += " — agreed by \(d.agreedBy.joined(separator: ", "))" }
if let o = d.supportingOffset { line += " *(\(RecapAnalyzer.mmss(Double(o))))*" }
out += line + "\n"
}
out += "\n"
}
if !x.actionItems.isEmpty {
out += "## Action Items\n\n"
for a in x.actionItems {
var line = "- [ ] \(a.description)"
if let o = a.owner { line += " — **\(o)**" }
if let due = a.dueHint { line += " (\(due))" }
if let off = a.supportingOffset { line += " *(\(RecapAnalyzer.mmss(Double(off))))*" }
out += line + "\n"
}
out += "\n"
}
if !x.openQuestions.isEmpty {
out += "## Open Questions\n\n"
for q in x.openQuestions {
var line = "- \(q.question)"
if let r = q.raisedBy { line += " — *\(r)*" }
out += line + "\n"
}
out += "\n"
}
if !x.keyQuotes.isEmpty {
out += "## Key Quotes\n\n"
for k in x.keyQuotes {
out += "> \"\(k.quote)\""
var attr: [String] = []
if let s = k.speaker { attr.append(s) }
if let o = k.offset { attr.append(RecapAnalyzer.mmss(Double(o))) }
if !attr.isEmpty { out += "\(attr.joined(separator: ", "))" }
out += "\n"
if !k.whyNotable.isEmpty { out += ">\n> \(k.whyNotable)\n" }
out += "\n"
}
}
}
if !result.sections.isEmpty {
out += "## Topics\n\n"
for (i, sec) in result.sections.enumerated() {
let range = timeRange(sec, entries: entries)
out += "### \(i + 1). \(sec.title)\(range)\n\n"
if !sec.summary.isEmpty { out += "\(sec.summary)\n\n" }
out += "<details>\n<summary>Transcript</summary>\n\n"
out += transcriptLines(sec, entries: entries)
out += "\n</details>\n\n"
}
}
out += "## Full Transcript\n\n"
for e in entries { out += "**[\(RecapAnalyzer.mmss(e.offset))] \(e.speaker):** \(e.text)\n\n" }
return out
}
private static func timeRange(_ sec: TopicSection, entries: [RecapAnalyzer.Entry]) -> String {
guard entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex) else { return "" }
return " *(\(RecapAnalyzer.mmss(entries[sec.startIndex].offset))\(RecapAnalyzer.mmss(entries[sec.endIndex].end)))*"
}
private static func transcriptLines(_ sec: TopicSection, entries: [RecapAnalyzer.Entry]) -> String {
guard sec.startIndex <= sec.endIndex, entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex)
else { return "" }
return entries[sec.startIndex...sec.endIndex]
.map { "**[\(RecapAnalyzer.mmss($0.offset))] \($0.speaker):** \($0.text)" }
.joined(separator: "\n\n")
}
// MARK: - HTML
static func html(file: SpeakersFile, result: RecapResult, title: String,
entries: [RecapAnalyzer.Entry]) -> String {
let speakers = RecapAnalyzer.orderedSpeakerNames(entries)
let colorFor = speakerColors(speakers)
func chip(_ name: String) -> String {
let c = colorFor[name] ?? "#8a8f98"
return "<span class=\"chip\" style=\"background:\(c)\">\(esc(name))</span>"
}
var body = ""
let sub = "\(esc(file.app)) · \(RecapAnalyzer.mmss(file.durationSec))"
+ (speakers.isEmpty ? "" : " · \(speakers.count) speaker\(speakers.count == 1 ? "" : "s")")
body += "<header><h1>\(esc(title))</h1><div class=\"sub\">\(sub)</div>"
if !speakers.isEmpty {
body += "<div class=\"legend\">" + speakers.map { chip($0) }.joined() + "</div>"
}
body += "</header>"
if let x = result.extras {
if !x.tldr.summary.isEmpty {
body += card("Summary", "<p>\(esc(x.tldr.summary))</p>"
+ (x.tldr.primarySpeakers.isEmpty ? "" : "<p class=\"muted\">Primary: \(x.tldr.primarySpeakers.map(esc).joined(separator: ", "))</p>"))
}
if !x.decisions.isEmpty {
let items = x.decisions.map { d -> String in
var s = "<li>\(esc(d.statement))"
if !d.agreedBy.isEmpty { s += " <span class=\"muted\">— agreed by \(d.agreedBy.map(esc).joined(separator: ", "))</span>" }
if let o = d.supportingOffset { s += " <span class=\"ts\">\(RecapAnalyzer.mmss(Double(o)))</span>" }
return s + "</li>"
}.joined()
body += card("Decisions", "<ul>\(items)</ul>")
}
if !x.actionItems.isEmpty {
let items = x.actionItems.map { a -> String in
var s = "<li>☐ \(esc(a.description))"
if let o = a.owner { s += " <strong>\(esc(o))</strong>" }
if let due = a.dueHint { s += " <span class=\"muted\">(\(esc(due)))</span>" }
if let off = a.supportingOffset { s += " <span class=\"ts\">\(RecapAnalyzer.mmss(Double(off)))</span>" }
return s + "</li>"
}.joined()
body += card("Action Items", "<ul class=\"actions\">\(items)</ul>")
}
if !x.openQuestions.isEmpty {
let items = x.openQuestions.map { q -> String in
"<li>\(esc(q.question))" + (q.raisedBy.map { " <span class=\"muted\">— \(esc($0))</span>" } ?? "") + "</li>"
}.joined()
body += card("Open Questions", "<ul>\(items)</ul>")
}
if !x.keyQuotes.isEmpty {
let items = x.keyQuotes.map { k -> String in
var attr: [String] = []
if let s = k.speaker { attr.append(esc(s)) }
if let o = k.offset { attr.append(RecapAnalyzer.mmss(Double(o))) }
var s = "<blockquote>“\(esc(k.quote))"
if !attr.isEmpty { s += "<cite>— \(attr.joined(separator: ", "))</cite>" }
if !k.whyNotable.isEmpty { s += "<div class=\"muted\">\(esc(k.whyNotable))</div>" }
return s + "</blockquote>"
}.joined()
body += card("Key Quotes", items)
}
}
if !result.sections.isEmpty {
var topics = ""
for (i, sec) in result.sections.enumerated() {
let range = entries.indices.contains(sec.startIndex) && entries.indices.contains(sec.endIndex)
? "<span class=\"ts\">\(RecapAnalyzer.mmss(entries[sec.startIndex].offset))\(RecapAnalyzer.mmss(entries[sec.endIndex].end))</span>" : ""
topics += "<details class=\"topic\"><summary><span class=\"tnum\">\(i + 1)</span> \(esc(sec.title)) \(range)</summary>"
if !sec.summary.isEmpty { topics += "<p>\(esc(sec.summary))</p>" }
topics += "<div class=\"turns\">" + turnsHtml(sec, entries: entries, chip: chip) + "</div></details>"
}
body += card("Topics", topics)
}
let full = entries.map { "<div class=\"turn\"><span class=\"ts\">\(RecapAnalyzer.mmss($0.offset))</span> \(chip($0.speaker)) <span class=\"txt\">\(esc($0.text))</span></div>" }.joined()
body += "<details class=\"topic\" open><summary>Full Transcript</summary><div class=\"turns\">\(full)</div></details>"
return htmlShell(title: esc(title), body: body)
}
private static func turnsHtml(_ sec: TopicSection, entries: [RecapAnalyzer.Entry],
chip: (String) -> String) -> String {
guard sec.startIndex <= sec.endIndex, entries.indices.contains(sec.startIndex), entries.indices.contains(sec.endIndex)
else { return "" }
return entries[sec.startIndex...sec.endIndex].map {
"<div class=\"turn\"><span class=\"ts\">\(RecapAnalyzer.mmss($0.offset))</span> \(chip($0.speaker)) <span class=\"txt\">\(esc($0.text))</span></div>"
}.joined()
}
private static func card(_ title: String, _ inner: String) -> String {
"<section class=\"card\"><h2>\(esc(title))</h2>\(inner)</section>"
}
private static let palette = ["#5b8def", "#e0719c", "#43b581", "#e8a33d", "#9b6dde",
"#3fb6c9", "#d96f6f", "#7aa55c"]
private static func speakerColors(_ names: [String]) -> [String: String] {
var map: [String: String] = [:]
for (i, n) in names.enumerated() { map[n] = palette[i % palette.count] }
return map
}
private static func esc(_ s: String) -> String {
s.replacingOccurrences(of: "&", with: "&amp;")
.replacingOccurrences(of: "<", with: "&lt;")
.replacingOccurrences(of: ">", with: "&gt;")
.replacingOccurrences(of: "\"", with: "&quot;")
}
private static func htmlShell(title: String, body: String) -> String {
"""
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>\(title)</title>
<style>
:root{--bg:#15171c;--card:#1d2026;--fg:#e6e8ec;--muted:#9aa0aa;--line:#2a2e36;--accent:#5b8def;}
*{box-sizing:border-box}
body{margin:0;background:var(--bg);color:var(--fg);font:15px/1.55 -apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;}
main{max-width:820px;margin:0 auto;padding:32px 20px 80px;}
header h1{margin:0 0 4px;font-size:24px}
.sub{color:var(--muted);font-size:13px}
.legend{margin-top:12px;display:flex;flex-wrap:wrap;gap:6px}
.chip{display:inline-block;padding:1px 8px;border-radius:10px;color:#fff;font-size:12px;font-weight:600}
.card{background:var(--card);border:1px solid var(--line);border-radius:12px;padding:16px 18px;margin-top:18px}
.card h2{margin:0 0 10px;font-size:16px;color:var(--accent)}
.muted{color:var(--muted)}
ul{margin:0;padding-left:18px} li{margin:4px 0}
ul.actions{list-style:none;padding-left:0}
.ts{color:var(--muted);font-variant-numeric:tabular-nums;font-size:12px;margin-right:4px}
blockquote{margin:0 0 12px;padding:8px 12px;border-left:3px solid var(--accent);background:#0e0f13;border-radius:0 8px 8px 0}
blockquote cite{display:block;color:var(--muted);font-size:12px;margin-top:4px;font-style:normal}
details.topic{border-top:1px solid var(--line);padding:10px 0}
details.topic > summary{cursor:pointer;font-weight:600;list-style:none}
details.topic > summary::-webkit-details-marker{display:none}
.tnum{display:inline-block;min-width:20px;color:var(--accent);font-weight:700}
.turns{margin-top:10px}
.turn{margin:6px 0;display:flex;gap:8px;align-items:baseline;flex-wrap:wrap}
.turn .txt{flex:1;min-width:60%}
@media print{body{background:#fff;color:#000}.card,blockquote{background:#fff;border-color:#ccc}details.topic{}.chip{border:1px solid #999}}
</style></head>
<body><main>\(body)
<footer class="sub" style="margin-top:40px">Ten31 Transcripts · generated on-device</footer>
</main></body></html>
"""
}
}
@@ -54,6 +54,8 @@ final class SessionController: ObservableObject {
@Published private(set) var detectionStatus: CallDetector.Status = .disabled
/// Backend transcription status for the last session.
@Published private(set) var transcriptStatus: TranscriptStatus = .idle
/// Set when a readable recap (`recap.html`) has been written for the last session.
@Published private(set) var recapURL: URL?
private let settings: AppSettings
private var voiceprints: VoiceprintStore
@@ -195,6 +197,7 @@ final class SessionController: ObservableObject {
mixedURL: folder.appendingPathComponent("mixed_mono_16k.wav"))
self.recorder = recorder
warning = nil
recapURL = nil
state = .starting
lifecycleGeneration += 1
@@ -361,6 +364,7 @@ final class SessionController: ObservableObject {
guard let inputs = lastProcess else { return }
if case .processing = transcriptStatus { return }
transcriptStatus = .processing(0, 1)
recapURL = nil
let settings = self.settings
let voiceprints = self.voiceprints
@@ -379,6 +383,11 @@ final class SessionController: ObservableObject {
await MainActor.run { self.transcriptStatus = .processing(done, total) }
})
self.transcriptStatus = .done(speakers: speakers.speakers.count, segments: speakers.segments.count)
// Best-effort readable recap (topic sections + extras) via the gateway LLM.
if settings.recapEnabled, !speakers.segments.isEmpty {
try Task.checkCancellation()
await self.buildRecap(speakers: speakers, inputs: inputs, settings: settings)
}
} catch is CancellationError {
self.transcriptStatus = .idle
} catch {
@@ -387,6 +396,31 @@ final class SessionController: ObservableObject {
}
}
/// Build `transcript.md` + `recap.html` from the finished `speakers.json` using
/// the gateway LLM. Best-effort: a missing LLM or any failure leaves the
/// transcript intact and just skips the recap.
private func buildRecap(speakers: SpeakersFile, inputs: ProcessInputs, settings: AppSettings) async {
let llm = GatewayLLMClient(baseURL: settings.backendBaseURL, skipTLS: settings.skipTLSVerification)
guard let model = await llm.chatModelId() else { return } // no LLM on the gateway skip
let analyzer = RecapAnalyzer(llm: llm, model: model)
guard let result = try? await analyzer.recap(file: speakers) else { return }
let title = Self.recapTitle(app: inputs.app, sessionId: inputs.sessionId)
try? RecapRenderer.write(file: speakers, result: result, title: title, to: inputs.folder)
let url = inputs.folder.appendingPathComponent("recap.html")
if FileManager.default.fileExists(atPath: url.path) { self.recapURL = url }
}
/// Friendly recap title, e.g. "Google Meet call 2026-06-06 11:43".
private static func recapTitle(app: String, sessionId: String) -> String {
let appName = CallDetector.DetectedApp(rawValue: app)?.display ?? app.capitalized
let stamp = sessionId.split(separator: "_").first.map(String.init) ?? sessionId
let parts = stamp.split(separator: "T")
let date = parts.first.map(String.init) ?? ""
let timeBits = parts.count > 1 ? parts[1].split(separator: "-") : []
let time = timeBits.count >= 2 ? "\(timeBits[0]):\(timeBits[1])" : ""
return "\(appName) call — \(date) \(time)".trimmingCharacters(in: .whitespaces)
}
private func fail(_ message: String) {
recorder = nil
visualCapture = nil // recorder.start() failed before visual started; nothing running
@@ -47,6 +47,12 @@ final class AppSettings: ObservableObject {
didSet { defaults.set(autoSendOnStop, forKey: Keys.autoSend) }
}
/// After transcription, build the readable recap (topic sections + meeting
/// extras) via the gateway LLM and write transcript.md / recap.html. Best-effort.
@Published var recapEnabled: Bool {
didSet { defaults.set(recapEnabled, forKey: Keys.recapEnabled) }
}
/// Output folder as a resolved file URL (expands a leading `~`).
var outputFolderURL: URL {
URL(fileURLWithPath: (outputFolderPath as NSString).expandingTildeInPath,
@@ -74,6 +80,7 @@ final class AppSettings: ObservableObject {
self.autoRecordOnDetection = defaults.object(forKey: Keys.autoRecord) as? Bool ?? true
self.selfName = defaults.string(forKey: Keys.selfName) ?? "Me"
self.autoSendOnStop = defaults.object(forKey: Keys.autoSend) as? Bool ?? false
self.recapEnabled = defaults.object(forKey: Keys.recapEnabled) as? Bool ?? true
}
private enum Keys {
@@ -84,5 +91,6 @@ final class AppSettings: ObservableObject {
static let autoRecord = "autoRecordOnDetection"
static let selfName = "selfName"
static let autoSend = "autoSendOnStop"
static let recapEnabled = "recapEnabled"
}
}
+3
View File
@@ -91,6 +91,9 @@ struct MenuBarView: View {
HStack {
Button("Send to backend") { session.processLastSession() }
.disabled(transcriptProcessing)
if let recap = session.recapURL {
Button("Open recap") { NSWorkspace.shared.open(recap) }
}
Spacer()
}
if !transcriptText.isEmpty {
+2 -1
View File
@@ -25,7 +25,8 @@ struct SettingsView: View {
TextField("Your name", text: $settings.selfName)
.textFieldStyle(.roundedBorder)
Toggle("Auto-send recordings to backend", isOn: $settings.autoSendOnStop)
Text("Your name labels the mic-VAD \"self\" spans. Auto-send transcribes each recording on stop.")
Toggle("Build readable recap (topics + highlights)", isOn: $settings.recapEnabled)
Text("Your name labels your mic channel. Auto-send transcribes on stop; the recap writes transcript.md + recap.html.")
.font(.caption)
.foregroundStyle(.secondary)
}