Recap: readable transcript + topic sections + meeting extras (gateway LLM)
New 'Recap' phase — turns speakers.json into a human-readable recap, leveraging recap-relay's proven logic/prompts but calling the Spark gateway's OpenAI-compatible /v1/chat/completions directly (same host/TLS as label-merge; Qwen3-35B). We start from already-named speakers (label-merge), so recap-relay's speaker clustering + name-inference are skipped entirely. - GatewayLLMClient: /v1/chat/completions (JSON mode), model discovery via /api/endpoints, TLS-skip reuse, 503 retry, sequential. - RecapAnalyzer: speakers.json → numbered [N] (MM:SS) Name: text transcript → time-windowed analyze (single window for short calls, 18min/2min overlap for long) → stitch/dedup topic sections → meeting extras (TLDR/decisions/action_items/ open_questions/key_quotes). Defensive JSON parsing of LLM output. - RecapRenderer: writes transcript.md + a self-contained dark-theme recap.html (topic sections w/ collapsible transcripts, extras panels, speaker color chips, full timestamped speaker-attributed transcript, print styles). - SessionController.buildRecap: best-effort after speakers.json (gated by settings.recapEnabled); surfaces recapURL → menu 'Open recap'. Skips silently if the gateway has no LLM. Settings toggle added. Validated END-TO-END on the real Meet session against the live gateway: dual-channel transcription → 3 topic sections + accurate TLDR + key quotes; 'Go Bitcoin' correctly attributed to the remote speaker. 46/46 XCTest (10 new).
This commit is contained in:
@@ -0,0 +1,135 @@
|
||||
import Foundation
|
||||
|
||||
enum GatewayLLMError: Error, LocalizedError {
|
||||
case invalidHost
|
||||
case notReady // vLLM not loaded on the gateway
|
||||
case server(Int, String)
|
||||
case decode(String)
|
||||
case retriesExhausted
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .invalidHost: return "Invalid backend host URL."
|
||||
case .notReady: return "The gateway's language model isn't ready."
|
||||
case .server(let code, let detail): return "LLM error \(code): \(detail)"
|
||||
case .decode(let msg): return "Couldn't decode the LLM response: \(msg)"
|
||||
case .retriesExhausted: return "Gateway stayed busy (503) after retries."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Talks to the Spark Control gateway's OpenAI-compatible `/v1/chat/completions`
|
||||
/// (the same host + TLS as `label-merge`). Used for the recap analysis (topic
|
||||
/// sections, summary polish, meeting extras). **Call sequentially** — like audio,
|
||||
/// the gateway serializes GPU work; the recap pipeline issues one request at a time.
|
||||
final class GatewayLLMClient {
|
||||
private let baseURL: String
|
||||
private let urlSession: URLSession
|
||||
|
||||
init(baseURL: String, skipTLS: Bool) {
|
||||
let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
self.baseURL = trimmed.hasSuffix("/") ? String(trimmed.dropLast()) : trimmed
|
||||
let config = URLSessionConfiguration.ephemeral
|
||||
config.timeoutIntervalForRequest = 600
|
||||
config.timeoutIntervalForResource = 900
|
||||
config.waitsForConnectivity = false
|
||||
let delegate: URLSessionDelegate? = skipTLS ? InsecureTrustDelegate() : nil
|
||||
self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil)
|
||||
}
|
||||
|
||||
deinit { urlSession.finishTasksAndInvalidate() }
|
||||
|
||||
/// The ready chat model id from `/api/endpoints` (`vllm.model`), or nil if the
|
||||
/// gateway has no language model loaded.
|
||||
func chatModelId() async -> String? {
|
||||
guard let url = URL(string: baseURL + "/api/endpoints") else { return nil }
|
||||
guard let (data, _) = try? await urlSession.data(from: url),
|
||||
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
|
||||
let vllm = obj["vllm"] as? [String: Any],
|
||||
(vllm["ready"] as? Bool) == true,
|
||||
let model = vllm["model"] as? String, !model.isEmpty else { return nil }
|
||||
return model
|
||||
}
|
||||
|
||||
/// One JSON-mode chat completion. Returns the raw `content` string (the caller
|
||||
/// parses it as JSON). Retries on `503 + Retry-After`.
|
||||
func completeJSON(model: String, system: String?, user: String,
|
||||
maxTokens: Int = 4096, maxRetries: Int = 3) async throws -> String {
|
||||
guard let url = URL(string: baseURL + "/v1/chat/completions") else {
|
||||
throw GatewayLLMError.invalidHost
|
||||
}
|
||||
var messages: [[String: String]] = []
|
||||
if let system { messages.append(["role": "system", "content": system]) }
|
||||
messages.append(["role": "user", "content": user])
|
||||
let body: [String: Any] = [
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": maxTokens,
|
||||
"stream": false,
|
||||
"response_format": ["type": "json_object"],
|
||||
"chat_template_kwargs": ["enable_thinking": false],
|
||||
]
|
||||
let bodyData = try JSONSerialization.data(withJSONObject: body)
|
||||
|
||||
var attempt = 0
|
||||
while true {
|
||||
var request = URLRequest(url: url)
|
||||
request.httpMethod = "POST"
|
||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
||||
request.httpBody = bodyData
|
||||
|
||||
let (data, response) = try await urlSession.data(for: request)
|
||||
guard let http = response as? HTTPURLResponse else {
|
||||
throw GatewayLLMError.decode("no HTTP response")
|
||||
}
|
||||
switch http.statusCode {
|
||||
case 200..<300:
|
||||
return try Self.content(from: data)
|
||||
case 503:
|
||||
attempt += 1
|
||||
if attempt > maxRetries { throw GatewayLLMError.retriesExhausted }
|
||||
let retryAfter = http.value(forHTTPHeaderField: "Retry-After").flatMap(Double.init) ?? 5
|
||||
try await Task.sleep(nanoseconds: UInt64(max(1, retryAfter) * 1_000_000_000))
|
||||
default:
|
||||
throw GatewayLLMError.server(http.statusCode, Self.detail(from: data))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Parsing
|
||||
|
||||
private static func content(from data: Data) throws -> String {
|
||||
struct ChatResponse: Decodable {
|
||||
struct Choice: Decodable { struct Message: Decodable { let content: String }; let message: Message }
|
||||
let choices: [Choice]
|
||||
}
|
||||
do {
|
||||
let decoded = try JSONDecoder().decode(ChatResponse.self, from: data)
|
||||
guard let text = decoded.choices.first?.message.content else {
|
||||
throw GatewayLLMError.decode("no choices in response")
|
||||
}
|
||||
return text
|
||||
} catch {
|
||||
throw GatewayLLMError.decode(error.localizedDescription)
|
||||
}
|
||||
}
|
||||
|
||||
private static func detail(from data: Data) -> String {
|
||||
if let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
|
||||
if let detail = obj["detail"] as? String { return detail }
|
||||
if let err = obj["error"] as? [String: Any], let msg = err["message"] as? String { return msg }
|
||||
}
|
||||
return String(data: data, encoding: .utf8) ?? "unknown error"
|
||||
}
|
||||
|
||||
/// Strip a ```json … ``` code fence if the model wrapped its JSON (defensive;
|
||||
/// JSON mode usually prevents this).
|
||||
static func stripCodeFence(_ s: String) -> String {
|
||||
var t = s.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if t.hasPrefix("```") {
|
||||
if let firstNewline = t.firstIndex(of: "\n") { t = String(t[t.index(after: firstNewline)...]) }
|
||||
if let fenceRange = t.range(of: "```", options: .backwards) { t = String(t[..<fenceRange.lowerBound]) }
|
||||
}
|
||||
return t.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user