Files
ten31-transcripts/Ten31Transcripts/Backend/GatewayLLMClient.swift
T
Grant Gilliam 3629dbdaaa Default TLS validation on; scope skip-TLS bypass to the configured host
The app shipped with certificate validation bypassed globally and on by
default — InsecureTrustDelegate trusted any cert from any host. That was
the evaluation's P1: anyone on the LAN could MITM call audio, transcripts,
and voiceprints.

The backend's Start9 cert already validates under normal system trust when
the StartOS Root CA is installed in the keychain (confirmed: URLSession
default validation returns 200 against the backend and its fallback), so the
bypass is unnecessary:
- skip-TLS now defaults to off
- when explicitly enabled, the bypass is scoped to the configured host via
  InsecureTrustDelegate.allowsTrustOverride, never "trust any server"
- the host gate is pure and unit-tested (InsecureTrustDelegateTests)

Docs reconciled: AGENTS.md backend/TLS line and Current state.
2026-06-13 16:02:57 -05:00

138 lines
6.1 KiB
Swift

import Foundation
enum GatewayLLMError: Error, LocalizedError {
case invalidHost
case notReady // vLLM not loaded on the gateway
case server(Int, String)
case decode(String)
case retriesExhausted
var errorDescription: String? {
switch self {
case .invalidHost: return "Invalid backend host URL."
case .notReady: return "The gateway's language model isn't ready."
case .server(let code, let detail): return "LLM error \(code): \(detail)"
case .decode(let msg): return "Couldn't decode the LLM response: \(msg)"
case .retriesExhausted: return "Gateway stayed busy (503) after retries."
}
}
}
/// Talks to the Spark Control gateway's OpenAI-compatible `/v1/chat/completions`
/// (the same host + TLS as `label-merge`). Used for the recap analysis (topic
/// sections, summary polish, meeting extras). **Call sequentially** like audio,
/// the gateway serializes GPU work; the recap pipeline issues one request at a time.
final class GatewayLLMClient {
private let baseURL: String
private let urlSession: URLSession
init(baseURL: String, skipTLS: Bool) {
let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines)
self.baseURL = trimmed.hasSuffix("/") ? String(trimmed.dropLast()) : trimmed
let config = URLSessionConfiguration.ephemeral
config.timeoutIntervalForRequest = 600
config.timeoutIntervalForResource = 900
config.waitsForConnectivity = false
let delegate: URLSessionDelegate? = skipTLS
? InsecureTrustDelegate(allowedHost: URL(string: self.baseURL)?.host)
: nil
self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil)
}
deinit { urlSession.finishTasksAndInvalidate() }
/// The ready chat model id from `/api/endpoints` (`vllm.model`), or nil if the
/// gateway has no language model loaded.
func chatModelId() async -> String? {
guard let url = URL(string: baseURL + "/api/endpoints") else { return nil }
guard let (data, _) = try? await urlSession.data(from: url),
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let vllm = obj["vllm"] as? [String: Any],
(vllm["ready"] as? Bool) == true,
let model = vllm["model"] as? String, !model.isEmpty else { return nil }
return model
}
/// One JSON-mode chat completion. Returns the raw `content` string (the caller
/// parses it as JSON). Retries on `503 + Retry-After`.
func completeJSON(model: String, system: String?, user: String,
maxTokens: Int = 4096, maxRetries: Int = 3) async throws -> String {
guard let url = URL(string: baseURL + "/v1/chat/completions") else {
throw GatewayLLMError.invalidHost
}
var messages: [[String: String]] = []
if let system { messages.append(["role": "system", "content": system]) }
messages.append(["role": "user", "content": user])
let body: [String: Any] = [
"model": model,
"messages": messages,
"max_tokens": maxTokens,
"stream": false,
"response_format": ["type": "json_object"],
"chat_template_kwargs": ["enable_thinking": false],
]
let bodyData = try JSONSerialization.data(withJSONObject: body)
var attempt = 0
while true {
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = bodyData
let (data, response) = try await urlSession.data(for: request)
guard let http = response as? HTTPURLResponse else {
throw GatewayLLMError.decode("no HTTP response")
}
switch http.statusCode {
case 200..<300:
return try Self.content(from: data)
case 503:
attempt += 1
if attempt > maxRetries { throw GatewayLLMError.retriesExhausted }
let retryAfter = http.value(forHTTPHeaderField: "Retry-After").flatMap(Double.init) ?? 5
try await Task.sleep(nanoseconds: UInt64(max(1, retryAfter) * 1_000_000_000))
default:
throw GatewayLLMError.server(http.statusCode, Self.detail(from: data))
}
}
}
// MARK: - Parsing
private static func content(from data: Data) throws -> String {
struct ChatResponse: Decodable {
struct Choice: Decodable { struct Message: Decodable { let content: String }; let message: Message }
let choices: [Choice]
}
do {
let decoded = try JSONDecoder().decode(ChatResponse.self, from: data)
guard let text = decoded.choices.first?.message.content else {
throw GatewayLLMError.decode("no choices in response")
}
return text
} catch {
throw GatewayLLMError.decode(error.localizedDescription)
}
}
private static func detail(from data: Data) -> String {
if let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
if let detail = obj["detail"] as? String { return detail }
if let err = obj["error"] as? [String: Any], let msg = err["message"] as? String { return msg }
}
return String(data: data, encoding: .utf8) ?? "unknown error"
}
/// Strip a ```json ``` code fence if the model wrapped its JSON (defensive;
/// JSON mode usually prevents this).
static func stripCodeFence(_ s: String) -> String {
var t = s.trimmingCharacters(in: .whitespacesAndNewlines)
if t.hasPrefix("```") {
if let firstNewline = t.firstIndex(of: "\n") { t = String(t[t.index(after: firstNewline)...]) }
if let fenceRange = t.range(of: "```", options: .backwards) { t = String(t[..<fenceRange.lowerBound]) }
}
return t.trimmingCharacters(in: .whitespacesAndNewlines)
}
}