3629dbdaaa
The app shipped with certificate validation bypassed globally and on by default — InsecureTrustDelegate trusted any cert from any host. That was the evaluation's P1: anyone on the LAN could MITM call audio, transcripts, and voiceprints. The backend's Start9 cert already validates under normal system trust when the StartOS Root CA is installed in the keychain (confirmed: URLSession default validation returns 200 against the backend and its fallback), so the bypass is unnecessary: - skip-TLS now defaults to off - when explicitly enabled, the bypass is scoped to the configured host via InsecureTrustDelegate.allowsTrustOverride, never "trust any server" - the host gate is pure and unit-tested (InsecureTrustDelegateTests) Docs reconciled: AGENTS.md backend/TLS line and Current state.
138 lines
6.1 KiB
Swift
138 lines
6.1 KiB
Swift
import Foundation
|
|
|
|
enum GatewayLLMError: Error, LocalizedError {
|
|
case invalidHost
|
|
case notReady // vLLM not loaded on the gateway
|
|
case server(Int, String)
|
|
case decode(String)
|
|
case retriesExhausted
|
|
|
|
var errorDescription: String? {
|
|
switch self {
|
|
case .invalidHost: return "Invalid backend host URL."
|
|
case .notReady: return "The gateway's language model isn't ready."
|
|
case .server(let code, let detail): return "LLM error \(code): \(detail)"
|
|
case .decode(let msg): return "Couldn't decode the LLM response: \(msg)"
|
|
case .retriesExhausted: return "Gateway stayed busy (503) after retries."
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Talks to the Spark Control gateway's OpenAI-compatible `/v1/chat/completions`
|
|
/// (the same host + TLS as `label-merge`). Used for the recap analysis (topic
|
|
/// sections, summary polish, meeting extras). **Call sequentially** — like audio,
|
|
/// the gateway serializes GPU work; the recap pipeline issues one request at a time.
|
|
final class GatewayLLMClient {
|
|
private let baseURL: String
|
|
private let urlSession: URLSession
|
|
|
|
init(baseURL: String, skipTLS: Bool) {
|
|
let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
self.baseURL = trimmed.hasSuffix("/") ? String(trimmed.dropLast()) : trimmed
|
|
let config = URLSessionConfiguration.ephemeral
|
|
config.timeoutIntervalForRequest = 600
|
|
config.timeoutIntervalForResource = 900
|
|
config.waitsForConnectivity = false
|
|
let delegate: URLSessionDelegate? = skipTLS
|
|
? InsecureTrustDelegate(allowedHost: URL(string: self.baseURL)?.host)
|
|
: nil
|
|
self.urlSession = URLSession(configuration: config, delegate: delegate, delegateQueue: nil)
|
|
}
|
|
|
|
deinit { urlSession.finishTasksAndInvalidate() }
|
|
|
|
/// The ready chat model id from `/api/endpoints` (`vllm.model`), or nil if the
|
|
/// gateway has no language model loaded.
|
|
func chatModelId() async -> String? {
|
|
guard let url = URL(string: baseURL + "/api/endpoints") else { return nil }
|
|
guard let (data, _) = try? await urlSession.data(from: url),
|
|
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
let vllm = obj["vllm"] as? [String: Any],
|
|
(vllm["ready"] as? Bool) == true,
|
|
let model = vllm["model"] as? String, !model.isEmpty else { return nil }
|
|
return model
|
|
}
|
|
|
|
/// One JSON-mode chat completion. Returns the raw `content` string (the caller
|
|
/// parses it as JSON). Retries on `503 + Retry-After`.
|
|
func completeJSON(model: String, system: String?, user: String,
|
|
maxTokens: Int = 4096, maxRetries: Int = 3) async throws -> String {
|
|
guard let url = URL(string: baseURL + "/v1/chat/completions") else {
|
|
throw GatewayLLMError.invalidHost
|
|
}
|
|
var messages: [[String: String]] = []
|
|
if let system { messages.append(["role": "system", "content": system]) }
|
|
messages.append(["role": "user", "content": user])
|
|
let body: [String: Any] = [
|
|
"model": model,
|
|
"messages": messages,
|
|
"max_tokens": maxTokens,
|
|
"stream": false,
|
|
"response_format": ["type": "json_object"],
|
|
"chat_template_kwargs": ["enable_thinking": false],
|
|
]
|
|
let bodyData = try JSONSerialization.data(withJSONObject: body)
|
|
|
|
var attempt = 0
|
|
while true {
|
|
var request = URLRequest(url: url)
|
|
request.httpMethod = "POST"
|
|
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
|
request.httpBody = bodyData
|
|
|
|
let (data, response) = try await urlSession.data(for: request)
|
|
guard let http = response as? HTTPURLResponse else {
|
|
throw GatewayLLMError.decode("no HTTP response")
|
|
}
|
|
switch http.statusCode {
|
|
case 200..<300:
|
|
return try Self.content(from: data)
|
|
case 503:
|
|
attempt += 1
|
|
if attempt > maxRetries { throw GatewayLLMError.retriesExhausted }
|
|
let retryAfter = http.value(forHTTPHeaderField: "Retry-After").flatMap(Double.init) ?? 5
|
|
try await Task.sleep(nanoseconds: UInt64(max(1, retryAfter) * 1_000_000_000))
|
|
default:
|
|
throw GatewayLLMError.server(http.statusCode, Self.detail(from: data))
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - Parsing
|
|
|
|
private static func content(from data: Data) throws -> String {
|
|
struct ChatResponse: Decodable {
|
|
struct Choice: Decodable { struct Message: Decodable { let content: String }; let message: Message }
|
|
let choices: [Choice]
|
|
}
|
|
do {
|
|
let decoded = try JSONDecoder().decode(ChatResponse.self, from: data)
|
|
guard let text = decoded.choices.first?.message.content else {
|
|
throw GatewayLLMError.decode("no choices in response")
|
|
}
|
|
return text
|
|
} catch {
|
|
throw GatewayLLMError.decode(error.localizedDescription)
|
|
}
|
|
}
|
|
|
|
private static func detail(from data: Data) -> String {
|
|
if let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
|
|
if let detail = obj["detail"] as? String { return detail }
|
|
if let err = obj["error"] as? [String: Any], let msg = err["message"] as? String { return msg }
|
|
}
|
|
return String(data: data, encoding: .utf8) ?? "unknown error"
|
|
}
|
|
|
|
/// Strip a ```json … ``` code fence if the model wrapped its JSON (defensive;
|
|
/// JSON mode usually prevents this).
|
|
static func stripCodeFence(_ s: String) -> String {
|
|
var t = s.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
if t.hasPrefix("```") {
|
|
if let firstNewline = t.firstIndex(of: "\n") { t = String(t[t.index(after: firstNewline)...]) }
|
|
if let fenceRange = t.range(of: "```", options: .backwards) { t = String(t[..<fenceRange.lowerBound]) }
|
|
}
|
|
return t.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
}
|
|
}
|