import Foundation import Vision import CoreVideo import CoreGraphics /// Thin wrapper over Vision's text recognition, used by adapters to read names / /// initials off participant tiles. Runs on the Neural Engine; no permission /// needed. Works on any frame, so adapters can be developed against still images. struct TextRecognizer { struct Result { let text: String let confidence: Float /// Normalized Vision bounding box (origin bottom-left, 0…1). let boundingBox: CGRect } var recognitionLevel: VNRequestTextRecognitionLevel = .accurate var minimumTextHeight: Float = 0 // 0 = Vision default var usesLanguageCorrection = false // names/initials aren't dictionary words /// Recognize text in `pixelBuffer`, optionally limited to a normalized region /// of interest (origin bottom-left, matching Vision's coordinate space). func recognize(in pixelBuffer: CVPixelBuffer, regionOfInterest: CGRect? = nil) -> [Result] { let request = VNRecognizeTextRequest() request.recognitionLevel = recognitionLevel request.usesLanguageCorrection = usesLanguageCorrection if minimumTextHeight > 0 { request.minimumTextHeight = minimumTextHeight } if let roi = regionOfInterest { request.regionOfInterest = roi } let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]) do { try handler.perform([request]) } catch { return [] } guard let observations = request.results else { return [] } return observations.compactMap { obs in guard let top = obs.topCandidates(1).first else { return nil } return Result(text: top.string, confidence: top.confidence, boundingBox: obs.boundingBox) } } /// Convenience for fixtures/tests: recognize text in a CGImage. func recognize(in cgImage: CGImage, regionOfInterest: CGRect? = nil) -> [Result] { let request = VNRecognizeTextRequest() request.recognitionLevel = recognitionLevel request.usesLanguageCorrection = usesLanguageCorrection if minimumTextHeight > 0 { request.minimumTextHeight = minimumTextHeight } if let roi = regionOfInterest { request.regionOfInterest = roi } let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) guard (try? handler.perform([request])) != nil, let results = request.results else { return [] } return results.compactMap { obs in guard let top = obs.topCandidates(1).first else { return nil } return Result(text: top.string, confidence: top.confidence, boundingBox: obs.boundingBox) } } }