Files
ten31-transcripts/Ten31Transcripts/Visual/FrameSampler.swift
T
Grant Gilliam 39beccf7f4 Fix Meet visual: reject solid avatar tiles + screen-share OCR
Root cause of the "4 people → 2 speakers" Meet call: the colored-border detector
read solid camera-off avatar tiles (orange "J", magenta "G") as active speakers
for the ENTIRE call. Those whole-call phantom spans dominated backend name
attribution, collapsing every remote voice onto one name — and the giant filled
bbox also swallowed screen-share text (WERUNBTC.COM ×49) as a speaker.

Validated against 9 real fixtures (harness over the real MeetAdapter):

Detection:
- FrameSampler.thinColoredPoints: coloured counterpart of thinWhitePoints — keeps
  thin border/ring/pill edges, drops solid colour fills.
- GridCallAnalyzer.isHollow: reject a highlight component whose interior is filled
  (a solid tile) vs a hollow ring (a real border). Config.maxInteriorFill (0.2 default).
- MeetAdapter: detect thin BLUE edges only (hue 180–240°, measured from the
  fixtures), maxInteriorFill 0.3 (real Meet rings ≈0.2–0.3, solid tiles ≈0.36).
- Result on fixtures: John Arnold/Grant Gilliam (solid tiles) now NEVER detected;
  Matt Odell/Mark detected when their blue cue is present. Sparse but never wrong —
  correct for a naming hint over audio diarization.

OCR name hygiene:
- isLikelyName rejects domain-like screen-share text ("WERUNBTC.COM", OCR'd ".GOM").
- cleaned() strips trailing punctuation ("Mark." → "Mark").
- TimelineBuilder.canonicalizeByFrequency folds rare OCR misspellings into a
  dominant near-twin name ("Matt Odel"/"MattOdell" → "Matt Odell", "Mare" → "Mark").

Tests: hollow-ring, extended OCR filter, fuzzy-merge. 65 pass.
2026-06-08 16:18:52 -05:00

172 lines
8.0 KiB
Swift

import Foundation
import CoreGraphics
/// Renders a CGImage to an RGBA8 buffer once, then answers cheap colour queries
/// over pixel regions. Used to score the active-speaker highlight (a saturated
/// coloured border/ring) around participant tiles.
struct FrameSampler {
let width: Int
let height: Int
private let pixels: [UInt8] // RGBA8, row-major, top-left origin
init?(cgImage: CGImage) {
let w = cgImage.width, h = cgImage.height
guard w > 0, h > 0 else { return nil }
var buffer = [UInt8](repeating: 0, count: w * h * 4)
let colorSpace = CGColorSpaceCreateDeviceRGB()
let info = CGImageAlphaInfo.premultipliedLast.rawValue
guard let ctx = buffer.withUnsafeMutableBytes({ raw -> CGContext? in
CGContext(data: raw.baseAddress, width: w, height: h, bitsPerComponent: 8,
bytesPerRow: w * 4, space: colorSpace, bitmapInfo: info)
}) else { return nil }
ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h))
self.width = w
self.height = h
self.pixels = buffer
}
/// Mean HSV saturation (01) over a pixel rect (top-left origin), sampled on a grid.
func meanSaturation(inPixelRect rect: CGRect, samples: Int = 24) -> Double {
let x0 = max(0, Int(rect.minX)), x1 = min(width, Int(rect.maxX))
let y0 = max(0, Int(rect.minY)), y1 = min(height, Int(rect.maxY))
guard x1 > x0, y1 > y0 else { return 0 }
let stepX = max(1, (x1 - x0) / samples)
let stepY = max(1, (y1 - y0) / samples)
var sum = 0.0, count = 0
var y = y0
while y < y1 {
var x = x0
while x < x1 {
let i = (y * width + x) * 4
let r = Double(pixels[i]), g = Double(pixels[i + 1]), b = Double(pixels[i + 2])
let mx = max(r, g, b), mn = min(r, g, b)
sum += mx > 0 ? (mx - mn) / mx : 0
count += 1
x += stepX
}
y += stepY
}
return count > 0 ? sum / Double(count) : 0
}
/// Mean saturation of a ring just inside `rect`'s edges (the tile border),
/// excluding the interior that's where the speaking highlight lives.
func borderSaturation(inPixelRect rect: CGRect, thicknessFraction: Double = 0.12) -> Double {
let t = max(2.0, min(rect.width, rect.height) * thicknessFraction)
let top = CGRect(x: rect.minX, y: rect.minY, width: rect.width, height: t)
let bottom = CGRect(x: rect.minX, y: rect.maxY - t, width: rect.width, height: t)
let left = CGRect(x: rect.minX, y: rect.minY, width: t, height: rect.height)
let right = CGRect(x: rect.maxX - t, y: rect.minY, width: t, height: rect.height)
return [top, bottom, left, right].map { meanSaturation(inPixelRect: $0) }.max() ?? 0
}
private func isNearWhite(_ x: Int, _ y: Int, minChannel: Double) -> Bool {
guard x >= 0, x < width, y >= 0, y < height else { return false }
let i = (y * width + x) * 4
return Double(pixels[i]) >= minChannel
&& Double(pixels[i + 1]) >= minChannel
&& Double(pixels[i + 2]) >= minChannel
}
/// Grid-sampled near-white pixels that lie on a THIN structure (a non-white
/// pixel within `edgeGap` on some axis) i.e. a border/ring/audio-bar, not a
/// solid white blob (face, bright video). This is Signal's white speaking
/// border (saturation 0, so `saturatedPoints` can't see it).
func thinWhitePoints(minChannel: Double = 200, edgeGap: Int = 6, gridStep: Int = 4) -> [CGPoint] {
var points: [CGPoint] = []
var y = edgeGap
while y < height - edgeGap {
var x = edgeGap
while x < width - edgeGap {
if isNearWhite(x, y, minChannel: minChannel) {
let thin = !isNearWhite(x - edgeGap, y, minChannel: minChannel)
|| !isNearWhite(x + edgeGap, y, minChannel: minChannel)
|| !isNearWhite(x, y - edgeGap, minChannel: minChannel)
|| !isNearWhite(x, y + edgeGap, minChannel: minChannel)
if thin { points.append(CGPoint(x: x, y: y)) }
}
x += gridStep
}
y += gridStep
}
return points
}
/// Grid-sampled pixel positions (top-left origin) that are saturated AND bright
/// enough to be a UI highlight i.e. a coloured speaking ring/border. `threshold`
/// is the minimum saturation; lower it for muted accent rings (Teams violet,
/// Meet's light-blue glow sit below the 0.5 default). `hueRange`, when set,
/// additionally restricts to a hue band (degrees, 0360) so a low threshold
/// doesn't pick up warm video the per-platform calibration lever.
func saturatedPoints(threshold: Double = 0.5, minBrightness: Double = 60,
hueRange: ClosedRange<Double>? = nil, gridStep: Int = 6) -> [CGPoint] {
var points: [CGPoint] = []
var y = 0
while y < height {
var x = 0
while x < width {
let i = (y * width + x) * 4
let r = Double(pixels[i]), g = Double(pixels[i + 1]), b = Double(pixels[i + 2])
let mx = max(r, g, b), mn = min(r, g, b)
let sat = mx > 0 ? (mx - mn) / mx : 0
if sat > threshold && mx > minBrightness,
hueRange == nil || hueRange!.contains(Self.hueDegrees(r, g, b, mx, mn)) {
points.append(CGPoint(x: x, y: y))
}
x += gridStep
}
y += gridStep
}
return points
}
/// Grid-sampled saturated pixels that lie on a THIN structure (a non-saturated
/// pixel within `edgeGap` on some axis) the coloured counterpart of
/// `thinWhitePoints`. This keeps a thin speaking BORDER/ring/pill but drops the
/// solid interior of a colour FILL (e.g. Meet's orange/magenta camera-off avatar
/// tiles), whose pixels are surrounded by the same colour. Pair with `hueRange`
/// to keep only the cue's colour (Meet's blue ring) and reject the thin edges a
/// solid tile still has against the background (orange/magenta boundaries).
func thinColoredPoints(threshold: Double = 0.35, minBrightness: Double = 60,
hueRange: ClosedRange<Double>? = nil,
edgeGap: Int = 6, gridStep: Int = 4) -> [CGPoint] {
func isCue(_ x: Int, _ y: Int) -> Bool {
guard x >= 0, x < width, y >= 0, y < height else { return false }
let i = (y * width + x) * 4
let r = Double(pixels[i]), g = Double(pixels[i + 1]), b = Double(pixels[i + 2])
let mx = max(r, g, b), mn = min(r, g, b)
let sat = mx > 0 ? (mx - mn) / mx : 0
guard sat > threshold, mx > minBrightness else { return false }
if let hr = hueRange { return hr.contains(Self.hueDegrees(r, g, b, mx, mn)) }
return true
}
var points: [CGPoint] = []
var y = edgeGap
while y < height - edgeGap {
var x = edgeGap
while x < width - edgeGap {
if isCue(x, y) {
let thin = !isCue(x - edgeGap, y) || !isCue(x + edgeGap, y)
|| !isCue(x, y - edgeGap) || !isCue(x, y + edgeGap)
if thin { points.append(CGPoint(x: x, y: y)) }
}
x += gridStep
}
y += gridStep
}
return points
}
/// HSV hue in degrees (0360) from RGB and its precomputed max/min channels.
private static func hueDegrees(_ r: Double, _ g: Double, _ b: Double, _ mx: Double, _ mn: Double) -> Double {
let d = mx - mn
guard d > 0 else { return 0 }
let h: Double
if mx == r { h = (g - b) / d }
else if mx == g { h = 2 + (b - r) / d }
else { h = 4 + (r - g) / d }
let deg = h * 60
return deg < 0 ? deg + 360 : deg
}
}