Make diarization chunk length configurable (Auto + presets)
Chunk size was hardcoded at 2.5-min bodies. Add a Settings control: Auto / Standard 2.5min / Large group 60s / Fine 90s. Shorter chunks keep fewer simultaneous speakers per window (Sortformer resolves ~4/chunk), useful for large calls, at some cost to speed and cross-chunk voice matching. - ChunkMode (new, pure/testable): mode → body seconds; Auto picks 60s when >4 participants were detected, else 150s; overlap + single-chunk threshold scale with the body length. - AppSettings.chunkMode (+ typed `chunk`); SettingsView picker with explanation. - TranscriptPipeline.process gains chunkSeconds; derives overlap/threshold from it. - SessionController resolves the body from the setting + the session's detected participant count (visual_timeline participants) for both send + re-process. - Participant roster now counts EVERY tile OCR'd, not just who spoke (TimelineBuilder.observedNames → VisualObserver → VisualCapture), so the Auto call-size signal is meaningful even though speaking-detection is sparse. Tests: ChunkMode resolution, overlap scaling, short-body re-chunking. 69 pass.
This commit is contained in:
@@ -37,6 +37,33 @@ final class Phase5Tests: XCTestCase {
|
||||
XCTAssertEqual(asm.speakersFile.segments[0].start, 152, accuracy: 0.01)
|
||||
}
|
||||
|
||||
func testChunkModeResolvesBodyLength() {
|
||||
// Fixed presets ignore participant count.
|
||||
XCTAssertEqual(ChunkMode.standard.bodySeconds(participantCount: 99), 150)
|
||||
XCTAssertEqual(ChunkMode.largeGroup.bodySeconds(participantCount: 2), 60)
|
||||
XCTAssertEqual(ChunkMode.fine.bodySeconds(participantCount: nil), 90)
|
||||
// Auto: >4 detected → 60s, ≤4 → 150s, unknown → 150s.
|
||||
XCTAssertEqual(ChunkMode.auto.bodySeconds(participantCount: 6), 60)
|
||||
XCTAssertEqual(ChunkMode.auto.bodySeconds(participantCount: 4), 150)
|
||||
XCTAssertEqual(ChunkMode.auto.bodySeconds(participantCount: nil), 150)
|
||||
}
|
||||
|
||||
func testChunkOverlapScalesWithBody() {
|
||||
XCTAssertEqual(ChunkMode.overlapSeconds(forBody: 150), 15) // capped
|
||||
XCTAssertEqual(ChunkMode.overlapSeconds(forBody: 60), 8) // floored (60*0.12=7.2→8)
|
||||
XCTAssertEqual(ChunkMode.overlapSeconds(forBody: 90), 11) // 90*0.12=10.8→11
|
||||
}
|
||||
|
||||
func testPlanChunksShortBodyChunksAShortCall() {
|
||||
// A 100s call would be ONE chunk at the 2.5-min default, but at a 60s body it
|
||||
// splits — so "Large group" actually re-chunks medium calls.
|
||||
let c = SessionPackager.planChunks(durationSec: 100, chunkSeconds: 60,
|
||||
overlapSeconds: 8, thresholdSec: 72)
|
||||
XCTAssertEqual(c.count, 2)
|
||||
XCTAssertEqual(c[0].bodyStart, 0); XCTAssertEqual(c[0].bodyEnd, 60)
|
||||
XCTAssertEqual(c[1].bodyStart, 60); XCTAssertEqual(c[1].bodyEnd, 100)
|
||||
}
|
||||
|
||||
func testDropStuckSpansRemovesWholeCallCue() {
|
||||
let segs = [
|
||||
VisualTimeline.Segment(start: 0, end: 1900, name: "Grant Gilliam", confidence: 1, source: "vision"), // stuck whole-call tile
|
||||
|
||||
Reference in New Issue
Block a user