Chunk overlap + overlap-aware stitching

Chunks were contiguous (start = prev end) with a naïve offset-concat stitch — no
overlap. That cut sentences at boundaries, denied the diarizer context at edges, and
let one voice split across chunks (the MH/Unknown_0 problem). Now each ~150s body is
sliced with a 15s margin on both sides ([bodyStart-15, bodyEnd+15]); the stitcher
keeps a segment only in the chunk that owns its MIDPOINT (body region) and drops it
from the neighbour's margin — so boundary-spanning speech is seen whole by the
backend and kept exactly once.

- SessionPackager.PlannedChunk gains bodyStart/bodyEnd; planChunks adds overlapSeconds.
- TranscriptAssembler.ChunkResult carries body bounds (defaults keep-all → no-overlap
  behaviour preserved for existing callers); assemble dedups by midpoint-in-body.
- TranscriptPipeline passes body bounds through.

Complements (doesn't replace) the fragment-smoothing + reconciliation safety nets;
this is the upstream fix. ~+20% backend audio per interior chunk. 63/63 XCTest
(new: overlap window layout + boundary-segment dedup).
This commit is contained in:
Grant Gilliam
2026-06-08 13:03:56 -05:00
parent 1c133c8970
commit ab910cf742
4 changed files with 60 additions and 18 deletions
+26 -4
View File
@@ -8,11 +8,33 @@ final class Phase5Tests: XCTestCase {
XCTAssertEqual(c[0].end, 70, accuracy: 0.001)
}
func testPlanChunksLong() {
let c = SessionPackager.planChunks(durationSec: 400, chunkSeconds: 150)
func testPlanChunksLongOverlapsWindowsWithContiguousBodies() {
let c = SessionPackager.planChunks(durationSec: 400, chunkSeconds: 150, overlapSeconds: 15)
XCTAssertEqual(c.count, 3)
XCTAssertEqual(c[0].start, 0); XCTAssertEqual(c[0].end, 150)
XCTAssertEqual(c[1].start, 150); XCTAssertEqual(c[2].end, 400)
// Owned bodies tile the call with no gaps/overlap.
XCTAssertEqual(c[0].bodyStart, 0); XCTAssertEqual(c[0].bodyEnd, 150)
XCTAssertEqual(c[1].bodyStart, 150); XCTAssertEqual(c[1].bodyEnd, 300)
XCTAssertEqual(c[2].bodyEnd, 400)
// Sliced windows overlap by the margin (and clamp at the ends).
XCTAssertEqual(c[0].start, 0); XCTAssertEqual(c[0].end, 165) // +15 trailing
XCTAssertEqual(c[1].start, 135) // -15 leading
XCTAssertLessThan(c[1].start, c[0].end) // windows overlap
XCTAssertEqual(c[2].end, 400) // clamped
}
func testAssembleDedupsOverlapByBody() {
// A segment at global 152156 sits in chunk1's body but also in chunk0's
// trailing margin (overlap). It must be kept exactly once (by chunk1).
let r0 = #"{"duration":165,"speakers":[{"cluster":"S0","name":"A","source":"visual","overlap_confidence":0.9}],"segments":[{"start_ms":152000,"end_ms":156000,"speaker":"A","text":"boundary"}],"fingerprints":{},"models":{}}"#
let r1 = #"{"duration":180,"speakers":[{"cluster":"S0","name":"A","source":"visual","overlap_confidence":0.9}],"segments":[{"start_ms":17000,"end_ms":21000,"speaker":"A","text":"boundary"}],"fingerprints":{},"models":{}}"#
let c0 = try! JSONDecoder().decode(LabelMergeResponse.self, from: Data(r0.utf8))
let c1 = try! JSONDecoder().decode(LabelMergeResponse.self, from: Data(r1.utf8))
let asm = TranscriptAssembler.assemble(sessionId: "s", app: "meet", chunks: [
.init(chunkStart: 0, response: c0, bodyStart: 0, bodyEnd: 150),
.init(chunkStart: 135, response: c1, bodyStart: 150, bodyEnd: 300),
])
XCTAssertEqual(asm.speakersFile.segments.count, 1) // deduped
XCTAssertEqual(asm.speakersFile.segments[0].start, 152, accuracy: 0.01)
}
func testRebaseClipsAndRebases() throws {