Chunk overlap + overlap-aware stitching
Chunks were contiguous (start = prev end) with a naïve offset-concat stitch — no overlap. That cut sentences at boundaries, denied the diarizer context at edges, and let one voice split across chunks (the MH/Unknown_0 problem). Now each ~150s body is sliced with a 15s margin on both sides ([bodyStart-15, bodyEnd+15]); the stitcher keeps a segment only in the chunk that owns its MIDPOINT (body region) and drops it from the neighbour's margin — so boundary-spanning speech is seen whole by the backend and kept exactly once. - SessionPackager.PlannedChunk gains bodyStart/bodyEnd; planChunks adds overlapSeconds. - TranscriptAssembler.ChunkResult carries body bounds (defaults keep-all → no-overlap behaviour preserved for existing callers); assemble dedups by midpoint-in-body. - TranscriptPipeline passes body bounds through. Complements (doesn't replace) the fragment-smoothing + reconciliation safety nets; this is the upstream fix. ~+20% backend audio per interior chunk. 63/63 XCTest (new: overlap window layout + boundary-segment dedup).
This commit is contained in:
@@ -10,24 +10,35 @@ import AVFoundation
|
||||
enum SessionPackager {
|
||||
struct PlannedChunk: Equatable {
|
||||
let index: Int
|
||||
let start: Double // global seconds
|
||||
let end: Double
|
||||
let start: Double // sliced window start (global seconds, incl. overlap margin)
|
||||
let end: Double // sliced window end (incl. overlap margin)
|
||||
let bodyStart: Double // the region this chunk OWNS (no overlap) — for stitch dedup
|
||||
let bodyEnd: Double
|
||||
}
|
||||
|
||||
/// One chunk if short; otherwise even ~`chunkSeconds` windows.
|
||||
/// One chunk if short; otherwise ~`chunkSeconds` bodies, each sliced with an
|
||||
/// `overlapSeconds` margin on both sides. The margin gives the backend context at
|
||||
/// boundaries (so a sentence isn't cut and the diarizer attributes edge speech
|
||||
/// correctly and keeps a voice consistent across chunks); the stitcher keeps only
|
||||
/// each chunk's owned `body` region, deduping the overlap.
|
||||
static func planChunks(durationSec: Double,
|
||||
chunkSeconds: Double = 150,
|
||||
overlapSeconds: Double = 15,
|
||||
thresholdSec: Double = 180) -> [PlannedChunk] {
|
||||
guard durationSec > thresholdSec else {
|
||||
return [PlannedChunk(index: 0, start: 0, end: durationSec)]
|
||||
return [PlannedChunk(index: 0, start: 0, end: durationSec, bodyStart: 0, bodyEnd: durationSec)]
|
||||
}
|
||||
var chunks: [PlannedChunk] = []
|
||||
var start = 0.0
|
||||
var bodyStart = 0.0
|
||||
var index = 0
|
||||
while start < durationSec - 0.001 {
|
||||
let end = min(start + chunkSeconds, durationSec)
|
||||
chunks.append(PlannedChunk(index: index, start: start, end: end))
|
||||
start = end
|
||||
while bodyStart < durationSec - 0.001 {
|
||||
let bodyEnd = min(bodyStart + chunkSeconds, durationSec)
|
||||
chunks.append(PlannedChunk(
|
||||
index: index,
|
||||
start: max(0, bodyStart - overlapSeconds),
|
||||
end: min(durationSec, bodyEnd + overlapSeconds),
|
||||
bodyStart: bodyStart, bodyEnd: bodyEnd))
|
||||
bodyStart = bodyEnd
|
||||
index += 1
|
||||
}
|
||||
return chunks
|
||||
|
||||
Reference in New Issue
Block a user