Package v0.2.12→v0.2.124: manifest, actions, version graph
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
export const v_0_2_88 = VersionInfo.of({
|
||||
version: '0.2.88:0',
|
||||
releaseNotes: {
|
||||
en_US:
|
||||
"Phase 1D of the diarization migration: cross-chunk speaker clustering. The Phase 1C output (per-chunk segments + per-speaker 192-dim TitaNet fingerprints) is now reconciled into global speaker IDs (Speaker_A, Speaker_B, ...) via average-linkage agglomerative clustering on cosine similarity. New module server/speaker-clustering.js. Threshold is the operator-tunable 'Voice clustering threshold' slider (default 0.70 cosine similarity, NeMo's recommended TitaNet default; range 0.50–0.95). The merged transcript segments each gain `speaker` + `speaker_confidence` fields based on which diarization segment overlaps the transcript line's midpoint (5s nearest-fallback when no segment covers the midpoint). The relay's summarize-url response envelope gains two new top-level fields: `speakers` (per-speaker summary with turns, total speaking seconds, mean confidence, fingerprint count, chunks_appeared_in) and `transcript_segments` (per-segment array with start, end, text, speaker, speaker_confidence). Both are null when diarization is off or produced no fingerprints. The Recap frontend ignores these for now — Phase 1E will hook up color-coded speaker rendering. New log line: '[hardware] diarization: 21/21 chunks succeeded, 42 fingerprints → 2 distinct speaker(s) at 70% cosine-sim threshold (clustering took 3ms)'. 13 unit tests cover the algorithm (cosine sim, cluster merge, threshold clamping, label-flip recovery, summary aggregation, midpoint vs nearest assignment).",
|
||||
},
|
||||
migrations: {
|
||||
up: async ({ effects }) => {},
|
||||
down: async ({ effects }) => {},
|
||||
},
|
||||
})
|
||||
Reference in New Issue
Block a user