recap/startos/versions/v0.2.67.ts

import { VersionInfo } from '@start9labs/start-sdk'

export const v_0_2_67 = VersionInfo.of({
  version: '0.2.67:0',
  releaseNotes: {
    en_US:
      'Chunked-transcribe robustness fixes triggered by a real-world failure on a 94-min YouTube podcast: gemini-3.1-flash-lite (the fallback model the chain walked to after gemini-2.5-flash kept 400ing on "Thinking level not supported") emitted absurd timestamps like [10:12:44] on a 45-min chunk. Those bogus offsets then poisoned the chunk-merge step (which uses a running-max comparison to dedupe boundary overlaps) — every subsequent chunk\'s entries were silently dropped because their valid offsets were "earlier" than chunk 1\'s 10-hour-claimed last entry. UI ended up showing the 94-min video as a 10:12:44 transcript with section timestamps wildly mismatched. Fixes: (1) Each audio chunk now carries its true durationSec (different from the configured chunkSeconds when it\'s the trailing chunk) so the merge step has a real time-window upper bound to validate against. (2) After parsing a chunk\'s transcribe output, drop any entry whose offset exceeds the chunk\'s end + 10s tolerance. A warning logs the count of dropped segments and the worst offset so the operator can see which model misbehaved. (3) Sort each chunk\'s entries by offset before merging — defends against models that emit segments out of chronological order (which broke the merge\'s monotonic-greater-than dedupe rule). (4) Only send thinkingConfig: {thinkingLevel: "minimal"} to Gemini 3.x flash models. Gemini 2.5 flash uses a different param shape (thinkingBudget integer) and 400s on thinkingLevel — was causing the noisy fallback spam every chunk on 2.5-flash. (5) Clearer log message for the single-shot analyze path: distinguishes "content fits in single shot (≤25 min)" from "only one analyze window planned (sparse entries — usually a sign of bad upstream transcribe data)". Net effect on the failing 94-min episode going forward: the bogus [10:12:44] entries get dropped before they poison the merge, chunk 2 + chunk 3 entries land in the final transcript, the UI shows the correct ~94-min duration, and section timestamps line up with where things actually happened in the audio.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
})