recap-relay/startos/versions/v0.2.32.ts

import { VersionInfo } from '@start9labs/start-sdk'

export const v_0_2_32 = VersionInfo.of({
  version: '0.2.32:0',
  releaseNotes: {
    en_US:
      'Configurable chunking + chunked analyze + parallel hardware transcribe. (1) New Settings tab in the dashboard (PUT /admin/settings) exposes 11 chunking / concurrency knobs that drive both real-user traffic AND test-run benchmarks — no separate test-run config. Knobs: Gemini transcribe chunk size (default 30 min), Gemini transcribe concurrency (default 12, was 6), Gemini analyze window body (default 18 min, new), Gemini analyze window overlap (default 2 min, new), Gemini analyze concurrency (default 12, new); Hardware transcribe chunk size (default 5 min), Hardware transcribe concurrency (default 4, was sequential), Hardware analyze window body (default 18 min, new), Hardware analyze window overlap (default 2 min, new), Hardware analyze concurrency (default 8, new); plus a shared analyze single-shot cutoff (default 25 min, below which the planner emits a single window covering the whole transcript). Edits apply to new jobs only — in-flight benchmarks keep their snapshot. (2) Ported the chunked-analyze logic from the Recap app: server/chunked-analyze.js parses the relay\'s bracketed [MM:SS] transcript into entries, plans time-windowed overlapping slices (body + overlap stride algorithm), fires each window at the chosen analyze backend up to the configured concurrency, emits one audit row per window with window_idx / window_count / window_body_seconds fields, then stitches sections back together using the same ownership-by-body-start rule the Recap app uses. The 200K-char truncation in the old single-shot analyze path is gone — long transcripts now analyze in parallel windows instead of getting truncated. (3) Parakeet now runs up to 4 transcribe chunks concurrently (was sequential) — per the spark-control LLM dev, a single GPU comfortably handles 4 parallel POSTs. Failed chunks are tolerated (their segments leave gaps in the stitched transcript) rather than aborting the whole job. (4) Removed every hardcoded chunking / concurrency constant scattered across the codebase: GEMINI_CHUNK_SECONDS / GEMINI_CHUNK_CONCURRENCY in server/backends/gemini.js, HARDWARE_CHUNK_SECONDS in server/audio-meta.js, and the 200K char cap in server/routes/admin-test-run.js. All chunking decisions now flow from exactly ONE source: server/config.js defaultConfig() (canonical default per knob) → relay-config.json (operator overrides via Settings tab) → backend factories at request time. splitAudioFile() now throws on missing chunkSeconds rather than silently substituting an old default. (5) Wall-time accuracy: per-window audio_seconds = window body length (not total audio_seconds), so the Jobs table\'s per-row analyze rate columns (s/audio-min) divide by the right denominator. Job-level analyze_ms remains sum-of-windows (= total backend work, drives cost). Job-level wall_time_ms remains completedAt − startedAt across all audit rows (= user-POV elapsed time, captures the parallel-window speedup). Both metrics live side-by-side in the Jobs table.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
})