Package v0.2.12→v0.2.124: manifest, actions, version graph

2026-06-13 13:36:30 -05:00
parent 318c6c4b81
commit 1243f4414c
126 changed files with 2052 additions and 441 deletions
@@ -60,6 +60,11 @@ COPY server/*.js ./server/
 COPY server/backends/ ./server/backends/
 COPY server/routes/ ./server/routes/
 COPY public/ ./public/
 # Issuer public key for offline license verification. Without this
 # assets/ COPY, keysat-client.js can't load the PEM at runtime and
 # falls back to "all licenses are anonymous" — which silently degrades
 # every paid tier to Core. Same file Recap-app ships at assets/issuer.pub.
 COPY assets/ ./assets/
 COPY docker_entrypoint.sh /usr/local/bin/docker_entrypoint.sh
 RUN chmod +x /usr/local/bin/docker_entrypoint.sh
@@ -1,17 +1,31 @@
 import { sdk } from '../sdk'
 import { setGeminiKey } from './setGeminiKey'
 import { setKeysatBaseUrl } from './setKeysatBaseUrl'
 import { setParakeetUrl } from './setParakeetUrl'
 import { setGemmaUrl } from './setGemmaUrl'
 import { setAdminPassword } from './setAdminPassword'
 import { adjustTierQuotas } from './adjustTierQuotas'
-import { setBackendRouting } from './setBackendRouting'
+import { setTierPrices } from './setTierPrices'
 // setBackendRouting removed in v0.2.50 — backend routing + Gemini
 // model selection now live in the relay dashboard's Settings tab.
 //
 // Five more actions removed in v0.2.82 for the same reason:
 //   setGeminiKey, setKeysatBaseUrl, setSparkControlUrl,
 //   setParakeetUrl, setGemmaUrl
 // All five fields are now inline-editable in the dashboard's Settings
 // tab under "Endpoints & credentials". Same backing store
 // (relay-config.json); the actions added a parallel surface for the
 // same writes which created confusion about which path won. Single
 // source of truth: the dashboard. The config keys themselves stay in
 // relay-config.json — no migration needed.
 //
 // setAdminPassword stays here because it's the bootstrap mechanism:
 // you can't sign into the dashboard until the password is set, so
 // that one HAS to live in StartOS Actions.
 import { setBtcpayConnection } from './setBtcpayConnection'
 import { setZapriteConnection } from './setZapriteConnection'
 import { setCreditPackages } from './setCreditPackages'
 export const actions = sdk.Actions.of()
  .addAction(setGeminiKey)
  .addAction(setKeysatBaseUrl)
  .addAction(setParakeetUrl)
  .addAction(setGemmaUrl)
  .addAction(setBackendRouting)
  .addAction(setAdminPassword)
  .addAction(adjustTierQuotas)
  .addAction(setTierPrices)
  .addAction(setBtcpayConnection)
  .addAction(setZapriteConnection)
  .addAction(setCreditPackages)
@@ -1,154 +0,0 @@
 import { sdk } from '../sdk'
 import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
 // Lets the operator tune which backend gets tried first per pipeline
 // step (transcribe vs analyze) AND which Gemini SKU is used when
 // Gemini is the backend. All four knobs live-reload — change them
 // via this action and the next relay request honors the new values
 // without a daemon restart.
 const inputSpec = InputSpec.of({
  // ── Gemini model selection ──
  // Both fields are radio-select with curated options. The relay's
  // Gemini backend automatically falls back to lower-tier models in
  // this same list when the chosen one returns a 503 / capacity /
  // rate-limit error — see server/backends/gemini.js for the
  // fallback-chain logic.
  relay_gemini_transcription_model: Value.select({
    name: 'Gemini Transcription Model',
    description:
      "Primary Gemini SKU used when a transcription request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3-flash → 2.5-flash → 2.0-flash).",
    default: 'gemini-3-flash-preview',
    values: {
      'gemini-3-flash-preview':
        'Gemini 3 Flash — latest, recommended (~$0.30/M in, $2.50/M out)',
      'gemini-2.5-flash':
        'Gemini 2.5 Flash — prior gen (same pricing as 3-flash)',
      'gemini-2.0-flash':
        'Gemini 2.0 Flash — older + cheapest (~$0.10/M in, $0.40/M out)',
    },
  }),
  relay_gemini_analysis_model: Value.select({
    name: 'Gemini Analysis Model',
    description:
      "Primary Gemini SKU used when an analysis request is routed to Gemini. On 503/capacity/rate-limit failure, the relay falls back to lower-tier models in order (e.g. 3.1-pro → 3-pro → 3-flash → 2.5-flash).",
    default: 'gemini-3.1-pro-preview',
    values: {
      'gemini-3.1-pro-preview':
        'Gemini 3.1 Pro — best quality on structured-JSON output ($5/M in, $25/M out)',
      'gemini-3-pro-preview':
        'Gemini 3 Pro — prior Pro gen (same pricing as 3.1)',
      'gemini-3-flash-preview':
        'Gemini 3 Flash — faster + ~20× cheaper than Pro; some loss of section-boundary precision on long transcripts',
      'gemini-2.5-flash':
        'Gemini 2.5 Flash — prior Flash gen',
    },
  }),
  // ── Backend routing preference per pipeline ──
  relay_transcribe_backend_preference: Value.select({
    name: 'Transcribe Backend Preference',
    description:
      'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
    default: 'gemini_first',
    values: {
      gemini_first:
        'Gemini first → operator hardware (Parakeet) when cap exceeded',
      hardware_first: 'Operator hardware first → Gemini as fallback',
      gemini_only: 'Gemini only — fail when cap is exceeded',
      hardware_only:
        'Hardware only — fail when no Parakeet endpoint is configured',
    },
  }),
  relay_analyze_backend_preference: Value.select({
    name: 'Analyze Backend Preference',
    description:
      'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
    default: 'gemini_first',
    values: {
      gemini_first:
        'Gemini first → operator hardware (Gemma) when cap exceeded',
      hardware_first: 'Operator hardware first → Gemini as fallback',
      gemini_only: 'Gemini only — fail when cap is exceeded',
      hardware_only:
        'Hardware only — fail when no Gemma endpoint is configured',
    },
  }),
 })
 export const setBackendRouting = sdk.Action.withInput(
  'set-backend-routing',
  async ({ effects }) => ({
    name: 'Set Backend Routing & Models',
    description:
      "Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
    warning: null,
    allowedStatuses: 'any',
    group: 'AI Backends',
    visibility: 'enabled',
  }),
  inputSpec,
  async ({ effects }) => {
    const config = await configFile.read().once()
    // Coerce any previously-saved model name to a value in the new
    // select's options. Older 0.2.7-era saved configs could hold a
    // free-text value that's no longer in the dropdown — clamp to a
    // sensible default rather than presenting an invalid radio.
    const TX_OPTIONS = [
      'gemini-3-flash-preview',
      'gemini-2.5-flash',
      'gemini-2.0-flash',
    ] as const
    const AN_OPTIONS = [
      'gemini-3.1-pro-preview',
      'gemini-3-pro-preview',
      'gemini-3-flash-preview',
      'gemini-2.5-flash',
    ] as const
    const tx = config?.relay_gemini_transcription_model as
      | (typeof TX_OPTIONS)[number]
      | undefined
    const an = config?.relay_gemini_analysis_model as
      | (typeof AN_OPTIONS)[number]
      | undefined
    return {
      relay_gemini_transcription_model:
        tx && TX_OPTIONS.includes(tx) ? tx : 'gemini-3-flash-preview',
      relay_gemini_analysis_model:
        an && AN_OPTIONS.includes(an) ? an : 'gemini-3.1-pro-preview',
      relay_transcribe_backend_preference:
        (config?.relay_transcribe_backend_preference as
          | 'gemini_first'
          | 'hardware_first'
          | 'gemini_only'
          | 'hardware_only'
          | undefined) || 'gemini_first',
      relay_analyze_backend_preference:
        (config?.relay_analyze_backend_preference as
          | 'gemini_first'
          | 'hardware_first'
          | 'gemini_only'
          | 'hardware_only'
          | undefined) || 'gemini_first',
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_gemini_transcription_model: (
        input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
      ).trim(),
      relay_gemini_analysis_model: (
        input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
      ).trim(),
      relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
      relay_analyze_backend_preference: input.relay_analyze_backend_preference,
    })
    return null
  },
 )
@@ -1,54 +0,0 @@
 import { sdk } from '../sdk'
 import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
 // The operator's Gemini API key. This is the relay's primary backend
 // — Recap requests for both transcribe and analyze go to Gemini first,
 // and only spill to the optional Parakeet/Gemma backends once a user
 // exceeds their tier's monthly Gemini cap.
 //
 // Free key from https://aistudio.google.com/apikey. Track usage in
 // the Google AI Studio dashboard to know what tier pricing should be.
 const inputSpec = InputSpec.of({
  relay_gemini_api_key: Value.text({
    name: 'Gemini API Key',
    description:
      'The relay\'s Google Gemini API key. Used for transcribe + analyze forwarding. Get one at https://aistudio.google.com/apikey',
    required: true,
    default: null,
    masked: true,
    minLength: 1,
    maxLength: 256,
  }),
 })
 export const setGeminiKey = sdk.Action.withInput(
  'set-gemini-key',
  async ({ effects }) => ({
    name: 'Set Gemini API Key',
    description:
      "The operator's Gemini key. Required — the relay will refuse to serve traffic until this is set.",
    warning: null,
    allowedStatuses: 'any',
    group: 'AI Backends',
    visibility: 'enabled',
  }),
  inputSpec,
  async ({ effects }) => {
    const config = await configFile.read().once()
    return {
      relay_gemini_api_key: config?.relay_gemini_api_key || undefined,
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_gemini_api_key: input.relay_gemini_api_key,
    })
    return null
  },
 )
@@ -1,67 +0,0 @@
 import { sdk } from '../sdk'
 import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
 // Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
 // + which model to request. Both fields live-reload so the operator
 // can pull a different Gemma SKU on Ollama and update the model name
 // here without restarting the relay.
 const inputSpec = InputSpec.of({
  relay_gemma_base_url: Value.text({
    name: 'Gemma Base URL',
    description:
      "URL of the operator's Gemma / Ollama / OpenAI-compatible analysis endpoint. Used as the overflow path once a user exceeds their monthly Gemini cap. Leave empty to hard-cap at the Gemini limit. Example: http://192.168.1.87:11434",
    required: false,
    default: '',
    minLength: 0,
    maxLength: 256,
    patterns: [
      {
        regex: '^(https?://.+)?$',
        description: 'Must be empty or start with http:// or https://',
      },
    ],
  }),
  relay_gemma_model: Value.text({
    name: 'Gemma Model Name',
    description:
      'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
    required: true,
    default: 'gemma3:27b',
    minLength: 1,
    maxLength: 128,
  }),
 })
 export const setGemmaUrl = sdk.Action.withInput(
  'set-gemma-url',
  async ({ effects }) => ({
    name: 'Set Gemma URL',
    description:
      'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
    warning: null,
    allowedStatuses: 'any',
    group: 'AI Backends',
    visibility: 'enabled',
  }),
  inputSpec,
  async ({ effects }) => {
    const config = await configFile.read().once()
    return {
      relay_gemma_base_url: config?.relay_gemma_base_url || '',
      relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
      relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
    })
    return null
  },
 )
@@ -1,57 +0,0 @@
 import { sdk } from '../sdk'
 import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
 // Where the relay calls to validate licenses. Defaults to the public
 // Keysat endpoint. Operators running Keysat on the same Start9 server
 // can override to the internal hostname (e.g. http://keysat.startos:3000)
 // for a lower-latency hot path — every relay request hits this for the
 // cached online check.
 const inputSpec = InputSpec.of({
  relay_keysat_base_url: Value.text({
    name: 'Keysat Base URL',
    description:
      "URL of the Keysat license server. Defaults to https://keysat.xyz. If you're running Keysat as a co-located StartOS package, override to the internal hostname (http://keysat.startos:<port>) to skip the public-internet roundtrip.",
    required: true,
    default: 'https://keysat.xyz',
    minLength: 8,
    maxLength: 256,
    patterns: [
      {
        regex: '^https?://.+$',
        description: 'Must start with http:// or https://',
      },
    ],
  }),
 })
 export const setKeysatBaseUrl = sdk.Action.withInput(
  'set-keysat-base-url',
  async ({ effects }) => ({
    name: 'Set Keysat URL',
    description:
      "Where the relay validates Recap user licenses. Defaults to https://keysat.xyz — override to a co-located internal hostname if Keysat is on the same Start9 server.",
    warning: null,
    allowedStatuses: 'any',
    group: 'Setup',
    visibility: 'enabled',
  }),
  inputSpec,
  async ({ effects }) => {
    const config = await configFile.read().once()
    return {
      relay_keysat_base_url: config?.relay_keysat_base_url || 'https://keysat.xyz',
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_keysat_base_url: (input.relay_keysat_base_url || '').trim(),
    })
    return null
  },
 )
@@ -1,68 +0,0 @@
 import { sdk } from '../sdk'
 import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
 // Operator's Parakeet endpoint + which model to request. Both fields
 // live-reload — change them via this action and the next relay request
 // picks up the new values without a daemon restart.
 const inputSpec = InputSpec.of({
  relay_parakeet_base_url: Value.text({
    name: 'Parakeet Base URL',
    description:
      'URL of the operator\'s Parakeet (or any Whisper-API-compatible) transcription endpoint. Used as the overflow path once a user exceeds their monthly Gemini cap. Leave empty to hard-cap at the Gemini limit. Example: http://192.168.1.87:8000',
    required: false,
    default: '',
    minLength: 0,
    maxLength: 256,
    patterns: [
      {
        regex: '^(https?://.+)?$',
        description: 'Must be empty or start with http:// or https://',
      },
    ],
  }),
  relay_parakeet_model: Value.text({
    name: 'Parakeet Model Name',
    description:
      'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
    required: true,
    default: 'parakeet-tdt-0.6b-v3',
    minLength: 1,
    maxLength: 128,
  }),
 })
 export const setParakeetUrl = sdk.Action.withInput(
  'set-parakeet-url',
  async ({ effects }) => ({
    name: 'Set Parakeet URL',
    description:
      "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
    warning: null,
    allowedStatuses: 'any',
    group: 'AI Backends',
    visibility: 'enabled',
  }),
  inputSpec,
  async ({ effects }) => {
    const config = await configFile.read().once()
    return {
      relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
      relay_parakeet_model:
        config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
      relay_parakeet_model:
        (input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
    })
    return null
  },
 )
@@ -1,12 +1,24 @@
 import { sdk } from './sdk'
-// Recap declares Ollama as an OPTIONAL dependency in the manifest.
+// Required running dependency on BTCPay Server. Mirrors Keysat's
-// We do not return it here because we don't want to enforce a runtime
+// pattern. The dep declaration is what gives us:
-// requirement on it — Recap runs fine using cloud providers
+//   - guaranteed `btcpayserver.startos` DNS resolution inside our
-// (Gemini/Anthropic/OpenAI) when Ollama is not installed. The optional
+//     container (used for daemon-to-daemon BTCPay API calls — see
-// declaration in the manifest is what surfaces it as a suggested
+//     server/btcpay-client.js)
-// install on the Marketplace; this empty result keeps it from blocking
+//   - StartOS-level guarantee that BTCPay is running before the
-// startup.
+//     relay starts, so the credit-purchase flow's webhook
-export const setDependencies = sdk.setupDependencies(async ({ effects }) => {
+//     auto-registration always works
-  return {}
+//
 // Without this declaration the docker network link isn't wired up
 // and `btcpayserver.startos` doesn't resolve — which previously
 // surfaced as "fetch failed" when the relay tried to list stores or
 // create the webhook during the one-click setup.
 export const setDependencies = sdk.setupDependencies(async ({ effects: _effects }) => {
  return {
    btcpayserver: {
      kind: 'running',
      versionRange: '>=1.11.0:0',
      healthChecks: [],
    },
  }
 })
@@ -22,19 +22,19 @@ export const configFile = FileHelper.json(
    // hardware (if configured) or 503 every request.
    relay_gemini_api_key: z.string().default(''),
-    // ── Operator hardware (optional fallback) ──
+    // ── Service discovery (operator-hardware wiring) ──
-    // When a Pro/Max user exceeds their monthly Gemini cap, the relay
+    // URL of a Spark-Control-style /api/endpoints JSON document that
-    // routes overflow here. Leave empty to hard-cap at the Gemini limit
+    // lists local LLM / STT services on the operator's LAN. The relay
-    // and return 402 once exceeded (no fallback).
+    // reads this (60s TTL) to find the transcribe + analyze backends
-    relay_parakeet_base_url: z.string().default(''),
+    // for the hardware path, and POSTs /api/audio/diarize-chunk to
-    relay_gemma_base_url: z.string().default(''),
+    // the same host. This is the SINGLE source of truth for which
-    // Model identifiers to send in the upstream request bodies. The
+    // hardware URLs and models the relay uses — there is no longer a
-    // operator's Ollama or Parakeet wrapper may serve different models
+    // per-endpoint manual-override fallback (removed in v0.2.84;
-    // depending on what's been pulled; making these config-driven
+    // Spark Control became the canonical wiring).
-    // means the operator can swap models without rebuilding the relay.
+    //
-    // Live-reloaded — change applies to the next request.
+    // Empty string disables the operator-hardware path entirely —
-    relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
+    // the relay will return 503 from any hardware-routed request.
-    relay_gemma_model: z.string().default('gemma3:27b'),
+    relay_spark_control_url: z.string().default(''),
    // ── Gemini model selection ──
    // Operator can pick which Gemini SKU is used per pipeline step
@@ -82,6 +82,17 @@ export const configFile = FileHelper.json(
    relay_admin_password_salt: z.string().default(''),
    relay_admin_session_secret: z.string().default(''),
    // ── Output storage policy ──
    // When true, the relay persists per-job transcript + analysis
    // JSON for REAL USER traffic (jobs from Recap clients) to
    // /data/relay-outputs/. Test-run jobs (source=admin-test) are
    // ALWAYS persisted regardless of this flag — they're the
    // operator's own benchmarking content with no privacy concern.
    // Default off: real-user transcripts pass through and aren't
    // written to disk unless the operator explicitly opts in for
    // their own debugging.
    relay_save_user_outputs: z.boolean().default(false),
    // ── Tier quotas (operator-adjustable without redeploy) ──
    // JSON blob driving credits.js. Defaults match the v1 product
    // spec: Core lifetime-5, Pro 50/mo with 25 Gemini cap, Max
@@ -103,5 +114,124 @@ export const configFile = FileHelper.json(
        max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
      }),
    ),
    // ── BTCPay (credit-purchase flow) ──
    // When configured, the relay exposes /relay/credits/buy so users
    // can top up their credit balance directly via Lightning/onchain.
    // Empty fields → the purchase route returns 503 ("not configured")
    // — the rest of the relay continues to work.
    relay_btcpay_base_url: z.string().default(''),
    // Internal Start9 hostname for server-to-server BTCPay calls
    // (e.g. http://btcpayserver.startos:23000). Auto-populated by
    // the one-click setup wizard when BTCPay is co-installed; falls
    // back to relay_btcpay_base_url when empty. mDNS / clearnet
    // hostnames don't resolve from inside the docker container, so
    // this internal alias is required when BTCPay sits behind one.
    relay_btcpay_internal_url: z.string().default(''),
    // Public BUYER-facing URL — used to rewrite the checkout link
    // BTCPay returns from invoice-create (which contains the internal
    // hostname we called it on). Buyers can be anywhere on the
    // internet so this must be a clearnet domain when possible.
    // Falls back to relay_btcpay_base_url (LAN-only) when empty,
    // which limits credit purchases to operator's local network.
    relay_btcpay_public_url: z.string().default(''),
    relay_btcpay_store_id: z.string().default(''),
    relay_btcpay_api_key: z.string().default(''),
    relay_btcpay_webhook_secret: z.string().default(''),
    // ── Zaprite (card rail) ──
    // Hosted-checkout API for the "Pay by card" rail. Set via the "Set
    // Zaprite Connection" action. api_key empty = card rail disabled (the
    // UI hides "Pay by card"). No webhook secret: the webhook is verified
    // by re-fetching the order from Zaprite's authenticated API.
    relay_zaprite_base_url: z.string().default('https://api.zaprite.com'),
    relay_zaprite_api_key: z.string().default(''),
    relay_zaprite_currency: z.string().default('USD'),
    // ── Credit packages (operator-editable bundle pricing) ──
    // JSON array, ordered as it should appear in the buyer UI. Each
    // entry is { credits: number, sats: number }. Defaults match
    // the initial Recap pricing — operator can adjust via the
    // "Set Credit Packages" action.
    relay_credit_packages_json: z
      .string()
      .default(
        JSON.stringify([
          { credits: 5, sats: 4000 },
          { credits: 10, sats: 6000 },
          { credits: 20, sats: 10000 },
        ])
      ),
    // ── Tier prices (USD per active subscription) ──
    // Operator-set monthly price per tier. Used by the dashboard to
    // estimate revenue and operating margin against Gemini cost. Pure
    // accounting — the relay itself does no billing; Keysat handles
    // license sales. Defaults match the current keysat.xyz price list
    // ($0/$5/$15) but the operator can override to whatever they're
    // actually charging customers for this relay's tier comping.
    relay_tier_prices_usd_json: z.string().default(
      JSON.stringify({ core: 0, pro: 5, max: 15 }),
    ),
    // ── Card-rail (Zaprite) subscription prices ──
    // The amount actually CHARGED to a card buyer, in the smallest unit
    // of relay_zaprite_currency (cents for USD). Distinct from
    // relay_tier_prices_usd_json (dashboard accounting only). Set via the
    // "Set Zaprite Connection" action. Default ≈ parity with the sat
    // prices ($21 / $42); raise to add a card premium for processing fees.
    relay_tier_prices_fiat_cents_json: z.string().default(
      JSON.stringify({ pro: 2100, max: 4200 }),
    ),
    // ── Chunking / concurrency knobs ──
    // Edited via the dashboard's Settings tab (PUT /admin/settings).
    // ONE canonical default per knob, defined here exactly once. All
    // backend code reads from the live-reloaded snapshot at request
    // time — no hardcoded fallbacks anywhere else in the codebase.
    //
    // Gemini backend (relay → Google Gemini API):
    relay_gemini_tx_chunk_minutes: z.number().int().min(1).max(120).default(30),
    relay_gemini_tx_concurrency: z.number().int().min(1).max(32).default(12),
    relay_gemini_analyze_window_minutes: z.number().int().min(1).max(60).default(18),
    relay_gemini_analyze_overlap_minutes: z.number().int().min(0).max(10).default(2),
    relay_gemini_analyze_concurrency: z.number().int().min(1).max(32).default(12),
    // Hardware backend (relay → operator's Parakeet + Gemma/vLLM):
    relay_hardware_tx_chunk_minutes: z.number().int().min(1).max(120).default(5),
    relay_hardware_tx_concurrency: z.number().int().min(1).max(32).default(4),
    relay_hardware_analyze_window_minutes: z.number().int().min(1).max(60).default(18),
    relay_hardware_analyze_overlap_minutes: z.number().int().min(0).max(10).default(2),
    relay_hardware_analyze_concurrency: z.number().int().min(1).max(32).default(8),
    // Diarization suppression thresholds (Phase 2 cluster cleanup).
    // See server/speaker-clustering.js for how these shape the
    // post-cluster pass that demotes small spurious clusters.
    relay_hardware_anchor_min_speaking_sec: z.number().int().min(5).max(120).default(30),
    relay_hardware_small_cluster_max_speaking_sec: z.number().int().min(1).max(60).default(15),
    relay_hardware_uncertain_margin_pct: z.number().int().min(0).max(30).default(10),
    // Floor below which both the analyze planner emits a single window
    // covering the whole transcript (single-shot fast path) and TX
    // chunking is bypassed. Tunable for benchmarking.
    relay_analyze_cutoff_minutes: z.number().int().min(1).max(60).default(25),
    // ── Editable LLM prompts ──
    // Empty string = use the hardcoded default at request time. The
    // dashboard's Settings tab edits these; the textareas can also
    // be reset to default, which clears the field to "" so future
    // default-prompt changes in code flow through automatically.
    relay_transcribe_prompt: z.string().default(''),
    relay_analyze_prompt: z.string().default(''),
    // ── Post-cluster polish pass (Phase 2) ──
    // Default ON; operator can disable via Settings tab if they
    // want raw fast output or are debugging analyze quality
    // independently from polish quality.
    relay_post_cluster_polish_enabled: z.boolean().default(true),
    // Operator-editable polish prompts. Empty = fall through to
    // the hardcoded defaults in server/post-cluster-polish.js.
    // Same three-layer override pattern (per-session override →
    // operator-promoted default → code default) as the analyze
    // and transcribe prompts.
    relay_polish_name_inference_prompt: z.string().default(''),
    relay_polish_summary_rewrite_prompt: z.string().default(''),
  }),
 )
@@ -1,8 +1,262 @@
 import { sdk } from '../sdk'
 import { FileHelper } from '@start9labs/start-sdk'
 import { Volume } from '@start9labs/start-sdk/package/lib/util/Volume'
 import { z } from 'zod'
-// Recap needs no special initialization.
+const mainVolume = new Volume('main')
-// Directories are created by docker_entrypoint.sh and
+
-// config is loaded from the persistent volume at runtime.
+// File the relay container reads to learn the URLs of co-installed
-export const setup = sdk.setupOnInit(async (effects, kind) => {
+// services. Written here at install/update time via the StartOS SDK's
-  // Nothing to do on install, update, restore, or rebuild.
+// service-interface lookup; the container exposes it via
 // /admin/btcpay/discover so the dashboard can offer one-click setup.
 const discoveryFile = FileHelper.json(
  {
    base: mainVolume,
    subpath: 'discovered-services.json',
  },
  z.object({
    btcpay: z
      .object({
        // LAN URL the operator's browser uses (with port). Used as
        // the base for the /api-keys/authorize redirect page.
        browser_url: z.string().nullable(),
        // Public clearnet URL buyers use to pay. Used by the
        // relay's rewriteCheckoutUrl to swap the host on BTCPay's
        // returned checkout link before handing it to a buyer.
        public_url: z.string().nullable().optional(),
        // Internal Start9 hostname for container-to-container calls.
        // Faster than the LAN URL since it stays inside docker.
        internal_url: z.string().nullable(),
        // ISO timestamp of last discovery — useful for debugging
        // stale entries if BTCPay has since been uninstalled.
        discovered_at: z.string().nullable(),
      })
      .nullable(),
    // The relay's own clearnet URL — used as the webhook URL we
    // register with BTCPay. BTCPay's container needs to be able to
    // resolve this to post InvoiceSettled events; mDNS .local
    // doesn't resolve inside docker. Clearnet does.
    self: z
      .object({
        public_url: z.string().nullable().optional(),
      })
      .nullable()
      .optional(),
  }),
 )
 // Look up BTCPay's service interfaces. Returns three URLs (one may
 // be null) used for different audiences:
 //   - browser_url:  what the OPERATOR opens in their browser for the
 //                   authorize flow (mDNS / LAN preferred — same-LAN
 //                   flow keeps everything local)
 //   - public_url:   what BUYERS open to pay (clearnet preferred —
 //                   buyers may be anywhere on the internet, not on
 //                   the operator's LAN). Used by rewriteCheckoutUrl
 //                   on the BTCPay-returned checkout link.
 //   - internal_url: what the RELAY container uses for daemon-to-
 //                   daemon API calls (Start9 docker internal hostname).
 //
 // Returns null when BTCPay isn't installed or has no 'ui' interface.
 //
 // Picking the right URL is the whole trick. addressInfo.format()
 // returns EVERY hostname mapped to this binding — clearnet domain,
 // mDNS `.local`, onion, private IPs, link-local v6, and the LXC
 // bridge gateway (`10.0.3.1` shape) the relay container sees from
 // inside docker. Almost all of these are wrong for a browser link
 // the operator clicks in StartOS dashboard:
 //
 //   - bridge IP (10.0.3.x)        — only resolvable from inside the host
 //   - link-local v6 (fe80::/10)   — same problem
 //   - localhost (127.0.0.1, ::1)  — useless
 //   - public domain (clearnet)    — best, if configured
 //   - mDNS .local                 — what operators actually use day-to-day
 //   - private DNS                 — also fine
 //
 // Order of preference for the operator-facing browser URL:
 //   1. mDNS .local — same-LAN browser hop, keeps the auth + callback
 //                    flow entirely local. Most operators run BTCPay
 //                    and the relay on the same Start9, so mDNS Just
 //                    Works without involving any clearnet hop.
 //   2. private domain (LAN DNS, if the operator runs one)
 //   3. public clearnet domain — fallback for remote operators
 //   4. anything non-local that isn't bridge/link-local/localhost
 async function discoverBtcpay(effects: any): Promise<{
  browser_url: string | null
  public_url: string | null
  internal_url: string | null
 } | null> {
  try {
    const ifaces = await sdk.serviceInterface
      .getAll(effects, { packageId: 'btcpayserver' })
      .const()
    if (!ifaces || ifaces.length === 0) return null
    const ui = ifaces.find((i: any) => i?.type === 'ui')
    if (!ui?.addressInfo) return null
    const addr = ui.addressInfo
    let browserUrl: string | null = null
    // Each .filter() narrows the address set; .format() returns URL
    // strings. Walk preference tiers and stop at the first that
    // yields any URL. Wrapped in try/catch each tier because filter
    // calls can throw on empty matches in some SDK versions.
    // Walk preference tiers, stop at the first that yields a URL.
    const firstUrl = (urls: any): string | null => {
      if (Array.isArray(urls) && urls.length > 0 && urls[0]) return String(urls[0])
      return null
    }
    const browserTries: Array<() => string | null> = [
      // 1. mDNS .local — same-LAN flow
      () => firstUrl(addr.filter({ kind: 'mdns' }).format?.()),
      // 2. Private LAN domain — operator's own LAN DNS, if any
      () =>
        firstUrl(addr.filter({ visibility: 'private', kind: 'domain' }).format?.()),
      // 3. Public clearnet domain — fallback for remote operators
      () => firstUrl(addr.public?.filter({ kind: 'domain' }).format?.()),
      // 4. Any non-local address (excludes localhost, link-local, bridge)
      () =>
        firstUrl(
          addr.nonLocal
            ?.filter({
              exclude: { kind: ['bridge', 'link-local', 'localhost'] },
            })
            .format?.(),
        ),
    ]
    for (const attempt of browserTries) {
      try {
        const u = attempt()
        if (u) {
          browserUrl = u
          break
        }
      } catch {
        // try next tier
      }
    }
    // Public buyer-facing URL — used to rewrite BTCPay's returned
    // checkout link (`btcpayserver.startos:23000/i/...`) before
    // handing it to the buyer's browser. Buyer may be on clearnet
    // (e.g. accessing Recap via a StartTunnel public domain), so we
    // strongly prefer a clearnet domain here. Falls through to mDNS
    // / private DNS only when BTCPay isn't exposed on clearnet —
    // those operators can only sell to LAN buyers anyway.
    let publicUrl: string | null = null
    const publicTries: Array<() => string | null> = [
      // 1. Public clearnet domain — works for any buyer on the internet
      () => firstUrl(addr.public?.filter({ kind: 'domain' }).format?.()),
      // 2. Private LAN domain
      () =>
        firstUrl(addr.filter({ visibility: 'private', kind: 'domain' }).format?.()),
      // 3. mDNS .local — same-LAN buyers only
      () => firstUrl(addr.filter({ kind: 'mdns' }).format?.()),
    ]
    for (const attempt of publicTries) {
      try {
        const u = attempt()
        if (u) {
          publicUrl = u
          break
        }
      } catch {
        // try next tier
      }
    }
    // Server-to-server URL is the Start9 internal docker hostname.
    // This works because we declare btcpayserver as a required
    // `running` dependency in dependencies.ts — Start9 wires up the
    // DNS link inside our container as part of that declaration.
    // Port 23000 is BTCPay's standard internal container port (every
    // Start9 BTCPay install uses this); hardcoded rather than
    // discovered via the SDK because it's stable across BTCPay
    // versions and saves us a fragile lookup path.
    //
    // This mirrors how Keysat's licensing-service reaches BTCPay on
    // a co-installed Start9 — same hostname, same port.
    const internalUrl = 'http://btcpayserver.startos:23000'
    return {
      browser_url: browserUrl,
      public_url: publicUrl,
      internal_url: internalUrl,
    }
  } catch (err: any) {
    console.warn(
      `[setup] BTCPay discovery skipped: ${err?.message || err}`,
    )
    return null
  }
 }
 // Discover the relay's OWN clearnet URL by walking our own service
 // interfaces. Used as the webhook URL we register with BTCPay —
 // BTCPay's container needs a hostname it can resolve internally
 // (clearnet via standard DNS works, mDNS doesn't).
 async function discoverSelfPublicUrl(effects: any): Promise<string | null> {
  try {
    const ifaces = await sdk.serviceInterface.getAllOwn(effects).const()
    if (!ifaces || ifaces.length === 0) return null
    const ui = ifaces.find((i: any) => i?.type === 'ui') || ifaces[0]
    if (!ui?.addressInfo) return null
    const addr = ui.addressInfo
    const firstUrl = (urls: any): string | null => {
      if (Array.isArray(urls) && urls.length > 0 && urls[0]) return String(urls[0])
      return null
    }
    // Priority: public clearnet (best for BTCPay container's DNS),
    // then private domain. mDNS won't resolve from BTCPay's
    // container so we explicitly skip it.
    const tries: Array<() => string | null> = [
      () => firstUrl(addr.public?.filter({ kind: 'domain' }).format?.()),
      () =>
        firstUrl(addr.filter({ visibility: 'private', kind: 'domain' }).format?.()),
    ]
    for (const attempt of tries) {
      try {
        const u = attempt()
        if (u) return u
      } catch {
        // try next
      }
    }
    return null
  } catch (err: any) {
    console.warn(`[setup] self-discovery skipped: ${err?.message || err}`)
    return null
  }
 }
 export const setup = sdk.setupOnInit(async (effects, kind) => {
  // Probe for BTCPay on every install / update. We write the result
  // every time (even on "no BTCPay found") so the container always
  // sees a fresh snapshot — never a leftover hit from a previous
  // version that has since been uninstalled.
  const btcpay = await discoverBtcpay(effects)
  const selfPublicUrl = await discoverSelfPublicUrl(effects)
  await discoveryFile.write(effects, {
    btcpay: btcpay
      ? {
          ...btcpay,
          discovered_at: new Date().toISOString(),
        }
      : null,
    self: selfPublicUrl
      ? { public_url: selfPublicUrl }
      : null,
  })
  if (btcpay) {
    console.info(
      `[setup] discovered BTCPay at ${btcpay.browser_url || '(no browser URL)'}`,
    )
  } else {
    console.info(
      `[setup] BTCPay not detected on this Start9 — credit-purchase flow will require a manual URL`,
    )
  }
  console.info(
    `[setup] self clearnet URL: ${selfPublicUrl || '(none — webhooks will use the operator-facing host header at finalize time, may fail if mDNS)'}`,
  )
 })
@@ -3,7 +3,9 @@ export const short =
 export const long =
  'Recap Relay is the operator-side service that fronts Gemini (and ' +
-  'optionally a local Parakeet+Gemma setup) for Recap installs. It ' +
+  'optionally an operator-hosted transcribe + analyze setup over a ' +
  'Whisper-API-compatible STT endpoint and an OpenAI-compatible ' +
  'chat-completions endpoint) for Recap installs. It ' +
  "tracks per-install credit balances, enforces tier-based monthly " +
  'quotas, and proxies transcribe/analyze calls so Core users can ' +
  'summarize a handful of videos without paying and paid tiers get ' +
@@ -31,8 +31,23 @@ export const manifest = setupManifest({
    start: null,
    stop: null,
  },
-  // Relay has no required dependencies — Gemini is internet-fronted
+  // Relay has no REQUIRED dependencies — Gemini is internet-fronted
  // and the optional Parakeet/Gemma backends are at user-configured
  // URLs (typically a separate machine on the operator's LAN).
-  dependencies: {},
+  //
  // BTCPay Server is declared optional so the dashboard's "Connect
  // BTCPay" flow can auto-discover its URL via
  // sdk.serviceInterface.getAll() when both are installed on the
  // same Start9 box. When BTCPay is not installed, the relay still
  // runs fine — only the credit-purchase flow is disabled.
  dependencies: {
    btcpayserver: {
      description: {
        en_US:
          'Required for relay credit top-ups via Lightning. The relay-to-BTCPay API calls use the internal Start9 docker hostname (btcpayserver.startos:23000), which is only wired up when BTCPay is declared as a required running dependency. Without this dependency the one-click BTCPay setup flow fails with "fetch failed".',
      },
      optional: false,
      s9pk: null,
    },
  },
 })
@@ -12,8 +12,121 @@ import { v_0_2_8 } from './v0.2.8'
 import { v_0_2_9 } from './v0.2.9'
 import { v_0_2_10 } from './v0.2.10'
 import { v_0_2_11 } from './v0.2.11'
 import { v_0_2_12 } from './v0.2.12'
 import { v_0_2_13 } from './v0.2.13'
 import { v_0_2_14 } from './v0.2.14'
 import { v_0_2_15 } from './v0.2.15'
 import { v_0_2_16 } from './v0.2.16'
 import { v_0_2_17 } from './v0.2.17'
 import { v_0_2_18 } from './v0.2.18'
 import { v_0_2_19 } from './v0.2.19'
 import { v_0_2_20 } from './v0.2.20'
 import { v_0_2_21 } from './v0.2.21'
 import { v_0_2_22 } from './v0.2.22'
 import { v_0_2_23 } from './v0.2.23'
 import { v_0_2_24 } from './v0.2.24'
 import { v_0_2_25 } from './v0.2.25'
 import { v_0_2_26 } from './v0.2.26'
 import { v_0_2_27 } from './v0.2.27'
 import { v_0_2_28 } from './v0.2.28'
 import { v_0_2_29 } from './v0.2.29'
 import { v_0_2_30 } from './v0.2.30'
 import { v_0_2_31 } from './v0.2.31'
 import { v_0_2_32 } from './v0.2.32'
 import { v_0_2_33 } from './v0.2.33'
 import { v_0_2_34 } from './v0.2.34'
 import { v_0_2_35 } from './v0.2.35'
 import { v_0_2_36 } from './v0.2.36'
 import { v_0_2_37 } from './v0.2.37'
 import { v_0_2_38 } from './v0.2.38'
 import { v_0_2_39 } from './v0.2.39'
 import { v_0_2_40 } from './v0.2.40'
 import { v_0_2_41 } from './v0.2.41'
 import { v_0_2_42 } from './v0.2.42'
 import { v_0_2_43 } from './v0.2.43'
 import { v_0_2_44 } from './v0.2.44'
 import { v_0_2_45 } from './v0.2.45'
 import { v_0_2_46 } from './v0.2.46'
 import { v_0_2_47 } from './v0.2.47'
 import { v_0_2_48 } from './v0.2.48'
 import { v_0_2_49 } from './v0.2.49'
 import { v_0_2_50 } from './v0.2.50'
 import { v_0_2_51 } from './v0.2.51'
 import { v_0_2_52 } from './v0.2.52'
 import { v_0_2_53 } from './v0.2.53'
 import { v_0_2_54 } from './v0.2.54'
 import { v_0_2_55 } from './v0.2.55'
 import { v_0_2_56 } from './v0.2.56'
 import { v_0_2_57 } from './v0.2.57'
 import { v_0_2_58 } from './v0.2.58'
 import { v_0_2_59 } from './v0.2.59'
 import { v_0_2_60 } from './v0.2.60'
 import { v_0_2_61 } from './v0.2.61'
 import { v_0_2_62 } from './v0.2.62'
 import { v_0_2_63 } from './v0.2.63'
 import { v_0_2_64 } from './v0.2.64'
 import { v_0_2_65 } from './v0.2.65'
 import { v_0_2_66 } from './v0.2.66'
 import { v_0_2_67 } from './v0.2.67'
 import { v_0_2_68 } from './v0.2.68'
 import { v_0_2_69 } from './v0.2.69'
 import { v_0_2_70 } from './v0.2.70'
 import { v_0_2_71 } from './v0.2.71'
 import { v_0_2_72 } from './v0.2.72'
 import { v_0_2_73 } from './v0.2.73'
 import { v_0_2_74 } from './v0.2.74'
 import { v_0_2_75 } from './v0.2.75'
 import { v_0_2_76 } from './v0.2.76'
 import { v_0_2_77 } from './v0.2.77'
 import { v_0_2_78 } from './v0.2.78'
 import { v_0_2_79 } from './v0.2.79'
 import { v_0_2_80 } from './v0.2.80'
 import { v_0_2_81 } from './v0.2.81'
 import { v_0_2_82 } from './v0.2.82'
 import { v_0_2_83 } from './v0.2.83'
 import { v_0_2_84 } from './v0.2.84'
 import { v_0_2_85 } from './v0.2.85'
 import { v_0_2_86 } from './v0.2.86'
 import { v_0_2_87 } from './v0.2.87'
 import { v_0_2_88 } from './v0.2.88'
 import { v_0_2_89 } from './v0.2.89'
 import { v_0_2_90 } from './v0.2.90'
 import { v_0_2_91 } from './v0.2.91'
 import { v_0_2_92 } from './v0.2.92'
 import { v_0_2_93 } from './v0.2.93'
 import { v_0_2_94 } from './v0.2.94'
 import { v_0_2_95 } from './v0.2.95'
 import { v_0_2_96 } from './v0.2.96'
 import { v_0_2_97 } from './v0.2.97'
 import { v_0_2_98 } from './v0.2.98'
 import { v_0_2_99 } from './v0.2.99'
 import { v_0_2_100 } from './v0.2.100'
 import { v_0_2_101 } from './v0.2.101'
 import { v_0_2_102 } from './v0.2.102'
 import { v_0_2_103 } from './v0.2.103'
 import { v_0_2_104 } from './v0.2.104'
 import { v_0_2_105 } from './v0.2.105'
 import { v_0_2_106 } from './v0.2.106'
 import { v_0_2_107 } from './v0.2.107'
 import { v_0_2_108 } from './v0.2.108'
 import { v_0_2_109 } from './v0.2.109'
 import { v_0_2_110 } from './v0.2.110'
 import { v_0_2_111 } from './v0.2.111'
 import { v_0_2_112 } from './v0.2.112'
 import { v_0_2_113 } from './v0.2.113'
 import { v_0_2_114 } from './v0.2.114'
 import { v_0_2_115 } from './v0.2.115'
 import { v_0_2_116 } from './v0.2.116'
 import { v_0_2_117 } from './v0.2.117'
 import { v_0_2_118 } from './v0.2.118'
 import { v_0_2_119 } from './v0.2.119'
 import { v_0_2_120 } from './v0.2.120'
 import { v_0_2_121 } from './v0.2.121'
 import { v_0_2_122 } from './v0.2.122'
 import { v_0_2_123 } from './v0.2.123'
 import { v_0_2_124 } from './v0.2.124'
 export const versionGraph = VersionGraph.of({
-  current: v_0_2_11,
+  current: v_0_2_124,
-  other: [v_0_2_10, v_0_2_9, v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
+  other: [v_0_2_123, v_0_2_122, v_0_2_121, v_0_2_120, v_0_2_119, v_0_2_118, v_0_2_117, v_0_2_116, v_0_2_115, v_0_2_114, v_0_2_113, v_0_2_112, v_0_2_111, v_0_2_110, v_0_2_109, v_0_2_108, v_0_2_107, v_0_2_106, v_0_2_105, v_0_2_104, v_0_2_103, v_0_2_102, v_0_2_101, v_0_2_100, v_0_2_99, v_0_2_98, v_0_2_97, v_0_2_96, v_0_2_95, v_0_2_94, v_0_2_93, v_0_2_92, v_0_2_91, v_0_2_90, v_0_2_89, v_0_2_88, v_0_2_87, v_0_2_86, v_0_2_85, v_0_2_84, v_0_2_83, v_0_2_82, v_0_2_81, v_0_2_80, v_0_2_79, v_0_2_78, v_0_2_77, v_0_2_76, v_0_2_75, v_0_2_74, v_0_2_73, v_0_2_72, v_0_2_71, v_0_2_70, v_0_2_69, v_0_2_68, v_0_2_67, v_0_2_66, v_0_2_65, v_0_2_64, v_0_2_63, v_0_2_62, v_0_2_61, v_0_2_60, v_0_2_59, v_0_2_58, v_0_2_57, v_0_2_56, v_0_2_55, v_0_2_54, v_0_2_53, v_0_2_52, v_0_2_51, v_0_2_50, v_0_2_49, v_0_2_48, v_0_2_47, v_0_2_46, v_0_2_45, v_0_2_44, v_0_2_43, v_0_2_42, v_0_2_41, v_0_2_40, v_0_2_39, v_0_2_38, v_0_2_37, v_0_2_36, v_0_2_35, v_0_2_34, v_0_2_33, v_0_2_32, v_0_2_31, v_0_2_30, v_0_2_29, v_0_2_28, v_0_2_27, v_0_2_26, v_0_2_25, v_0_2_24, v_0_2_23, v_0_2_22, v_0_2_21, v_0_2_20, v_0_2_19, v_0_2_18, v_0_2_17, v_0_2_16, v_0_2_15, v_0_2_14, v_0_2_13, v_0_2_12, v_0_2_11, v_0_2_10, v_0_2_9, v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_100 = VersionInfo.of({
  version: '0.2.100:0',
  releaseNotes: {
    en_US:
      "Three large changes shipped together. (1) Hardware FIFO job queue. Module-level semaphore in server/hardware-queue.js serializes hardware-backed pipelines so the operator's GPU box doesn't get hammered by N concurrent jobs. Gemini-only jobs bypass entirely. When a job waits in the queue it emits a 'queued' SSE event with its position so Recap can show 'Queued — N jobs ahead' instead of looking stalled. Within a job, the existing per-chunk concurrency (2 by default) is preserved — the queue serializes WHOLE jobs, not individual calls within a job. Slot release is wrapped in try/finally so every exit path (success, early-return failure, uncaught exception) cleanly frees the slot for the next queued job. (2) Small-cluster suppression. After agglomerative clustering, a second pass walks the clusters and re-categorizes the small ones to fix the '14 speakers detected when really only 2' case. Anchors (clusters with >= 30s of total speaking time) keep their own global ID. Non-anchor clusters within UNCERTAIN_SIMILARITY_MARGIN (0.10) of the main threshold of any anchor get REASSIGNED to that anchor — chip renders with a '?' suffix to mark best-guess attribution. Non-anchor clusters that are small (< 15s) AND far-from-anchor become a single 'Speaker_Unknown' pseudo-speaker — chip is grey '?', legend reads 'Unknown'. Large non-anchor clusters with low similarity stay as their own speaker (plausibly a real 3rd person). All thresholds are code-side constants for now. (3) Auto-save UX overhaul on the operator dashboard. Sliders, pills, toggles, text fields (Endpoints & credentials + Section-count targets) all auto-save on change with a 400ms debounce; per-field rapid changes coalesce into a single PUT. Prompts get their own per-prompt 'Save this prompt' button next to their existing Reset / Set as new default / Show current default controls — large textareas don't auto-save on every keystroke. Global 'Save changes' button removed entirely; the inline 'saving… / ✓ saved' status indicator stays in the bottom right.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_101 = VersionInfo.of({
  version: '0.2.101:0',
  releaseNotes: {
    en_US:
      "Two operator-experience improvements. (1) The 'saved' / 'save failed' status indicator that auto-save fires now appears as a fixed-position toast in the bottom-right corner of the viewport instead of an inline span at the bottom of the settings tab. Previously the indicator was below-the-fold for most operators — saves felt silent. New toast appears immediately on save start, turns green '✓ saved' on success (2s fade), red 'save failed: ...' on error (5s fade). The in-page status span at the bottom of settings stays as a fallback. (2) The three diarization suppression thresholds are now operator settings, exposed as sliders in Settings → Operator hardware: 'Anchor min speaking time' (5-120s, default 30), 'Small-cluster suppress under' (1-60s, default 15), 'Uncertain reassignment margin' (0-30%, default 10). The defaults match what shipped hardcoded in v0.2.100 — no behavior change unless the operator tunes. Operator can now experiment without a relay redeploy if a specific video produces over- or under-suppressed cluster counts.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_102 = VersionInfo.of({
  version: '0.2.102:0',
  releaseNotes: {
    en_US:
      "Hardware-queue status chip in the operator dashboard. A small fixed-position pill in the top-right corner appears when the hardware FIFO queue has activity: 'Hardware: <jobid> · N waiting' with a pulsing dot. Hidden when nothing is running on the hardware path. Hover tooltip shows the full active job ID, total queue depth, and a reminder that Gemini-backend jobs bypass the queue entirely. New GET /admin/hardware-queue endpoint returns the live status (pendingCount, currentJobId, recent log entries) — dashboard polls it every 3s. Operator can now glance at the chip to know whether a new submission will start immediately or wait, without tailing relay logs.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_103 = VersionInfo.of({
  version: '0.2.103:0',
  releaseNotes: {
    en_US:
      "Path 2A Phase 1 backend — internal team meeting upload + analysis. New /admin/internal-meetings/* endpoint family, all operator-only (gated by the existing admin session cookie). POST /upload accepts a multipart audio file (mp3/m4a/wav up to 500MB) + title + comma-separated participant hints, runs it through the EXACT SAME hardware pipeline that /relay/v1/summarize-url uses (transcribe → diarize → cluster → suppress small clusters → analyze → post-cluster polish for speaker-aware summaries). Goes through the existing hardware FIFO queue so the top-bar queue chip covers internal meetings too. SSE stream at /jobs/:id/stream pushes live progress events identical in shape to the YouTube/podcast path. Audio is NEVER retained — uploaded files land in os.tmpdir(), are deleted in a try/finally after the pipeline completes (success or failure), and never copied elsewhere. Same audio-retention policy as YouTube downloads. Saved record lives at /data/internal-meetings/<jobId>.json with full transcript + transcript_segments (speaker-attributed) + chunks (Recaps-style topic structure) + speakers map + speaker_names from polish + diarization metadata. Three download formats: GET /admin/internal-meetings/:id returns the JSON record, GET /:id/download forces a JSON file download with the meeting title as filename, GET /:id/markdown returns a human-readable markdown serialization (title + duration + speakers + topics-with-expandable-transcripts + full bracketed transcript at the bottom). Frontend (dashboard tab) ships in the next release. Phase 2 (Decisions / Action Items / Open Questions / Key Quotes structured extras analysis) and Phase 3 (multiple operator-editable prompt sets) are queued for follow-up.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_104 = VersionInfo.of({
  version: '0.2.104:0',
  releaseNotes: {
    en_US:
      "Path 2A Phase 1 frontend — Internal Meetings tab in the operator dashboard. New tab between 'Jobs' and 'Settings'. Three sub-views: list (upload card + past meetings), live (processing job with SSE-streamed events + status), detail (full meeting view). Upload card accepts an audio file + title + optional comma-separated participant hints; submit kicks off the relay backend's /upload endpoint and switches the view to a live progress display. SSE events stream in real-time and auto-flip the view to the detail page when the 'done' event fires. Detail view renders the meeting Recaps-style: speakers legend with chips matching Recaps' color palette (Speaker_Unknown = grey '?', uncertain reassignments = '?' suffix), topic cards using <details> elements that expand to reveal speaker-attributed transcript lines below each summary, with timestamp + colored chip + text per line. Three downloads + delete buttons in the title bar: 'Download .md' (markdown — same human-readable format the relay's markdown endpoint produces, suitable for LLM input), 'Download .json' (full saved record), 'Delete' (with confirm prompt). Past-meetings list shows title + duration + topic count + speaker count per row; click anywhere on a row to open. Tab state persists to localStorage like the others so a hard refresh keeps you where you were.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_105 = VersionInfo.of({
  version: '0.2.105:0',
  releaseNotes: {
    en_US:
      "Bug fix — hardware backend's transcribeAudio hung forever on short audio (anything fitting in a single transcribe chunk, i.e. ≤ 5 minutes at the default settings). Two symptoms: (1) the SSE transcript_complete event arrived with transcript:\"\" because the single-shot code path emitted segments but never stitched them into bracketed [mm:ss]-prefixed text; (2) when pipelined-analyze was in use (Internal Meetings, and YouTube videos shorter than the chunk threshold), the analyze worker waited forever on chunkBuffer.waitForTime(audioSec) because the single-shot path never fired the onChunkComplete callback that advances the buffer's covered-time cursor. Diarization was also silently skipped for short audio for the same reason — the short-circuit bypassed the diarize+cluster code paths entirely. Fix: short-audio runs now flow through the same worker loop as multi-chunk runs by synthesizing a single chunk descriptor that points at the original audio file (no ffmpeg cut needed). Diarization, onChunkComplete, cross-chunk clustering, and bracketed-text stitching all run uniformly regardless of chunk count. Benefits Internal Meetings (Path 2A) AND short YouTube videos that take the hardware backend.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_106 = VersionInfo.of({
  version: '0.2.106:0',
  releaseNotes: {
    en_US:
      "Internal Meetings — two improvements. (1) Speaker chips now render next to every transcript line in the topic-expand view. Earlier records showed only the speaker legend at the top (e.g. 'Jonathan 0:40 · Grant 0:21') but no chip next to each individual quote, because the speaker-attach loop missed entries due to a precision mismatch: bracketed timestamps in the transcript text are floored integers, but the underlying diarized segment starts are floats (e.g. 0.32s), so the strict containment check rejected all start-of-segment matches. Fixed by using Math.floor(seg.start) as the join key, plus tolerant fallback to the nearest preceding segment. Old records from earlier v0.2.10x ships get the chips back automatically on next view — the relay backfills speakers from transcript_segments on read when entries are missing them (non-destructive, no re-upload required). (2) New 'Download .html' button alongside Download .md / .json / Delete. Produces a single self-contained HTML page with inlined CSS that renders the meeting exactly the way the dashboard's detail view does — title, summary stats line, speaker legend with colored chips, topic cards with native <details>-driven expandable transcripts (each line gets its timestamp + colored speaker chip + text), and the full bracketed transcript at the bottom in a collapsed details block. Print-friendly stylesheet flattens the dark theme for paper. Email it, drag it into a browser, drop it in a shared folder — renders the same anywhere. Reserves space for the Phase 2 extras section (Decisions / Action Items / Open Questions / Key Quotes) to slot in above the topics once that pipeline ships.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_107 = VersionInfo.of({
  version: '0.2.107:0',
  releaseNotes: {
    en_US:
      "Internal Meetings — transcript-completeness fix. Earlier versions saved each topic's entries[] by strictly slicing canonicalEntries[startIndex..endIndex] using the LLM analyze pass's section boundaries. When the LLM left gaps between sections (which it does often on short meetings — orphaning filler chatter, 'yeah', 'right', brief transitions), those entries vanished from the rendered topic cards: topics showed single-statement bodies under summaries that clearly described more dialogue, and consecutive topic timestamps had 12s-46s gaps with nothing visible between them. Fix: each chunk's slice now extends to fill the gap to the next chunk's startIndex (and the first chunk backfills to entry 0, last chunk to end of transcript), so every canonical entry lives in exactly one chunk's entries[]. Result: contiguous topic timestamps, no transcript lines silently dropped, summaries match the visible content. The LLM's original section boundaries stay in rec.analysis.sections for forensics — only the rendered chunks[] becomes gap-absorbing. Also retroactive: old records saved by v0.2.103-v0.2.106 get re-sliced on read using rec.transcript as the source of truth, so the existing meeting in the dashboard becomes contiguous immediately without a re-upload (prior speaker chip attributions carry over to the re-sliced entries).",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_108 = VersionInfo.of({
  version: '0.2.108:0',
  releaseNotes: {
    en_US:
      "Internal Meetings — three improvements. (1) Topic timestamp ranges now display contiguously like Recaps does for YouTube + podcasts. Earlier versions showed each topic's range as 'first entry's offset — LAST entry's offset', which left visible gaps even though v0.2.107 made the entries themselves contiguous. Now the displayed end equals the next topic's start (last topic extends to the full audio duration) so consecutive topic cards read as adjacent timeline ranges. Applies to the dashboard detail view + .html + .md downloads. (2) Click-to-rename for speakers in the legend. The polish-pass name inference is a best-guess from early-meeting context — on internal calls where participants rarely say each other's names, or two people share a first name, it often gets things wrong. Now you can click any speaker name in the legend, type the correction (Enter to save / Escape to cancel), and the new name flows everywhere — chips on every transcript line, the legend, and .md / .html / .json downloads. Persists to rec.speaker_names server-side via PATCH /admin/internal-meetings/:id/speakers. Speaker_Unknown stays read-only (it's a catch-all bucket, not a real person). Speaker MERGES (two cluster IDs are actually one person) are tracked as a separate follow-up — for now, two clusters renamed to the same display name show identical legend labels but stay as distinct chips on per-line attribution. (3) Speech-detection percentage indicator. The legend's header now shows '<X>% speech detected' next to the click-to-rename hint when speaker times sum to less than the call's audio duration — Sortformer skips silence, pauses, and low-volume audio, so the speaking totals naturally undershoot wall-clock duration. This makes the gap explicit so it's not mysterious why six speakers totalling 60 minutes appear on a 90-minute call. Diarization quality tuning (cluster threshold + suppression knobs) remains in Settings → Operator hardware for operator iteration; cross-call speaker fingerprint memory (so renamed speakers are auto-recognized in future calls) is on the roadmap.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_109 = VersionInfo.of({
  version: '0.2.109:0',
  releaseNotes: {
    en_US:
      "Internal Meetings — per-line speaker re-assignment. Diarization on 4+ voices gets noisy in real meetings: similar voices get merged, mid-sentence speaker swaps get assigned to the wrong person, and the LLM name-inference compounds the issue when it confidently picks the wrong person from limited early-meeting context. Sometimes the LLM even invents names that aren't on the participant list. Now every speaker chip on the transcript is a clickable button — click it to open a small popover listing every speaker in the meeting (each rendered as its colored chip + name + speaking time), plus a 'Clear override (revert to auto)' option for any line you've already corrected. Pick the right speaker → POST → the chip updates in place. Operator-corrected chips render with a DASHED border (vs. solid for diarization-attributed) so you can scan a long transcript and see at a glance which lines you've already fixed. Override lives on a separate field (entry.speaker_override) so the original Sortformer attribution is never destroyed — you can always click 'Clear override' to revert a line. Click outside or press Escape to dismiss the picker. Corrections flow into every output: dashboard chips, the .html download, the .md download, and the .json download. Works alongside the v0.2.108 global rename (click a speaker NAME in the legend) — typical workflow: rename Speaker_A to the right person globally, then sweep through the transcript and re-attribute the handful of lines diarization got wrong. Speaker MERGES (collapsing two cluster IDs that are actually the same person into one) and cross-call speaker fingerprint memory remain on the roadmap.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_110 = VersionInfo.of({
  version: '0.2.110:0',
  releaseNotes: {
    en_US:
      "Internal Meetings — Phase 2 of Path 2A: meeting extras. One additional LLM pass at the end of the pipeline (after transcribe → diarize → cluster → analyze → name-inference → summary-polish) pulls out four categories of structured information operators consistently want at the top of a meeting recap: Decisions (what was agreed, who agreed, the timestamp where it was settled), Action items (who owes what, by when if mentioned, supporting timestamp), Open questions (questions raised that didn't get resolved, who asked them), and Key quotes (3-6 pivotal statements worth surfacing verbatim, with the speaker + offset + why-notable note). Each item carries speaker cluster IDs so existing operator renames AND per-line overrides flow through — the extras chips reflect the same Speaker_A → 'Matt Hill' display throughout. Each item also carries an offset in seconds rendered as a clickable [m:ss] timestamp in the dashboard that jumps to the supporting transcript line (opens the containing topic's details panel and highlight-flashes the row); in the .html download, timestamps render as styled tags but are static (it's a shareable file). Extras render ABOVE the topics list with four collapsible sections (default expanded). Empty categories collapse; if all four are empty the whole block hides. Flows into all download formats — dashboard render, .html download (with matching inline CSS), .md download (top-level ## sections), and .json (rec.extras). Failure is non-fatal — if the LLM can't return valid JSON after 3 retries, rec.extras stays null and the UI just hides the section. Cost: ~5-15s wall time added to the end of the pipeline. Operator toggle: relay_meeting_extras_enabled (default true) in config; operator-editable prompt: relay_meeting_extras_prompt. Schema is intentionally conservative — the prompt explicitly instructs 'empty array beats fabricated items' and validates each item shape on parse, dropping malformed entries silently. Phase 3 (multiple operator-editable prompt sets — default / 1on1 / all-hands / customer-interview / standup, selectable per upload) is the next item on Path 2A.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_111 = VersionInfo.of({
  version: '0.2.111:0',
  releaseNotes: {
    en_US:
      "Diarization speaker-time accounting fix. Two bugs in the cluster-summary builder were producing dramatically deflated speaker totals on real meetings — a 1.5-hour team call was showing as '34% speech detected' when in reality 70%+ of the audio was speech. (1) Unfingerprinted segments were silently dropped. Sortformer routinely emits diarization segments for speakers whose voice TitaNet didn't aggregate a fingerprint for (very brief utterances, soft speech, overlapped speech). The speaking-time accumulator only counted segments whose speaker_local had a fingerprint — every other segment fell through to `globalId === undefined → continue` and disappeared from the totals entirely. Now unmapped segments bucket into Speaker_Unknown so the time is still accounted for. The per-line chip on the transcript still shows '?' for those lines — they just aren't claimed by a wrong cluster, and the Unknown legend entry reflects the real total of un-clusterable speech. (2) Chunk-overlap double-counting. Transcribe segments were already deduped at the 30s overlap boundary between consecutive chunks (handled in hardware.js), but diarization segments weren't — the same speech in an overlap zone was counted toward BOTH chunks, inflating speaker totals. Now diar segments are deduped using the same overlapBoundarySec the transcribe path uses; the relay backend stamps the global boundary onto each chunk's diarization data so the clustering module can drop overlap-zone duplicates. Net effect: speaker totals on a real meeting should jump from ~30-40% of audio duration to 70-90%, with Speaker_Unknown absorbing legitimately un-clusterable speech. Note: existing saved records keep their pre-fix totals (the clustering pass runs once during pipeline execution, not on every read). To see the corrected totals on an old meeting, re-upload the audio.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_112 = VersionInfo.of({
  version: '0.2.112:0',
  releaseNotes: {
    en_US:
      "Settings UI — exposes the two internal-meetings extras controls that shipped in v0.2.110 but were missing from the dashboard. (1) Toggle 'Internal-meetings extras (decisions / action items / questions / quotes)' lands under Settings → Operator hardware, right below the Speaker-aware summary polish toggle. Same on/off behaviour as the underlying relay_meeting_extras_enabled field — flip off to skip the ~5-15s LLM call if it's unreliable on your hardware. (2) Prompt editor 'Internal-meetings extras prompt' lands under Settings → LLM prompts, after the two polish prompts. Same per-field promote-to-default / reset-to-default / show-current-default pattern as the existing editable prompts. Required template variable: {{transcript}}. Optional: {{title}}, {{duration}}, {{speakerRoster}}, {{topics}}. Save-time validation enforces that the prompt mentions JSON output (so the parser can't silently start failing on a hand-edited template that forgot to ask for JSON). Both controls only affect the internal-meetings flow — YouTube/podcast summarize ignores them.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_113 = VersionInfo.of({
  version: '0.2.113:0',
  releaseNotes: {
    en_US:
      "Internal Meetings — TL;DR added as the first meeting extra. The extras LLM pass now produces an additional 2-4 sentence executive summary of the whole meeting (what it was about, the discussion arc, the bottom-line outcome) alongside the existing decisions / action items / open questions / key quotes. Renders as a highlighted accent-bordered callout block ABOVE the four collapsible extras sections — it's the first thing the reader sees, not collapsed away. Also includes a 'primary speakers' row showing the 1-3 people most central to the discussion as chips (using your existing renames + per-line overrides for display). Flows into every output: dashboard detail view, .html download (with matching inline CSS + a print-friendly variant), .md download (as a '## TL;DR' section above the other extras headings), and .json (rec.extras.tldr = { summary, primary_speakers }). Schema-validated on parse — malformed TL;DR objects get dropped silently rather than failing the whole extras pass. Default prompt updated to make TL;DR the only REQUIRED category — even substanceless meetings (a 3-minute check-in, an audio test) get a one-sentence factual descriptor instead of fabricated content; the other four categories remain conservative ('empty array beats fabricated items'). Operators who already customized relay_meeting_extras_prompt in Settings → LLM prompts will keep their existing prompt (which won't produce a TL;DR field yet) — paste the TL;DR section + renumber 1-4 to 2-5 + add the tldr key to your JSON example to opt in.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_114 = VersionInfo.of({
  version: '0.2.114:0',
  releaseNotes: {
    en_US:
      "Spark Control 503 'CUDA wedge' retry — proper multi-attempt loop on both transcribe AND diarize paths. The Parakeet wrapper and the Sortformer/TitaNet wrapper both return HTTP 503 with a body of {detail: 'Parakeet returned a transient error (likely CUDA wedge). Auto-restart triggered; retry in ~60s.'} when the GPU container wedges and Spark Control kicks an automatic container restart. Before this ship: the diarize path retried ONCE on 503 (then failed), and the transcribe path didn't have a 503-aware retry at all — its 'retrying bare' fallback was a PARAMETER fix (drop verbose_json) that ran immediately and just hit the same wedge again, then permanently failed the chunk. Symptom: a multi-chunk job that starts two chunks in parallel can hit a single wedge at job start and lose both chunks, breaking the whole pipeline. Now: each path waits and retries up to 4 attempts total per chunk, honoring the Retry-After header when supplied (clamped to [5s, 120s], default 60s if absent), with ±5s jitter so parallel chunks don't synchronize their wake-ups and pile back onto the freshly-restarted container at the exact same instant (would just re-wedge). The transcribe path's existing rich → bare parameter fallback still runs for non-503 4xx/5xx (a different failure mode). Error messages now distinguish 503-after-N-retries ('operator container may need manual restart') from one-shot failures. Net effect for an operator on Spark Control: a fresh container restart that takes 60-120s now mostly gets absorbed by the retry loop instead of killing the pipeline.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_115 = VersionInfo.of({
  version: '0.2.115:0',
  releaseNotes: {
    en_US:
      "Internal meeting hints + transient operator notes + self-contained shareable HTML. Three related changes: (1) Participants are now treated as HINTS, not authoritative truth, in both the name-inference and meeting-extras LLM passes. Before: the upload's comma-separated participants field was being stuffed into the name-inference prompt's 'Description:' metadata slot as 'Likely participants: A, B, C', which the LLM tended to read as a definitive speaker list — even when fewer or different people actually spoke. After: participants flow through a new explicit OPERATOR HINTS block in the prompt that's prefaced with 'treat as suggestions only — verify against the transcript before assigning names', plus an instruction telling the LLM that a name in the hints is only a candidate, the transcript can override hints, and returning null is better than mapping a hint to the wrong chip letter. (2) New Notes field on the upload form — TRANSIENT, NOT PERSISTED. Free-form prose (up to 4000 chars) where the operator can describe who-said-what to help the LLM disambiguate speakers. Example: 'Steve from XYZ Corp gave a business update. John followed up with questions about Q3. Hank chimed in toward the end.' Notes get passed to the OPERATOR HINTS block in both prompts (name inference + extras) as a soft signal the LLM weighs against the transcript, then are DROPPED. They never appear on the saved meeting record, the dashboard detail view, or any of the downloads (.md / .html / .json). The operator can write candid disambiguation context ('Steve is the new guy still figuring stuff out', 'John was angry about Q3') without worrying about it becoming part of a shareable artifact. The upload form makes this explicit in the field's helper text. (3) Downloaded .html is now truly portable. The shareable HTML file embeds the .md and .json serializations inline as base64 data URLs and exposes 'Download .md' and 'Download .json' buttons in the page header — so a recipient who got the HTML via email / shared drive / etc. can grab any of the three formats with no relay round-trip and no admin auth needed (the relay download endpoints are gated, external recipients couldn't reach them anyway). Buttons hide in print stylesheet. Size overhead is ~33% from base64 — still tens of KB for a typical meeting. Existing YouTube/podcast flow is untouched — runNameInference's new participantHints / operatorNotes args default to empty, the OPERATOR HINTS block in the rendered prompt collapses to nothing when both are empty, and the prompt looks exactly like before for those callers.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_116 = VersionInfo.of({
  version: '0.2.116:0',
  releaseNotes: {
    en_US:
      "Two related fixes — backend selection bug + operator-internal detail leaking to public clients. (1) The pre-flight 'Spark Control reports STT (parakeet) is offline' 503 short-circuited backend selection. Four routes (summarize-url, transcribe, transcribe-url, analyze) checked hw.{transcribe|analyze}.blocked_reason and 503'd the request BEFORE running planBackend, so even with the operator's configured preference set to gemini_first / gemini_only / hardware_first (Gemini-fallback), a Spark Control 'delegate not ready' state torpedoed Gemini-routable requests too. Now the routes log the blocked state for the operator and let planBackend handle routing normally — when Spark Control marks a service not ready, hw.{transcribe|analyze}.url becomes null, hasHardware = false, and planBackend correctly picks Gemini under any preference except hardware_only (which keeps its existing generic 'hardware_only_not_configured' refusal). (2) Operator-internal wording (Spark Control, parakeet, vLLM, Gemma, Sortformer, TitaNet, LAN IPs / *.local URLs) no longer leaks to public clients. Added a sanitizer (server/sanitize-error.js) that swaps known operator-private terms for generic equivalents ('Parakeet' → 'the transcribe service', 'Spark Control' → 'the operator hardware', etc.) and redacts private-IP / *.local URLs to '(internal)'. Applied centrally in markFailed (so every pipeline-failure path is covered with no per-route edits) — job.error becomes client-safe + job.error_internal preserves the raw text. snapshotJobs (consumed by the admin dashboard) exposes both fields so the operator can still see the raw diagnostic locally. Admin config endpoint now exposes effective_transcribe_blocked_reason and effective_analyze_blocked_reason so the operator's own hardware health panel can display the actual SC-supplied reason without relying on log grep. Net effect: a Recaps cloud user with Gemini selected as preference no longer sees a hardware-down 503; the operator still gets full diagnostic detail in their own logs + admin dashboard.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_117 = VersionInfo.of({
  version: '0.2.117:0',
  releaseNotes: {
    en_US: 'Internal meetings: add post-hoc speaker-correction tools — merge two clusters that are actually one person, and re-run diarization at a higher strictness to split over-merged speakers. Both reflow per-line attributions, names, extras, and downloads.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_118 = VersionInfo.of({
  version: '0.2.118:0',
  releaseNotes: {
    en_US:
      'Text-to-speech (audio-first Recaps). New POST /relay/tts endpoint synthesizes topic summaries into speech via Spark Control (Kokoro on operator hardware, with ElevenLabs as a swappable cloud alternative), so the Recaps app can play a recap back-to-back like a podcast. /relay/capabilities now advertises has_tts so the app knows when to offer the feature. Billed as one credit per recap (all topics share a job id). Requires Spark Control v0.14.0+ for the Kokoro backend; default voice bm_george, mp3 output. Additive — existing transcription/analysis are unchanged.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_119 = VersionInfo.of({
  version: '0.2.119:0',
  releaseNotes: {
    en_US:
      'Cloud operator-key path (core-decoupling). The relay can now accept requests that identify a Recaps user by account-id (X-Recap-User-Id) authenticated by a shared operator key, instead of a per-user Keysat license — so the operator\'s cloud Recaps server owns Pro/Max subscriptions keyed to the user. Set the shared key under Settings → Endpoints & credentials → "Cloud operator key". New /relay/user-tier endpoint lets the operator set a user\'s subscription tier. Additive — the existing license/install credit paths are unchanged.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,20 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_12 = VersionInfo.of({
  version: '0.2.12:3',
  releaseNotes: {
    en_US:
      'Dashboard v2: operator can now see revenue, operating margin, and per-summary rollups in the same view as the existing call-level breakdowns.\n\n' +
      '• New "Set Tier Prices (USD)" action under StartOS → Recap Relay → Actions → Tiers. Operator enters the monthly USD price per tier (defaults: $0/$5/$15). Dashboard multiplies that by the count of active installs per tier in the selected window to estimate revenue.\n\n' +
      '• Operating margin tile = revenue − Gemini API cost. Surfaces immediately whether the relay is currently profitable.\n\n' +
      '• Per-summary view collapses each transcribe + analyze pair into one row by job_id, with combined cost/duration and a transcribe/analyze backend column. Easier to answer "how many summaries did I serve?" without doing the math.\n\n' +
      '• Recent-errors panel surfaces the most recent 50 failed calls with their truncated error strings — quick triage when something breaks.\n\n' +
      '• 24-hour success-rate alert banner appears when the recent success rate drops below 95%, regardless of which time range is being viewed.\n\n' +
      '• "Export CSV" link downloads the raw audit entries in the selected window for spreadsheet analysis.\n\n' +
      '• Audit log auto-rotates at boot when it grows past 50MB (renames to relay-calls-YYYYMMDD.ndjson and starts a fresh file).',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_120 = VersionInfo.of({
  version: '0.2.120:0',
  releaseNotes: {
    en_US: 'Self-serve Pro/Max purchase backend: BTCPay tier-invoice endpoint, settle webhook extends the prepaid period, and a tier-plans pricing endpoint.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_121 = VersionInfo.of({
  version: '0.2.121:0',
  releaseNotes: {
    en_US: 'Card payment rail (Zaprite): hosted-checkout order endpoint + re-fetch-verified webhook that extends the prepaid period, Set Zaprite Connection action with fiat card pricing, and tier-plans now reports card availability + fiat prices.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_122 = VersionInfo.of({
  version: '0.2.122:0',
  releaseNotes: {
    en_US: 'Fix: re-polishing internal-meeting summaries after renaming speakers now re-attributes to the corrected names — the polish transcript is labeled with the operator\'s names and the prompt treats the speaker roster as authoritative over any stale names in the original summaries.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_123 = VersionInfo.of({
  version: '0.2.123:0',
  releaseNotes: {
    en_US: 'Add operator-keyed GET /relay/expiring-subscriptions so the Recaps app can email users before their prepaid Pro/Max period expires.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_124 = VersionInfo.of({
  version: '0.2.124:0',
  releaseNotes: {
    en_US: 'Tier Bitcoin invoices now return the Lightning BOLT11 (for inline in-app payment) and each plan reports its configured per-period relay-credit allotment.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,18 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_13 = VersionInfo.of({
  version: '0.2.13:2',
  releaseNotes: {
    en_US:
      'Dashboard v3 — backend-agnostic performance benchmarks and per-model failure analysis.\n\n' +
      '• New "Transcription speed by model (normalized)" table — reports wall-clock processing time per minute of audio (ms/min). Lets the operator compare e.g. Gemini Flash vs. Parakeet on a like-for-like benchmark: if Gemini takes 2.5 s/min and Parakeet takes 0.8 s/min on the same audio, the difference is now visible at a glance.\n\n' +
      '• New "Analysis speed by model" table — reports wall-clock per 1000 input tokens, since analyze calls work on text not audio. Same like-for-like comparison story between Gemini Pro analysis vs. Gemma analysis.\n\n' +
      '• Per-model success rate, computed against attempted calls (success + error). Refused calls are excluded — they never reached the backend, so they shouldn\'t count against the model\'s reliability.\n\n' +
      '• New "Top failure modes by model" table — top 3 normalized error signatures per model with occurrence counts. Tells the operator at a glance whether a model is failing for one specific reason (Gemini quota, Parakeet timeout) or distributed noise. Error strings are normalized — timestamps, UUIDs, ports, hex IDs collapse to placeholders so near-identical errors bucket together.\n\n' +
      '• Transcribe routes now record `audio_seconds` (via ffprobe) alongside every audit entry, which is the data the new normalized benchmarks consume. Audit log records grow by ~15 bytes per row.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,18 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_14 = VersionInfo.of({
  version: '0.2.14:0',
  releaseNotes: {
    en_US:
      'Relay credit top-ups via BTCPay (operator-side).\n\n' +
      '• New "Set BTCPay Connection" action under StartOS → Recap Relay → Actions → Tiers. Operator drops in their BTCPay base URL, store ID, API key, and webhook secret. Empty fields disable the purchase flow; the rest of the relay keeps working.\n\n' +
      '• New "Set Credit Bundle Prices" action — adjust sats prices for the 1/5/10/20-credit bundles without touching code.\n\n' +
      '• New /relay/credits/* endpoints: GET /packages, POST /buy (mints a BTCPay invoice tied to the install), GET /invoice/:id (polls status), POST /relay/btcpay/webhook (HMAC-validated, dedupes by invoice id, grants credits to the install on InvoiceSettled).\n\n' +
      '• Credit ledger gains purchased_balance + purchased_total_ever fields. Spend order: tier allotment first, purchased top-up second, so a Pro user always gets their 50 monthly comped credits before dipping into anything they paid for. Purchased credits don\'t expire and bypass the per-tier Gemini cap (paid calls don\'t count against the comped-Gemini ceiling).\n\n' +
      '• To wire this up to BTCPay: in BTCPay → your store → Webhooks, add https://<your-relay-host>/relay/btcpay/webhook, subscribe to "An invoice has been settled", copy the auto-generated secret into the StartOS action.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,15 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_15 = VersionInfo.of({
  version: '0.2.15:23',
  releaseNotes: {
    en_US:
      'Two major reliability + UX fixes.\n\n' +
      '1. Async transcribe-url jobs. POST /relay/transcribe-url no longer holds the connection open for the duration of the work — it returns immediately with { job_id, status: "queued" } and processes in the background. Clients poll GET /relay/jobs/:id every few seconds until status flips to "complete" or "failed". This fixes the "fetch failed (other side closed)" failures observed on long-running transcribes: a proxy or load balancer somewhere in the path was dropping the connection after a few minutes of activity, taking the whole job down with it. Poll requests are short and cheap, so they never trip any timeout. Job records persist in-memory for 24h after completion.\n\n' +
      '2. One-click BTCPay setup. New dashboard widget: "Connect BTCPay →" button. Click → enter your BTCPay base URL → BTCPay opens authorize page in a new tab with the right scopes pre-selected → click Approve → pick your store → done. The relay auto-creates the webhook (with a freshly-generated secret) and stores all four credentials. No more 4-step manual config involving API-key generation, webhook creation, and secret-copy/paste. The state-token-validated callback is admin-auth-exempt to handle the cross-site POST redirect from BTCPay.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_16 = VersionInfo.of({
  version: '0.2.16:0',
  releaseNotes: {
    en_US: 'Analyze pipeline now talks to operator-hardware LLMs in a much faster mode. Two changes to the chat-completion request the relay sends to vLLM: (1) response_format is set to JSON-object mode, which constrains the model to emit valid JSON instead of wrapping the output in prose preamble like "Here are the topics I identified:" — saves real decode tokens and avoids parse failures. (2) chat_template_kwargs.enable_thinking is set to false, which disables Qwen3.6\'s reasoning mode for this task. Thinking mode is great for math but pure latency-noise for structured extraction. Together these typically cut analyze wall-time on operator hardware by 30-50%. Both fields are vLLM-specific; non-Qwen / non-vLLM backends ignore them, so this is safe across other operator-hardware setups.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_17 = VersionInfo.of({
  version: '0.2.17:0',
  releaseNotes: {
    en_US: 'Two production bugs fixed. (1) Gemini Flash model IDs updated — gemini-3.1-flash-preview and gemini-3-flash-preview have been retired by Google in favor of gemini-3.1-flash and gemini-3-flash (without the -preview suffix). The relay\'s config defaults, dropdown options, fallback chain, and pricing table all now use the new IDs. Existing saved configs that point at the old -preview names automatically fall back to the new default on next request — no operator intervention required. The Pro variants (3.1-pro-preview, 3-pro-preview) are unchanged for now; if those start 404\'ing we\'ll do the same rename pass for Pro. (2) thinkingConfig parameter is now only sent for Gemini 3.x Flash models, not 2.x Flash or any Pro model. Previously the relay sent thinkingLevel: "minimal" for any model matching /flash/i, which caused 400 errors with "Thinking level is not supported for this model" when the fallback chain walked down to gemini-2.5-flash or gemini-2.0-flash. New gate: ^gemini-3(\\.\\d+)?[^-]*-flash regex — matches gemini-3-flash and gemini-3.1-flash only.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_18 = VersionInfo.of({
  version: '0.2.18:0',
  releaseNotes: {
    en_US: 'Gemini model + thinking-config corrections. (1) The supported Gemini model list is now exactly 5, verified valid against Google\'s official docs: gemini-3.1-pro-preview, gemini-3-flash-preview, gemini-3.1-flash-lite, gemini-2.5-pro, gemini-2.5-flash. The "Set Backend Routing" action shows the same 5 in both transcription and analysis dropdowns. Removed never-existed entries (gemini-3.1-flash, gemini-3.1-flash-preview), shut-down entries (gemini-3-pro-preview retired 2026-03-09), and deprecated entries (gemini-2.0-flash). Transcription default is now gemini-3-flash-preview; analysis default remains gemini-3.1-pro-preview. Existing operator configs pointing at the old IDs automatically clamp to the new default on the next routing-action read. (2) thinkingConfig parameter shape now branches correctly by model family: Gemini 3.x models get { thinkingLevel: "minimal" } (the 3.x style), Gemini 2.5 Flash gets { thinkingBudget: 0 } (disable thinking entirely for fastest/cheapest transcription), and Gemini 2.5 Pro gets { thinkingBudget: 128 } (low end of its accepted range). Previously the relay sent thinkingLevel to all Flash models, which 400\'d on 2.5-flash with "Thinking level is not supported for this model" because 2.5 uses thinkingBudget, not thinkingLevel — confirmed at ai.google.dev/gemini-api/docs/thinking. (3) The Gemini-backend fallback chain now treats 404 "model not found" and 400 "Thinking level is not supported" as fallback-eligible errors, so a single dead model no longer fails the whole call — the chain walks to the next model.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_19 = VersionInfo.of({
  version: '0.2.19:0',
  releaseNotes: {
    en_US: 'Gemini audio capability bumped 60 min / 30 MB → 240 min / 200 MB. The original conservative cap was sized for free-tier Gemini worries that no longer apply on paid Gemini — File API handles audio up to 9.5 hours per call and 2 GB files. The 60-min ceiling was forcing Recap clients with audio >60 min to skip the relay-URL fast-path (where the relay downloads the audio server-side) and instead chunk + upload from the buyer\'s home connection — burning their upload bandwidth and serializing the calls. With the bump, the fast-path now handles content up to 4 hours, which covers virtually every podcast and long-form YouTube content. preferred_chunk_seconds stays at 45 min so server-side chunking still kicks in for the longest files for stability, but doesn\'t gate client behavior.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_20 = VersionInfo.of({
  version: '0.2.20:0',
  releaseNotes: {
    en_US: 'Big speed win on long-audio transcription via Gemini, plus a fallback fix. (1) The Gemini transcribe path was previously a single generateContent call against the entire audio file — for a 173-min podcast, that meant one giant call to Google that took 7-8 minutes. Now the relay splits any audio longer than 30 min into ~30-min chunks via ffmpeg, uploads each to Gemini File API in parallel (6 concurrent), and calls generateContent for each chunk independently. Each chunk\'s transcript has its [MM:SS] timestamps shifted by the chunk\'s start offset before stitching, so the final transcript is a single chronological stream — downstream parsers see the same shape as before. On a 173-min audio: ~7-8 min single-call → ~2-3 min parallel-chunked (limited by the slowest chunk, not the sum). Short audio (≤30 min) still takes the original single-call fast path. (2) Fallback chain now walks LATERALLY through other models when the primary fails, not just downward. Previously, picking gemini-3.1-flash-lite (which sits at the bottom of the analysis chain) left zero fallbacks: a single 503 from Google\'s capacity would fail the entire analyze call. Now the chain walks primary → everything-below → everything-above as a last-resort tier, so a flash-lite 503 falls through to flash → flash-preview → 2.5-pro → 3.1-pro instead of failing the user\'s job. Operators concerned about runaway cost from auto-falling-up can monitor in the dashboard.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_21 = VersionInfo.of({
  version: '0.2.21:0',
  releaseNotes: {
    en_US: 'Hotfix for 0.2.20: ffmpeg chunking failed with "Unable to find a suitable output format for .../chunk_0.bin". The master audio file was being written as audio.bin (extension-less binary), so ffmpeg couldn\'t infer the output container/codec when splitting into chunks. The audio bytes themselves were always fine; only the file extension confused ffmpeg. Fixed by deriving an extension from the mimeType (audio/mpeg → mp3, audio/mp4 → m4a, etc.) so the master file and the chunk outputs have a format ffmpeg understands. Defaults to mp3 for unknown audio/* types since that\'s what yt-dlp + most podcast enclosures produce. All other chunking logic from 0.2.20 unchanged.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_22 = VersionInfo.of({
  version: '0.2.22:0',
  releaseNotes: {
    en_US: 'New "Jobs" tab on the operator dashboard. Per-video / per-podcast history with summary stats at the top and a sortable + filterable table below. (1) Tab nav added — flip between the existing Overview tab and the new Jobs tab without losing range selection. (2) Summary tiles: total jobs in range, success rate (with partial / failed breakdown), median wall time, median transcribe ms / audio-minute, median analyze ms / audio-minute, total audio hours processed, total cost. (3) Filterable by status (success/partial/failed), transcribe backend (gemini/hardware), analyze backend, model substring, and free-text title/URL search. (4) Every column is sortable, click the header to toggle ascending/descending — date, duration, size, chunks, time-per-MB, time-per-min, model, cost, status, etc. (5) Pagination at 100 rows per page. (6) Audit log now captures media_url + title + chunk_count on transcribe rows so the Jobs table can attribute each row to a specific video. Backwards-compatible: old audit rows without these fields just show "—" in those columns. Lateral-fallback fix from 0.2.20 and Gemini server-side chunking from 0.2.21 remain in effect.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_23 = VersionInfo.of({
  version: '0.2.23:0',
  releaseNotes: {
    en_US: 'Airtable-style column controls on the Jobs tab. (1) Drag any column header left or right to reorder columns; the column drops where you release and the new order persists across page reloads (saved to localStorage). A blue vertical line on the target column shows where the drop will land. (2) Right-click on any column header to open a custom context menu with: "Sort ascending", "Sort descending", "Hide column", "Show all hidden" (count badge when hidden columns exist), and "Reset to default order". The browser\'s native right-click menu is suppressed. Click outside the menu or press Escape to close. (3) Column order + hidden columns are namespaced per-browser via localStorage with versioned keys (relay.dashboard.jobs.columnOrder.v1, ...hiddenColumns.v1) so a future schema change can clean-slate without manual reset. (4) Newly-added columns from future relay updates append automatically at the end of the user\'s saved order instead of breaking the layout — no manual reset required. Left-click sorting still works on the column itself; right-click is for the menu.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_24 = VersionInfo.of({
  version: '0.2.24:0',
  releaseNotes: {
    en_US: 'Jobs-tab polish pass. (1) All rate metrics (ms-per-min, ms-per-MB, download-per-MB) now render uniformly as seconds with one decimal (e.g. "7.3s") instead of the mixed-shape "412ms" / "7.3s" output — columns visually align across rows. (2) Horizontal scrollbar is now always visible at the bottom of the Jobs table (was previously only visible mid-scroll on macOS Safari). Custom-themed to match the dashboard\'s dark palette. (3) Default row height is now strictly single-line across the entire table. The errors column was the only one that broke this — long error strings used to push individual rows to wrap, making the rest of the table look uneven. Fixed by truncating errors to single-line with a small +/− chevron at the right edge of each errors cell. Click the chevron to expand JUST that row\'s errors text into a wrapped multi-line view; other rows in the table stay single-line. Hover the cell to see the full error text in a native tooltip even without expanding. (4) "Show hidden columns" in the right-click menu became a submenu: hover over it (or click) to reveal each hidden column individually, click one to unhide just that column. "Show all" is offered at the top of the submenu when more than one column is hidden. Back button to return to the main menu. (5) Drag the right edge of any column header to resize that column\'s width. Widths persist to localStorage so the layout you tune sticks across page reloads. Minimum width 40px. While dragging, the cursor changes to col-resize and other UI is temporarily disabled to prevent stray clicks. (6) Misc: better cursor hints (grab vs col-resize), context menu closes cleanly on Escape, overlay click, or right-click outside the menu.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_25 = VersionInfo.of({
  version: '0.2.25:0',
  releaseNotes: {
    en_US: 'Test-run panel + benchmark suite on the Jobs tab. (1) New panel above the Jobs table — paste a media URL, pick transcribe + analyze backends (Gemini or Hardware) plus Gemini models, click "Run single" to fire one test directly. The job lands as a new row in the table below within seconds of the relay completing the work. (2) "Run benchmark suite (6 runs)" fires six pre-defined permutations sequentially with a shared batch_id: (a) gemini-3.1-flash-lite → gemini-3.1-pro-preview, (b) gemini-3-flash-preview → gemini-3.1-flash-lite, (c) gemini-2.5-flash → gemini-2.5-pro, (d) hardware → hardware, (e) hardware → gemini-3.1-flash-lite, (f) gemini-3.1-flash-lite → hardware. The status line below the panel tracks "permutation N of 6: <progress>" while running. (3) "Rerun last" button replays the most recent run\'s inputs with a fresh batch_id so suites stay distinguishable in the table. (4) Jobs table gains a Batch column (click to filter to just that batch) and a Batch / Source filter in the filter row — set Source = "Test runs only" to hide real-user traffic, leave blank to see everything. (5) Backend: new admin-auth-gated POST /admin/test-run endpoint runs the same download + transcribe + analyze flow as the public /relay/transcribe-url, but with explicit backend/model overrides bypassing the operator\'s tier/preference defaults. Audit rows from these runs are stamped with batch_id + source="admin-test" so they\'re filterable. (6) Captions-fast-path permutations (7, 8), TX-sharing for paired permutations (4+5, 1+6, 7+8), and live Spark Control hardware-model pull are deferred to 0.2.26.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_26 = VersionInfo.of({
  version: '0.2.26:0',
  releaseNotes: {
    en_US: 'YouTube captions fast-path for the test-run + benchmark suite. (1) New server-side captions fetch in the relay: when the test-run captions toggle is set to "Use", the relay runs yt-dlp with --write-auto-subs --skip-download to grab the YouTube caption track (manual when available, auto otherwise), parses the .vtt subtitle file into the same [MM:SS] bracketed transcript shape Gemini\'s transcribe path produces, and feeds it straight into the analyze step. No audio download, no Gemini transcribe call — typical captions fetch is 2-5 seconds vs 60-300+ seconds for full audio transcribe. (2) Dashboard test-run panel gains a "Captions: Skip / Use (YT only)" toggle. When set to Use, the TX backend + TX model controls are disabled (captions don\'t need them). Analyze backend + model controls work normally. (3) Benchmark suite auto-appends two captions permutations when the URL is a YouTube link: (7) captions → gemini-3.1-pro-preview analyze, (8) captions → hardware analyze. Podcast URLs get the 6-permutation suite unchanged (no captions for raw audio enclosures). (4) Captions audit rows are stamped with backend="captions" and model="youtube-auto" or "youtube-manual" so the Jobs table distinguishes them from real transcribe runs. audio_seconds is populated from yt-dlp\'s reported duration; audio_bytes is null since no audio was downloaded. (5) TX-sharing for paired permutations (1+6, 4+5, 7+8) and Spark Control live hardware-model pull are deferred to 0.2.27 — captions was the highest-value piece of the 0.2.26 backlog.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_27 = VersionInfo.of({
  version: '0.2.27:0',
  releaseNotes: {
    en_US: 'Sticky horizontal scrollbar on the Jobs table. The table is wide (22+ columns) and its real horizontal scrollbar used to live at the bottom of the table container — meaning when you were looking at the top of a long list of jobs, the scrollbar was below the viewport and you had to scroll the whole page down to reach it. Now the scrollbar is a phantom element positioned with CSS position: sticky; bottom: 0, so it stays glued to the bottom of the browser viewport for the entire time the table is in view. Dragging either the phantom or the real scrollbar scrolls the table in both places (two-way JS sync). The phantom\'s width auto-updates via ResizeObserver when columns are resized or the window changes size. Hides itself entirely when the table fits in the viewport (no horizontal overflow). Same dark-theme styling as the rest of the dashboard.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_28 = VersionInfo.of({
  version: '0.2.28:0',
  releaseNotes: {
    en_US: 'Output storage + parallel benchmark pairs + storage management. (1) Every test-run job\'s transcript + analysis JSON is now persisted to /data/relay-outputs/<job_id>.json after the job completes. Real-user traffic is opt-in via a new relay_save_user_outputs config flag (default false, set via StartOS action). (2) The Jobs table gains a "View" column with a 👁 link on each row that has a stored output — opens a new tab showing a Recap-style two-pane render: topic list on the left, transcript with timestamps on the right, click any topic to scroll the transcript to that range and highlight the matching entries. The view page is public/job-output-view.html — admin-auth-gated like the rest of /admin/*. (3) Row-level checkboxes on the Jobs table let you select a subset of rows; new "Stored outputs" mini-panel above the table shows total count + size + Delete-selected and Delete-all buttons. Audit log rows are NEVER deleted alongside outputs — only the saved transcript+analysis JSONs. (4) New admin endpoints: GET /admin/job-output/:id, GET /admin/output-store-stats, GET /admin/output-store-ids, DELETE /admin/job-outputs (body {job_ids:[...]} or {all:true}). (5) Benchmark suite now runs paired permutations CONCURRENTLY instead of sequentially — perms 1+6 (shared TX = gemini-3.1-flash-lite), 4+5 (shared TX = hardware), 7+8 (shared captions) each fire as a single "phase" with two in-flight permutations. The relay\'s new TX-share cache ensures the underlying transcribe runs ONCE per phase; the paired permutation awaits the in-flight Promise and reuses the resulting transcript at zero additional TX cost or wall time. Audit rows from the cached side are stamped source="admin-test-shared-tx" so the dashboard can identify them. Net effect: a full 8-permutation YouTube suite drops from 8 sequential transcribes to 5 (pairs collapse), typically cutting total wall time by 30-40% on long content. (6) Spark Control live model pull was already wired in via hardware-config.js — the audit log already records the live model name returned by Spark Control\'s /api/endpoints (with a 60s cache), so model swaps on the Spark side are reflected within ~1 minute without any relay config change.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_29 = VersionInfo.of({
  version: '0.2.29:0',
  releaseNotes: {
    en_US: 'Server-side benchmark suite orchestration. The benchmark runner has been moved out of dashboard JavaScript and into the relay\'s background workers. Previously, clicking "Run benchmark suite" started a JS for-loop that submitted one permutation at a time, awaited each, then moved to the next — if the operator\'s browser tab closed, mobile phone went to sleep, or anything else interrupted the JS, only the already-in-flight permutations completed and the rest were silently dropped. Net effect: phones, tab switches, refreshes all halted suites mid-run, sometimes after just one permutation. (1) New endpoint POST /admin/test-run-suite accepts the full permutation list + media_url, mints ALL jobs upfront so the dashboard can render them immediately, and orchestrates phase-based concurrent execution in a relay-side setImmediate loop. The suite keeps grinding whether or not any browser is watching. (2) Dashboard now POSTs once to the suite endpoint, then polls /admin/jobs-history?batch_id=X every 5s for progress. (3) Active batch ID persisted to localStorage — if the operator closes the tab and reopens the dashboard, the Jobs tab auto-detects the in-progress batch and resumes the progress poll without manual intervention. (4) Phases group permutations by TX fingerprint (same media + same TX backend + same model = same phase). Within a phase, permutations fire concurrently and share TX via the existing inflight-promise cache; phases themselves run sequentially so the underlying transcribe backends don\'t get overloaded. (5) Failed permutations don\'t abort the suite — each failure is recorded as a row in the Jobs table; the runner moves on to the next perm. (6) Bonus: the extracted executeTestRunWorker function is now reusable for any future workflows that want to programmatically queue test runs (e.g. CLI tooling).',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_30 = VersionInfo.of({
  version: '0.2.30:0',
  releaseNotes: {
    en_US: 'Sticky horizontal scrollbar redo — properly hugs the viewport bottom while letting the page scroll vertically. Earlier attempts used position:sticky on a separate phantom scrollbar, but the CSS containing-block math meant the bar didn\'t always stay at the BROWSER WINDOW\'S bottom when the table grew tall — it sometimes ended up below the viewport instead, leaving the operator without a visible horizontal scroll. (1) Switched the phantom scrollbar to position:fixed; bottom:0 so it\'s anchored to the viewport bottom regardless of vertical scroll position. JS positions its left+width to match the table-wrap\'s visible rect, refreshing on window resize, page scroll, and ResizeObserver column-width changes. (2) Changed the wrap\'s overflow-y from "visible" to "clip" — the CSS spec implicitly flips overflow-y:visible to overflow-y:auto when paired with overflow-x:auto, which silently created a vertical scroll container that could trap rows inside a shorter-than-content box. "clip" is the explicit "don\'t establish a scroll context" value, so vertical content flows naturally onto the page; the page scrolls; the wrap grows tall. (3) Net effect: operator opens the Jobs tab, scrolls the page down to see more rows in a long table, and the horizontal scrollbar stays glued to the bottom of the browser window the whole time. Bar shows only when the table actually overflows horizontally; hides itself when it fits.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_31 = VersionInfo.of({
  version: '0.2.31:0',
  releaseNotes: {
    en_US:
      'Six benchmark-suite bug fixes from the 8-permutation YouTube run. (1) View-output eyeball link in the Jobs table was pointing at /admin/job-output-view.html, but the static asset is served at /job-output-view.html — clicking the eyeball threw "Cannot GET /admin/job-output-view.html". Link now points at the correct path. (2) Jobs-table column headers said "ms/MB", "ms/audio-min", etc., but the cell values are formatted as seconds-with-one-decimal (e.g. "1.2s"). Header labels are now "s/MB" and "s/audio-min" so they match the displayed units. (3) Paired permutations that reused a sibling\'s transcribe via the TX-share cache wrote duration_ms=0 to their audit row, which made the dashboard\'s per-row TX rate columns collapse to "—" on the sibling rows even though both permutations actually consumed the same TX work. Both rows now show the wall-time of the underlying TX call (cost stays $0 on the sibling so aggregate totals still avoid double-counting). job-stats.js also switched from `||` to `??` so a legitimate 0 value isn\'t treated as missing data. (4) The hardware transcribe backend was returning {text, segments, duration_seconds, model} but NOT chunk_count, so the Jobs table\'s "TX chunks" column always showed "—" for Parakeet runs. It now returns chunk_count=1 for single-shot calls and chunk_count=N when the audio was split into N pieces for OOM mitigation (HARDWARE_CHUNK_SECONDS=300s). (5) Hardware analyze was hardcoding "gemma3:27b" as a fallback when the operator left "Gemma Model Name" blank in StartOS — defeating the autodiscovery logic that\'s supposed to query /v1/models at request time and use whatever model is actually loaded on the endpoint. The hardcoded default has been removed from server/backends/hardware.js, server/config.js, AND startos/file-models/config.json.ts so a blank model name now reliably triggers autodiscovery. Operators whose hardware hosts a model OTHER than gemma3:27b (which is most of them) will now have analyze succeed instead of erroring with "model does not exist". (6) yt-dlp captions fetch failed on a video that DOES have captions on YouTube. The single-pass --sub-langs en.* call produced no .vtt files. Replaced with a two-pass strategy: en.* first, then --sub-langs all as a fallback for videos whose captions are tagged with non-en language codes. When both passes fail, the error message now includes yt-dlp\'s --list-subs output so the operator can tell whether the video genuinely lacks captions or whether yt-dlp\'s extractor is being blocked.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_32 = VersionInfo.of({
  version: '0.2.32:0',
  releaseNotes: {
    en_US:
      'Configurable chunking + chunked analyze + parallel hardware transcribe. (1) New Settings tab in the dashboard (PUT /admin/settings) exposes 11 chunking / concurrency knobs that drive both real-user traffic AND test-run benchmarks — no separate test-run config. Knobs: Gemini transcribe chunk size (default 30 min), Gemini transcribe concurrency (default 12, was 6), Gemini analyze window body (default 18 min, new), Gemini analyze window overlap (default 2 min, new), Gemini analyze concurrency (default 12, new); Hardware transcribe chunk size (default 5 min), Hardware transcribe concurrency (default 4, was sequential), Hardware analyze window body (default 18 min, new), Hardware analyze window overlap (default 2 min, new), Hardware analyze concurrency (default 8, new); plus a shared analyze single-shot cutoff (default 25 min, below which the planner emits a single window covering the whole transcript). Edits apply to new jobs only — in-flight benchmarks keep their snapshot. (2) Ported the chunked-analyze logic from the Recap app: server/chunked-analyze.js parses the relay\'s bracketed [MM:SS] transcript into entries, plans time-windowed overlapping slices (body + overlap stride algorithm), fires each window at the chosen analyze backend up to the configured concurrency, emits one audit row per window with window_idx / window_count / window_body_seconds fields, then stitches sections back together using the same ownership-by-body-start rule the Recap app uses. The 200K-char truncation in the old single-shot analyze path is gone — long transcripts now analyze in parallel windows instead of getting truncated. (3) Parakeet now runs up to 4 transcribe chunks concurrently (was sequential) — per the spark-control LLM dev, a single GPU comfortably handles 4 parallel POSTs. Failed chunks are tolerated (their segments leave gaps in the stitched transcript) rather than aborting the whole job. (4) Removed every hardcoded chunking / concurrency constant scattered across the codebase: GEMINI_CHUNK_SECONDS / GEMINI_CHUNK_CONCURRENCY in server/backends/gemini.js, HARDWARE_CHUNK_SECONDS in server/audio-meta.js, and the 200K char cap in server/routes/admin-test-run.js. All chunking decisions now flow from exactly ONE source: server/config.js defaultConfig() (canonical default per knob) → relay-config.json (operator overrides via Settings tab) → backend factories at request time. splitAudioFile() now throws on missing chunkSeconds rather than silently substituting an old default. (5) Wall-time accuracy: per-window audio_seconds = window body length (not total audio_seconds), so the Jobs table\'s per-row analyze rate columns (s/audio-min) divide by the right denominator. Job-level analyze_ms remains sum-of-windows (= total backend work, drives cost). Job-level wall_time_ms remains completedAt − startedAt across all audit rows (= user-POV elapsed time, captures the parallel-window speedup). Both metrics live side-by-side in the Jobs table.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_33 = VersionInfo.of({
  version: '0.2.33:0',
  releaseNotes: {
    en_US:
      'Unified summarize-url pipeline + SSE streaming. New POST /relay/summarize-url endpoint replaces the two-call /relay/transcribe-url + /relay/analyze pattern Recap previously used in relay mode. The relay now does the WHOLE pipeline server-side — download, chunked transcribe (using the operator\'s Settings-tab knobs), chunked-analyze with parallel windows, and section stitching — and streams per-window section results back to Recap over SSE at GET /relay/summarize-url/:jobId/events. Wins: (1) Cuts 12+ round-trips per long video down to one POST + one SSE stream — the transcript never has to leave the relay just for Recap to slice it into 12 prompts and ship them back. (2) The operator\'s Settings-tab chunking knobs now actually drive production behavior. Before this version they only affected test-run benchmarks because Recap windowed client-side using its own hardcoded constants. (3) Credit policy: charged ONLY when the full pipeline completes cleanly — transcribe succeeded AND every analyze window succeeded. Download failure: no charge. Transcribe failure: no charge. Analyze backend init failure: no charge. ANY analyze window fails (even just 1 of 12): no charge — partial results are still returned to the user (saved to disk + delivered in the result envelope), the operator eats the cost of the compute they paid for on the windows that did succeed, but no credit moves. Charging full price for a degraded summary would erode user trust. If a previous endpoint (legacy two-call path) had pre-charged on the same X-Recap-Job-Id, the credit is refunded. SSE event types: progress, transcribe_complete, window_complete (per analyze window with that window\'s body-owned sections), done, error. Heartbeat every 25s keeps idle connections alive through reverse proxies. The existing /relay/transcribe-url + /relay/analyze endpoints are unchanged and continue to work for older Recap clients. Also: subscribeToJob + appendEvent helpers added to server/jobs.js so any future long-running endpoint can stream events the same way.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_34 = VersionInfo.of({
  version: '0.2.34:0',
  releaseNotes: {
    en_US:
      'Dashboard polish + correctness fixes triggered by a 6-permutation benchmark run. (1) wall_time_ms now correctly includes the TX work duration. Previously the calc was `latest_audit_row.ts − earliest_audit_row.ts`, but audit rows are stamped at COMPLETION, so the time BEFORE the first row (i.e., the TX phase itself) was unaccounted for. With this fix, wall_time = TX duration + the parallel-window AN wall-time, which matches the user-POV elapsed time. Example from the benchmark: perm 1 (TX flash-lite → AN pro) was showing 2m 16s wall but the true elapsed was ~4 min; perm 6 (paired sibling with hardware AN) was showing 51s but the true elapsed was ~2m 32s. Both now reflect actual time-on-clock. (2) Settings tab no longer flashes "Failed to load settings" on first click. The render order was running the render function BEFORE the fetch fired, so the null settingsData triggered the error path in the same render tick the loading skeleton SHOULD have shown. Fixed the render to treat null-data as "still loading" and reserve the error state for explicit `{ error: ... }` payloads. (3) Jobs table no longer resets horizontal scroll to column 0 every 5 seconds while a suite is running. The trackBatchProgress poll calls loadJobs() every 5s which re-renders the whole table via innerHTML replacement, blowing away scroll state. Now captures scrollLeft (and window.scrollY) before the replacement and restores after the new DOM is in place via requestAnimationFrame. (4) Captions permutations (perms 7+8) are temporarily disabled from the benchmark suite. yt-dlp keeps reporting "no .vtt subtitle file" on videos that clearly have captions on YouTube — needs a separate dig into yt-dlp\'s extractor behavior on this network. Suite now runs 6 audio permutations regardless of YouTube vs podcast URL. Button label and suite blurb updated to reflect the count change.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_35 = VersionInfo.of({
  version: '0.2.35:0',
  releaseNotes: {
    en_US:
      'Mirrors the Recap-app v0.2.68 transcribe-truncation fixes into the relay\'s server-side pipeline. (1) Relay\'s Gemini transcribe backend now sets maxOutputTokens: 65536 on its generateContent calls. Same root cause as the Recap-app fix — Gemini\'s default cap (~8192) silently truncates long-chunk transcripts mid-stream, costing the user 14-43 minutes of speech per affected chunk with no visible error. The relay\'s test-run benchmark suite was experiencing the same bug but the symptom got masked by the bigger chunks-share-TX optimization. (2) Gap-handling added to the relay\'s chunked-analyze.js planAnalysisWindows. When the transcript has a hole (e.g., one chunk truncated by Gemini\'s output cap), the old planner stopped at the hole and silently dropped any entries past it from analysis. New behavior: jump the body cursor forward to the next entry\'s body-stride boundary and keep planning windows instead of stopping. The /relay/summarize-url unified pipeline (and the test-run worker) both inherit this fix.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_36 = VersionInfo.of({
  version: '0.2.36:0',
  releaseNotes: {
    en_US:
      'Dashboard UX pass. (1) Settings tab actually works now. The button looked clickable but did nothing because renderSettingsTab shadowed the module-scoped `root` variable with a local one pointing at id="dashboard-root" — an element that doesn\'t exist. root.innerHTML = … threw a silent TypeError on every click. Dropped the local shadow so it uses the same id="root" container every other tab uses. (2) Active tab persists across hard refresh. localStorage stores whichever of "overview"/"jobs"/"settings" you were on; boot reads it and lazy-loads the matching data so the tab is populated without a second click. Refreshing while on Jobs no longer bounces you back to Overview. (3) Sticky horizontal scrollbar raised 16px off the bottom of the viewport and bumped from 12px tall to 20px. Easier mouse target — the old position was flush against the OS dock / browser chrome with a thin band that needed precision aim. (4) Suite blurb moved from always-on screen text to the Run benchmark suite button\'s hover tooltip. Reclaims 2-3 lines of vertical space for the operator\'s real work. (5) Pizza-tracker breadcrumb for single test runs: shows ● Downloading → ◐ Transcribing → ○ Analyzing → ○ Done with the active stage pulsing, the relay\'s current progress text rendered below. Stage parsed from the progress message keywords ("download", "transcrib", "analyz"). Polling cadence dropped from 5s to 2s so the breadcrumb feels live. Suite runs still use the "N of M perms complete" indicator since multi-job parallelism doesn\'t fit the linear-stages model. (6) Single runs land in the Jobs table the moment you click Run. A synthetic placeholder row appears at the top with status="pending" and the current progress text in the title column, soft-highlighted so it visually reads as in-flight. Each 2s poll refreshes both the breadcrumb AND the table — as transcribe completes the real row replaces the placeholder with the TX columns populated, then analyze columns fill in as windows land. No more hard-refresh-to-see-the-result.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_37 = VersionInfo.of({
  version: '0.2.37:0',
  releaseNotes: {
    en_US:
      'Column-resize fix on the Jobs table. The cursor showed col-resize on hover and mousedown registered, but the column never actually changed width. Two bugs combining: (1) The TH carries draggable="true" for column-reordering. When the user mousedowns on the (draggable=false) resize handle inside it, browsers detect the subsequent motion as the START OF AN HTML5 DRAG-AND-DROP on the parent TH and suppress mousemove events in favor of dragstart/dragover. The resize handler\'s mousemove listener never fires. ev.stopPropagation() on the handle doesn\'t help — the drag-detection looks at the closest draggable ancestor at mousedown time. Workaround: turn the TH non-draggable for the duration of the resize, restore on mouseup. (2) The 2s pizza-tracker poll calls render() periodically, which replaces the table\'s innerHTML and detaches the TH the drag was operating on. Even after fix (1), any pending writes to the detached node went nowhere. Added a `if (_colResize) return` guard at the top of render() so polling skips renders while a resize is active. onJobsColResizeEnd() now triggers a catch-up render itself once the drag finishes so polling-state changes that fired during the drag get reflected.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_38 = VersionInfo.of({
  version: '0.2.38:0',
  releaseNotes: {
    en_US:
      'Errors column gets a fixed default width. Long error strings (e.g., a stack trace or a full yt-dlp --list-subs dump) were stretching the column to the full unwrapped width of the longest message, blowing out the table\'s horizontal scroll and forcing the operator to scroll way right just to see a normal row. The column now defaults to 220px wide; the existing expand chevron (+) on each row reveals the full text when needed, and hovering the cell shows the full text in the native browser tooltip. User-resize via the column-edge drag handle still works — if the resized width is set in state.jobsColumnWidths.errors it overrides the 220px default. Implementation note: the inline max-width is applied to the TD itself (not just the TH) because in auto-layout tables, the TH width is treated as a hint and the cell content can still force the column wider. Stamping max-width on the TD enforces it.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_39 = VersionInfo.of({
  version: '0.2.39:0',
  releaseNotes: {
    en_US:
      'Seven Jobs-table UX fixes triggered by a benchmarking session. (1) Column drag-reorder now actually reorders. The visual drop-indicator showed but releasing the mouse left columns unchanged because the 2s pizza-tracker poll called render() mid-drag, replacing the table\'s innerHTML — the browser\'s drag-tracking lost its target and fired dragend WITHOUT firing drop. Added `if (state.jobsDragCol) return` to render() so polling defers re-renders while a drag is in progress; the dragend handler fires a catch-up render itself once the drop completes. (2) Columns can now shrink below their natural content width. Auto-layout tables ignore min-width:0 on TDs unless there\'s an explicit upper bound — renderJobsBody now stamps inline max-width on each TD based on the column\'s effective width (user-resized → catalog defaultWidth → unconstrained). User can drag the YouTube URL column smaller and the content ellipsis-truncates instead of forcing the column wider. (3) Column headers wrap to a second line when narrower than the label. Changed TH white-space from nowrap to normal + word-break: break-word + vertical-align: middle so the header row grows in height to fit the tallest wrapped header while single-line headers stay centered. (4) New "DL time" column shows total media download wall-time. Sits next to the existing DL s/MB column so the operator can spot videos where the download itself was slow vs. where transcribe dominated. (5) Pizza-tracker breadcrumb + synthetic pending row PERSIST across hard refresh and multi-device viewing. On boot, queries /admin/jobs for in-flight admin-test-run / summarize-url / transcribe-url jobs and reattaches a poll loop to each one — the breadcrumb and pending row reappear without operator action. (6) The "audit log entries are kept regardless" hint is replaced with a "Delete EVERYTHING" button. Two-step confirmation; truncates the audit log AND deletes every stored output. For the going-live cleanup step after a string of test-run cycles produced bad data. Stored-output panel is also always visible now (was hidden when count was 0) so the going-live button is reachable on a freshly-wiped relay. (7) "Delete selected" now wipes BOTH the stored output AND every audit row for the selected job_ids — single button, single mental model ("this row plus everything about it is gone"). Server-side: new POST /admin/wipe-all endpoint plus DELETE /admin/job-outputs accepts include_audit:true. server/audit-log.js gains clearAllAuditEntries() and deleteAuditRowsByJobIds() helpers.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_40 = VersionInfo.of({
  version: '0.2.40:0',
  releaseNotes: {
    en_US:
      'New AN wall time column + fixes wall_time_ms to include download. (1) New column "AN wall time" — elapsed clock time across the analyze phase, computed as max(window_end_ts) − min(window_start_ts) across all analyze audit rows for a job. Different from the existing "AN time (sum)" column (renamed from "AN time" to make the distinction obvious): AN sum is total backend compute (drives cost), AN wall is the time a user actually waits (drives perceived speed). For single-batch parallel analyze the two diverge by ~N×: 10 windows at 100s each = 1000s AN sum but ≈ 100s AN wall. For multi-batch (windows > configured concurrency, e.g. 10 windows at concurrency 8 → 2 sequential batches), AN wall spans both batches including any gap. Server-side: server/job-stats.js aggregateOne now emits both analyze_ms (existing, sum) and analyze_wall_time_ms (new). (2) wall_time_ms now includes the download phase. Previous calc used `ts - duration_ms` to recover the TX phase but missed the download_ms field — which can be 30-60s on long YouTube fetches. Fixed: startedAt = ts - duration_ms - download_ms. Operator\'s intuitive formula WALL ≈ DL + TX + AN_wall now actually balances (modulo small worker-bookkeeping gaps). (3) Tile labels in the column-visibility submenu updated to match the new column names so the right-click menu stays in sync.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_41 = VersionInfo.of({
  version: '0.2.41:0',
  releaseNotes: {
    en_US:
      'TX side gets the same wall vs sum split that AN got in 0.2.40. (1) Renamed "TX time" column → "TX wall time" — the outer wall-time of the whole transcribe phase, which is what the operator actually waits for. (2) New "TX time (sum)" column — total backend compute summed across all concurrent chunks. For Gemini transcribe at concurrency 12 over 3 chunks each taking ~60s of API time: wall ≈ 60s, sum = 180s. Mirrors the AN time (sum) vs AN wall time pair so the operator can read cost (sum-of-compute) and user-perceived speed (wall-time) at a glance on both transcribe and analyze sides. (3) Server-side: both Gemini and Hardware transcribe backends now track per-chunk wall-times in chunk_durations_ms (array, one entry per chunk including the single-chunk case for consistency). Threaded through admin-test-run / summarize-url / transcribe-url worker routes into the audit row. server/job-stats.js aggregator sums chunk_durations_ms into transcribe_ms_sum; falls back to transcribe_ms (wall) when the field is absent so old audit rows from before this version still render sensibly.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_42 = VersionInfo.of({
  version: '0.2.42:0',
  releaseNotes: {
    en_US:
      'Column-resize + header-wrap fixes after the v0.2.39 changes produced a 1-letter-per-line vertical stack on narrow headers ("T / X / C / H / U / N / K / S"). (1) Header text is now wrapped inside an inner span with display: -webkit-box + -webkit-line-clamp: 2 + word-break: normal + overflow-wrap: break-word. Concretely: headers can wrap at WORD boundaries (spaces, between "TX" and "CHUNKS") to a maximum of 2 lines; if the column is too narrow even for that, the second line ellipsis-truncates. NEVER breaks inside a word — that\'s what produced the letter-per-line stack before. The TH itself stays display: table-cell so the table layout works; the line-clamp lives on the inner .th-label span. Native browser tooltip shows the full label on hover when ellipsised. (2) Cells now update WIDTH LIVE during the drag, not just after mouseup. The previous behavior set inline width on the TH only, but auto-layout tables use the cell content as the column-width floor — so the user dragged the TH smaller and saw no visual change until mouseup triggered a full re-render. Now onJobsColResizeMove also stamps max-width on every TD in the dragged column for instant feedback. (3) Minimum column width raised from 40px to 60px. Narrower than that is unusable (one-digit numbers + resize handle alone need ~50px combined). (4) Resize handle widened from 6px → 8px with a 2px visual accent that appears on hover. Easier to grab on narrow columns. (5) TH right padding reduced from 28px → 14px so the resize handle is reachable on tight columns and headers have room to lay out the label.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_43 = VersionInfo.of({
  version: '0.2.43:0',
  releaseNotes: {
    en_US:
      'Three real fixes from a Recap-app submission test. (1) Live job-discovery: the Jobs tab now polls /admin/jobs every 5s while open, picking up any in-flight job (admin test, Recap summarize-url, Recap transcribe-url) without manual refresh. Recap-app submissions land in the Jobs table as a pending synthetic row the moment the relay accepts the POST; the row updates through download → transcribe → analyze and morphs into the real audit-backed row as phases complete. Single-job case also engages the pizza-tracker breadcrumb. Polling stops automatically when the operator leaves the Jobs tab. (2) Silent-transcript-truncation now flips the job status to PARTIAL with an explanatory error string. Previous behavior: when Gemini hit a chunk\'s output-token cap mid-transcript (the still-recurring failure mode from gemini-2.5-flash and flash-lite even with maxOutputTokens=65536), the chunk "succeeded" silently with a short transcript and the Jobs table showed SUCCESS for a job that lost real minutes of speech. New behavior: server/backends/gemini.js compares each chunk\'s last-emitted timestamp to the chunk\'s expected duration; when coverage drops below 80%, it returns truncated_chunks: [...] in the result envelope. The summarize-url / transcribe-url / admin-test-run worker routes propagate that into the audit row as status="partial" with a `transcribe: N chunk(s) truncated — missing ~Xs of speech (model: ...)` error message. server/job-stats.js handles the new "partial" tx status — overall_status of the whole job becomes "partial" regardless of analyze success, since analyzing a truncated transcript produces a misleading result. (3) Column drag-reorder: added draggable="false" on the inner th-label span (added in v0.2.42) so browsers don\'t initiate a text-drag from the label text and confuse the column-reorder drag-tracking. Combined with the v0.2.39 render-gate fix, drag-reorder should now actually drop columns into their target positions.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_44 = VersionInfo.of({
  version: '0.2.44:0',
  releaseNotes: {
    en_US:
      'Three bug fixes from an end-to-end Recap-app submission test. (1) TX wall time was including download time. The audit row\'s duration_ms used `Date.now() - workerT0` which spans the whole worker run; download was supposed to be carried separately in download_ms. On a 94-min video this made the "TX wall time" column show 116s when the actual transcribe phase took ~63s and download took the other ~53s. Fixed: stamp a `txPhaseStart` marker after download completes and use it for duration_ms. Same fix applied to /relay/summarize-url AND /relay/transcribe-url worker routes. (2) Recap-app submissions to the relay always showed "Untitled" in the Jobs table because the relay-mode branch in Recap-app skips the YouTube-metadata fetch (it kicks off summarizeUrl with just the URL). Fixed at the relay side: server/routes/transcribe-url.js downloadYouTube() now adds `--print "before_dl:%(title)s"` to its yt-dlp invocation, captures the title from stdout, and returns it as `audio.title`. The summarize-url and transcribe-url workers backfill `title` with the yt-dlp value when the client didn\'t pass one. Recap submissions now show the actual video title in the Jobs table. (3) Diagnostic console.log for column drag-reorder. Multiple theoretical causes have been patched (v0.2.39 render-gate, v0.2.43 draggable=false on inner span) but the operator still reports the feature not working. Logs now print to DevTools console at each drag event ([drag] start / drop / splice / new order / end) so the next bug report has concrete data: did dragstart fire? did drop fire? was the new order computed correctly? was it saved? Cmd+Opt+J in Firefox/Chrome to open. Also tweaked the splice insert position to `to` (target slot) for both directions instead of the asymmetric "+0" arithmetic — clearer semantics.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_45 = VersionInfo.of({
  version: '0.2.45:0',
  releaseNotes: {
    en_US:
      'Two analysis-quality fixes. (1) Out-of-order transcript entries: Gemini flash variants sometimes emit entries within a single chunk in non-chronological order (observed: a section showing 4:27 → 4:40 → 4:56 → 0:00 → 0:18 → 0:30, as if the model treated the audio as multiple "thoughts" and reset its mental clock between them). Without a post-stitch sort the downstream analyzer sees those entries out of order and the rendered transcript jumps backward mid-section. Added sortAndDedupeTranscript() to server/backends/gemini.js — parses the joined chunk text, sorts entries by absolute offset, drops near-duplicates (within 1s offset AND identical leading 40 chars), and re-emits canonical [H:MM:SS] / [MM:SS] form. Skips the rebuild when entries are already sorted (cheap fast-path for well-behaved chunks). Logs a warning when re-sort fires so the operator can spot which model misbehaves. (2) Section over-segmentation: the analyze prompt was getting 4-5 sections per 18-min window from the model, producing ~25 sections for a 94-min podcast which is way too granular. Rewrote server/chunked-analyze.js buildWindowPrompt to: (a) tell the model the window\'s actual minute-length, (b) suggest a section-count target scaled to that length (1 section for ≤10 min, 1-2 for ≤20 min, 2-3 for longer), (c) explicitly call out NOT to split a continuous discussion just because the speaker pauses or shifts subtly, (d) require ≥5 min of substantive discussion per section, (e) ask for concise noun-phrase titles (≤8 words) and 2-3 sentence summaries. For a 90-min podcast this should drop to ~5-10 broad sections instead of 25 narrow ones — closer to the "10 segments feels right regardless of length" target.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_46 = VersionInfo.of({
  version: '0.2.46:0',
  releaseNotes: {
    en_US:
      'Drag-reorder now works for newly-added columns. Root cause (finally found via the v0.2.44 diagnostic logging): currentOrderedKeys() was returning state.jobsColumnOrder directly — the saved order from localStorage. But applyColumnPrefs renders columns in saved-order-FIRST + new-catalog-keys-APPENDED order, so a relay update that added new columns (e.g. download_ms, transcribe_ms_sum, analyze_wall_time_ms) showed them in the table but they were NOT in the saved order. When the user dragged one of those new columns, indexOf(dragKey) returned -1, the splice short-circuited, and the column visibly snapped back. Fixed: currentOrderedKeys() now reads from the DOM (.jobs-table th[data-col]) — what applyColumnPrefs already computed and rendered, which always reflects the FULL current order including appended new catalog keys. Once the user successfully drags a new column, the now-complete order gets saved to localStorage and subsequent drags work naturally. Also removed the [drag] diagnostic console.log statements added in v0.2.44 since they\'ve served their purpose.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_47 = VersionInfo.of({
  version: '0.2.47:0',
  releaseNotes: {
    en_US:
      'Three fixes after the "two empty columns on the left" report. (1) Select checkbox now always renders. Previously the select cell hid itself when has_output was false, on the theory that there was nothing to delete. But v0.2.39 extended "Delete selected" to also wipe audit-log rows for the selected job_ids — so the checkbox IS useful even when the output is gone (or was never saved). Pending/in-flight rows render a disabled checkbox so the column visually lines up. (2) View-eyeball cell now renders a dimmed eyeball with a tooltip explaining WHY there\'s no output (either Delete EVERYTHING was run earlier, or this is Recap user-traffic with relay_save_user_outputs=false). Previously rendered a bare em-dash with no explanation. (3) New "Output storage" toggle in the Settings tab: relay_save_user_outputs. When enabled, transcripts + analysis JSON from Recap-app user submissions get persisted to /data/relay-outputs/. Default OFF (privacy default — don\'t save user transcripts unless operator explicitly opts in). Admin test-runs always save regardless of this flag. Backend changes: server/routes/admin.js GET/PUT /admin/settings now exposes a SETTINGS_BOOLS list alongside the existing chunking knobs; PUT handler validates boolean values separately. Dashboard saveSettings() reads checkbox state for boolean inputs.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_48 = VersionInfo.of({
  version: '0.2.48:0',
  releaseNotes: {
    en_US:
      'Three transcript-quality fixes after the v0.2.45 sort fix didn\'t catch everything. (1) sortAndDedupeTranscript() now also runs on the SINGLE-SHOT transcribe path (previously only on the chunked path) — Gemini occasionally emits entries out of order even within a single API call, especially on flash variants treating long audio as multiple mental "thoughts." Same fix applied to the Hardware (Parakeet) backend single-shot path. (2) New mergeShortEntries() helper coalesces too-granular entries — when the model emits one entry per breath (`► 4:05 um,` / `► 4:07 that is` / `► 4:09 usually based`) producing unreadable transcripts. Merges adjacent entries that are <60 chars AND within 10s of the previous entry\'s timestamp, until each entry contains a complete thought (≥60 chars OR sits at a real speech-pause). Conservative thresholds preserve real short interjections like "Yeah." or "Right." when they\'re meaningful exchanges. (3) Transcribe prompt rewritten to ask for ~30-60s entries containing complete thoughts (2-5 sentences, ~50-200 words) rather than the previous "every 15-30 seconds or at natural pauses" which the model interpreted as "as granular as possible." Also added an explicit "TIMESTAMPS MUST INCREASE MONOTONICALLY" rule plus the [H:MM:SS] format guidance for audio over 60 min. Both gemini.js and hardware.js apply sort + merge to every output path now. Helpers exported from gemini.js, imported into hardware.js to avoid duplication.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_49 = VersionInfo.of({
  version: '0.2.49:0',
  releaseNotes: {
    en_US:
      'Two Settings-tab improvements. (1) "Save user-submission outputs to disk" toggle now correctly updates its "enabled / disabled" label after Save. The numeric inputs reflected their new values automatically, but the checkbox companion span was rendered with the old state and stayed stale until a full re-render — which the save handler intentionally avoided to keep the green Saved pill visible. Fix patches the toggle\'s label sibling directly in the save callback. (2) Backend routing + Gemini model selection migrated from StartOS actions into the dashboard\'s Settings tab. The "AI backends & routing" section at the top of Settings now has four dropdowns: Gemini transcription model, Gemini analysis model, transcribe backend preference (Gemini-first / hardware-first / Gemini-only / hardware-only), and analyze backend preference. Live-reloaded — changes apply to the next request, no restart. The matching StartOS action still works for operators who prefer that surface; both paths write to the same relay-config.json. server/routes/admin.js: new SETTINGS_ENUMS registry alongside SETTINGS_KEYS / SETTINGS_BOOLS; GET /admin/settings returns enum option metadata; PUT validates submitted values against the allowed set.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_50 = VersionInfo.of({
  version: '0.2.50:0',
  releaseNotes: {
    en_US:
      'Editable LLM prompts + removal of duplicate "Set Backend Routing & Models" StartOS action. (1) Two new textareas in the dashboard Settings tab: "Transcribe prompt (Gemini only)" and "Analyze prompt (Gemini + operator-hardware)". Both default to the current hardcoded prompt body; operator can edit the instruction text while keeping the auto-prepended metadata block (transcribe) and template variables {{transcript}} / {{windowMin}} / {{targetSections}} (analyze) intact. Each textarea has a "Reset to default" button and a "Show current default" toggle that reveals the latest built-in prompt at any time — important because saving with the default text intentionally stores empty string so future default-prompt changes in code flow through to the operator without any action. Save-time validation enforces that the analyze prompt still contains {{transcript}} and a "JSON" output instruction; the relay refuses to save an override that would silently break the pipeline. The transcribe-prompt override applies to the Gemini path only — operator-hardware (Parakeet) is pure STT with no prompt input; the analyze-prompt override applies to both Gemini and operator-hardware (Gemma) analyze paths since both accept the same instruction text. (2) Removed the "Set Backend Routing & Models" StartOS action. Backend routing + Gemini model selection have been in the dashboard Settings tab since v0.2.49 and the duplicate surface was a source of "which one wins" confusion. The four config fields (relay_gemini_transcription_model, relay_gemini_analysis_model, relay_transcribe_backend_preference, relay_analyze_backend_preference) stay where they were in relay-config.json — only the StartOS action UI is gone. setBackendRouting.ts deleted; actions/index.ts no longer imports or registers it.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_51 = VersionInfo.of({
  version: '0.2.51:0',
  releaseNotes: {
    en_US:
      'Settings tab visual overhaul — matches the rest of the dashboard. (1) Dropdowns for model picker + backend routing replaced with selectable pill groups (same .tr-toggle language used on the Jobs tab\'s test-run panel). Five Gemini models render as short pills "3.1 Pro / 3 Flash / 3.1 Flash-Lite / 2.5 Pro / 2.5 Flash" with the full long description on hover. Four routing modes render as short pills "Gemini→HW / HW→Gemini / Gemini only / HW only". (2) Number spinners replaced with slider + number-input pairs — typing in the number snaps the slider, dragging the slider updates the number live, both clamped to the field\'s min/max. (3) Two-column grid layout for the Gemini and Operator-hardware tuning panels so 10 sliders fit side-by-side instead of stacked. (4) New "Shared & output" panel collapses the single shared knob plus the save-user-outputs toggle into one compact box. (5) Boolean toggle is now a visual switch (animated pill) instead of a checkbox — matches typical settings-panel UX. (6) Section headers now match the rest of the dashboard — small uppercase eyebrows with a colored dot dot (blue for Gemini, orange for hardware, purple for routing, etc.) and the same panel/border treatment used by the Overview tab\'s tiles. (7) LLM prompt textareas restyled to fit the new panel language with the same "Show current default" and "Reset to default" controls. (8) Default-value hint only appears when the operator value differs from the default — when current matches default, the row stays clean. Reset / Save buttons aligned with a thin separator above. The whole page now lives on one screen at 1000px max-width and feels like a control panel instead of a generic form.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_52 = VersionInfo.of({
  version: '0.2.52:0',
  releaseNotes: {
    en_US:
      'Tier upgrade now carries Core leftover credits forward. Previously, a Core user with 6 of 6 lifetime credits remaining who activated a Pro license would see their balance flip to "50 / 50 (Pro)" — the 6 unused Core credits silently vanished because Core uses a lifetime counter and Pro uses a separate monthly counter. Now: on the first Core → paid-tier promotion (detected when tier_snapshot is still "core" and the resolved license tier is "pro" or "max"), the unused Core lifetime allotment (coreLifetimeCap − lifetime_consumed) is transferred into purchased_balance — the same durable top-up bucket BTCPay credit purchases land in. The transferred credits never expire and stack on top of the new tier\'s monthly allotment, so total = monthly cap + carried-over Core credit + any prior top-up. Net effect for the typical upgrade path: a Core user who has used 0 of 6 lifetime credits and activates Pro now sees 56 total (50 Pro + 6 carried-over) instead of 50. The promotion handler is now centralized in a single `applyTierPromotion(row, newTier)` helper called from both `commitCredit` (the metered transcribe / analyze routes) AND `/relay/balance` (which Recap calls first after license activation to refresh the toolbar). Centralizing it fixes a pre-existing bug where a /relay/balance call between license activation and the first metered call could silently flip tier_snapshot to "pro", causing the subsequent commitCredit to skip the promotion logic entirely. The helper persists immediately when a promotion fires so the leftover transfer is durable across relay restarts.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_53 = VersionInfo.of({
  version: '0.2.53:0',
  releaseNotes: {
    en_US:
      'Echo resolved media title back to Recap in the summarize-url completion envelope. Previously, the relay\'s worker extracted the YouTube title via yt-dlp (line ~219 of summarize-url.js — "if (!title && audio.title) title = audio.title") and stored it on the audit log, but the SSE "done" event\'s result body did NOT include it. Recap-app\'s relay-mode branch had no way to learn the real title, so every fresh URL paste landed in the library list as "Untitled" — the user would have to double-click to rename each entry manually. Fixed by adding a single `title` field to the markComplete result envelope alongside transcript / analysis / model / audio_seconds. Paired with Recap 0.2.70 which reads this field and uses it for the saveToHistory call + the transcript_ready / result SSE events the browser renders. Older Recap clients (< 0.2.70) ignore the new field — fully backwards compatible.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_54 = VersionInfo.of({
  version: '0.2.54:0',
  releaseNotes: {
    en_US:
      'Two real root-cause fixes (no entitlement-name shortcuts). (1) License verifier was always null, silently degrading every paid license to Core. keysat-client.js\'s loadVerifier() called `mod.createVerifier()` — but the licensing-client doesn\'t export a `createVerifier` factory; it exports a `Verifier` CLASS that must be constructed as `new Verifier(PublicKey.fromPem(pem))`. So `verifier` stayed null forever → offline verification was skipped → `entitlements` came back as an empty Set → tierFromEntitlements() always returned "core" no matter what the license actually contained. A Pro license with the documented `relay_pro` entitlement (correctly configured in Keysat) still resolved to Core. Fixed by using the documented constructor shape AND loading the issuer PEM from /app/assets/issuer.pub at first use. The same PEM file Recap-app ships, so any license that validates in Recap now validates here too. (2) Dockerfile wasn\'t copying the assets/ directory into the runtime image, so even with the verifier code fixed, issuer.pub wouldn\'t exist at runtime. Added `COPY assets/ ./assets/` alongside the public/ and server/ copies. The entitlement-name namespacing stays intact: relay still requires explicit "relay_pro" / "relay_max" to grant Pro/Max — bare "pro"/"max" entitlements remain a Recap-app-only signal as originally designed. Also includes the stitcher containment dedup from the earlier 0.2.54 build draft: sections that the analyze model emits as fully-contained inside another section (e.g., "Decentralizing 1:10–1:12" inside "Systemic Critique 1:10–1:23") are now dropped instead of both surviving into the final output.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_55 = VersionInfo.of({
  version: '0.2.55:0',
  releaseNotes: {
    en_US:
      'Phase 1 diagnostic for the "missing analysis windows" issue: per-job audit-row drill-down on the dashboard\'s Jobs tab. (1) New backend endpoint GET /admin/job/:id/details returns every audit row keyed to a single job_id (transcribe row + one row per analyze window), pre-sorted by timestamp ascending so the pipeline reads in execution order. Includes a per-job summary with transcribe status, truncated-chunk count, total analyze rows, failed-window count, and a "windows planned vs windows logged" delta that flags jobs where analyze code planned N windows but only M < N audit rows landed (signature of a worker crash or silent dropped window). (2) New "Inspect" 🔍 column on the Jobs table. Click the magnifier next to any row to drop in a sub-row beneath it showing the per-window detail: timestamp, pipeline, window index, status pill, duration, model, and the recorded error message. Sub-row is lazy-loaded — first click fetches /admin/job/:id/details, subsequent toggles use the cached result. (3) Operator can now answer "WHICH window failed and WHY" without shell access to /data/relay-calls.ndjson. No fix to the underlying gap problem ships in this version — this release is purely diagnostic so we can confirm hypothesis 1 (single window silently failed) vs hypothesis 2 (transcribe chunk truncation upstream) on real failed runs before writing the retry + empty-sections-detection code for v0.2.56.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_56 = VersionInfo.of({
  version: '0.2.56:0',
  releaseNotes: {
    en_US:
      'Five dashboard UX fixes prompted by real-traffic testing of v0.2.55. (1) In-flight job discovery now runs continuously after admin login, not just while the operator is on the Jobs tab. Previous behavior gated the 5-second poll behind `if (state.activeTab !== "jobs") return`, so a Recap-submitted job that arrived while the operator was on Overview or Settings was silently missed by the pending-row pipeline — it only appeared in the table on the next manual page refresh AFTER it had already completed. New behavior: discovery starts the moment auth resolves and keeps polling regardless of which tab is visible. Cost is one small GET /admin/jobs every 3s (was 5s; tightened so a 90-second Recap submit can be caught mid-flight rather than missed entirely). Also fires one immediate poll on Jobs-tab entry so the operator doesn\'t wait up to 5s for the first tick. (2) Jobs-tab data now refreshes on EVERY tab entry rather than only on first visit. Previously `if (tab === "jobs" && !state.jobsData)` cached jobsData forever, so a job that completed while the operator was on Settings never appeared until a hard page refresh. Now: loadJobs() runs on every Jobs-tab click — same cost as a manual refresh, but automatic. (3) Click-to-copy button (📋) on the errors column. Hover over any errors cell to reveal a copy button next to the expand chevron; clicking it ships the raw (un-HTML-escaped) error text to the clipboard via the async clipboard API (with a hidden-textarea + execCommand fallback for older browsers). 1.1s "✓" confirmation animation on the button so the operator knows the click landed. (4) Dedicated "Active jobs" callout at the top of the Jobs tab. The pizza-tracker breadcrumb used to live inside the test-run panel mixed in with the model-picker controls; lifted out into its own bordered card above the panel so it\'s the first thing the operator sees on Jobs-tab entry — regardless of whether the in-flight job came from the test-run panel or a Recap submission. Each card shows a "Source: Test run | Recap" tag (color-coded), elapsed time, jobId short hash, and the 4-stage breadcrumb (Downloading → Transcribing → Analyzing → Done) with the active stage pulsing. Handles multiple concurrent jobs gracefully — one card per job, newest-first. (5) Global in-flight pill at the top-right of every page. Fixed-positioned outside the tab content so it persists across tab switches: a compact pulsing pill that says "1 job in flight • Transcribing" (or "N jobs in flight" when multiple) appears whenever state.activeJobs has at least one entry. Clicking it switches to the Jobs tab. Auto-hides when the operator IS on the Jobs tab (the dedicated callout takes over there). Combined effect: no matter where you are in the dashboard, you can see at a glance that a Recap-submitted summarize is running. NOTE: this release does NOT fix the underlying TX-chunk-truncation gap (when gemini-3.1-flash-lite hits maxOutputTokens on long dense chunks, the transcript silently loses 5-10 minutes of speech). That fix — auto-retry-with-bisection on truncated chunks — is targeted for v0.2.57.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_57 = VersionInfo.of({
  version: '0.2.57:0',
  releaseNotes: {
    en_US:
      'Defensive title normalization in /relay/summarize-url. The yt-dlp-extracted-title fallback at line ~219 (`if (!title && audio.title) title = audio.title`) was only firing when the client either omitted the title field entirely or passed an empty string — both of which yield a falsy `title`. Recap-app < 0.2.71 actually passed the literal string "Untitled" when the operator hadn\'t pre-fetched metadata (because Recap\'s relay-mode branch built its title hint as `titleSurrogate = itemTitle || "Untitled"`). `"Untitled"` is truthy, so the fallback gate never tripped and the relay echoed `"Untitled"` back as if it were a real title — library entries on Recap stayed "Untitled" forever even though yt-dlp had fetched the right one. Fix: normalize the incoming `bodyTitle` at the route entry — treat both empty string and the literal sentinel "Untitled" as "no title supplied" (set to null). The yt-dlp fallback then fires correctly for both new Recap clients (0.2.71+, which now pass empty string) and old Recap clients (still passing "Untitled"). No behavior change for any client that passes a real title.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_58 = VersionInfo.of({
  version: '0.2.58:0',
  releaseNotes: {
    en_US:
      'Three dashboard diagnosability fixes after 0.2.57\'s pizza-tracker still didn\'t engage for Recap-submitted jobs on a real test. (1) Discovery polling now ALSO starts on a fresh login (form-submit handler), not just on initial boot. Previously, an admin who logged in after a session timeout would leave _discoverPollHandle null forever — no pending row ever showed up for Recap submissions until the operator hard-refreshed the page. The boot() path already started discovery; the login handler did not. Fixed by mirroring the boot()\'s post-auth initialization (tryResumeActiveSingleRuns + startInFlightDiscoveryPoll) in the login handler. (2) New "Discovery" diagnostic strip at the top of the Jobs tab. Always visible (not gated on having active jobs), it shows: polling state (ON / OFF / STALE / ERROR), seconds since the last poll, how many running jobs the last poll found, current active-jobs count, total polls since auth, and the last error message if any. Surface for debugging "I submitted a job but no pending row appeared" — operator can immediately see whether the discovery code is running, whether /admin/jobs is returning the expected jobs, or whether something is silently 401-ing. (3) Source classification on the in-flight callout now uses job.kind directly instead of inferring from metadata fields. Previously, a Recap summarize-url job was mislabeled as "Test run" because its metadata included transcribe_backend/analyze_backend (which the summarize-url route stamps for its own bookkeeping). The classification now reads "Recap (summarize)" / "Recap (transcribe)" / "Test run" based on the relay-side kind string, which is the authoritative source.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_59 = VersionInfo.of({
  version: '0.2.59:0',
  releaseNotes: {
    en_US:
      'Critical regression fix from 0.2.58: the continuous discovery polling was firing render() every 3 seconds (and pollTestRunJob was firing it every 2s), which clobbered the Settings form mid-edit. Symptom: drag a slider, the value snaps back to the last-saved position before you can hit Save. Three coordinated fixes: (1) New renderLightweight() helper that updates ONLY the out-of-root global in-flight bar (and the Jobs-tab body when on Jobs). Every periodic poll path now calls renderLightweight() instead of render() — discovery loop, pollTestRunJob progress callback, tryResumeActiveSingleRuns. The global pill still refreshes in real time; the body of whichever tab the operator IS on stays untouched unless they navigate. (2) Settings-tab guard inside render() itself: if activeTab==="settings" AND the body already has the Settings UI rendered (detected via a DOM probe for the .settings-actions element), skip the root.innerHTML rewrite. Switching INTO Settings still works because at that point the body has Overview/Jobs content (no .settings-actions yet); the first render fires, settings paint, subsequent background polls are gated. Save / Reset already use surgical DOM patches (they never relied on a full re-render) so this guard is transparent to the existing Settings UX. (3) loadJobs() in the periodic-poll finally blocks now only fires when the operator is actually on the Jobs tab. No wasted fetch + render when the operator is mid-edit on Settings; the next Jobs-tab entry auto-refreshes via switchTab. Net effect: sliders / number inputs / pill clicks on the Settings tab now stick — operator can drag, tweak, and hit Save without the form snapping back to stale state.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_60 = VersionInfo.of({
  version: '0.2.60:0',
  releaseNotes: {
    en_US:
      'Two fixes after a third "still doesn\'t work" iteration on the title bug. (1) ACTUAL root cause of the "Untitled" library entry persistence: jobs.js#markComplete() was double-nesting the SSE done event. Callers pass `{result: {title, transcript, ...inner}, credit_charged, tier}` as the envelope; markComplete then did `appendEvent(jobId, "done", { result: envelope })` which produced `data.result = envelope` on the SSE side — so the actual fields lived at `data.result.result.title`, not `data.result.title`. Recap-app\'s SSE handler reads `finalResult = data.result` then accesses `finalResult.title`, which was undefined every time. That\'s why the audit log showed the correct title (recorded from the relay\'s local `title` variable, not from this SSE path) while Recap\'s library showed "Untitled". Fixed by unwrapping inside markComplete: SSE done event now emits `data: {result: <inner>, credit_charged, tier}` so subscribers can read `data.result.title` directly. Internal consumers (job.result on the in-memory job record) still see the full envelope shape. Backwards-compatible with older Recap clients that read the same nested location — they\'ll just get undefined where they already got undefined. (2) New "view raw response" button on the Discovery diagnostic strip. Shows what /admin/jobs actually returned on the last poll: total entries count, breakdown by kind, breakdown by status, and the 5 most recent jobs (any status) with id/kind/status/age/progress. Critical for diagnosing "Discovery ON, found: 0 running" when the operator KNOWS a job is in flight — operator can immediately see whether the relay\'s jobs map is empty (routing / cookie issue), whether the job has a kind the filter doesn\'t recognize, or whether the job status flipped to something unexpected.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_61 = VersionInfo.of({
  version: '0.2.61:0',
  releaseNotes: {
    en_US:
      'Two fixes. (1) Stitcher now clamps each section\'s endIndex to its source window\'s actual range. Symptom that prompted this: an analyze window of 18 minutes produced a section spanning 52 minutes in the final UI (e.g., "Developing Sovereign Network Infrastructure" 36:00 — 1:28:00). Root cause: the LLM occasionally emits a section with an inflated endIndex (e.g., 9999) that goes far past the window\'s transcript range. The stitcher computed globalEnd = window.startIdx + section.endIndex without bounds-checking, so the inflated value propagated all the way to the final stitched output. The Recap-app side\'s `Math.min(relayEntries.length - 1, s.endIndex)` clamp in index.js prevented total breakage but allowed the bogus extended range. Now: stitchAnalysisResults() applies Math.min(rawGlobalEnd, window.endIdx) when accepting each section, so a window\'s sections can never extend beyond the entries that window was given. Same clamp also applied to the per-window streaming callback so SSE clients rendering partials don\'t briefly show inflated bounds before the final stitch re-trims. (2) Discovery diagnostic strip now stashes the FULL raw /admin/jobs response sample (first 5 entries + envelope top-level keys) and exposes a 📋 Copy JSON button. Operators chasing "Discovery ON, found: 0 running but I just submitted a job" can copy the actual response payload and paste it into a bug report — definitively answers whether the relay\'s jobs map is empty, returning entries with field names we don\'t expect, etc.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_62 = VersionInfo.of({
  version: '0.2.62:0',
  releaseNotes: {
    en_US:
      'Operator-tunable output token caps + section-count targets. Until now four of the most performance- and quality-impacting knobs were hardcoded: Gemini transcribe maxOutputTokens (65536), Gemini analyze maxOutputTokens (implicit Google default ~8192), hardware analyze max_tokens (16000), and the section-count target string used to populate {{targetSections}} in the analyze prompt (1/1-2/2-3 based on window length). All four are now first-class Settings tab fields with live-reload. (1) Gemini Backend panel: TWO new sliders "TX max output tokens" (1024-65536, default 65536) and "AN max output tokens" (1024-65536, default 8192). Lower TX = cheaper + faster but more truncation risk on dense audio. AN was previously implicit Google default; making it explicit lets operators with reasoning-heavy models cap unbounded thinking preambles. (2) Operator Hardware panel: new "AN max output tokens" slider (1024-32768, default 16000). Pass-through to the vLLM/Ollama chat-completion `max_tokens` parameter. (3) New "Section-count targets" panel below the LLM prompts: three free-form text inputs (short ≤10min / medium ≤20min / long >20min). Each is spliced into the analyze prompt as {{targetSections}} based on each window\'s actual transcript duration. Defaults match the historical hardcoded values ("1 section" / "1-2 sections" / "2-3 sections") so behavior is unchanged unless the operator overrides. Empty string = use default (future code-side default changes flow through). Net effect: an operator who wants denser segmentation can set medium to "3-4 sections" without rewriting the whole analyze prompt template; one who wants to constrain Gemini AN output token spend can cap maxOutputTokens at 4096 in the new slider. All knobs validated server-side (1024 floor on token caps to prevent unusable values).',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_63 = VersionInfo.of({
  version: '0.2.63:0',
  releaseNotes: {
    en_US:
      'Two coordinated fixes that restore the speaker-name labels the recap-app direct path gets but the relay path was losing. (1) The relay\'s downloadYouTube() now extracts the FULL YouTube metadata block during the existing yt-dlp invocation — channel, description (capped at 2000 chars), and chapters — not just the title. Uses yt-dlp\'s `--print before_dl:%(.{title,channel,description,chapters})j` template to get a single JSON line containing all four fields. The metadata is returned alongside the audio file path and flows into the transcribe prompt\'s context block as fallbacks: when /relay/summarize-url\'s request body has empty channel/description/chapters (older Recap clients, or any non-Recap caller), the yt-dlp-extracted values fill in. With this metadata present, the model can correctly assign speaker labels (host name from channel, guest names from description, chapter titles often name both) — same behavior the recap-app direct-to-Gemini path has had since launch. Without it, every transcript came back with unlabeled dialogue regardless of how detailed the prompt\'s speaker-identification rule was. (2) Strengthened the speaker-ID rule in DEFAULT_TRANSCRIBE_PROMPT_BODY to match recap-app\'s longer text. Adds three previously-missing hints: "the channel name is often the host\'s name", explicit in-audio cue examples ("I\'m Dax", "this is Will"), and a stricter fallback condition ("Only fall back to Host/Guest if no names appear in the metadata AND nobody is introduced by name in the audio"). The shorter prior rule was hand-trimmed from recap-app at fork-time; the longer version produces meaningfully better speaker attribution in practice. Net effect: paired with recap-app 0.2.74 (which also fetches metadata client-side and passes it through), a Recap → Relay → Gemini summarize now produces transcripts with "Brandon Karpeles:" and "Matt Hill:" labels rather than unlabeled paragraphs.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_64 = VersionInfo.of({
  version: '0.2.64:0',
  releaseNotes: {
    en_US:
      'Analyze prompt overhaul to match the Recap-app\'s direct-to-Gemini behavior. Two coupled changes: (1) Transcript format passed to the analyze model is now numbered + timestamped — each line is `[N] (MM:SS) text` instead of `[MM:SS] text`. The bracketed [N] index at the start of each line is exactly what the model is asked to reference when emitting startIndex / endIndex in the JSON response, so the contract is now explicit in the input rather than requiring the model to count bracketed-timestamp lines internally. This is the root cause of the runaway-endIndex hallucinations that motivated the v0.2.61 stitcher clamp — without visible indices, models occasionally emit values like 9999 for a 200-entry window. The v0.2.61 clamp stays as a safety net but should fire dramatically less. (2) DEFAULT_ANALYZE_PROMPT_TEMPLATE rewritten to adopt Recap-app\'s completeness contract while preserving the relay\'s broad-sections design intent and operator-tunable variables. Adds explicit "every segment index from 0 to {{maxIndex}} must belong to exactly one section" + "startIndex of section N+1 must equal endIndex of section N plus 1" requirements — closes the gap (literally) where the old prompt allowed sections to skip portions of the window. Drops the "single section spanning the whole window is often the correct answer" invitation that pushed the model toward oversized single sections. Keeps {{windowMin}}, {{targetSections}}, {{transcript}} variables AND adds {{maxIndex}} = last valid index in the window for the completeness clause. Net effect: stitched output should cover every minute of every window with no gaps, AND broad-section preference is preserved via {{targetSections}} guidance + "lean toward broad, substantive topics" wording. Operator overrides saved in relay-config.json keep working unchanged — they receive the new {{transcript}} format (still timestamped, just with [N] prefix added) and existing {{windowMin}}/{{targetSections}} keep working. Add {{maxIndex}} to your override if you have one and want the completeness clause to mention a concrete index.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_65 = VersionInfo.of({
  version: '0.2.65:0',
  releaseNotes: {
    en_US:
      'The actual root cause of "Discovery ON · found: 0 running" surfaced — a one-line import bug. Two modules both export a function named `snapshotJobs`: `server/jobs.js` returns the in-memory job tracker ({id, kind, status, progress, started_at, ...}) which is what the dashboard\'s discovery poll expects; `server/job-credits.js` returns the credit ledger ({key, backend, tier, charged_at, refunded}) — totally different shape used by the billing path. `server/routes/admin.js`\'s /admin/jobs route was importing snapshotJobs from `../job-credits.js`, so every discovery poll got credit-ledger records back. The records DID have entries (e.g. "total entries: 1") but the discovery filter requires `status === "running"` and `kind` in a known set — credit records have neither field, so they all matched `kind: "unknown" / status: "unknown"` and `found: 0 running`. This was identifiable only after the v0.2.61 raw-JSON-viewer landed, which let the operator paste the actual response payload and see {key, backend, tier, charged_at, refunded} instead of {id, kind, status}. Fix: import from `../jobs.js` instead. Added a long inline comment so future changes don\'t accidentally swap it back. Net effect: in-flight Recap submissions now appear as pending rows in the Jobs table within ~3 seconds of relay-side createJob() (the discovery poll interval), the pizza tracker engages, the global in-flight pill shows up on non-Jobs tabs, and loadJobs() auto-refreshes the table when each job completes. No relay-side state changes — purely an import correction.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_66 = VersionInfo.of({
  version: '0.2.66:0',
  releaseNotes: {
    en_US:
      'Dashboard polish pass driven by operator feedback. (1) Settings tab now has the "Recap Relay — Operator Dashboard" headline so all three tabs match. The explanatory subheading on the Jobs tab ("Per-video / per-podcast processing history. Sortable + filterable; pages of 100.") was removed — operators know what the tab is. (2) The Overview\'s red "24-hour success rate" alert banner was removed. It duplicated the "Success rate" tile already in the metric grid below it; visual noise without new information. (3) The BTCPay-connected status (previously a full-width green row at the top of Overview) moved to the Settings tab. Operators care about BTCPay config when they\'re configuring; Overview is for at-a-glance metrics. (4) Transcription speed table: units changed from "ms/min" to "s/min" (still computed as sum_duration_ms / total_audio_minutes, divided by 1000 for display). The underlying value was already total compute time / audio duration — matching what the operator wanted — just displayed in milliseconds. (5) Analysis speed table: same unit change AND switched the rate basis from "per 1k input tokens" to "per audio minute" so it matches the transcribe table. Audit data records audio_seconds for analyze rows (= window body duration), so summing across all analyze calls gives total analyzed audio minutes — sum_duration_ms / that gives the same compute-per-audio-minute metric. (6) Overview tab now auto-refreshes every 10 seconds while the operator is viewing it. Calls /admin/dashboard silently in the background, preserves scroll position across the re-render (no view-yank back to top mid-scroll), and falls back to the previous data on a transient fetch failure. Scroll preservation uses requestAnimationFrame to wait one paint tick before restoring the previous scrollY value. Poll auto-stops on tab leave. (7) "Top failure modes by model" and "Recent errors" tables previously filtered strictly on status === "error", which excluded the vast majority of operator-facing failures — transcribe-truncated runs are recorded as status="partial" with the missing-speech message in the `error` field, but they wouldn\'t surface. The filters now match on any row with a truthy `error` message (covers error, partial, refused). Also exposed a new `failures` count (errors + partials) and a breakdown ("N err, M partial") in the by-model table so operators can distinguish kinds.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_67 = VersionInfo.of({
  version: '0.2.67:0',
  releaseNotes: {
    en_US:
      'Section-count target tuning methodology rebuilt around VIDEO duration instead of window duration. Old model (v0.2.62-v0.2.66): operator set three strings — short / medium / long — keyed by individual analyze window length. The relay picked one per window based on that window\'s transcript duration. Problem: a 30-min single-window podcast and a 3-hour 6-window podcast bucketed identically as "medium" but need totally different segmentation density. The model was producing too-many or too-few sections depending on which the operator hadn\'t calibrated for. New model: operator sets the target TOTAL sections per video for each of 7 duration buckets (under 30 / 30-60 / 60-90 / 90-120 / 120-150 / 150-180 / over 180 min). The relay reads the actual audio duration at request time, picks the matching bucket\'s target, then divides by the effective number of analyze windows (clamped >= 1 for single-shot) to get the average sections-per-window. Splices a formatted label ("around N sections" / "N–M sections" / "1 section") into {{targetSections}} in the analyze prompt. Settings tab redesigned: a 7-row table with the duration label, an integer input for target-total, a live "Sections per window" preview column showing the computed result + the prompt label string, and the default value. The preview recomputes as the operator tweaks either the AN window body slider above (event-delegated input listener) or any target-total input directly (per-row oninput handler). Backend changes: chunked-analyze.js exports computePerWindowTarget(); buildWindowPrompt() takes targetTotalsByBucket + totalAudioSec parameters; runChunkedAnalysis threads them through. Both /relay/summarize-url and the admin test-run path supply the new config keys. The deprecated string settings (relay_analyze_target_short / medium / long) are no longer used and have been removed from the schema; old values saved in relay-config.json become orphans, harmless. Defaults: 6/8/9/10/11/12/12 — calibrated so a 90-min podcast at the default 18-min AN window body produces ~1-2 sections per window (about 9 total), matching what felt right in operator testing.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_68 = VersionInfo.of({
  version: '0.2.68:0',
  releaseNotes: {
    en_US:
      'Two fixes. (1) Per-window analyze now retries ONCE on invalid-JSON responses (or on a thrown exception from the backend). The model occasionally returns a JSON-shaped string that fails safeParseSections — extra prose preamble, truncated closing brace, code fence the parser misses — and pre-0.2.68 those windows were dropped on the first failure. With a 10-window job, two such failures lost ~36-48 minutes of analyzed coverage. The retry runs against the same prompt; the upstream LLMs treat each call independently and almost always succeed on attempt 2. Audit row records the per-attempt error message and tags the final error with "(after 2 attempts)" so the operator can tell whether it\'s the model fundamentally struggling with that window or a one-shot blip. (2) Section-count targets preview now updates when you drag the AN window body slider, not just when you type in the number input next to it. Earlier the listener only matched `data-setting-key` — but the slider in a slider/number-input pair uses `data-setting-slave`, so drag-events were ignored. Symptom: changing AN window body from 18 → 24 left the "Sections / window" preview column (and the "current AN window body of N min" description line) stuck at 18. Fixed by matching both attributes in the input-event delegator.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_69 = VersionInfo.of({
  version: '0.2.69:0',
  releaseNotes: {
    en_US:
      'Two coupled changes to eliminate the lingering "analyze: invalid JSON in window response" failures that the v0.2.68 single-retry mechanism couldn\'t catch. (1) Gemini analyze calls now pass `responseMimeType: "application/json"` in the generateContent config. This is a documented Gemini feature that tells the decoder to emit ONLY valid JSON for the response body — eliminates the entire class of failures that came from the model wrapping its sections array in a prose preamble, a ```json``` markdown fence, or truncating the closing brace. The prompt already asked for JSON; this turns that request into a hard server-enforced constraint that the model can\'t violate. Lighter than declaring a full responseSchema (which would also enforce field types) — kept the lighter form because the post-parse stitcher already clamps and dedupes any out-of-range indices, so structural deviation in the fields is recoverable. (2) Per-window analyze retry bumped from 2 attempts to 3. Analyze is by far the cheapest pipeline phase (~5 seconds per window vs ~30+ seconds per transcribe chunk), so a third attempt is essentially free — adds at most ~10s of wall time to a worst-case failing window, trivial compared to the 30+ minute transcribe phase for long content. The retry loop remains the primary defense for the hardware (Gemma) analyze path which has no equivalent decoder-side JSON guarantee; on Gemini, the responseMimeType change should eliminate the need to retry-for-JSON-reasons entirely. Combined: a 10-window job that pre-v0.2.68 lost ~2 windows per long-podcast run should now almost never lose a single one.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,12 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_70 = VersionInfo.of({
  version: '0.2.70:0',
  releaseNotes: {
    en_US: 'License-keyed credit accounting (Path 3): Pro and Max tier rows in the credits ledger are now keyed by license fingerprint (sha256 of license UUID) instead of install_id. Multiple installs activating the same Pro license share one monthly pool — eliminates the 2x credits leak when a cloud Pro user pastes their key on a self-hosted install. Core (free) tier rows stay install-keyed. Existing rows are untouched; legacy Pro installs migrate lazily to a fresh license-keyed row on their next request (effectively a free transition month, acceptable at current user count). BTCPay webhook stashes license_fingerprint in invoice metadata for restart-safe credit routing. Operator dashboard label tweaks: "Active users" instead of "Active installs" for paid tiers. New test suite covers the credit-key helper (11 tests, all passing).',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_71 = VersionInfo.of({
  version: '0.2.71:0',
  releaseNotes: {
    en_US:
      "Surface the BOLT11 Lightning invoice in the /relay/credits/buy response so Recap can render an inline QR + 'Open in wallet' deep link instead of redirecting buyers to BTCPay's hosted checkout page (Phase 1 of the inline-payment migration). After creating the BTCPay invoice we now fetch /payment-methods, pick out the Lightning entry, and return its destination as `bolt11` plus the corresponding lightning: deep link as `lightning_payment_link`. Best-effort: if the LN invoice isn't ready on the first call (some BTCPay configs generate it asynchronously), we retry once with a short backoff. If LN still isn't available — store doesn't have it configured, or LND is unreachable — the field is null and Recap falls back to the legacy checkout-URL flow. The buy itself never fails because of this; the LN fields are additive. Polling endpoint (/relay/credits/invoice/:id) and webhook handling are unchanged.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_72 = VersionInfo.of({
  version: '0.2.72:0',
  releaseNotes: {
    en_US:
      "Phase 1 diagnostic: when the new bolt11 path falls back to null, the /relay/credits/buy response now includes a `_ln_debug` field with the reason (no_lightning_method vs fetch_failed) plus a small sample of the BTCPay /payment-methods response. Lets Recap surface a concrete error message in the UI so we can fix the pick-function heuristic for whatever BTCPay version / shape the operator's store happens to use, without operators having to tail relay logs. Will be removed once Phase 1 is verified.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_73 = VersionInfo.of({
  version: '0.2.73:0',
  releaseNotes: {
    en_US:
      "Fix the BTCPay 2.x payment-method detection. The /payment-methods response field was renamed from `paymentMethod` to `paymentMethodId` in BTCPay 2.x, and the heuristic shipped in v0.2.71 was checking the legacy 1.x name only — so it never matched on stores running 2.x and the inline-Lightning path silently fell back to the BTCPay redirect. The picker now checks both field names. Also: when a store has both BTC-LN (BOLT11-direct) and BTC-LNURL methods, we prefer BTC-LN — every Lightning wallet supports BOLT11 but LNURL-pay needs a capable wallet (and BTCPay's LNURL entry often has a null destination until the buyer scans, which we can't render to a QR). Skips entries with null/empty destinations.",
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })
--- a/Show More
+++ b/Show More