// Live-reloading config layer. Mirrors Recap's config.js pattern: read // /data/config/relay-config.json on every access (filesystem watcher // pulls in StartOS-action changes without a daemon restart), parse, // and expose typed accessors. // // All defaults match the schema in startos/file-models/config.json.ts. import fs from "fs/promises"; import path from "path"; let dataDir = "/data"; let cached = { mtimeMs: 0, snapshot: defaultConfig() }; function defaultConfig() { return { relay_gemini_api_key: "", relay_spark_control_url: "", // Phase 2 — post-cluster polish pass. After diarization + // clustering produce global speaker IDs and after analyze // produces section objects, run a two-stage LLM pass that: // 1. infers speaker names from the labeled transcript + // episode metadata // 2. rewrites section summaries to attribute statements to // specific speakers ("Matt Hill explains..." instead of // "the discussion centers around...") // Cost: ~15-25s wall time at the end of the pipeline. Skipped // automatically when fewer than 2 speakers detected (nothing // to attribute). Default ON; operator can disable via the // Settings tab. relay_post_cluster_polish_enabled: true, // Operator-editable polish prompts (same three-layer override // pattern as relay_transcribe_prompt / relay_analyze_prompt). // Empty string at this layer = fall through to the hardcoded // defaults in post-cluster-polish.js. The dashboard "Set as new // default" button promotes a current override into the // *_default companion fields. relay_polish_name_inference_prompt: "", relay_polish_summary_rewrite_prompt: "", // Phase 2 of Path 2A — internal meeting extras analysis. Runs // ONE additional LLM call after the polish pass to extract // structured items (decisions / action items / open questions / // key quotes) from the speaker-attributed transcript. Failure // is non-fatal — rec.extras stays null and the UI hides the // section. Cost: ~5-15s wall time. Default ON; operator can // disable here if their hardware is slow or the LLM is unreliable // for structured extraction. Only affects internal meetings — // YouTube/podcast flows ignore this setting. relay_meeting_extras_enabled: true, // Operator-editable prompt override for the extras pass. Empty = // fall through to DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE in // meeting-extras.js. Same three-layer convention as the other // editable prompts. relay_meeting_extras_prompt: "", relay_gemini_transcription_model: "gemini-3-flash-preview", relay_gemini_analysis_model: "gemini-3.1-pro-preview", relay_transcribe_backend_preference: "gemini_first", relay_analyze_backend_preference: "gemini_first", // ── Text-to-speech (audio-first Recaps "walking mode") ── // TTS turns each topic-summary into a spoken clip so the Recap app // can play a recap back-to-back like a podcast. Two backends, same // swap model as transcribe/analyze: operator-hardware Kokoro (via // Spark Control's /v1/audio/speech) is the default; ElevenLabs is // the cloud alternative. Gated to Max users on the Recap side. // // Preference values mirror the transcribe/analyze knobs: // hardware_first → Kokoro if ready, else ElevenLabs (if keyed) // hardware_only → Kokoro only (error if not ready) // cloud_first → ElevenLabs if keyed, else Kokoro // cloud_only → ElevenLabs only // ("gemini_*" naming is intentionally NOT reused — TTS's cloud // backend is ElevenLabs, not Gemini.) relay_tts_backend_preference: "hardware_first", // Default voice when the client doesn't specify one. Kokoro voice ids // follow _ (e.g. bm_george, bf_emma, am_michael, // af_heart — the four curated for narration). Enumerate live voices // for a picker via /v1/models (kind:tts; curated ones first). relay_tts_default_voice: "bm_george", // Output format. Kokoro emits wav | mp3 | opus | flac directly, so we // default to mp3 (small + universally playable for the mobile/offline // player) and avoid any client-side transcode. The Recap client may // override per request. relay_tts_format: "mp3", // ElevenLabs cloud TTS (untested until a key is supplied). Empty // api key = ElevenLabs unavailable; relay falls back to Kokoro under // hardware_first / cloud_first, or errors under cloud_only. relay_elevenlabs_api_key: "", relay_elevenlabs_voice_id: "", relay_elevenlabs_model: "eleven_turbo_v2_5", relay_keysat_base_url: "https://keysat.xyz", // ── Cloud operator key (core-decoupling) ── // Shared secret that authenticates the operator's cloud Recaps server // (recaps.cc) to this relay. When a request carries X-Recap-User-Id, // the relay trusts it (and keys the credit pool by user:, with the // tier the relay stores for that user) ONLY if X-Recap-Operator-Key // matches this value. Empty = cloud user-id requests are rejected // (relay still serves the existing license/install path). Set the same // value here and in the Recaps server's recap_relay_operator_key. relay_cloud_operator_key: "", relay_admin_username: "", relay_admin_password_hash: "", relay_admin_password_salt: "", relay_admin_session_secret: "", relay_save_user_outputs: false, relay_tier_quotas_json: JSON.stringify({ core: { lifetime: 10, geminiCapLifetime: 5, monthly: null, geminiCapMonthly: null, }, pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 }, max: { lifetime: null, monthly: null, geminiCapMonthly: 50 }, }), relay_tier_prices_usd_json: JSON.stringify({ core: 0, pro: 5, max: 15 }), // Self-serve subscription prices in SATS per 30-day period. Bitcoin- // native (the BTCPay rail invoices these directly). The Zaprite card // rail converts to fiat at purchase time. relay_tier_prices_sats_json: JSON.stringify({ pro: 21000, max: 42000 }), // Self-serve subscription prices for the CARD (Zaprite) rail, in the // smallest unit of relay_zaprite_currency (cents for USD). This is the // amount actually charged to a card buyer — kept explicit (cents) and // separate from relay_tier_prices_usd_json (which is a whole-dollar // figure used only by the dashboard's revenue/margin tile). Default is // parity with the sat prices (≈$21 / $42); the operator can set a card // premium here to cover processing fees. relay_tier_prices_fiat_cents_json: JSON.stringify({ pro: 2100, max: 4200 }), // Prepaid subscription period length in days. relay_subscription_period_days: 30, relay_btcpay_base_url: "", relay_btcpay_internal_url: "", relay_btcpay_public_url: "", relay_btcpay_store_id: "", relay_btcpay_api_key: "", relay_btcpay_webhook_secret: "", // ── Zaprite (card rail) ── // Hosted-checkout API for card payments. The operator sets the API key // via the StartOS "Set Zaprite Connection" action. base_url defaults to // Zaprite's public API; currency is the fiat the card is charged in // (amount comes from relay_tier_prices_fiat_cents_json, in that // currency's smallest unit). No webhook secret: the webhook is verified // by re-fetching the order from Zaprite's authenticated API. relay_zaprite_base_url: "https://api.zaprite.com", relay_zaprite_api_key: "", relay_zaprite_currency: "USD", relay_credit_packages_json: JSON.stringify([ { credits: 5, sats: 4000 }, { credits: 10, sats: 6000 }, { credits: 20, sats: 10000 }, ]), // ── Chunking / concurrency knobs ── // ONE canonical default per knob, defined here exactly once. All // backend code reads from the live-reloaded snapshot at request // time — no hardcoded fallbacks anywhere else, no per-test-run // overrides. The Settings tab in the dashboard edits these values // via PUT /admin/settings. // // Gemini backend (relay → Google Gemini API): // 30-min TX chunks at 12-way concurrency is well under Gemini's // paid Tier 1 RPM cap (1000 for flash, 150 for pro). 18-min // analyze windows match the recap-app value. 12 analyze windows // in flight saturates most operator workloads. relay_gemini_tx_chunk_minutes: 30, relay_gemini_tx_concurrency: 12, relay_gemini_analyze_window_minutes: 18, relay_gemini_analyze_overlap_minutes: 2, relay_gemini_analyze_concurrency: 12, // Hardware backend (relay → operator's Parakeet + Gemma/vLLM): // 5-min TX chunks at 4-way concurrency for Parakeet OOM headroom // (operator hardware is typically 1 GPU; spark-control dev // confirmed 4 concurrent is safe). 18-min analyze windows match // gemini's value for cross-backend benchmark parity. 8 analyze // windows in flight is the vLLM-on-single-Spark sweet spot for // our prompt size (12 starts to queue on the GPU). // Speaker diarization on the operator-hardware transcribe path. // When enabled, each audio chunk gets sent to TWO Spark Control // endpoints in parallel: /v1/audio/transcriptions (Parakeet) AND // /api/audio/diarize-chunk (Sortformer + TitaNet). The relay // collects per-chunk voice fingerprints, clusters them across // chunks using cosine similarity, and re-labels diarization // segments with globally-consistent speaker IDs (Speaker_0, // Speaker_1, …). Each transcript entry then carries an optional // speaker label. Diarization is hardware-path-only — the Gemini // path uses prompt-based speaker labeling instead. Default OFF; // operator opts in via the dashboard toggle. relay_hardware_diarization_enabled: false, // Cosine-similarity threshold for the cross-chunk speaker // clustering step. Stored as integer percentage (70 = 0.70 // cosine similarity) because the slider system is integer-only. // Two fingerprints with similarity >= threshold are merged // into the same global speaker; below threshold = different // speakers. NeMo's recommended default for TitaNet embeddings // is 0.70 — clean audio with distinct voices tolerates lower // (more aggressive merging); panel audio with similar voices // benefits from higher (more conservative). relay_hardware_voice_clustering_threshold: 70, relay_hardware_anchor_min_speaking_sec: 30, relay_hardware_small_cluster_max_speaking_sec: 15, relay_hardware_uncertain_margin_pct: 10, relay_hardware_tx_chunk_minutes: 5, // Overlap (in seconds) between consecutive audio chunks on the // hardware transcribe path. Each chunk N+1 starts `overlap` // seconds before chunk N ends, so the same audio is covered // twice at chunk boundaries. The relay dedupes by dropping // words/segments in chunk N+1 whose timestamps fall within the // shared region. Two reasons this matters: // 1. Diarization: TitaNet needs ~3+ seconds of clean speech // to produce a reliable voice fingerprint. A speaker who // only talks at the very end of chunk N may get a thin // fingerprint; the overlap means chunk N+1 also captures // that audio, giving a better fingerprint for clustering. // 2. Word boundaries: words straddling a chunk boundary get // cleanly transcribed in one chunk or the other rather // than clipped. Applies even when diarization is OFF. // Default 30s is conservative — enough for fingerprint quality // without making the overlap region a meaningful fraction of // a 5-minute chunk. relay_hardware_tx_chunk_overlap_seconds: 30, relay_hardware_tx_concurrency: 4, relay_hardware_analyze_window_minutes: 18, relay_hardware_analyze_overlap_minutes: 2, relay_hardware_analyze_concurrency: 8, // Below this audio duration the chunked-analyze planner emits a // single window covering the whole transcript (single-shot fast // path). Below this duration TX chunking is also skipped — both // backends' transcribeAudio short-circuit when audio is shorter // than their chunk size, but this is the floor. relay_analyze_cutoff_minutes: 25, // ── Output-token caps per backend / pipeline ── // Gemini's transcribe path needs a high cap because dense long // chunks emit a lot of timestamped lines. Lower values trade // robustness against truncation for cost / speed. The model's // internal per-call cap may be lower than our request — see the // truncation-detector in gemini.js for the post-hoc check that // surfaces this as a "partial" status on the Jobs table. relay_gemini_tx_max_output_tokens: 65536, // Gemini analyze emits a small JSON sections array — defaulting // explicit to 8192 (was implicit / Google default before). Bump // higher if you observe truncated section JSON in audit rows. relay_gemini_an_max_output_tokens: 8192, // Hardware analyze max_tokens passed to the operator's vLLM / // Ollama endpoint via OpenAI-compatible chat-completion shape. // Smaller models (3B-7B) may produce better JSON with a lower // cap that forces concision; larger models can use more. relay_hardware_an_max_tokens: 16000, // ── Analyze prompt section-count targets ── // Per-VIDEO-DURATION targets that the relay uses to compute a // per-window section count at request time. The operator sets how // many TOTAL sections they want for a video of each duration; the // relay divides by (total_audio / window_body) to get the average // sections-per-window, splices the result into {{targetSections}} // in the analyze prompt. // // Replaces the prior 3-bucket per-window model (short/medium/long // by window duration). Reason: section count should scale with // video length, not window length — a 30-min single-window // podcast and a 3-hour 6-window podcast have very different // segmentation needs even when their window duration is identical. // Defaults are calibrated so a typical 90-min podcast at the // default 18-min AN window body produces ~1-2 sections per window // (about 9 total), matching what felt right in operator testing. // // Buckets are video total duration in minutes: // under_30 → audio_sec < 30 * 60 // 30_60 → 30 * 60 <= audio_sec < 60 * 60 // 60_90 → 60 * 60 <= audio_sec < 90 * 60 // 90_120 → 90 * 60 <= audio_sec < 120 * 60 // 120_150 → 120 * 60 <= audio_sec < 150 * 60 // 150_180 → 150 * 60 <= audio_sec < 180 * 60 // over_180 → audio_sec >= 180 * 60 // // Each value is target TOTAL sections for that video. The // per-window emit value = round(total * window_body_sec / // total_audio_sec). relay_analyze_total_sections_under_30: 6, relay_analyze_total_sections_30_60: 8, relay_analyze_total_sections_60_90: 9, relay_analyze_total_sections_90_120: 10, relay_analyze_total_sections_120_150: 11, relay_analyze_total_sections_150_180: 12, relay_analyze_total_sections_over_180: 12, // ── Editable LLM prompts (Settings tab) ── // Empty string = use the hardcoded default body in // server/backends/gemini.js (transcribe) or // server/chunked-analyze.js (analyze). Operator can override via // the dashboard's Settings tab textareas; PUT /admin/settings // writes the override here. Cleared (empty) to revert to default // without operators having to copy the default text exactly. // // Transcribe prompt: applies to the Gemini path only. The // operator-hardware (Parakeet) path is a pure STT model with no // prompt input, so the override is ignored there. relay_transcribe_prompt: "", // Analyze prompt: applies to BOTH Gemini and operator-hardware // (Gemma) analyze paths. Template variables {{transcript}}, // {{windowMin}}, {{targetSections}} are interpolated at request // time. PUT /admin/settings validates that the override still // contains the JSON-output instruction and the {{transcript}} // variable so an accidental edit can't silently break the // pipeline. relay_analyze_prompt: "", // Operator-promoted defaults. Three-layer resolution at request // time: override (relay_transcribe_prompt) → operator default // (relay_transcribe_prompt_default) → hardcoded code default // (DEFAULT_TRANSCRIBE_PROMPT_BODY in gemini.js). The "Set as new // default" button in the dashboard moves the override content // into this field + clears the override, letting operators // evolve their defaults over time without code redeploys. Empty // = use the code-side default. relay_transcribe_prompt_default: "", relay_analyze_prompt_default: "", }; } // Parsed view of the credit-package menu. Returns an array of // { credits, sats } pairs in display order. Used by the // purchase-modal endpoint to render package choices AND by the // purchase route to validate that the requested package matches a // configured option (so the buyer can't ask for arbitrary // credits-for-cheap pricing). export async function getCreditPackages() { const cfg = await getConfigSnapshot(); try { const parsed = JSON.parse(cfg.relay_credit_packages_json); if (!Array.isArray(parsed)) return DEFAULT_PACKAGES; return parsed .map((p) => ({ credits: Number(p?.credits), sats: Number(p?.sats), })) .filter( (p) => Number.isFinite(p.credits) && p.credits > 0 && Number.isFinite(p.sats) && p.sats > 0 ); } catch { return DEFAULT_PACKAGES; } } const DEFAULT_PACKAGES = [ { credits: 5, sats: 4000 }, { credits: 10, sats: 6000 }, { credits: 20, sats: 10000 }, ]; // Parsed view of relay_tier_prices_usd_json. Returns a {core,pro,max} // object with USD-per-month numbers. Used by the dashboard's // revenue/margin tile. export async function getTierPrices() { const cfg = await getConfigSnapshot(); try { const parsed = JSON.parse(cfg.relay_tier_prices_usd_json); return { core: numOrZero(parsed?.core, 0), pro: numOrZero(parsed?.pro, 5), max: numOrZero(parsed?.max, 15), }; } catch { return { core: 0, pro: 5, max: 15 }; } } // Self-serve subscription pricing in SATS per period. { pro, max }. Used by // the BTCPay tier-invoice flow. Returns null for an unknown/unpriced tier. export async function getTierPricesSats() { const cfg = await getConfigSnapshot(); try { const parsed = JSON.parse(cfg.relay_tier_prices_sats_json); return { pro: numOrZero(parsed?.pro, 21000), max: numOrZero(parsed?.max, 42000), }; } catch { return { pro: 21000, max: 42000 }; } } // The configured prepaid period length in days (default 30). export async function getSubscriptionPeriodDays() { const cfg = await getConfigSnapshot(); const n = Number(cfg.relay_subscription_period_days); return Number.isFinite(n) && n > 0 ? Math.floor(n) : 30; } // Card-rail (Zaprite) subscription prices in the smallest unit of the // configured currency (cents for USD). { pro, max }. Used to set the // Zaprite order `amount`. Returns null for an unknown/unpriced tier. export async function getTierPricesFiatCents() { const cfg = await getConfigSnapshot(); try { const parsed = JSON.parse(cfg.relay_tier_prices_fiat_cents_json); return { pro: numOrZero(parsed?.pro, 2100), max: numOrZero(parsed?.max, 4200), }; } catch { return { pro: 2100, max: 4200 }; } } // Zaprite (card rail) connection config. { baseUrl, apiKey, currency }. // apiKey empty = card rail not configured (callers should 503). export async function getZapriteConfig() { const cfg = await getConfigSnapshot(); const baseUrl = (cfg.relay_zaprite_base_url || "https://api.zaprite.com").replace(/\/$/, ""); const currency = (cfg.relay_zaprite_currency || "USD").toUpperCase(); return { baseUrl, apiKey: cfg.relay_zaprite_api_key || "", currency, }; } function numOrZero(v, fallback) { const n = Number(v); if (Number.isFinite(n) && n >= 0) return n; return fallback; } function configPath() { return path.join(dataDir, "config", "relay-config.json"); } export async function initConfig({ dataDir: dd }) { if (dd) dataDir = dd; await fs.mkdir(path.dirname(configPath()), { recursive: true }).catch(() => {}); // Prime the cache so the first request doesn't pay for a file-read. await getConfigSnapshot(); } // Reads the on-disk config and merges with defaults. Cheap — single // stat + read per call, but the result is cached until the file mtime // changes so repeat callers within one request don't re-read. export async function getConfigSnapshot() { const p = configPath(); let stat; try { stat = await fs.stat(p); } catch { return cached.snapshot; } if (stat.mtimeMs === cached.mtimeMs) return cached.snapshot; try { const raw = await fs.readFile(p, "utf8"); const parsed = JSON.parse(raw); cached = { mtimeMs: stat.mtimeMs, snapshot: { ...defaultConfig(), ...parsed }, }; } catch (err) { console.warn(`[config] failed to parse ${p}: ${err?.message}`); } return cached.snapshot; } // Parsed view of relay_tier_quotas_json, with safe fallbacks if the // blob is missing or malformed. geminiCapLifetime is the new field // added in relay 0.2.3 — splits a Core install's lifetime budget into // Gemini-served vs hardware-served credits. export async function getTierQuotas() { const cfg = await getConfigSnapshot(); try { const parsed = JSON.parse(cfg.relay_tier_quotas_json); return { core: { lifetime: parsed?.core?.lifetime ?? 10, geminiCapLifetime: parsed?.core?.geminiCapLifetime ?? 5, monthly: parsed?.core?.monthly ?? null, geminiCapMonthly: parsed?.core?.geminiCapMonthly ?? null, }, pro: { lifetime: parsed?.pro?.lifetime ?? null, monthly: parsed?.pro?.monthly ?? 50, geminiCapMonthly: parsed?.pro?.geminiCapMonthly ?? 25, }, max: { lifetime: parsed?.max?.lifetime ?? null, monthly: parsed?.max?.monthly ?? null, geminiCapMonthly: parsed?.max?.geminiCapMonthly ?? 50, }, }; } catch { return { core: { lifetime: 10, geminiCapLifetime: 5, monthly: null, geminiCapMonthly: null, }, pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 }, max: { lifetime: null, monthly: null, geminiCapMonthly: 50 }, }; } }