Wire new routes; identity, summarize-url, dashboard, admin

2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
@@ -14,19 +14,94 @@ let cached = { mtimeMs: 0, snapshot: defaultConfig() };
 function defaultConfig() {
  return {
    relay_gemini_api_key: "",
-    relay_parakeet_base_url: "",
-    relay_gemma_base_url: "",
-    relay_parakeet_model: "parakeet-tdt-0.6b-v3",
-    relay_gemma_model: "gemma3:27b",
+    relay_spark_control_url: "",
+    // Phase 2 — post-cluster polish pass. After diarization +
+    // clustering produce global speaker IDs and after analyze
+    // produces section objects, run a two-stage LLM pass that:
+    //   1. infers speaker names from the labeled transcript +
+    //      episode metadata
+    //   2. rewrites section summaries to attribute statements to
+    //      specific speakers ("Matt Hill explains..." instead of
+    //      "the discussion centers around...")
+    // Cost: ~15-25s wall time at the end of the pipeline. Skipped
+    // automatically when fewer than 2 speakers detected (nothing
+    // to attribute). Default ON; operator can disable via the
+    // Settings tab.
+    relay_post_cluster_polish_enabled: true,
+    // Operator-editable polish prompts (same three-layer override
+    // pattern as relay_transcribe_prompt / relay_analyze_prompt).
+    // Empty string at this layer = fall through to the hardcoded
+    // defaults in post-cluster-polish.js. The dashboard "Set as new
+    // default" button promotes a current override into the
+    // *_default companion fields.
+    relay_polish_name_inference_prompt: "",
+    relay_polish_summary_rewrite_prompt: "",
+    // Phase 2 of Path 2A — internal meeting extras analysis. Runs
+    // ONE additional LLM call after the polish pass to extract
+    // structured items (decisions / action items / open questions /
+    // key quotes) from the speaker-attributed transcript. Failure
+    // is non-fatal — rec.extras stays null and the UI hides the
+    // section. Cost: ~5-15s wall time. Default ON; operator can
+    // disable here if their hardware is slow or the LLM is unreliable
+    // for structured extraction. Only affects internal meetings —
+    // YouTube/podcast flows ignore this setting.
+    relay_meeting_extras_enabled: true,
+    // Operator-editable prompt override for the extras pass. Empty =
+    // fall through to DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE in
+    // meeting-extras.js. Same three-layer convention as the other
+    // editable prompts.
+    relay_meeting_extras_prompt: "",
    relay_gemini_transcription_model: "gemini-3-flash-preview",
    relay_gemini_analysis_model: "gemini-3.1-pro-preview",
    relay_transcribe_backend_preference: "gemini_first",
    relay_analyze_backend_preference: "gemini_first",
+
+    // ── Text-to-speech (audio-first Recaps "walking mode") ──
+    // TTS turns each topic-summary into a spoken clip so the Recap app
+    // can play a recap back-to-back like a podcast. Two backends, same
+    // swap model as transcribe/analyze: operator-hardware Kokoro (via
+    // Spark Control's /v1/audio/speech) is the default; ElevenLabs is
+    // the cloud alternative. Gated to Max users on the Recap side.
+    //
+    // Preference values mirror the transcribe/analyze knobs:
+    //   hardware_first → Kokoro if ready, else ElevenLabs (if keyed)
+    //   hardware_only  → Kokoro only (error if not ready)
+    //   cloud_first    → ElevenLabs if keyed, else Kokoro
+    //   cloud_only     → ElevenLabs only
+    // ("gemini_*" naming is intentionally NOT reused — TTS's cloud
+    //  backend is ElevenLabs, not Gemini.)
+    relay_tts_backend_preference: "hardware_first",
+    // Default voice when the client doesn't specify one. Kokoro voice ids
+    // follow <lang_gender>_<name> (e.g. bm_george, bf_emma, am_michael,
+    // af_heart — the four curated for narration). Enumerate live voices
+    // for a picker via /v1/models (kind:tts; curated ones first).
+    relay_tts_default_voice: "bm_george",
+    // Output format. Kokoro emits wav | mp3 | opus | flac directly, so we
+    // default to mp3 (small + universally playable for the mobile/offline
+    // player) and avoid any client-side transcode. The Recap client may
+    // override per request.
+    relay_tts_format: "mp3",
+    // ElevenLabs cloud TTS (untested until a key is supplied). Empty
+    // api key = ElevenLabs unavailable; relay falls back to Kokoro under
+    // hardware_first / cloud_first, or errors under cloud_only.
+    relay_elevenlabs_api_key: "",
+    relay_elevenlabs_voice_id: "",
+    relay_elevenlabs_model: "eleven_turbo_v2_5",
    relay_keysat_base_url: "https://keysat.xyz",
+    // ── Cloud operator key (core-decoupling) ──
+    // Shared secret that authenticates the operator's cloud Recaps server
+    // (recaps.cc) to this relay. When a request carries X-Recap-User-Id,
+    // the relay trusts it (and keys the credit pool by user:<id>, with the
+    // tier the relay stores for that user) ONLY if X-Recap-Operator-Key
+    // matches this value. Empty = cloud user-id requests are rejected
+    // (relay still serves the existing license/install path). Set the same
+    // value here and in the Recaps server's recap_relay_operator_key.
+    relay_cloud_operator_key: "",
    relay_admin_username: "",
    relay_admin_password_hash: "",
    relay_admin_password_salt: "",
    relay_admin_session_secret: "",
+    relay_save_user_outputs: false,
    relay_tier_quotas_json: JSON.stringify({
      core: {
        lifetime: 10,
@@ -37,9 +112,319 @@ function defaultConfig() {
      pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 },
      max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
    }),
+    relay_tier_prices_usd_json: JSON.stringify({ core: 0, pro: 5, max: 15 }),
+    // Self-serve subscription prices in SATS per 30-day period. Bitcoin-
+    // native (the BTCPay rail invoices these directly). The Zaprite card
+    // rail converts to fiat at purchase time.
+    relay_tier_prices_sats_json: JSON.stringify({ pro: 21000, max: 42000 }),
+    // Self-serve subscription prices for the CARD (Zaprite) rail, in the
+    // smallest unit of relay_zaprite_currency (cents for USD). This is the
+    // amount actually charged to a card buyer — kept explicit (cents) and
+    // separate from relay_tier_prices_usd_json (which is a whole-dollar
+    // figure used only by the dashboard's revenue/margin tile). Default is
+    // parity with the sat prices (≈$21 / $42); the operator can set a card
+    // premium here to cover processing fees.
+    relay_tier_prices_fiat_cents_json: JSON.stringify({ pro: 2100, max: 4200 }),
+    // Prepaid subscription period length in days.
+    relay_subscription_period_days: 30,
+    relay_btcpay_base_url: "",
+    relay_btcpay_internal_url: "",
+    relay_btcpay_public_url: "",
+    relay_btcpay_store_id: "",
+    relay_btcpay_api_key: "",
+    relay_btcpay_webhook_secret: "",
+    // ── Zaprite (card rail) ──
+    // Hosted-checkout API for card payments. The operator sets the API key
+    // via the StartOS "Set Zaprite Connection" action. base_url defaults to
+    // Zaprite's public API; currency is the fiat the card is charged in
+    // (amount comes from relay_tier_prices_fiat_cents_json, in that
+    // currency's smallest unit). No webhook secret: the webhook is verified
+    // by re-fetching the order from Zaprite's authenticated API.
+    relay_zaprite_base_url: "https://api.zaprite.com",
+    relay_zaprite_api_key: "",
+    relay_zaprite_currency: "USD",
+    relay_credit_packages_json: JSON.stringify([
+      { credits: 5, sats: 4000 },
+      { credits: 10, sats: 6000 },
+      { credits: 20, sats: 10000 },
+    ]),
+
+    // ── Chunking / concurrency knobs ──
+    // ONE canonical default per knob, defined here exactly once. All
+    // backend code reads from the live-reloaded snapshot at request
+    // time — no hardcoded fallbacks anywhere else, no per-test-run
+    // overrides. The Settings tab in the dashboard edits these values
+    // via PUT /admin/settings.
+    //
+    // Gemini backend (relay → Google Gemini API):
+    //   30-min TX chunks at 12-way concurrency is well under Gemini's
+    //   paid Tier 1 RPM cap (1000 for flash, 150 for pro). 18-min
+    //   analyze windows match the recap-app value. 12 analyze windows
+    //   in flight saturates most operator workloads.
+    relay_gemini_tx_chunk_minutes: 30,
+    relay_gemini_tx_concurrency: 12,
+    relay_gemini_analyze_window_minutes: 18,
+    relay_gemini_analyze_overlap_minutes: 2,
+    relay_gemini_analyze_concurrency: 12,
+    // Hardware backend (relay → operator's Parakeet + Gemma/vLLM):
+    //   5-min TX chunks at 4-way concurrency for Parakeet OOM headroom
+    //   (operator hardware is typically 1 GPU; spark-control dev
+    //   confirmed 4 concurrent is safe). 18-min analyze windows match
+    //   gemini's value for cross-backend benchmark parity. 8 analyze
+    //   windows in flight is the vLLM-on-single-Spark sweet spot for
+    //   our prompt size (12 starts to queue on the GPU).
+    // Speaker diarization on the operator-hardware transcribe path.
+    // When enabled, each audio chunk gets sent to TWO Spark Control
+    // endpoints in parallel: /v1/audio/transcriptions (Parakeet) AND
+    // /api/audio/diarize-chunk (Sortformer + TitaNet). The relay
+    // collects per-chunk voice fingerprints, clusters them across
+    // chunks using cosine similarity, and re-labels diarization
+    // segments with globally-consistent speaker IDs (Speaker_0,
+    // Speaker_1, …). Each transcript entry then carries an optional
+    // speaker label. Diarization is hardware-path-only — the Gemini
+    // path uses prompt-based speaker labeling instead. Default OFF;
+    // operator opts in via the dashboard toggle.
+    relay_hardware_diarization_enabled: false,
+    // Cosine-similarity threshold for the cross-chunk speaker
+    // clustering step. Stored as integer percentage (70 = 0.70
+    // cosine similarity) because the slider system is integer-only.
+    // Two fingerprints with similarity >= threshold are merged
+    // into the same global speaker; below threshold = different
+    // speakers. NeMo's recommended default for TitaNet embeddings
+    // is 0.70 — clean audio with distinct voices tolerates lower
+    // (more aggressive merging); panel audio with similar voices
+    // benefits from higher (more conservative).
+    relay_hardware_voice_clustering_threshold: 70,
+    relay_hardware_anchor_min_speaking_sec: 30,
+    relay_hardware_small_cluster_max_speaking_sec: 15,
+    relay_hardware_uncertain_margin_pct: 10,
+    relay_hardware_tx_chunk_minutes: 5,
+    // Overlap (in seconds) between consecutive audio chunks on the
+    // hardware transcribe path. Each chunk N+1 starts `overlap`
+    // seconds before chunk N ends, so the same audio is covered
+    // twice at chunk boundaries. The relay dedupes by dropping
+    // words/segments in chunk N+1 whose timestamps fall within the
+    // shared region. Two reasons this matters:
+    //   1. Diarization: TitaNet needs ~3+ seconds of clean speech
+    //      to produce a reliable voice fingerprint. A speaker who
+    //      only talks at the very end of chunk N may get a thin
+    //      fingerprint; the overlap means chunk N+1 also captures
+    //      that audio, giving a better fingerprint for clustering.
+    //   2. Word boundaries: words straddling a chunk boundary get
+    //      cleanly transcribed in one chunk or the other rather
+    //      than clipped. Applies even when diarization is OFF.
+    // Default 30s is conservative — enough for fingerprint quality
+    // without making the overlap region a meaningful fraction of
+    // a 5-minute chunk.
+    relay_hardware_tx_chunk_overlap_seconds: 30,
+    relay_hardware_tx_concurrency: 4,
+    relay_hardware_analyze_window_minutes: 18,
+    relay_hardware_analyze_overlap_minutes: 2,
+    relay_hardware_analyze_concurrency: 8,
+    // Below this audio duration the chunked-analyze planner emits a
+    // single window covering the whole transcript (single-shot fast
+    // path). Below this duration TX chunking is also skipped — both
+    // backends' transcribeAudio short-circuit when audio is shorter
+    // than their chunk size, but this is the floor.
+    relay_analyze_cutoff_minutes: 25,
+
+    // ── Output-token caps per backend / pipeline ──
+    // Gemini's transcribe path needs a high cap because dense long
+    // chunks emit a lot of timestamped lines. Lower values trade
+    // robustness against truncation for cost / speed. The model's
+    // internal per-call cap may be lower than our request — see the
+    // truncation-detector in gemini.js for the post-hoc check that
+    // surfaces this as a "partial" status on the Jobs table.
+    relay_gemini_tx_max_output_tokens: 65536,
+    // Gemini analyze emits a small JSON sections array — defaulting
+    // explicit to 8192 (was implicit / Google default before). Bump
+    // higher if you observe truncated section JSON in audit rows.
+    relay_gemini_an_max_output_tokens: 8192,
+    // Hardware analyze max_tokens passed to the operator's vLLM /
+    // Ollama endpoint via OpenAI-compatible chat-completion shape.
+    // Smaller models (3B-7B) may produce better JSON with a lower
+    // cap that forces concision; larger models can use more.
+    relay_hardware_an_max_tokens: 16000,
+
+    // ── Analyze prompt section-count targets ──
+    // Per-VIDEO-DURATION targets that the relay uses to compute a
+    // per-window section count at request time. The operator sets how
+    // many TOTAL sections they want for a video of each duration; the
+    // relay divides by (total_audio / window_body) to get the average
+    // sections-per-window, splices the result into {{targetSections}}
+    // in the analyze prompt.
+    //
+    // Replaces the prior 3-bucket per-window model (short/medium/long
+    // by window duration). Reason: section count should scale with
+    // video length, not window length — a 30-min single-window
+    // podcast and a 3-hour 6-window podcast have very different
+    // segmentation needs even when their window duration is identical.
+    // Defaults are calibrated so a typical 90-min podcast at the
+    // default 18-min AN window body produces ~1-2 sections per window
+    // (about 9 total), matching what felt right in operator testing.
+    //
+    // Buckets are video total duration in minutes:
+    //   under_30      → audio_sec < 30 * 60
+    //   30_60         → 30 * 60 <= audio_sec < 60 * 60
+    //   60_90         → 60 * 60 <= audio_sec < 90 * 60
+    //   90_120        → 90 * 60 <= audio_sec < 120 * 60
+    //   120_150       → 120 * 60 <= audio_sec < 150 * 60
+    //   150_180       → 150 * 60 <= audio_sec < 180 * 60
+    //   over_180      → audio_sec >= 180 * 60
+    //
+    // Each value is target TOTAL sections for that video. The
+    // per-window emit value = round(total * window_body_sec /
+    // total_audio_sec).
+    relay_analyze_total_sections_under_30: 6,
+    relay_analyze_total_sections_30_60: 8,
+    relay_analyze_total_sections_60_90: 9,
+    relay_analyze_total_sections_90_120: 10,
+    relay_analyze_total_sections_120_150: 11,
+    relay_analyze_total_sections_150_180: 12,
+    relay_analyze_total_sections_over_180: 12,
+
+    // ── Editable LLM prompts (Settings tab) ──
+    // Empty string = use the hardcoded default body in
+    // server/backends/gemini.js (transcribe) or
+    // server/chunked-analyze.js (analyze). Operator can override via
+    // the dashboard's Settings tab textareas; PUT /admin/settings
+    // writes the override here. Cleared (empty) to revert to default
+    // without operators having to copy the default text exactly.
+    //
+    // Transcribe prompt: applies to the Gemini path only. The
+    // operator-hardware (Parakeet) path is a pure STT model with no
+    // prompt input, so the override is ignored there.
+    relay_transcribe_prompt: "",
+    // Analyze prompt: applies to BOTH Gemini and operator-hardware
+    // (Gemma) analyze paths. Template variables {{transcript}},
+    // {{windowMin}}, {{targetSections}} are interpolated at request
+    // time. PUT /admin/settings validates that the override still
+    // contains the JSON-output instruction and the {{transcript}}
+    // variable so an accidental edit can't silently break the
+    // pipeline.
+    relay_analyze_prompt: "",
+    // Operator-promoted defaults. Three-layer resolution at request
+    // time: override (relay_transcribe_prompt) → operator default
+    // (relay_transcribe_prompt_default) → hardcoded code default
+    // (DEFAULT_TRANSCRIBE_PROMPT_BODY in gemini.js). The "Set as new
+    // default" button in the dashboard moves the override content
+    // into this field + clears the override, letting operators
+    // evolve their defaults over time without code redeploys. Empty
+    // = use the code-side default.
+    relay_transcribe_prompt_default: "",
+    relay_analyze_prompt_default: "",
  };
 }

+// Parsed view of the credit-package menu. Returns an array of
+// { credits, sats } pairs in display order. Used by the
+// purchase-modal endpoint to render package choices AND by the
+// purchase route to validate that the requested package matches a
+// configured option (so the buyer can't ask for arbitrary
+// credits-for-cheap pricing).
+export async function getCreditPackages() {
+  const cfg = await getConfigSnapshot();
+  try {
+    const parsed = JSON.parse(cfg.relay_credit_packages_json);
+    if (!Array.isArray(parsed)) return DEFAULT_PACKAGES;
+    return parsed
+      .map((p) => ({
+        credits: Number(p?.credits),
+        sats: Number(p?.sats),
+      }))
+      .filter(
+        (p) =>
+          Number.isFinite(p.credits) &&
+          p.credits > 0 &&
+          Number.isFinite(p.sats) &&
+          p.sats > 0
+      );
+  } catch {
+    return DEFAULT_PACKAGES;
+  }
+}
+
+const DEFAULT_PACKAGES = [
+  { credits: 5, sats: 4000 },
+  { credits: 10, sats: 6000 },
+  { credits: 20, sats: 10000 },
+];
+
+// Parsed view of relay_tier_prices_usd_json. Returns a {core,pro,max}
+// object with USD-per-month numbers. Used by the dashboard's
+// revenue/margin tile.
+export async function getTierPrices() {
+  const cfg = await getConfigSnapshot();
+  try {
+    const parsed = JSON.parse(cfg.relay_tier_prices_usd_json);
+    return {
+      core: numOrZero(parsed?.core, 0),
+      pro: numOrZero(parsed?.pro, 5),
+      max: numOrZero(parsed?.max, 15),
+    };
+  } catch {
+    return { core: 0, pro: 5, max: 15 };
+  }
+}
+
+// Self-serve subscription pricing in SATS per period. { pro, max }. Used by
+// the BTCPay tier-invoice flow. Returns null for an unknown/unpriced tier.
+export async function getTierPricesSats() {
+  const cfg = await getConfigSnapshot();
+  try {
+    const parsed = JSON.parse(cfg.relay_tier_prices_sats_json);
+    return {
+      pro: numOrZero(parsed?.pro, 21000),
+      max: numOrZero(parsed?.max, 42000),
+    };
+  } catch {
+    return { pro: 21000, max: 42000 };
+  }
+}
+
+// The configured prepaid period length in days (default 30).
+export async function getSubscriptionPeriodDays() {
+  const cfg = await getConfigSnapshot();
+  const n = Number(cfg.relay_subscription_period_days);
+  return Number.isFinite(n) && n > 0 ? Math.floor(n) : 30;
+}
+
+// Card-rail (Zaprite) subscription prices in the smallest unit of the
+// configured currency (cents for USD). { pro, max }. Used to set the
+// Zaprite order `amount`. Returns null for an unknown/unpriced tier.
+export async function getTierPricesFiatCents() {
+  const cfg = await getConfigSnapshot();
+  try {
+    const parsed = JSON.parse(cfg.relay_tier_prices_fiat_cents_json);
+    return {
+      pro: numOrZero(parsed?.pro, 2100),
+      max: numOrZero(parsed?.max, 4200),
+    };
+  } catch {
+    return { pro: 2100, max: 4200 };
+  }
+}
+
+// Zaprite (card rail) connection config. { baseUrl, apiKey, currency }.
+// apiKey empty = card rail not configured (callers should 503).
+export async function getZapriteConfig() {
+  const cfg = await getConfigSnapshot();
+  const baseUrl =
+    (cfg.relay_zaprite_base_url || "https://api.zaprite.com").replace(/\/$/, "");
+  const currency = (cfg.relay_zaprite_currency || "USD").toUpperCase();
+  return {
+    baseUrl,
+    apiKey: cfg.relay_zaprite_api_key || "",
+    currency,
+  };
+}
+
+function numOrZero(v, fallback) {
+  const n = Number(v);
+  if (Number.isFinite(n) && n >= 0) return n;
+  return fallback;
+}
+
 function configPath() {
  return path.join(dataDir, "config", "relay-config.json");
 }