// Live-reloading config layer. Mirrors Recap's config.js pattern: read
// /data/config/relay-config.json on every access (filesystem watcher
// pulls in StartOS-action changes without a daemon restart), parse,
// and expose typed accessors.
//
// All defaults match the schema in startos/file-models/config.json.ts.

import fs from "fs/promises";
import path from "path";

let dataDir = "/data";
let cached = { mtimeMs: 0, snapshot: defaultConfig() };

function defaultConfig() {
  return {
    relay_gemini_api_key: "",
    relay_spark_control_url: "",
    // Phase 2 — post-cluster polish pass. After diarization +
    // clustering produce global speaker IDs and after analyze
    // produces section objects, run a two-stage LLM pass that:
    //   1. infers speaker names from the labeled transcript +
    //      episode metadata
    //   2. rewrites section summaries to attribute statements to
    //      specific speakers ("Matt Hill explains..." instead of
    //      "the discussion centers around...")
    // Cost: ~15-25s wall time at the end of the pipeline. Skipped
    // automatically when fewer than 2 speakers detected (nothing
    // to attribute). Default ON; operator can disable via the
    // Settings tab.
    relay_post_cluster_polish_enabled: true,
    // Operator-editable polish prompts (same three-layer override
    // pattern as relay_transcribe_prompt / relay_analyze_prompt).
    // Empty string at this layer = fall through to the hardcoded
    // defaults in post-cluster-polish.js. The dashboard "Set as new
    // default" button promotes a current override into the
    // *_default companion fields.
    relay_polish_name_inference_prompt: "",
    relay_polish_summary_rewrite_prompt: "",
    // Phase 2 of Path 2A — internal meeting extras analysis. Runs
    // ONE additional LLM call after the polish pass to extract
    // structured items (decisions / action items / open questions /
    // key quotes) from the speaker-attributed transcript. Failure
    // is non-fatal — rec.extras stays null and the UI hides the
    // section. Cost: ~5-15s wall time. Default ON; operator can
    // disable here if their hardware is slow or the LLM is unreliable
    // for structured extraction. Only affects internal meetings —
    // YouTube/podcast flows ignore this setting.
    relay_meeting_extras_enabled: true,
    // Operator-editable prompt override for the extras pass. Empty =
    // fall through to DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE in
    // meeting-extras.js. Same three-layer convention as the other
    // editable prompts.
    relay_meeting_extras_prompt: "",
    relay_gemini_transcription_model: "gemini-3-flash-preview",
    relay_gemini_analysis_model: "gemini-3.1-pro-preview",
    relay_transcribe_backend_preference: "gemini_first",
    relay_analyze_backend_preference: "gemini_first",

    // ── Text-to-speech (audio-first Recaps "walking mode") ──
    // TTS turns each topic-summary into a spoken clip so the Recap app
    // can play a recap back-to-back like a podcast. Two backends, same
    // swap model as transcribe/analyze: operator-hardware Kokoro (via
    // Spark Control's /v1/audio/speech) is the default; ElevenLabs is
    // the cloud alternative. Gated to Max users on the Recap side.
    //
    // Preference values mirror the transcribe/analyze knobs:
    //   hardware_first → Kokoro if ready, else ElevenLabs (if keyed)
    //   hardware_only  → Kokoro only (error if not ready)
    //   cloud_first    → ElevenLabs if keyed, else Kokoro
    //   cloud_only     → ElevenLabs only
    // ("gemini_*" naming is intentionally NOT reused — TTS's cloud
    //  backend is ElevenLabs, not Gemini.)
    relay_tts_backend_preference: "hardware_first",
    // Default voice when the client doesn't specify one. Kokoro voice ids
    // follow <lang_gender>_<name> (e.g. bm_george, bf_emma, am_michael,
    // af_heart — the four curated for narration). Enumerate live voices
    // for a picker via /v1/models (kind:tts; curated ones first).
    relay_tts_default_voice: "bm_george",
    // Output format. Kokoro emits wav | mp3 | opus | flac directly, so we
    // default to mp3 (small + universally playable for the mobile/offline
    // player) and avoid any client-side transcode. The Recap client may
    // override per request.
    relay_tts_format: "mp3",
    // ElevenLabs cloud TTS (untested until a key is supplied). Empty
    // api key = ElevenLabs unavailable; relay falls back to Kokoro under
    // hardware_first / cloud_first, or errors under cloud_only.
    relay_elevenlabs_api_key: "",
    relay_elevenlabs_voice_id: "",
    relay_elevenlabs_model: "eleven_turbo_v2_5",
    relay_keysat_base_url: "https://keysat.xyz",
    // ── Cloud operator key (core-decoupling) ──
    // Shared secret that authenticates the operator's cloud Recaps server
    // (recaps.cc) to this relay. When a request carries X-Recap-User-Id,
    // the relay trusts it (and keys the credit pool by user:<id>, with the
    // tier the relay stores for that user) ONLY if X-Recap-Operator-Key
    // matches this value. Empty = cloud user-id requests are rejected
    // (relay still serves the existing license/install path). Set the same
    // value here and in the Recaps server's recap_relay_operator_key.
    relay_cloud_operator_key: "",
    relay_admin_username: "",
    relay_admin_password_hash: "",
    relay_admin_password_salt: "",
    relay_admin_session_secret: "",
    relay_save_user_outputs: false,
    relay_tier_quotas_json: JSON.stringify({
      core: {
        lifetime: 10,
        geminiCapLifetime: 5,
        monthly: null,
        geminiCapMonthly: null,
      },
      pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 },
      max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
    }),
    relay_tier_prices_usd_json: JSON.stringify({ core: 0, pro: 5, max: 15 }),
    // Self-serve subscription prices in SATS per 30-day period. Bitcoin-
    // native (the BTCPay rail invoices these directly). The Zaprite card
    // rail converts to fiat at purchase time.
    relay_tier_prices_sats_json: JSON.stringify({ pro: 21000, max: 42000 }),
    // Self-serve subscription prices for the CARD (Zaprite) rail, in the
    // smallest unit of relay_zaprite_currency (cents for USD). This is the
    // amount actually charged to a card buyer — kept explicit (cents) and
    // separate from relay_tier_prices_usd_json (which is a whole-dollar
    // figure used only by the dashboard's revenue/margin tile). Default is
    // parity with the sat prices (≈$21 / $42); the operator can set a card
    // premium here to cover processing fees.
    relay_tier_prices_fiat_cents_json: JSON.stringify({ pro: 2100, max: 4200 }),
    // Prepaid subscription period length in days.
    relay_subscription_period_days: 30,
    relay_btcpay_base_url: "",
    relay_btcpay_internal_url: "",
    relay_btcpay_public_url: "",
    relay_btcpay_store_id: "",
    relay_btcpay_api_key: "",
    relay_btcpay_webhook_secret: "",
    // ── Zaprite (card rail) ──
    // Hosted-checkout API for card payments. The operator sets the API key
    // via the StartOS "Set Zaprite Connection" action. base_url defaults to
    // Zaprite's public API; currency is the fiat the card is charged in
    // (amount comes from relay_tier_prices_fiat_cents_json, in that
    // currency's smallest unit). No webhook secret: the webhook is verified
    // by re-fetching the order from Zaprite's authenticated API.
    relay_zaprite_base_url: "https://api.zaprite.com",
    relay_zaprite_api_key: "",
    relay_zaprite_currency: "USD",
    relay_credit_packages_json: JSON.stringify([
      { credits: 5, sats: 4000 },
      { credits: 10, sats: 6000 },
      { credits: 20, sats: 10000 },
    ]),

    // ── Chunking / concurrency knobs ──
    // ONE canonical default per knob, defined here exactly once. All
    // backend code reads from the live-reloaded snapshot at request
    // time — no hardcoded fallbacks anywhere else, no per-test-run
    // overrides. The Settings tab in the dashboard edits these values
    // via PUT /admin/settings.
    //
    // Gemini backend (relay → Google Gemini API):
    //   30-min TX chunks at 12-way concurrency is well under Gemini's
    //   paid Tier 1 RPM cap (1000 for flash, 150 for pro). 18-min
    //   analyze windows match the recap-app value. 12 analyze windows
    //   in flight saturates most operator workloads.
    relay_gemini_tx_chunk_minutes: 30,
    relay_gemini_tx_concurrency: 12,
    relay_gemini_analyze_window_minutes: 18,
    relay_gemini_analyze_overlap_minutes: 2,
    relay_gemini_analyze_concurrency: 12,
    // Hardware backend (relay → operator's Parakeet + Gemma/vLLM):
    //   5-min TX chunks at 4-way concurrency for Parakeet OOM headroom
    //   (operator hardware is typically 1 GPU; spark-control dev
    //   confirmed 4 concurrent is safe). 18-min analyze windows match
    //   gemini's value for cross-backend benchmark parity. 8 analyze
    //   windows in flight is the vLLM-on-single-Spark sweet spot for
    //   our prompt size (12 starts to queue on the GPU).
    // Speaker diarization on the operator-hardware transcribe path.
    // When enabled, each audio chunk gets sent to TWO Spark Control
    // endpoints in parallel: /v1/audio/transcriptions (Parakeet) AND
    // /api/audio/diarize-chunk (Sortformer + TitaNet). The relay
    // collects per-chunk voice fingerprints, clusters them across
    // chunks using cosine similarity, and re-labels diarization
    // segments with globally-consistent speaker IDs (Speaker_0,
    // Speaker_1, …). Each transcript entry then carries an optional
    // speaker label. Diarization is hardware-path-only — the Gemini
    // path uses prompt-based speaker labeling instead. Default OFF;
    // operator opts in via the dashboard toggle.
    relay_hardware_diarization_enabled: false,
    // Cosine-similarity threshold for the cross-chunk speaker
    // clustering step. Stored as integer percentage (70 = 0.70
    // cosine similarity) because the slider system is integer-only.
    // Two fingerprints with similarity >= threshold are merged
    // into the same global speaker; below threshold = different
    // speakers. NeMo's recommended default for TitaNet embeddings
    // is 0.70 — clean audio with distinct voices tolerates lower
    // (more aggressive merging); panel audio with similar voices
    // benefits from higher (more conservative).
    relay_hardware_voice_clustering_threshold: 70,
    relay_hardware_anchor_min_speaking_sec: 30,
    relay_hardware_small_cluster_max_speaking_sec: 15,
    relay_hardware_uncertain_margin_pct: 10,
    relay_hardware_tx_chunk_minutes: 5,
    // Overlap (in seconds) between consecutive audio chunks on the
    // hardware transcribe path. Each chunk N+1 starts `overlap`
    // seconds before chunk N ends, so the same audio is covered
    // twice at chunk boundaries. The relay dedupes by dropping
    // words/segments in chunk N+1 whose timestamps fall within the
    // shared region. Two reasons this matters:
    //   1. Diarization: TitaNet needs ~3+ seconds of clean speech
    //      to produce a reliable voice fingerprint. A speaker who
    //      only talks at the very end of chunk N may get a thin
    //      fingerprint; the overlap means chunk N+1 also captures
    //      that audio, giving a better fingerprint for clustering.
    //   2. Word boundaries: words straddling a chunk boundary get
    //      cleanly transcribed in one chunk or the other rather
    //      than clipped. Applies even when diarization is OFF.
    // Default 30s is conservative — enough for fingerprint quality
    // without making the overlap region a meaningful fraction of
    // a 5-minute chunk.
    relay_hardware_tx_chunk_overlap_seconds: 30,
    relay_hardware_tx_concurrency: 4,
    relay_hardware_analyze_window_minutes: 18,
    relay_hardware_analyze_overlap_minutes: 2,
    relay_hardware_analyze_concurrency: 8,
    // Below this audio duration the chunked-analyze planner emits a
    // single window covering the whole transcript (single-shot fast
    // path). Below this duration TX chunking is also skipped — both
    // backends' transcribeAudio short-circuit when audio is shorter
    // than their chunk size, but this is the floor.
    relay_analyze_cutoff_minutes: 25,

    // ── Output-token caps per backend / pipeline ──
    // Gemini's transcribe path needs a high cap because dense long
    // chunks emit a lot of timestamped lines. Lower values trade
    // robustness against truncation for cost / speed. The model's
    // internal per-call cap may be lower than our request — see the
    // truncation-detector in gemini.js for the post-hoc check that
    // surfaces this as a "partial" status on the Jobs table.
    relay_gemini_tx_max_output_tokens: 65536,
    // Gemini analyze emits a small JSON sections array — defaulting
    // explicit to 8192 (was implicit / Google default before). Bump
    // higher if you observe truncated section JSON in audit rows.
    relay_gemini_an_max_output_tokens: 8192,
    // Hardware analyze max_tokens passed to the operator's vLLM /
    // Ollama endpoint via OpenAI-compatible chat-completion shape.
    // Smaller models (3B-7B) may produce better JSON with a lower
    // cap that forces concision; larger models can use more.
    relay_hardware_an_max_tokens: 16000,

    // ── Analyze prompt section-count targets ──
    // Per-VIDEO-DURATION targets that the relay uses to compute a
    // per-window section count at request time. The operator sets how
    // many TOTAL sections they want for a video of each duration; the
    // relay divides by (total_audio / window_body) to get the average
    // sections-per-window, splices the result into {{targetSections}}
    // in the analyze prompt.
    //
    // Replaces the prior 3-bucket per-window model (short/medium/long
    // by window duration). Reason: section count should scale with
    // video length, not window length — a 30-min single-window
    // podcast and a 3-hour 6-window podcast have very different
    // segmentation needs even when their window duration is identical.
    // Defaults are calibrated so a typical 90-min podcast at the
    // default 18-min AN window body produces ~1-2 sections per window
    // (about 9 total), matching what felt right in operator testing.
    //
    // Buckets are video total duration in minutes:
    //   under_30      → audio_sec < 30 * 60
    //   30_60         → 30 * 60 <= audio_sec < 60 * 60
    //   60_90         → 60 * 60 <= audio_sec < 90 * 60
    //   90_120        → 90 * 60 <= audio_sec < 120 * 60
    //   120_150       → 120 * 60 <= audio_sec < 150 * 60
    //   150_180       → 150 * 60 <= audio_sec < 180 * 60
    //   over_180      → audio_sec >= 180 * 60
    //
    // Each value is target TOTAL sections for that video. The
    // per-window emit value = round(total * window_body_sec /
    // total_audio_sec).
    relay_analyze_total_sections_under_30: 6,
    relay_analyze_total_sections_30_60: 8,
    relay_analyze_total_sections_60_90: 9,
    relay_analyze_total_sections_90_120: 10,
    relay_analyze_total_sections_120_150: 11,
    relay_analyze_total_sections_150_180: 12,
    relay_analyze_total_sections_over_180: 12,

    // ── Editable LLM prompts (Settings tab) ──
    // Empty string = use the hardcoded default body in
    // server/backends/gemini.js (transcribe) or
    // server/chunked-analyze.js (analyze). Operator can override via
    // the dashboard's Settings tab textareas; PUT /admin/settings
    // writes the override here. Cleared (empty) to revert to default
    // without operators having to copy the default text exactly.
    //
    // Transcribe prompt: applies to the Gemini path only. The
    // operator-hardware (Parakeet) path is a pure STT model with no
    // prompt input, so the override is ignored there.
    relay_transcribe_prompt: "",
    // Analyze prompt: applies to BOTH Gemini and operator-hardware
    // (Gemma) analyze paths. Template variables {{transcript}},
    // {{windowMin}}, {{targetSections}} are interpolated at request
    // time. PUT /admin/settings validates that the override still
    // contains the JSON-output instruction and the {{transcript}}
    // variable so an accidental edit can't silently break the
    // pipeline.
    relay_analyze_prompt: "",
    // Operator-promoted defaults. Three-layer resolution at request
    // time: override (relay_transcribe_prompt) → operator default
    // (relay_transcribe_prompt_default) → hardcoded code default
    // (DEFAULT_TRANSCRIBE_PROMPT_BODY in gemini.js). The "Set as new
    // default" button in the dashboard moves the override content
    // into this field + clears the override, letting operators
    // evolve their defaults over time without code redeploys. Empty
    // = use the code-side default.
    relay_transcribe_prompt_default: "",
    relay_analyze_prompt_default: "",
  };
}

// Parsed view of the credit-package menu. Returns an array of
// { credits, sats } pairs in display order. Used by the
// purchase-modal endpoint to render package choices AND by the
// purchase route to validate that the requested package matches a
// configured option (so the buyer can't ask for arbitrary
// credits-for-cheap pricing).
export async function getCreditPackages() {
  const cfg = await getConfigSnapshot();
  try {
    const parsed = JSON.parse(cfg.relay_credit_packages_json);
    if (!Array.isArray(parsed)) return DEFAULT_PACKAGES;
    return parsed
      .map((p) => ({
        credits: Number(p?.credits),
        sats: Number(p?.sats),
      }))
      .filter(
        (p) =>
          Number.isFinite(p.credits) &&
          p.credits > 0 &&
          Number.isFinite(p.sats) &&
          p.sats > 0
      );
  } catch {
    return DEFAULT_PACKAGES;
  }
}

const DEFAULT_PACKAGES = [
  { credits: 5, sats: 4000 },
  { credits: 10, sats: 6000 },
  { credits: 20, sats: 10000 },
];

// Parsed view of relay_tier_prices_usd_json. Returns a {core,pro,max}
// object with USD-per-month numbers. Used by the dashboard's
// revenue/margin tile.
export async function getTierPrices() {
  const cfg = await getConfigSnapshot();
  try {
    const parsed = JSON.parse(cfg.relay_tier_prices_usd_json);
    return {
      core: numOrZero(parsed?.core, 0),
      pro: numOrZero(parsed?.pro, 5),
      max: numOrZero(parsed?.max, 15),
    };
  } catch {
    return { core: 0, pro: 5, max: 15 };
  }
}

// Self-serve subscription pricing in SATS per period. { pro, max }. Used by
// the BTCPay tier-invoice flow. Returns null for an unknown/unpriced tier.
export async function getTierPricesSats() {
  const cfg = await getConfigSnapshot();
  try {
    const parsed = JSON.parse(cfg.relay_tier_prices_sats_json);
    return {
      pro: numOrZero(parsed?.pro, 21000),
      max: numOrZero(parsed?.max, 42000),
    };
  } catch {
    return { pro: 21000, max: 42000 };
  }
}

// The configured prepaid period length in days (default 30).
export async function getSubscriptionPeriodDays() {
  const cfg = await getConfigSnapshot();
  const n = Number(cfg.relay_subscription_period_days);
  return Number.isFinite(n) && n > 0 ? Math.floor(n) : 30;
}

// Card-rail (Zaprite) subscription prices in the smallest unit of the
// configured currency (cents for USD). { pro, max }. Used to set the
// Zaprite order `amount`. Returns null for an unknown/unpriced tier.
export async function getTierPricesFiatCents() {
  const cfg = await getConfigSnapshot();
  try {
    const parsed = JSON.parse(cfg.relay_tier_prices_fiat_cents_json);
    return {
      pro: numOrZero(parsed?.pro, 2100),
      max: numOrZero(parsed?.max, 4200),
    };
  } catch {
    return { pro: 2100, max: 4200 };
  }
}

// Zaprite (card rail) connection config. { baseUrl, apiKey, currency }.
// apiKey empty = card rail not configured (callers should 503).
export async function getZapriteConfig() {
  const cfg = await getConfigSnapshot();
  const baseUrl =
    (cfg.relay_zaprite_base_url || "https://api.zaprite.com").replace(/\/$/, "");
  const currency = (cfg.relay_zaprite_currency || "USD").toUpperCase();
  return {
    baseUrl,
    apiKey: cfg.relay_zaprite_api_key || "",
    currency,
  };
}

function numOrZero(v, fallback) {
  const n = Number(v);
  if (Number.isFinite(n) && n >= 0) return n;
  return fallback;
}

function configPath() {
  return path.join(dataDir, "config", "relay-config.json");
}

export async function initConfig({ dataDir: dd }) {
  if (dd) dataDir = dd;
  await fs.mkdir(path.dirname(configPath()), { recursive: true }).catch(() => {});
  // Prime the cache so the first request doesn't pay for a file-read.
  await getConfigSnapshot();
}

// Reads the on-disk config and merges with defaults. Cheap — single
// stat + read per call, but the result is cached until the file mtime
// changes so repeat callers within one request don't re-read.
export async function getConfigSnapshot() {
  const p = configPath();
  let stat;
  try {
    stat = await fs.stat(p);
  } catch {
    return cached.snapshot;
  }
  if (stat.mtimeMs === cached.mtimeMs) return cached.snapshot;
  try {
    const raw = await fs.readFile(p, "utf8");
    const parsed = JSON.parse(raw);
    cached = {
      mtimeMs: stat.mtimeMs,
      snapshot: { ...defaultConfig(), ...parsed },
    };
  } catch (err) {
    console.warn(`[config] failed to parse ${p}: ${err?.message}`);
  }
  return cached.snapshot;
}

// Parsed view of relay_tier_quotas_json, with safe fallbacks if the
// blob is missing or malformed. geminiCapLifetime is the new field
// added in relay 0.2.3 — splits a Core install's lifetime budget into
// Gemini-served vs hardware-served credits.
export async function getTierQuotas() {
  const cfg = await getConfigSnapshot();
  try {
    const parsed = JSON.parse(cfg.relay_tier_quotas_json);
    return {
      core: {
        lifetime: parsed?.core?.lifetime ?? 10,
        geminiCapLifetime: parsed?.core?.geminiCapLifetime ?? 5,
        monthly: parsed?.core?.monthly ?? null,
        geminiCapMonthly: parsed?.core?.geminiCapMonthly ?? null,
      },
      pro: {
        lifetime: parsed?.pro?.lifetime ?? null,
        monthly: parsed?.pro?.monthly ?? 50,
        geminiCapMonthly: parsed?.pro?.geminiCapMonthly ?? 25,
      },
      max: {
        lifetime: parsed?.max?.lifetime ?? null,
        monthly: parsed?.max?.monthly ?? null,
        geminiCapMonthly: parsed?.max?.geminiCapMonthly ?? 50,
      },
    };
  } catch {
    return {
      core: {
        lifetime: 10,
        geminiCapLifetime: 5,
        monthly: null,
        geminiCapMonthly: null,
      },
      pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 },
      max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
    };
  }
}