Wire new routes; identity, summarize-url, dashboard, admin

This commit is contained in:
Keysat
2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
+389 -4
View File
@@ -14,19 +14,94 @@ let cached = { mtimeMs: 0, snapshot: defaultConfig() };
function defaultConfig() {
return {
relay_gemini_api_key: "",
relay_parakeet_base_url: "",
relay_gemma_base_url: "",
relay_parakeet_model: "parakeet-tdt-0.6b-v3",
relay_gemma_model: "gemma3:27b",
relay_spark_control_url: "",
// Phase 2 — post-cluster polish pass. After diarization +
// clustering produce global speaker IDs and after analyze
// produces section objects, run a two-stage LLM pass that:
// 1. infers speaker names from the labeled transcript +
// episode metadata
// 2. rewrites section summaries to attribute statements to
// specific speakers ("Matt Hill explains..." instead of
// "the discussion centers around...")
// Cost: ~15-25s wall time at the end of the pipeline. Skipped
// automatically when fewer than 2 speakers detected (nothing
// to attribute). Default ON; operator can disable via the
// Settings tab.
relay_post_cluster_polish_enabled: true,
// Operator-editable polish prompts (same three-layer override
// pattern as relay_transcribe_prompt / relay_analyze_prompt).
// Empty string at this layer = fall through to the hardcoded
// defaults in post-cluster-polish.js. The dashboard "Set as new
// default" button promotes a current override into the
// *_default companion fields.
relay_polish_name_inference_prompt: "",
relay_polish_summary_rewrite_prompt: "",
// Phase 2 of Path 2A — internal meeting extras analysis. Runs
// ONE additional LLM call after the polish pass to extract
// structured items (decisions / action items / open questions /
// key quotes) from the speaker-attributed transcript. Failure
// is non-fatal — rec.extras stays null and the UI hides the
// section. Cost: ~5-15s wall time. Default ON; operator can
// disable here if their hardware is slow or the LLM is unreliable
// for structured extraction. Only affects internal meetings —
// YouTube/podcast flows ignore this setting.
relay_meeting_extras_enabled: true,
// Operator-editable prompt override for the extras pass. Empty =
// fall through to DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE in
// meeting-extras.js. Same three-layer convention as the other
// editable prompts.
relay_meeting_extras_prompt: "",
relay_gemini_transcription_model: "gemini-3-flash-preview",
relay_gemini_analysis_model: "gemini-3.1-pro-preview",
relay_transcribe_backend_preference: "gemini_first",
relay_analyze_backend_preference: "gemini_first",
// ── Text-to-speech (audio-first Recaps "walking mode") ──
// TTS turns each topic-summary into a spoken clip so the Recap app
// can play a recap back-to-back like a podcast. Two backends, same
// swap model as transcribe/analyze: operator-hardware Kokoro (via
// Spark Control's /v1/audio/speech) is the default; ElevenLabs is
// the cloud alternative. Gated to Max users on the Recap side.
//
// Preference values mirror the transcribe/analyze knobs:
// hardware_first → Kokoro if ready, else ElevenLabs (if keyed)
// hardware_only → Kokoro only (error if not ready)
// cloud_first → ElevenLabs if keyed, else Kokoro
// cloud_only → ElevenLabs only
// ("gemini_*" naming is intentionally NOT reused — TTS's cloud
// backend is ElevenLabs, not Gemini.)
relay_tts_backend_preference: "hardware_first",
// Default voice when the client doesn't specify one. Kokoro voice ids
// follow <lang_gender>_<name> (e.g. bm_george, bf_emma, am_michael,
// af_heart — the four curated for narration). Enumerate live voices
// for a picker via /v1/models (kind:tts; curated ones first).
relay_tts_default_voice: "bm_george",
// Output format. Kokoro emits wav | mp3 | opus | flac directly, so we
// default to mp3 (small + universally playable for the mobile/offline
// player) and avoid any client-side transcode. The Recap client may
// override per request.
relay_tts_format: "mp3",
// ElevenLabs cloud TTS (untested until a key is supplied). Empty
// api key = ElevenLabs unavailable; relay falls back to Kokoro under
// hardware_first / cloud_first, or errors under cloud_only.
relay_elevenlabs_api_key: "",
relay_elevenlabs_voice_id: "",
relay_elevenlabs_model: "eleven_turbo_v2_5",
relay_keysat_base_url: "https://keysat.xyz",
// ── Cloud operator key (core-decoupling) ──
// Shared secret that authenticates the operator's cloud Recaps server
// (recaps.cc) to this relay. When a request carries X-Recap-User-Id,
// the relay trusts it (and keys the credit pool by user:<id>, with the
// tier the relay stores for that user) ONLY if X-Recap-Operator-Key
// matches this value. Empty = cloud user-id requests are rejected
// (relay still serves the existing license/install path). Set the same
// value here and in the Recaps server's recap_relay_operator_key.
relay_cloud_operator_key: "",
relay_admin_username: "",
relay_admin_password_hash: "",
relay_admin_password_salt: "",
relay_admin_session_secret: "",
relay_save_user_outputs: false,
relay_tier_quotas_json: JSON.stringify({
core: {
lifetime: 10,
@@ -37,9 +112,319 @@ function defaultConfig() {
pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 },
max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
}),
relay_tier_prices_usd_json: JSON.stringify({ core: 0, pro: 5, max: 15 }),
// Self-serve subscription prices in SATS per 30-day period. Bitcoin-
// native (the BTCPay rail invoices these directly). The Zaprite card
// rail converts to fiat at purchase time.
relay_tier_prices_sats_json: JSON.stringify({ pro: 21000, max: 42000 }),
// Self-serve subscription prices for the CARD (Zaprite) rail, in the
// smallest unit of relay_zaprite_currency (cents for USD). This is the
// amount actually charged to a card buyer — kept explicit (cents) and
// separate from relay_tier_prices_usd_json (which is a whole-dollar
// figure used only by the dashboard's revenue/margin tile). Default is
// parity with the sat prices (≈$21 / $42); the operator can set a card
// premium here to cover processing fees.
relay_tier_prices_fiat_cents_json: JSON.stringify({ pro: 2100, max: 4200 }),
// Prepaid subscription period length in days.
relay_subscription_period_days: 30,
relay_btcpay_base_url: "",
relay_btcpay_internal_url: "",
relay_btcpay_public_url: "",
relay_btcpay_store_id: "",
relay_btcpay_api_key: "",
relay_btcpay_webhook_secret: "",
// ── Zaprite (card rail) ──
// Hosted-checkout API for card payments. The operator sets the API key
// via the StartOS "Set Zaprite Connection" action. base_url defaults to
// Zaprite's public API; currency is the fiat the card is charged in
// (amount comes from relay_tier_prices_fiat_cents_json, in that
// currency's smallest unit). No webhook secret: the webhook is verified
// by re-fetching the order from Zaprite's authenticated API.
relay_zaprite_base_url: "https://api.zaprite.com",
relay_zaprite_api_key: "",
relay_zaprite_currency: "USD",
relay_credit_packages_json: JSON.stringify([
{ credits: 5, sats: 4000 },
{ credits: 10, sats: 6000 },
{ credits: 20, sats: 10000 },
]),
// ── Chunking / concurrency knobs ──
// ONE canonical default per knob, defined here exactly once. All
// backend code reads from the live-reloaded snapshot at request
// time — no hardcoded fallbacks anywhere else, no per-test-run
// overrides. The Settings tab in the dashboard edits these values
// via PUT /admin/settings.
//
// Gemini backend (relay → Google Gemini API):
// 30-min TX chunks at 12-way concurrency is well under Gemini's
// paid Tier 1 RPM cap (1000 for flash, 150 for pro). 18-min
// analyze windows match the recap-app value. 12 analyze windows
// in flight saturates most operator workloads.
relay_gemini_tx_chunk_minutes: 30,
relay_gemini_tx_concurrency: 12,
relay_gemini_analyze_window_minutes: 18,
relay_gemini_analyze_overlap_minutes: 2,
relay_gemini_analyze_concurrency: 12,
// Hardware backend (relay → operator's Parakeet + Gemma/vLLM):
// 5-min TX chunks at 4-way concurrency for Parakeet OOM headroom
// (operator hardware is typically 1 GPU; spark-control dev
// confirmed 4 concurrent is safe). 18-min analyze windows match
// gemini's value for cross-backend benchmark parity. 8 analyze
// windows in flight is the vLLM-on-single-Spark sweet spot for
// our prompt size (12 starts to queue on the GPU).
// Speaker diarization on the operator-hardware transcribe path.
// When enabled, each audio chunk gets sent to TWO Spark Control
// endpoints in parallel: /v1/audio/transcriptions (Parakeet) AND
// /api/audio/diarize-chunk (Sortformer + TitaNet). The relay
// collects per-chunk voice fingerprints, clusters them across
// chunks using cosine similarity, and re-labels diarization
// segments with globally-consistent speaker IDs (Speaker_0,
// Speaker_1, …). Each transcript entry then carries an optional
// speaker label. Diarization is hardware-path-only — the Gemini
// path uses prompt-based speaker labeling instead. Default OFF;
// operator opts in via the dashboard toggle.
relay_hardware_diarization_enabled: false,
// Cosine-similarity threshold for the cross-chunk speaker
// clustering step. Stored as integer percentage (70 = 0.70
// cosine similarity) because the slider system is integer-only.
// Two fingerprints with similarity >= threshold are merged
// into the same global speaker; below threshold = different
// speakers. NeMo's recommended default for TitaNet embeddings
// is 0.70 — clean audio with distinct voices tolerates lower
// (more aggressive merging); panel audio with similar voices
// benefits from higher (more conservative).
relay_hardware_voice_clustering_threshold: 70,
relay_hardware_anchor_min_speaking_sec: 30,
relay_hardware_small_cluster_max_speaking_sec: 15,
relay_hardware_uncertain_margin_pct: 10,
relay_hardware_tx_chunk_minutes: 5,
// Overlap (in seconds) between consecutive audio chunks on the
// hardware transcribe path. Each chunk N+1 starts `overlap`
// seconds before chunk N ends, so the same audio is covered
// twice at chunk boundaries. The relay dedupes by dropping
// words/segments in chunk N+1 whose timestamps fall within the
// shared region. Two reasons this matters:
// 1. Diarization: TitaNet needs ~3+ seconds of clean speech
// to produce a reliable voice fingerprint. A speaker who
// only talks at the very end of chunk N may get a thin
// fingerprint; the overlap means chunk N+1 also captures
// that audio, giving a better fingerprint for clustering.
// 2. Word boundaries: words straddling a chunk boundary get
// cleanly transcribed in one chunk or the other rather
// than clipped. Applies even when diarization is OFF.
// Default 30s is conservative — enough for fingerprint quality
// without making the overlap region a meaningful fraction of
// a 5-minute chunk.
relay_hardware_tx_chunk_overlap_seconds: 30,
relay_hardware_tx_concurrency: 4,
relay_hardware_analyze_window_minutes: 18,
relay_hardware_analyze_overlap_minutes: 2,
relay_hardware_analyze_concurrency: 8,
// Below this audio duration the chunked-analyze planner emits a
// single window covering the whole transcript (single-shot fast
// path). Below this duration TX chunking is also skipped — both
// backends' transcribeAudio short-circuit when audio is shorter
// than their chunk size, but this is the floor.
relay_analyze_cutoff_minutes: 25,
// ── Output-token caps per backend / pipeline ──
// Gemini's transcribe path needs a high cap because dense long
// chunks emit a lot of timestamped lines. Lower values trade
// robustness against truncation for cost / speed. The model's
// internal per-call cap may be lower than our request — see the
// truncation-detector in gemini.js for the post-hoc check that
// surfaces this as a "partial" status on the Jobs table.
relay_gemini_tx_max_output_tokens: 65536,
// Gemini analyze emits a small JSON sections array — defaulting
// explicit to 8192 (was implicit / Google default before). Bump
// higher if you observe truncated section JSON in audit rows.
relay_gemini_an_max_output_tokens: 8192,
// Hardware analyze max_tokens passed to the operator's vLLM /
// Ollama endpoint via OpenAI-compatible chat-completion shape.
// Smaller models (3B-7B) may produce better JSON with a lower
// cap that forces concision; larger models can use more.
relay_hardware_an_max_tokens: 16000,
// ── Analyze prompt section-count targets ──
// Per-VIDEO-DURATION targets that the relay uses to compute a
// per-window section count at request time. The operator sets how
// many TOTAL sections they want for a video of each duration; the
// relay divides by (total_audio / window_body) to get the average
// sections-per-window, splices the result into {{targetSections}}
// in the analyze prompt.
//
// Replaces the prior 3-bucket per-window model (short/medium/long
// by window duration). Reason: section count should scale with
// video length, not window length — a 30-min single-window
// podcast and a 3-hour 6-window podcast have very different
// segmentation needs even when their window duration is identical.
// Defaults are calibrated so a typical 90-min podcast at the
// default 18-min AN window body produces ~1-2 sections per window
// (about 9 total), matching what felt right in operator testing.
//
// Buckets are video total duration in minutes:
// under_30 → audio_sec < 30 * 60
// 30_60 → 30 * 60 <= audio_sec < 60 * 60
// 60_90 → 60 * 60 <= audio_sec < 90 * 60
// 90_120 → 90 * 60 <= audio_sec < 120 * 60
// 120_150 → 120 * 60 <= audio_sec < 150 * 60
// 150_180 → 150 * 60 <= audio_sec < 180 * 60
// over_180 → audio_sec >= 180 * 60
//
// Each value is target TOTAL sections for that video. The
// per-window emit value = round(total * window_body_sec /
// total_audio_sec).
relay_analyze_total_sections_under_30: 6,
relay_analyze_total_sections_30_60: 8,
relay_analyze_total_sections_60_90: 9,
relay_analyze_total_sections_90_120: 10,
relay_analyze_total_sections_120_150: 11,
relay_analyze_total_sections_150_180: 12,
relay_analyze_total_sections_over_180: 12,
// ── Editable LLM prompts (Settings tab) ──
// Empty string = use the hardcoded default body in
// server/backends/gemini.js (transcribe) or
// server/chunked-analyze.js (analyze). Operator can override via
// the dashboard's Settings tab textareas; PUT /admin/settings
// writes the override here. Cleared (empty) to revert to default
// without operators having to copy the default text exactly.
//
// Transcribe prompt: applies to the Gemini path only. The
// operator-hardware (Parakeet) path is a pure STT model with no
// prompt input, so the override is ignored there.
relay_transcribe_prompt: "",
// Analyze prompt: applies to BOTH Gemini and operator-hardware
// (Gemma) analyze paths. Template variables {{transcript}},
// {{windowMin}}, {{targetSections}} are interpolated at request
// time. PUT /admin/settings validates that the override still
// contains the JSON-output instruction and the {{transcript}}
// variable so an accidental edit can't silently break the
// pipeline.
relay_analyze_prompt: "",
// Operator-promoted defaults. Three-layer resolution at request
// time: override (relay_transcribe_prompt) → operator default
// (relay_transcribe_prompt_default) → hardcoded code default
// (DEFAULT_TRANSCRIBE_PROMPT_BODY in gemini.js). The "Set as new
// default" button in the dashboard moves the override content
// into this field + clears the override, letting operators
// evolve their defaults over time without code redeploys. Empty
// = use the code-side default.
relay_transcribe_prompt_default: "",
relay_analyze_prompt_default: "",
};
}
// Parsed view of the credit-package menu. Returns an array of
// { credits, sats } pairs in display order. Used by the
// purchase-modal endpoint to render package choices AND by the
// purchase route to validate that the requested package matches a
// configured option (so the buyer can't ask for arbitrary
// credits-for-cheap pricing).
export async function getCreditPackages() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_credit_packages_json);
if (!Array.isArray(parsed)) return DEFAULT_PACKAGES;
return parsed
.map((p) => ({
credits: Number(p?.credits),
sats: Number(p?.sats),
}))
.filter(
(p) =>
Number.isFinite(p.credits) &&
p.credits > 0 &&
Number.isFinite(p.sats) &&
p.sats > 0
);
} catch {
return DEFAULT_PACKAGES;
}
}
const DEFAULT_PACKAGES = [
{ credits: 5, sats: 4000 },
{ credits: 10, sats: 6000 },
{ credits: 20, sats: 10000 },
];
// Parsed view of relay_tier_prices_usd_json. Returns a {core,pro,max}
// object with USD-per-month numbers. Used by the dashboard's
// revenue/margin tile.
export async function getTierPrices() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_tier_prices_usd_json);
return {
core: numOrZero(parsed?.core, 0),
pro: numOrZero(parsed?.pro, 5),
max: numOrZero(parsed?.max, 15),
};
} catch {
return { core: 0, pro: 5, max: 15 };
}
}
// Self-serve subscription pricing in SATS per period. { pro, max }. Used by
// the BTCPay tier-invoice flow. Returns null for an unknown/unpriced tier.
export async function getTierPricesSats() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_tier_prices_sats_json);
return {
pro: numOrZero(parsed?.pro, 21000),
max: numOrZero(parsed?.max, 42000),
};
} catch {
return { pro: 21000, max: 42000 };
}
}
// The configured prepaid period length in days (default 30).
export async function getSubscriptionPeriodDays() {
const cfg = await getConfigSnapshot();
const n = Number(cfg.relay_subscription_period_days);
return Number.isFinite(n) && n > 0 ? Math.floor(n) : 30;
}
// Card-rail (Zaprite) subscription prices in the smallest unit of the
// configured currency (cents for USD). { pro, max }. Used to set the
// Zaprite order `amount`. Returns null for an unknown/unpriced tier.
export async function getTierPricesFiatCents() {
const cfg = await getConfigSnapshot();
try {
const parsed = JSON.parse(cfg.relay_tier_prices_fiat_cents_json);
return {
pro: numOrZero(parsed?.pro, 2100),
max: numOrZero(parsed?.max, 4200),
};
} catch {
return { pro: 2100, max: 4200 };
}
}
// Zaprite (card rail) connection config. { baseUrl, apiKey, currency }.
// apiKey empty = card rail not configured (callers should 503).
export async function getZapriteConfig() {
const cfg = await getConfigSnapshot();
const baseUrl =
(cfg.relay_zaprite_base_url || "https://api.zaprite.com").replace(/\/$/, "");
const currency = (cfg.relay_zaprite_currency || "USD").toUpperCase();
return {
baseUrl,
apiKey: cfg.relay_zaprite_api_key || "",
currency,
};
}
function numOrZero(v, fallback) {
const n = Number(v);
if (Number.isFinite(n) && n >= 0) return n;
return fallback;
}
function configPath() {
return path.join(dataDir, "config", "relay-config.json");
}