Files
recap-relay/startos/file-models/config.json.ts
T

238 lines
12 KiB
TypeScript

import { FileHelper } from '@start9labs/start-sdk'
import { Volume } from '@start9labs/start-sdk/package/lib/util/Volume'
import { z } from 'zod'
const mainVolume = new Volume('main')
// Operator-side configuration for the Recap Relay package. All fields
// are optional and ship with sensible defaults — the relay will boot
// even with an empty config, but will refuse to serve traffic until at
// least relay_gemini_api_key and relay_admin_password_hash are set.
export const configFile = FileHelper.json(
{
base: mainVolume,
subpath: 'config/relay-config.json',
},
z.object({
// ── Backend credentials ──
// The relay's Gemini API key. Used for all transcribe + analyze
// forwarding until a user exceeds their tier's Gemini cap (then
// overflows to operator hardware below). Empty disables the
// Gemini backend entirely — relay will then either route to
// hardware (if configured) or 503 every request.
relay_gemini_api_key: z.string().default(''),
// ── Service discovery (operator-hardware wiring) ──
// URL of a Spark-Control-style /api/endpoints JSON document that
// lists local LLM / STT services on the operator's LAN. The relay
// reads this (60s TTL) to find the transcribe + analyze backends
// for the hardware path, and POSTs /api/audio/diarize-chunk to
// the same host. This is the SINGLE source of truth for which
// hardware URLs and models the relay uses — there is no longer a
// per-endpoint manual-override fallback (removed in v0.2.84;
// Spark Control became the canonical wiring).
//
// Empty string disables the operator-hardware path entirely —
// the relay will return 503 from any hardware-routed request.
relay_spark_control_url: z.string().default(''),
// ── Gemini model selection ──
// Operator can pick which Gemini SKU is used per pipeline step
// without rebuilding the relay. Defaults match Google's typical
// recommendations: Flash for transcription (cheap, fast,
// multimodal-capable), Pro for analysis (higher quality on
// structured-JSON outputs). Operators can swap to flash for
// analysis when they want faster + cheaper at the cost of some
// section-boundary precision.
relay_gemini_transcription_model: z.string().default('gemini-3-flash-preview'),
relay_gemini_analysis_model: z.string().default('gemini-3.1-pro-preview'),
// ── Backend routing preference per pipeline ──
// Controls whether the relay tries Gemini first (current default —
// best quality, costs operator's Gemini API budget) or the
// operator-hardware backend first (saves Gemini budget, may be
// slower depending on the operator's hardware). One of:
// - "gemini_first" try Gemini until per-tier cap, then hardware
// - "hardware_first" try hardware first, fall back to Gemini
// - "gemini_only" Gemini only, fail when cap is exceeded
// - "hardware_only" Hardware only, fail when not configured
relay_transcribe_backend_preference: z
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
.default('gemini_first'),
relay_analyze_backend_preference: z
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
.default('gemini_first'),
// ── License server ──
// URL of the Keysat license server used for the cached online
// license-validation check. Defaults to the public endpoint;
// operators co-located with Keysat on the same Start9 server can
// override to the internal `http://keysat.startos:<port>` hostname
// for a lower-latency hot path.
relay_keysat_base_url: z.string().default('https://keysat.xyz'),
// ── Admin dashboard auth ──
// Username + scrypt-hashed password + session secret for the
// /admin/* dashboard. Same shape Recap uses (see Recap's
// server/admin-auth.js for the hash + verify code). Empty hash
// disables /admin entirely — useful while testing the public
// /relay/* endpoints.
relay_admin_username: z.string().default(''),
relay_admin_password_hash: z.string().default(''),
relay_admin_password_salt: z.string().default(''),
relay_admin_session_secret: z.string().default(''),
// ── Output storage policy ──
// When true, the relay persists per-job transcript + analysis
// JSON for REAL USER traffic (jobs from Recap clients) to
// /data/relay-outputs/. Test-run jobs (source=admin-test) are
// ALWAYS persisted regardless of this flag — they're the
// operator's own benchmarking content with no privacy concern.
// Default off: real-user transcripts pass through and aren't
// written to disk unless the operator explicitly opts in for
// their own debugging.
relay_save_user_outputs: z.boolean().default(false),
// ── Tier quotas (operator-adjustable without redeploy) ──
// JSON blob driving credits.js. Defaults match the v1 product
// spec: Core lifetime-5, Pro 50/mo with 25 Gemini cap, Max
// unlimited with 50 Gemini cap. Operators can tweak via the
// "Adjust Tier Quotas" action without a code change or restart.
relay_tier_quotas_json: z.string().default(
JSON.stringify({
// Core: 10 lifetime credits total — first 5 served via Gemini
// (operator's cloud spend), final 5 fall through to operator
// hardware so the user can keep going on free tier without
// costing the operator more cloud $.
core: {
lifetime: 10,
geminiCapLifetime: 5,
monthly: null,
geminiCapMonthly: null,
},
pro: { lifetime: null, monthly: 50, geminiCapMonthly: 25 },
max: { lifetime: null, monthly: null, geminiCapMonthly: 50 },
}),
),
// ── BTCPay (credit-purchase flow) ──
// When configured, the relay exposes /relay/credits/buy so users
// can top up their credit balance directly via Lightning/onchain.
// Empty fields → the purchase route returns 503 ("not configured")
// — the rest of the relay continues to work.
relay_btcpay_base_url: z.string().default(''),
// Internal Start9 hostname for server-to-server BTCPay calls
// (e.g. http://btcpayserver.startos:23000). Auto-populated by
// the one-click setup wizard when BTCPay is co-installed; falls
// back to relay_btcpay_base_url when empty. mDNS / clearnet
// hostnames don't resolve from inside the docker container, so
// this internal alias is required when BTCPay sits behind one.
relay_btcpay_internal_url: z.string().default(''),
// Public BUYER-facing URL — used to rewrite the checkout link
// BTCPay returns from invoice-create (which contains the internal
// hostname we called it on). Buyers can be anywhere on the
// internet so this must be a clearnet domain when possible.
// Falls back to relay_btcpay_base_url (LAN-only) when empty,
// which limits credit purchases to operator's local network.
relay_btcpay_public_url: z.string().default(''),
relay_btcpay_store_id: z.string().default(''),
relay_btcpay_api_key: z.string().default(''),
relay_btcpay_webhook_secret: z.string().default(''),
// ── Zaprite (card rail) ──
// Hosted-checkout API for the "Pay by card" rail. Set via the "Set
// Zaprite Connection" action. api_key empty = card rail disabled (the
// UI hides "Pay by card"). No webhook secret: the webhook is verified
// by re-fetching the order from Zaprite's authenticated API.
relay_zaprite_base_url: z.string().default('https://api.zaprite.com'),
relay_zaprite_api_key: z.string().default(''),
relay_zaprite_currency: z.string().default('USD'),
// ── Credit packages (operator-editable bundle pricing) ──
// JSON array, ordered as it should appear in the buyer UI. Each
// entry is { credits: number, sats: number }. Defaults match
// the initial Recap pricing — operator can adjust via the
// "Set Credit Packages" action.
relay_credit_packages_json: z
.string()
.default(
JSON.stringify([
{ credits: 5, sats: 4000 },
{ credits: 10, sats: 6000 },
{ credits: 20, sats: 10000 },
])
),
// ── Tier prices (USD per active subscription) ──
// Operator-set monthly price per tier. Used by the dashboard to
// estimate revenue and operating margin against Gemini cost. Pure
// accounting — the relay itself does no billing; Keysat handles
// license sales. Defaults match the current keysat.xyz price list
// ($0/$5/$15) but the operator can override to whatever they're
// actually charging customers for this relay's tier comping.
relay_tier_prices_usd_json: z.string().default(
JSON.stringify({ core: 0, pro: 5, max: 15 }),
),
// ── Card-rail (Zaprite) subscription prices ──
// The amount actually CHARGED to a card buyer, in the smallest unit
// of relay_zaprite_currency (cents for USD). Distinct from
// relay_tier_prices_usd_json (dashboard accounting only). Set via the
// "Set Zaprite Connection" action. Default ≈ parity with the sat
// prices ($21 / $42); raise to add a card premium for processing fees.
relay_tier_prices_fiat_cents_json: z.string().default(
JSON.stringify({ pro: 2100, max: 4200 }),
),
// ── Chunking / concurrency knobs ──
// Edited via the dashboard's Settings tab (PUT /admin/settings).
// ONE canonical default per knob, defined here exactly once. All
// backend code reads from the live-reloaded snapshot at request
// time — no hardcoded fallbacks anywhere else in the codebase.
//
// Gemini backend (relay → Google Gemini API):
relay_gemini_tx_chunk_minutes: z.number().int().min(1).max(120).default(30),
relay_gemini_tx_concurrency: z.number().int().min(1).max(32).default(12),
relay_gemini_analyze_window_minutes: z.number().int().min(1).max(60).default(18),
relay_gemini_analyze_overlap_minutes: z.number().int().min(0).max(10).default(2),
relay_gemini_analyze_concurrency: z.number().int().min(1).max(32).default(12),
// Hardware backend (relay → operator's Parakeet + Gemma/vLLM):
relay_hardware_tx_chunk_minutes: z.number().int().min(1).max(120).default(5),
relay_hardware_tx_concurrency: z.number().int().min(1).max(32).default(4),
relay_hardware_analyze_window_minutes: z.number().int().min(1).max(60).default(18),
relay_hardware_analyze_overlap_minutes: z.number().int().min(0).max(10).default(2),
relay_hardware_analyze_concurrency: z.number().int().min(1).max(32).default(8),
// Diarization suppression thresholds (Phase 2 cluster cleanup).
// See server/speaker-clustering.js for how these shape the
// post-cluster pass that demotes small spurious clusters.
relay_hardware_anchor_min_speaking_sec: z.number().int().min(5).max(120).default(30),
relay_hardware_small_cluster_max_speaking_sec: z.number().int().min(1).max(60).default(15),
relay_hardware_uncertain_margin_pct: z.number().int().min(0).max(30).default(10),
// Floor below which both the analyze planner emits a single window
// covering the whole transcript (single-shot fast path) and TX
// chunking is bypassed. Tunable for benchmarking.
relay_analyze_cutoff_minutes: z.number().int().min(1).max(60).default(25),
// ── Editable LLM prompts ──
// Empty string = use the hardcoded default at request time. The
// dashboard's Settings tab edits these; the textareas can also
// be reset to default, which clears the field to "" so future
// default-prompt changes in code flow through automatically.
relay_transcribe_prompt: z.string().default(''),
relay_analyze_prompt: z.string().default(''),
// ── Post-cluster polish pass (Phase 2) ──
// Default ON; operator can disable via Settings tab if they
// want raw fast output or are debugging analyze quality
// independently from polish quality.
relay_post_cluster_polish_enabled: z.boolean().default(true),
// Operator-editable polish prompts. Empty = fall through to
// the hardcoded defaults in server/post-cluster-polish.js.
// Same three-layer override pattern (per-session override →
// operator-promoted default → code default) as the analyze
// and transcribe prompts.
relay_polish_name_inference_prompt: z.string().default(''),
relay_polish_summary_rewrite_prompt: z.string().default(''),
}),
)