// /admin/* — operator dashboard endpoints. All require the admin // session cookie (enforced by admin-auth middleware). // // v0.1 endpoints (JSON only; v0.2 will add an HTML dashboard): // GET /admin/usage — all install rows + last-month aggregates // GET /admin/config — current operator config (sans password hash) // POST /admin/quotas — adjust tier quotas live (mirror of StartOS // action but reachable from the dashboard) import express from "express"; import { getConfigSnapshot, getTierPrices } from "../config.js"; import { snapshotAll } from "../credits.js"; import { snapshotCache } from "../keysat-client.js"; // snapshotJobs is exported by BOTH ../jobs.js (the in-memory job // tracker) and ../job-credits.js (the credit-ledger). They return // different shapes — ../jobs.js gives {id, kind, status, progress, // started_at, ...}, ../job-credits.js gives {key, backend, tier, // charged_at, refunded}. The /admin/jobs route exposes the in-memory // tracker for the dashboard's discovery poll, so the import MUST be // from ../jobs.js. Pre-v0.2.65, this import accidentally targeted // job-credits.js, which made discovery silently report "found: 0 // running" for every Recap submission (the response had entries but // none with the expected status/kind fields). Symptom that surfaced // the bug: the v0.2.61 raw-JSON-viewer showed entries with `key`, // `backend`, `tier`, `charged_at`, `refunded` instead of the expected // `id`, `kind`, `status`, `progress`. Don't change this back without // also redesigning /admin/jobs. import { snapshotJobs } from "../jobs.js"; import { readEntries, aggregate, computeRevenue, clearAllAuditEntries, deleteAuditRowsByJobIds } from "../audit-log.js"; import { DEFAULT_TRANSCRIBE_PROMPT_BODY } from "../backends/gemini.js"; import { DEFAULT_ANALYZE_PROMPT_TEMPLATE } from "../chunked-analyze.js"; import { DEFAULT_NAME_INFERENCE_PROMPT_TEMPLATE, DEFAULT_SUMMARY_POLISH_PROMPT_TEMPLATE, } from "../post-cluster-polish.js"; import { DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE } from "../meeting-extras.js"; import { aggregateJobs, summarizeJobs } from "../job-stats.js"; import { getJobOutput, listJobOutputIds, bulkDeleteOutputs, getStoredOutputsSummary, } from "../output-store.js"; import { GEMINI_PRICING } from "../pricing.js"; import { getSparkDiscoveryStatus } from "../spark-control.js"; import { getHardwareQueueStatus } from "../hardware-queue.js"; import fs from "fs/promises"; import path from "path"; export function adminRouter({ dataDir }) { const router = express.Router(); router.get("/usage", async (_req, res) => { const rows = snapshotAll(); res.json({ installs: rows.length, rows, }); }); router.get("/config", async (_req, res) => { const cfg = await getConfigSnapshot(); const hw = await (await import("../hardware-config.js")).resolveHardwareConfig(cfg); // Strip secrets before exposing to the dashboard. const safe = { keysat_base_url: cfg.relay_keysat_base_url, spark_control_url: cfg.relay_spark_control_url || null, // Effective values from Spark Control discovery (single source // of truth as of v0.2.84 — operator-typed overrides removed). effective_transcribe_url: hw.transcribe.url, effective_transcribe_model: hw.transcribe.model, effective_analyze_url: hw.analyze.url, effective_analyze_model: hw.analyze.model, effective_spark_base: hw.sparkBase || null, // Operator-facing diagnostic — when Spark Control reports a // delegate (parakeet / vllm) as not ready, the resolver leaves // the effective_* URL null and stamps blocked_reason with the // SC-supplied detail. Surfaced here so the dashboard's // hardware health panel can display "parakeet currently // offline — model swap in progress" without the operator // having to grep relay logs. Client-facing routes never // surface these strings — they let planBackend route around // the unavailable path. effective_transcribe_blocked_reason: hw.transcribe.blocked_reason || null, effective_analyze_blocked_reason: hw.analyze.blocked_reason || null, gemini_configured: !!cfg.relay_gemini_api_key, admin_username: cfg.relay_admin_username, tier_quotas: tryParse(cfg.relay_tier_quotas_json), }; res.json(safe); }); router.get("/license-cache", async (_req, res) => { res.json({ entries: snapshotCache() }); }); // Hardware FIFO queue status — polled by the operator dashboard // for the top-bar queue chip. Cheap (single in-memory snapshot). router.get("/hardware-queue", async (_req, res) => { res.json(getHardwareQueueStatus()); }); router.get("/jobs", async (_req, res) => { res.json({ entries: snapshotJobs() }); }); // ── Per-job history (the "Jobs" tab) ────────────────────────────────── // Returns audit entries aggregated by X-Recap-Job-Id, with derived // per-video stats the dashboard's Jobs tab renders as a sortable + // filterable table. Pagination is server-side (?page=1&page_size=100); // filters and sort are also server-side so the client doesn't need // to ship every row to do them. // // Query params: // days=N | since= time window (default 30 days) // page, page_size pagination (default page=1, size=100) // status "success" | "partial" | "failed" // transcribe_backend "gemini" | "hardware" // analyze_backend "gemini" | "hardware" // model substring match on transcribe_model OR analyze_model // q substring match on title or media_url // sort column name (see SORTABLE below) // dir "asc" | "desc" (default "desc") router.get("/jobs-history", async (req, res) => { const days = numQ(req.query.days); const explicitSince = numQ(req.query.since); const sinceMs = explicitSince ?? Date.now() - (days ?? 30) * 24 * 3600 * 1000; try { const entries = await readEntries({ sinceMs }); // Read the stored-output ID set once so the aggregator can flag // each row's has_output without per-row filesystem stats. const outputIds = await listJobOutputIds(); const outputIdSet = new Set(outputIds); let jobs = aggregateJobs(entries, { outputIdSet }); const summary = summarizeJobs(jobs); // ── Filters ── const status = oneOf(req.query.status, ["success", "partial", "failed"]); if (status) jobs = jobs.filter((j) => j.overall_status === status); const txBackend = oneOf(req.query.transcribe_backend, ["gemini", "hardware"]); if (txBackend) jobs = jobs.filter((j) => j.transcribe_backend === txBackend); const anBackend = oneOf(req.query.analyze_backend, ["gemini", "hardware"]); if (anBackend) jobs = jobs.filter((j) => j.analyze_backend === anBackend); const model = strQ(req.query.model); if (model) { const m = model.toLowerCase(); jobs = jobs.filter( (j) => (j.transcribe_model && j.transcribe_model.toLowerCase().includes(m)) || (j.analyze_model && j.analyze_model.toLowerCase().includes(m)) ); } const q = strQ(req.query.q); if (q) { const needle = q.toLowerCase(); jobs = jobs.filter( (j) => (j.title && j.title.toLowerCase().includes(needle)) || (j.media_url && j.media_url.toLowerCase().includes(needle)) ); } // Batch filter: substring match on batch_id. The dashboard's // benchmark suite stamps a shared batch_id across all 6-8 test // runs from one click; filtering by that ID surfaces just that // suite's rows for side-by-side comparison. const batchId = strQ(req.query.batch_id); if (batchId) { const b = batchId.toLowerCase(); jobs = jobs.filter((j) => j.batch_id && j.batch_id.toLowerCase().includes(b)); } // Source filter: "admin-test" to see only test runs, or empty // to see all (including real user traffic). const source = oneOf(req.query.source, ["admin-test"]); if (source) { jobs = jobs.filter((j) => j.source === source); } // ── Sort ── const SORTABLE = new Set([ "started_at", "completed_at", "audio_seconds", "audio_bytes", "wall_time_ms", "transcribe_ms", "transcribe_ms_per_min", "transcribe_ms_per_mb", "download_ms", "download_ms_per_mb", "analyze_ms", "analyze_ms_per_min", "analyze_ms_per_mb", "analyze_windows_total", "analyze_windows_success", "analyze_windows_failed", "chunk_count", "cost_usd", "tier", "transcribe_backend", "analyze_backend", "transcribe_model", "analyze_model", "overall_status", "title", "batch_id", "source", ]); const sort = SORTABLE.has(req.query.sort) ? req.query.sort : "started_at"; const dir = req.query.dir === "asc" ? 1 : -1; jobs.sort((a, b) => { const av = a[sort]; const bv = b[sort]; if (av == null && bv == null) return 0; if (av == null) return 1; // nulls last regardless of direction if (bv == null) return -1; if (typeof av === "number" && typeof bv === "number") { return (av - bv) * dir; } return String(av).localeCompare(String(bv)) * dir; }); // ── Paginate ── const pageSize = Math.min(Math.max(numQ(req.query.page_size) || 100, 1), 500); const page = Math.max(numQ(req.query.page) || 1, 1); const totalFiltered = jobs.length; const start = (page - 1) * pageSize; const slice = jobs.slice(start, start + pageSize); res.json({ range: { since_ms: sinceMs, until_ms: Date.now(), days: days ?? null, total_entries: entries.length, }, summary, page, page_size: pageSize, total_filtered: totalFiltered, total_pages: Math.max(1, Math.ceil(totalFiltered / pageSize)), sort, dir: dir === 1 ? "asc" : "desc", jobs: slice, }); } catch (err) { console.error(`[admin/jobs-history] failed: ${err?.message || err}`); res .status(500) .json({ error: "jobs_history_failed", message: err?.message || String(err) }); } }); // ── Stored job outputs (transcript + analysis JSON) ─────────────────── // GET /admin/job-output/:id — fetch a single output payload // GET /admin/output-store-stats — count + bytes for the dashboard panel // GET /admin/output-store-ids — set of job_ids that have stored outputs // (consumed by the Jobs table to set the // has_output flag without reading each file) // DELETE /admin/job-outputs — bulk delete; body { job_ids: [...] } or { all: true } router.get("/job-output/:id", async (req, res) => { const out = await getJobOutput(req.params.id); if (!out) return res.status(404).json({ error: "output_not_found" }); res.json(out); }); // ── Per-job audit-row drill-down ────────────────────────────────────── // GET /admin/job/:id/details — every audit row keyed to a single job_id, // sorted by timestamp ascending so the operator reads the pipeline in // execution order: download (no row), transcribe (1 row), analyze // (N rows, one per window). // // Powers the Jobs-tab row-expand diagnostic. The existing // /admin/jobs-history endpoint aggregates this data into per-job // summaries (which the table already renders), but you can't see // WHICH window failed or WHY without these raw rows. Use case: a job // shows up as "partial" with "5 (1 failed)" in the AN-windows column; // expanding the row hits this endpoint to surface "window_idx=3 // status=error error='fetch timeout after 60000ms' model=gemini-2.5-flash" // so the operator can decide whether it's a flaky-network issue, a // model-output issue, or systematic. router.get("/job/:id/details", async (req, res) => { const jobId = req.params.id; if (!jobId) return res.status(400).json({ error: "missing job_id" }); try { // Look back 60 days by default — same horizon as jobs-history. // Audit rows older than the on-disk retention window won't exist // even if they're requested. const sinceMs = numQ(req.query.since) ?? Date.now() - 60 * 24 * 3600 * 1000; const all = await readEntries({ sinceMs }); const rows = all .filter((r) => r.job_id === jobId) .sort((a, b) => (a.ts || 0) - (b.ts || 0)); if (rows.length === 0) { return res .status(404) .json({ error: "no_audit_rows", job_id: jobId, since_ms: sinceMs }); } // Quick per-pipeline tallies so the UI doesn't have to recompute. const tx = rows.find((r) => r.pipeline === "transcribe"); const analyzeRows = rows.filter((r) => r.pipeline === "analyze"); const analyzeFailed = analyzeRows.filter((r) => r.status !== "success"); const summary = { transcribe_status: tx?.status || "missing", transcribe_truncated_chunks: tx?.truncated_chunks || null, analyze_rows: analyzeRows.length, analyze_failed: analyzeFailed.length, analyze_window_count_planned: // Each analyze row carries window_count = the total planned // windows. If any are missing entirely (process crashed // mid-window before recordCall ran), planned > rows.length. analyzeRows.reduce( (max, r) => Math.max(max, r.window_count || 0), 0 ), }; res.json({ job_id: jobId, summary, rows }); } catch (err) { res .status(500) .json({ error: "job_details_failed", message: err?.message || String(err) }); } }); router.get("/output-store-stats", async (_req, res) => { res.json(await getStoredOutputsSummary()); }); router.get("/output-store-ids", async (_req, res) => { res.json({ ids: await listJobOutputIds() }); }); router.delete("/job-outputs", express.json(), async (req, res) => { const body = req.body || {}; if (!body.all && !Array.isArray(body.job_ids)) { return res.status(400).json({ error: "request body must include { all: true } or { job_ids: [...] }", }); } const outputResult = await bulkDeleteOutputs({ jobIds: body.job_ids, all: !!body.all }); // Optional escalation: also delete audit-log rows for the matched // jobs. The dashboard's "Delete selected" button sets // include_audit=true so a single click clears BOTH the stored // output and every audit row keyed to that job_id. The "Delete // all stored outputs" button does NOT include audit by default — // the operator has to confirm a separate scarier "Delete // everything" action to nuke the whole log. let auditResult = { deleted: 0 }; if (body.include_audit) { if (body.all) { const cleared = await clearAllAuditEntries(); auditResult = cleared.ok ? { deleted: "all" } : { deleted: 0, error: cleared.error }; } else if (Array.isArray(body.job_ids) && body.job_ids.length > 0) { auditResult = await deleteAuditRowsByJobIds(body.job_ids); } } res.json({ ...outputResult, audit: auditResult }); }); // ── Nuclear: delete all stored outputs AND truncate the audit log ── // Used by the dashboard's "Delete EVERYTHING" button when the // operator wants a totally clean slate before going live (or after // a string of test-run cycles producing bad data). Confirmation // happens client-side; this endpoint just executes. router.post("/wipe-all", express.json(), async (_req, res) => { const outputResult = await bulkDeleteOutputs({ all: true }); const auditResult = await clearAllAuditEntries(); res.json({ outputs_deleted: outputResult.deleted ?? 0, audit_cleared: auditResult.ok, audit_error: auditResult.error || null, }); }); // ── Dashboard ───────────────────────────────────────────────────────── // Time-range aggregations over the per-call audit log. Default range // is the last 30 days; override with ?days=N or ?since=. // Returns { range, summary, by_tier, by_model, by_pipeline, // by_backend, by_install, by_hour_utc, cost_vs_speed, pricing }. router.get("/dashboard", async (req, res) => { const days = typeof req.query.days === "string" ? parseInt(req.query.days, 10) : null; const explicitSince = typeof req.query.since === "string" ? parseInt(req.query.since, 10) : null; const sinceMs = explicitSince && Number.isFinite(explicitSince) ? explicitSince : Date.now() - (Number.isFinite(days) && days > 0 ? days : 30) * 24 * 3600 * 1000; try { const entries = await readEntries({ sinceMs }); const agg = aggregate(entries); const prices = await getTierPrices(); const revenue = computeRevenue({ activeInstallsByTier: agg.summary.active_installs_by_tier, prices, geminiCostInRange: agg.summary.total_cost_usd, }); // 24-hour success-rate window for the dashboard alert banner — // independent of the larger range the operator is viewing, so a // recent regression surfaces even when looking at the 30d view. const recentEntries = entries.filter( (e) => e.ts >= Date.now() - 24 * 3600 * 1000 ); const recentCalls = recentEntries.length; const recentSuccess = recentEntries.filter( (e) => e.status === "success" ).length; const recentSuccessRate = recentCalls > 0 ? recentSuccess / recentCalls : 1; res.json({ range: { since_ms: sinceMs, until_ms: Date.now(), days: Number.isFinite(days) && days > 0 ? days : null, total_entries: entries.length, }, ...agg, revenue, tier_prices_usd: prices, recent_24h: { calls: recentCalls, success: recentSuccess, success_rate: recentSuccessRate, }, pricing: GEMINI_PRICING, }); } catch (err) { console.error(`[admin/dashboard] failed: ${err?.message || err}`); res .status(500) .json({ error: "dashboard_failed", message: err?.message || String(err) }); } }); // ── CSV export ──────────────────────────────────────────────────────── // Streams the raw audit entries in the requested range as CSV so the // operator can pivot in a spreadsheet. Same window-selection as // /dashboard (?days=N or ?since=). Columns mirror the // shape recordCall() writes — easier to keep aligned with future // audit-log field additions than a curated subset. router.get("/dashboard.csv", async (req, res) => { const days = typeof req.query.days === "string" ? parseInt(req.query.days, 10) : null; const explicitSince = typeof req.query.since === "string" ? parseInt(req.query.since, 10) : null; const sinceMs = explicitSince && Number.isFinite(explicitSince) ? explicitSince : Date.now() - (Number.isFinite(days) && days > 0 ? days : 30) * 24 * 3600 * 1000; try { const entries = await readEntries({ sinceMs }); const cols = [ "ts", "iso_time", "install_id", "license_fingerprint", "tier", "pipeline", "backend", "model", "status", "credit_charged", "duration_ms", "download_ms", "audio_bytes", "input_tokens", "output_tokens", "thinking_tokens", "cost_usd", "job_id", "error", ]; const lines = [cols.join(",")]; for (const e of entries) { const row = cols.map((c) => { if (c === "iso_time") return csvCell(new Date(e.ts || 0).toISOString()); return csvCell(e[c]); }); lines.push(row.join(",")); } const ymd = new Date().toISOString().slice(0, 10); res.setHeader("Content-Type", "text/csv; charset=utf-8"); res.setHeader( "Content-Disposition", `attachment; filename="recap-relay-${ymd}.csv"` ); res.send(lines.join("\n") + "\n"); } catch (err) { console.error(`[admin/dashboard.csv] failed: ${err?.message || err}`); res .status(500) .json({ error: "csv_failed", message: err?.message || String(err) }); } }); // Adjust the live quotas blob. Same shape the StartOS action writes // to relay_tier_quotas_json — kept here so the dashboard can tune // quotas without round-tripping the StartOS UI. router.post("/quotas", express.json(), async (req, res) => { const incoming = req.body || {}; const normalized = { core: { lifetime: numOrNull(incoming?.core?.lifetime, 5), monthly: numOrNull(incoming?.core?.monthly, null), geminiCapMonthly: numOrNull(incoming?.core?.geminiCapMonthly, null), }, pro: { lifetime: numOrNull(incoming?.pro?.lifetime, null), monthly: numOrNull(incoming?.pro?.monthly, 50), geminiCapMonthly: numOrNull(incoming?.pro?.geminiCapMonthly, 25), }, max: { lifetime: numOrNull(incoming?.max?.lifetime, null), monthly: numOrNull(incoming?.max?.monthly, null), geminiCapMonthly: numOrNull(incoming?.max?.geminiCapMonthly, 50), }, }; // Write directly into relay-config.json — the live-reloader picks // it up on the next read. const configPath = path.join(dataDir, "config", "relay-config.json"); let existing = {}; try { existing = JSON.parse(await fs.readFile(configPath, "utf8")); } catch {} existing.relay_tier_quotas_json = JSON.stringify(normalized); await fs.mkdir(path.dirname(configPath), { recursive: true }); await fs.writeFile(configPath, JSON.stringify(existing), { mode: 0o600 }); res.json({ ok: true, quotas: normalized }); }); // ── Settings (chunking / concurrency knobs) ── // GET returns the 11 chunking-related fields from the live config // snapshot. PUT validates each value against its allowed range // (mirrors the Zod schema in startos/file-models/config.json.ts) // and writes them into /data/config/relay-config.json. The // live-reloader picks the new values up on the next request — no // restart, no in-flight benchmark interruption. // // Validation rules (kept in sync with the Zod schema): // tx_chunk_minutes integer, 1..120 // tx_concurrency integer, 1..32 // analyze_window_minutes integer, 1..60 // analyze_overlap_minutes integer, 0..10 // analyze_concurrency integer, 1..32 // analyze_cutoff_minutes integer, 1..60 const SETTINGS_KEYS = [ "relay_gemini_tx_chunk_minutes", "relay_gemini_tx_concurrency", "relay_gemini_analyze_window_minutes", "relay_gemini_analyze_overlap_minutes", "relay_gemini_analyze_concurrency", "relay_hardware_tx_chunk_minutes", "relay_hardware_tx_chunk_overlap_seconds", "relay_hardware_tx_concurrency", "relay_hardware_voice_clustering_threshold", "relay_hardware_anchor_min_speaking_sec", "relay_hardware_small_cluster_max_speaking_sec", "relay_hardware_uncertain_margin_pct", "relay_hardware_analyze_window_minutes", "relay_hardware_analyze_overlap_minutes", "relay_hardware_analyze_concurrency", "relay_analyze_cutoff_minutes", // Output-token caps, added v0.2.62. Numeric inputs; ranges below. "relay_gemini_tx_max_output_tokens", "relay_gemini_an_max_output_tokens", "relay_hardware_an_max_tokens", // Per-video-duration TOTAL section targets, added v0.2.67. // Replaces the old per-window-bucket target strings. Each is a // small positive integer = target total sections for a video in // the matching duration bucket. "relay_analyze_total_sections_under_30", "relay_analyze_total_sections_30_60", "relay_analyze_total_sections_60_90", "relay_analyze_total_sections_90_120", "relay_analyze_total_sections_120_150", "relay_analyze_total_sections_150_180", "relay_analyze_total_sections_over_180", ]; const SETTINGS_RANGES = { relay_gemini_tx_chunk_minutes: [1, 120], relay_gemini_tx_concurrency: [1, 32], relay_gemini_analyze_window_minutes: [1, 60], relay_gemini_analyze_overlap_minutes: [0, 10], relay_gemini_analyze_concurrency: [1, 32], relay_hardware_tx_chunk_minutes: [1, 120], relay_hardware_tx_chunk_overlap_seconds: [0, 120], relay_hardware_tx_concurrency: [1, 32], // Cosine-similarity threshold for cross-chunk speaker // clustering, stored as integer percentage. Range allows from // 50% (loose — easy to merge similar-sounding voices) to 95% // (strict — only near-identical fingerprints get merged). relay_hardware_voice_clustering_threshold: [50, 95], // Diarization suppression thresholds (Phase 2 cluster cleanup). // anchor_min_speaking_sec: a cluster needs at least this much // total speaking time to be considered a "real" speaker // (anchor). Below this and the cluster is a suppression // candidate. Range 5-120s. // small_cluster_max_speaking_sec: clusters with LESS than this // that aren't close to any anchor get merged into // "Speaker_Unknown". Range 1-60s. // uncertain_margin_pct: a small cluster whose best similarity // to any anchor is within this many percentage points of the // main clustering threshold gets reassigned to that anchor // with a "?" suffix marker. Range 0-30 (0 = no fuzzy // reassignment; 30 = very lenient). relay_hardware_anchor_min_speaking_sec: [5, 120], relay_hardware_small_cluster_max_speaking_sec: [1, 60], relay_hardware_uncertain_margin_pct: [0, 30], relay_hardware_analyze_window_minutes: [1, 60], relay_hardware_analyze_overlap_minutes: [0, 10], relay_hardware_analyze_concurrency: [1, 32], relay_analyze_cutoff_minutes: [1, 60], // 1024 lower bound is reasonable for any audio/JSON output; below // that and even a tiny chunk's output would clip. Upper bound 65536 // is Gemini's documented per-call max; vLLM/Ollama operators // with larger context windows can still hit this cap. relay_gemini_tx_max_output_tokens: [1024, 65536], relay_gemini_an_max_output_tokens: [1024, 65536], relay_hardware_an_max_tokens: [1024, 32768], // Total-sections targets — practical bounds. 1 = single section // for the whole video (probably too coarse to be useful); 40 is // a generous upper bound for marathon 4+ hour content. Operators // who want exotic values can hand-edit relay-config.json directly. relay_analyze_total_sections_under_30: [1, 40], relay_analyze_total_sections_30_60: [1, 40], relay_analyze_total_sections_60_90: [1, 40], relay_analyze_total_sections_90_120: [1, 40], relay_analyze_total_sections_120_150: [1, 40], relay_analyze_total_sections_150_180: [1, 40], relay_analyze_total_sections_over_180: [1, 40], }; // Canonical defaults — mirrors server/config.js defaultConfig(). // Used when a config field is missing entirely (e.g. older operator // config from before v0.2.32). One source of truth. const SETTINGS_DEFAULTS = { relay_gemini_tx_chunk_minutes: 30, relay_gemini_tx_concurrency: 12, relay_gemini_analyze_window_minutes: 18, relay_gemini_analyze_overlap_minutes: 2, relay_gemini_analyze_concurrency: 12, relay_hardware_tx_chunk_minutes: 5, relay_hardware_tx_chunk_overlap_seconds: 30, relay_hardware_tx_concurrency: 4, relay_hardware_voice_clustering_threshold: 70, relay_hardware_anchor_min_speaking_sec: 30, relay_hardware_small_cluster_max_speaking_sec: 15, relay_hardware_uncertain_margin_pct: 10, relay_hardware_analyze_window_minutes: 18, relay_hardware_analyze_overlap_minutes: 2, relay_hardware_analyze_concurrency: 8, relay_analyze_cutoff_minutes: 25, relay_gemini_tx_max_output_tokens: 65536, relay_gemini_an_max_output_tokens: 8192, relay_hardware_an_max_tokens: 16000, relay_analyze_total_sections_under_30: 6, relay_analyze_total_sections_30_60: 8, relay_analyze_total_sections_60_90: 9, relay_analyze_total_sections_90_120: 10, relay_analyze_total_sections_120_150: 11, relay_analyze_total_sections_150_180: 12, relay_analyze_total_sections_over_180: 12, }; // Boolean settings (presented as toggles in the dashboard's // Settings tab). Same get/put endpoint as the chunking knobs; // validated separately because they're booleans not bounded ints. const SETTINGS_BOOLS = [ "relay_save_user_outputs", "relay_hardware_diarization_enabled", "relay_post_cluster_polish_enabled", "relay_meeting_extras_enabled", ]; const SETTINGS_BOOL_DEFAULTS = { relay_save_user_outputs: false, relay_hardware_diarization_enabled: false, // Phase 2 polish pass — default ON. Operator can disable to // skip the ~15-25s polish step at the end of the pipeline. relay_post_cluster_polish_enabled: true, // Internal-meetings extras (Path 2A Phase 2) — default ON. // Adds ~5-15s LLM call to extract decisions / action items / // open questions / key quotes after polish. Affects internal // meetings only; YouTube/podcast flow ignores it. relay_meeting_extras_enabled: true, }; // Enum / select settings (Gemini SKU + backend routing preference) // — historically only editable via StartOS actions. Migrated to the // dashboard Settings tab so the operator doesn't have to leave // their tuning workflow. The same fields stay in the StartOS // schema; both paths converge on relay-config.json. // // Each entry: { default, options: { value: human-label } } const SETTINGS_ENUMS = { relay_gemini_transcription_model: { default: "gemini-3-flash-preview", options: { "gemini-3.1-pro-preview": "Gemini 3.1 Pro — top quality (Preview)", "gemini-3-flash-preview": "Gemini 3 Flash — Pro-class quality at Flash price (Preview)", "gemini-3.1-flash-lite": "Gemini 3.1 Flash-Lite — cost-optimized (Stable GA)", "gemini-2.5-pro": "Gemini 2.5 Pro — Stable", "gemini-2.5-flash": "Gemini 2.5 Flash — Stable", }, }, relay_gemini_analysis_model: { default: "gemini-3.1-pro-preview", options: { "gemini-3.1-pro-preview": "Gemini 3.1 Pro — top quality (Preview)", "gemini-3-flash-preview": "Gemini 3 Flash — Pro-class at Flash price (Preview)", "gemini-3.1-flash-lite": "Gemini 3.1 Flash-Lite — cost-optimized (Stable GA)", "gemini-2.5-pro": "Gemini 2.5 Pro — Stable", "gemini-2.5-flash": "Gemini 2.5 Flash — Stable", }, }, relay_transcribe_backend_preference: { default: "gemini_first", options: { gemini_first: "Gemini first → operator hardware when cap exceeded", hardware_first: "Operator hardware first → Gemini as fallback", gemini_only: "Gemini only — fail when cap is exceeded", hardware_only: "Hardware only — fail when no operator-hardware transcribe endpoint configured", }, }, relay_analyze_backend_preference: { default: "gemini_first", options: { gemini_first: "Gemini first → operator hardware when cap exceeded", hardware_first: "Operator hardware first → Gemini as fallback", gemini_only: "Gemini only — fail when cap is exceeded", hardware_only: "Hardware only — fail when no operator-hardware analyze endpoint configured", }, }, }; const SETTINGS_ENUM_KEYS = Object.keys(SETTINGS_ENUMS); // Free-form string settings (textareas in the dashboard). Each // entry carries its default value AND optional validation rules // checked on PUT — e.g., the analyze prompt MUST contain the JSON // output instruction and the {{transcript}} variable, so an // accidental edit can't silently break the pipeline. const SETTINGS_STRINGS = { relay_transcribe_prompt: { default: DEFAULT_TRANSCRIBE_PROMPT_BODY, validate: () => null, // no required substrings // The auto-prepended metadata block (title/channel/description/ // chapters) is rendered separately and is NOT part of the // editable prompt. The operator only edits the instruction body. help: "Edits the INSTRUCTION body sent to Gemini for transcription. The metadata block (title / channel / description / chapters) is auto-prepended by the relay at request time. The operator-hardware transcribe path is typically a pure STT model with no prompt input — this override is ignored when routed to hardware. Empty = use the latest hardcoded default (the value shown below as 'Current default').", }, relay_analyze_prompt: { default: DEFAULT_ANALYZE_PROMPT_TEMPLATE, validate: (s) => { if (!s) return null; // empty = use default, no validation const errors = []; if (!/\{\{\s*transcript\s*\}\}/i.test(s)) { errors.push("must contain {{transcript}} — the relay needs this to inject the transcript text into your prompt"); } if (!/json/i.test(s)) { errors.push("must reference JSON output — the analyzer parses the model response as JSON and will fail without an explicit instruction"); } return errors.length > 0 ? errors.join("; ") : null; }, help: "Applied to BOTH the Gemini and operator-hardware analyze paths. Template variables {{transcript}}, {{windowMin}}, {{targetSections}}, {{maxIndex}} are interpolated at request time — keep {{transcript}} (the pipeline will break without it) and prefer keeping the others where the prompt references them. The transcript is rendered as numbered + timestamped lines ([N] (MM:SS) text) so the model can read indices directly off the input rather than counting bracketed lines. {{maxIndex}} = position of the last line in this window's transcript — use it in completeness constraints ('every index from 0 to {{maxIndex}}'). The prompt MUST instruct the model to output JSON (validation enforced on save).", }, relay_polish_name_inference_prompt: { default: DEFAULT_NAME_INFERENCE_PROMPT_TEMPLATE, validate: (s) => { if (!s) return null; const errors = []; if (!/\{\{\s*transcript\s*\}\}/i.test(s)) { errors.push("must contain {{transcript}} — the relay needs this to inject the speaker-labeled transcript"); } if (!/\{\{\s*speakerKeys\s*\}\}/i.test(s)) { errors.push("must contain {{speakerKeys}} — needed in the JSON schema block so the model knows which Speaker_X keys to populate"); } if (!/json/i.test(s)) { errors.push("must reference JSON output — the polish parser expects a JSON {speakers: {...}} object and will fail without an explicit instruction"); } return errors.length > 0 ? errors.join("; ") : null; }, help: "Stage 1 of the post-cluster polish pass. ONE LLM call sees the full speaker-labeled transcript + episode metadata and infers real names. Template variables {{channel}}, {{title}}, {{description}}, {{speakerStats}}, {{transcript}}, {{speakerKeys}} are interpolated at request time — {{transcript}} and {{speakerKeys}} are required (validation enforced on save), the others are recommended for accuracy. Output must be JSON shaped { \"speakers\": { \"Speaker_A\": \"Name or null\", ... } }. Skipped automatically when <2 speakers detected OR the polish toggle is off.", }, relay_polish_summary_rewrite_prompt: { default: DEFAULT_SUMMARY_POLISH_PROMPT_TEMPLATE, validate: (s) => { if (!s) return null; const errors = []; if (!/\{\{\s*sections\s*\}\}/i.test(s)) { errors.push("must contain {{sections}} — the relay needs this to inject the per-window section list to be polished"); } if (!/\{\{\s*transcript\s*\}\}/i.test(s)) { errors.push("must contain {{transcript}} — the speaker-labeled window transcript is needed for speaker attribution"); } if (!/json/i.test(s)) { errors.push("must reference JSON output — the polish parser expects { sections: [...] } and will fail without an explicit instruction"); } return errors.length > 0 ? errors.join("; ") : null; }, help: "Stage 2 of the post-cluster polish pass. N parallel LLM calls (one per analyze window) rewrite each section's summary to attribute statements to specific speakers. Template variables {{speakerRoster}}, {{transcript}}, {{sections}} are interpolated at request time — {{sections}} and {{transcript}} are required (validation enforced on save). Output must be JSON shaped { \"sections\": [{ \"index\": N, \"summary\": \"...\" }, ...] }. Titles and section indices are NEVER modified by polish; only summary text gets rewritten.", }, relay_meeting_extras_prompt: { default: DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE, validate: (s) => { if (!s) return null; const errors = []; if (!/\{\{\s*transcript\s*\}\}/i.test(s)) { errors.push("must contain {{transcript}} — the relay needs this to inject the speaker-labeled transcript"); } if (!/json/i.test(s)) { errors.push("must reference JSON output — the extras parser expects a JSON object with decisions / action_items / open_questions / key_quotes arrays and will fail without an explicit instruction"); } return errors.length > 0 ? errors.join("; ") : null; }, help: "Path 2A Phase 2 — internal-meetings extras extraction. ONE LLM call at the end of the pipeline (after analyze + polish) pulls out structured items: decisions, action items, open questions, key quotes. Template variables {{title}}, {{duration}}, {{speakerRoster}}, {{topics}}, {{transcript}} are interpolated at request time — {{transcript}} is required (validation enforced on save). Output must be JSON shaped { decisions: [...], action_items: [...], open_questions: [...], key_quotes: [...] } — each item has speaker IDs (Speaker_X) + integer second offsets so the dashboard can render speaker chips + clickable timestamp jumps. Skipped automatically when the extras toggle is off OR transcript_segments is empty. Affects internal meetings only.", }, // (The old per-window-bucket section-count targets — short / medium // / long — were removed in v0.2.67. Section-count targets are now // operator-set as INTEGER total-sections-per-video-duration // numbers, exposed as 7 numeric settings above (see SETTINGS_KEYS / // SETTINGS_RANGES / SETTINGS_DEFAULTS for the new // relay_analyze_total_sections_* fields). The relay computes the // per-window section count at request time using the actual video // duration + AN window body. See computePerWindowTarget() in // server/chunked-analyze.js.) }; const SETTINGS_STRING_KEYS = Object.keys(SETTINGS_STRINGS); // ── Short text settings (single-line inputs in the dashboard) ── // Endpoint URLs, model names, and the Gemini API key — previously // only editable via StartOS Actions, now also editable inline in // the dashboard Settings tab. Same backing store (relay-config.json) // — the StartOS actions still work for operators who prefer that // workflow. // // Each entry: // { default, masked?, urlPattern?, help, placeholder?, group } // // masked: true if the value is a secret — GET never returns the // actual value (only a `set: bool` flag); PUT treats an // empty string as "leave unchanged" rather than "clear". // Use the StartOS action to explicitly clear a masked // secret if you really need to. // urlPattern: if set, value must match this regex on PUT. URL fields // use '^(https?://.+)?$' to permit empty (= use discovery // or disable that path). // group: UI section the dashboard renders this row under. const SETTINGS_TEXT = { relay_gemini_api_key: { default: "", masked: true, help: "The relay's Google Gemini API key. Required — the relay will refuse to serve Gemini traffic until this is set. Get one at https://aistudio.google.com/apikey.", placeholder: "AIza... (paste your key here)", group: "credentials", }, relay_spark_control_url: { default: "", urlPattern: "^(https?://.+)?$", help: "Spark Control's /api/endpoints discovery URL on your LAN. Required for the operator-hardware path — the relay reads it (60s TTL) to find your transcribe + analyze backends, and POSTs diarize-chunk to the same host. Spark Control's StartOS Local CA cert is auto-trusted by the relay's LAN fetcher. Example: https://192.168.1.72:62419/api/endpoints", placeholder: "https://192.168.1.72:62419/api/endpoints", group: "endpoints", }, relay_keysat_base_url: { default: "https://keysat.xyz", urlPattern: "^https?://.+$", help: "Where the relay validates Recap user licenses. Defaults to the public Keysat endpoint. If you're running Keysat as a co-located StartOS package, override to the internal hostname (http://keysat.startos:) to skip the public-internet roundtrip.", placeholder: "https://keysat.xyz", group: "credentials", }, relay_cloud_operator_key: { default: "", masked: true, help: "Shared secret that lets the operator's cloud Recaps server vouch for its Pro/Max users by account-id (core-decoupling), instead of each user carrying a Keysat license. Must EXACTLY match the Recaps server's \"Relay Operator Key\" action value. Generate with `openssl rand -hex 32`. Empty = cloud user-id requests are rejected (the relay still serves the license/install path).", placeholder: "paste the same key set on the Recaps server", group: "credentials", }, }; const SETTINGS_TEXT_KEYS = Object.keys(SETTINGS_TEXT); router.get("/settings", async (_req, res) => { const cfg = await getConfigSnapshot(); const out = {}; for (const k of SETTINGS_KEYS) { out[k] = cfg[k] ?? SETTINGS_DEFAULTS[k]; } for (const k of SETTINGS_BOOLS) { out[k] = cfg[k] ?? SETTINGS_BOOL_DEFAULTS[k]; } for (const k of SETTINGS_ENUM_KEYS) { out[k] = cfg[k] ?? SETTINGS_ENUMS[k].default; } for (const k of SETTINGS_STRING_KEYS) { // String overrides are stored verbatim. Empty string means // "use the hardcoded default" — UI displays the default in // that case but keeps the saved value as empty so future // default changes flow through. out[k] = cfg[k] || ""; } // Short text settings (endpoint URLs, model names, Gemini key). // Masked entries (secrets) NEVER expose their value over the wire // — the dashboard sees a `set: true|false` flag and renders a // "(saved — leave blank to keep)" placeholder. const textMeta = {}; for (const k of SETTINGS_TEXT_KEYS) { const meta = SETTINGS_TEXT[k]; const stored = cfg[k] != null ? String(cfg[k]) : ""; if (meta.masked) { out[k] = ""; // never send secret value textMeta[k] = { masked: true, set: stored.length > 0, help: meta.help, placeholder: meta.placeholder || "", }; } else { out[k] = stored; textMeta[k] = { masked: false, set: stored.length > 0, help: meta.help, placeholder: meta.placeholder || "", urlPattern: meta.urlPattern || null, }; } } // Build a defaults map covering all four types. const allDefaults = { ...SETTINGS_DEFAULTS, ...SETTINGS_BOOL_DEFAULTS }; for (const k of SETTINGS_ENUM_KEYS) { allDefaults[k] = SETTINGS_ENUMS[k].default; } for (const k of SETTINGS_STRING_KEYS) { // Effective default = operator-promoted default (if set) → // hardcoded code-side default. The dashboard renders this as // the "Current default" preview block + uses it as the // "Reset to default" target. const operatorDefault = cfg[k + "_default"]; allDefaults[k] = (typeof operatorDefault === "string" && operatorDefault.trim()) ? operatorDefault : SETTINGS_STRINGS[k].default; } res.json({ settings: out, ranges: SETTINGS_RANGES, defaults: allDefaults, booleans: SETTINGS_BOOLS, // Enum metadata for the UI: { key: { options: { value: label } } } enums: SETTINGS_ENUM_KEYS.reduce((acc, k) => { acc[k] = { options: SETTINGS_ENUMS[k].options }; return acc; }, {}), // String metadata for the UI: { key: { help: "..." } }. The // current default value already lives in `defaults[key]`. strings: SETTINGS_STRING_KEYS.reduce((acc, k) => { acc[k] = { help: SETTINGS_STRINGS[k].help }; return acc; }, {}), // Text metadata for short single-line inputs (endpoint URLs, // model names, masked secrets). The dashboard reads // `text[key].masked` to decide whether to render a password-style // placeholder, `text[key].set` to indicate whether a saved value // exists, `text[key].urlPattern` to validate before submit, and // `text[key].help` + `text[key].placeholder` for the UI surface. text: SETTINGS_TEXT_KEYS.reduce((acc, k) => { acc[k] = { ...textMeta[k], group: SETTINGS_TEXT[k].group }; return acc; }, {}), // Spark Control discovery health — read-only snapshot of the // last fetch attempt. The dashboard surfaces this under the // Service Discovery URL row so the operator can spot a silently- // failing discovery without grepping container logs. Reflects // current operator config (so it stays in sync if the URL was // just changed and not yet refetched). discoveryStatus: getSparkDiscoveryStatus(cfg.relay_spark_control_url), }); }); router.put("/settings", express.json(), async (req, res) => { const incoming = req.body || {}; const errors = []; const validated = {}; for (const k of SETTINGS_KEYS) { const v = incoming[k]; if (v === undefined) continue; // partial update — leave existing value const n = Number(v); if (!Number.isFinite(n) || !Number.isInteger(n)) { errors.push(`${k}: must be an integer (got ${JSON.stringify(v)})`); continue; } const [lo, hi] = SETTINGS_RANGES[k]; if (n < lo || n > hi) { errors.push(`${k}: out of range ${lo}..${hi} (got ${n})`); continue; } validated[k] = n; } for (const k of SETTINGS_BOOLS) { if (incoming[k] === undefined) continue; validated[k] = !!incoming[k]; } for (const k of SETTINGS_ENUM_KEYS) { const v = incoming[k]; if (v === undefined) continue; if (typeof v !== "string") { errors.push(`${k}: must be a string (got ${typeof v})`); continue; } if (!Object.prototype.hasOwnProperty.call(SETTINGS_ENUMS[k].options, v)) { errors.push(`${k}: ${JSON.stringify(v)} is not one of the allowed values`); continue; } validated[k] = v; } for (const k of SETTINGS_STRING_KEYS) { const v = incoming[k]; if (v === undefined) continue; if (typeof v !== "string") { errors.push(`${k}: must be a string (got ${typeof v})`); continue; } // Empty string is always valid — means "use the hardcoded default". if (v.trim() === "") { validated[k] = ""; continue; } // Field-specific validation (e.g. analyze prompt must contain // JSON + {{transcript}}). const validateErr = SETTINGS_STRINGS[k].validate(v); if (validateErr) { errors.push(`${k}: ${validateErr}`); continue; } validated[k] = v; } // Short text settings (endpoint URLs / model names / Gemini key). // Masked entries: empty string = leave unchanged (no-op), so the // operator never has to re-type their Gemini key just to tweak // an adjacent URL. Non-masked entries: empty string clears the // saved value. URL-pattern fields are validated against their // declared regex. for (const k of SETTINGS_TEXT_KEYS) { const v = incoming[k]; if (v === undefined) continue; if (typeof v !== "string") { errors.push(`${k}: must be a string (got ${typeof v})`); continue; } const trimmed = v.trim(); const meta = SETTINGS_TEXT[k]; if (meta.masked && trimmed === "") { // Skip — keep whatever's currently saved. continue; } if (meta.urlPattern && trimmed !== "") { try { if (!new RegExp(meta.urlPattern).test(trimmed)) { errors.push(`${k}: must match ${meta.urlPattern}`); continue; } } catch { // Pattern compile error shouldn't ever happen for our // hardcoded regexes; if it does, skip pattern validation // rather than fail the request. } } // Soft length cap to keep noisy paste-accidents out of config. if (trimmed.length > 512) { errors.push(`${k}: value too long (max 512 chars)`); continue; } validated[k] = trimmed; } if (errors.length > 0) { return res.status(400).json({ ok: false, errors }); } // Merge into existing relay-config.json and write back. const configPath = path.join(dataDir, "config", "relay-config.json"); let existing = {}; try { existing = JSON.parse(await fs.readFile(configPath, "utf8")); } catch {} Object.assign(existing, validated); await fs.mkdir(path.dirname(configPath), { recursive: true }); await fs.writeFile(configPath, JSON.stringify(existing, null, 2), { mode: 0o600 }); // Return the merged final state so the dashboard can refresh without // a second GET. const cfg = await getConfigSnapshot(); const final = {}; for (const k of SETTINGS_KEYS) { final[k] = cfg[k] ?? SETTINGS_DEFAULTS[k]; } for (const k of SETTINGS_BOOLS) { final[k] = cfg[k] ?? SETTINGS_BOOL_DEFAULTS[k]; } for (const k of SETTINGS_ENUM_KEYS) { final[k] = cfg[k] ?? SETTINGS_ENUMS[k].default; } for (const k of SETTINGS_STRING_KEYS) { final[k] = cfg[k] || ""; } // Text settings — return saved values for non-masked, never // return masked secret values (just emit empty string so the // dashboard doesn't accidentally display them). for (const k of SETTINGS_TEXT_KEYS) { if (SETTINGS_TEXT[k].masked) { final[k] = ""; } else { final[k] = cfg[k] || ""; } } res.json({ ok: true, settings: final }); }); // POST /admin/settings/promote-prompt // Body: { key: "relay_transcribe_prompt" | "relay_analyze_prompt" } // // Promotes the operator's CURRENT override into a persistent // operator-default. Used by the dashboard's "Set as new default" // button so the operator can evolve their prompt baselines over // time without code redeploys. Effect: // // 1. Validate the override against the same rules PUT uses // (analyze prompt must contain {{transcript}} + JSON output) // 2. Move cfg[key] (the override) → cfg[key + "_default"] // (the operator-promoted default) // 3. Clear cfg[key] (no override anymore — the override and // the default are now the same string, so the override is // redundant; clearing it lets future "Reset to default" // operations restore to the new operator default) // // Request-time resolution becomes: override (empty after promote) // → operator-promoted default (just written) → code-side default. // The active prompt sent to Gemini doesn't change, but future // edits start from the new baseline. router.post( "/settings/promote-prompt", express.json(), async (req, res) => { const key = String(req.body?.key || "").trim(); if (!SETTINGS_STRING_KEYS.includes(key)) { return res.status(400).json({ ok: false, error: "unknown_prompt_key", message: `Expected one of: ${SETTINGS_STRING_KEYS.join(", ")}`, }); } const cfg = await getConfigSnapshot(); const override = (cfg[key] || "").trim(); if (!override) { return res.status(400).json({ ok: false, error: "nothing_to_promote", message: "No override is currently saved — promote nothing means promote nothing. Edit the textarea and Save first, then click Set as new default.", }); } const validateErr = SETTINGS_STRINGS[key].validate(override); if (validateErr) { return res.status(400).json({ ok: false, error: "invalid_override", message: `Can't promote — current override fails validation: ${validateErr}`, }); } const configPath = path.join(dataDir, "config", "relay-config.json"); let existing = {}; try { existing = JSON.parse(await fs.readFile(configPath, "utf8")); } catch {} // Atomically: write the new operator default, clear the override. existing[key + "_default"] = override; existing[key] = ""; await fs.mkdir(path.dirname(configPath), { recursive: true }); await fs.writeFile( configPath, JSON.stringify(existing, null, 2), { mode: 0o600 }, ); res.json({ ok: true, promoted: { key, length: override.length }, }); }, ); return router; } function numOrNull(v, fallback) { if (v === null) return null; const n = Number(v); if (Number.isFinite(n)) return n; return fallback; } function tryParse(s) { try { return JSON.parse(s); } catch { return null; } } // Query-string helpers for /jobs-history. function numQ(v) { if (typeof v !== "string") return null; const n = parseInt(v, 10); return Number.isFinite(n) ? n : null; } function strQ(v) { return typeof v === "string" && v.trim() ? v.trim() : null; } function oneOf(v, allowed) { if (typeof v !== "string") return null; return allowed.includes(v) ? v : null; } // RFC4180-ish CSV cell escaping: wrap in quotes if the value contains // a comma, quote, or newline; double up any embedded quotes. function csvCell(v) { if (v === null || v === undefined) return ""; const s = String(v); if (s.includes(",") || s.includes('"') || s.includes("\n") || s.includes("\r")) { return '"' + s.replace(/"/g, '""') + '"'; } return s; }