Wire new routes; identity, summarize-url, dashboard, admin

This commit is contained in:
Keysat
2026-06-13 13:36:30 -05:00
parent 04dcf86fa4
commit 318c6c4b81
20 changed files with 12407 additions and 499 deletions
+403 -1
View File
@@ -10,6 +10,13 @@
// {
// ts: ms-epoch when the request landed
// install_id: X-Recap-Install-Id (truncated for log readability)
// license_fingerprint: stable 16-hex hash of the licenseUuid for
// paid-tier calls; null for anonymous/Core. Added
// in the license-keyed-credits refactor so spend can
// be aggregated by license-pool (since one license
// may span multiple installs). install_id is STILL
// logged on every entry — license_fingerprint is
// additive forensic visibility, not a replacement.
// tier: "core" | "pro" | "max"
// pipeline: "transcribe" | "analyze"
// backend: "gemini" | "hardware"
@@ -36,9 +43,16 @@ import path from "path";
let dataDir = "/data";
let logPath = "/data/relay-calls.ndjson";
// Size at which we rotate the live ndjson to a dated archive. Picked
// to roughly match a year of high-volume relay traffic — a typical
// entry is ~400 bytes, so 50MB ≈ 130k entries. Rotation runs once at
// boot; the operator can also rotate manually any time.
const ROTATION_THRESHOLD_BYTES = 50 * 1024 * 1024;
export async function initAuditLog({ dataDir: dd }) {
if (dd) dataDir = dd;
logPath = path.join(dataDir, "relay-calls.ndjson");
await maybeRotateLog();
// Ensure the file exists so the streaming read path doesn't trip.
try {
await fs.access(logPath);
@@ -48,6 +62,46 @@ export async function initAuditLog({ dataDir: dd }) {
console.log(`[audit-log] writing to ${logPath}`);
}
// Rotate the live ndjson to a dated archive when it grows past the
// threshold. The dashboard's `readEntries` always reads the live file
// only — archived entries fall out of the rolling 30-day window
// naturally and are kept around as raw files for ad-hoc analysis or
// long-term storage / CSV export. If a same-day archive already exists
// (e.g. operator restarts the relay mid-rotation), append a counter.
async function maybeRotateLog() {
let stat;
try {
stat = await fs.stat(logPath);
} catch {
return; // No file yet — nothing to rotate.
}
if (stat.size < ROTATION_THRESHOLD_BYTES) return;
const ymd = new Date().toISOString().slice(0, 10);
let archive = path.join(dataDir, `relay-calls-${ymd}.ndjson`);
let counter = 1;
while (true) {
try {
await fs.access(archive);
// Exists; pick a new name with a counter suffix.
archive = path.join(dataDir, `relay-calls-${ymd}.${counter}.ndjson`);
counter += 1;
if (counter > 99) return; // pathological — give up rotating
} catch {
break; // Free name found.
}
}
try {
await fs.rename(logPath, archive);
await fs.writeFile(logPath, "", { mode: 0o600 });
console.log(
`[audit-log] rotated ${(stat.size / 1024 / 1024).toFixed(1)}MB → ${archive}`
);
} catch (err) {
console.warn(`[audit-log] rotation failed: ${err?.message || err}`);
}
}
// Best-effort append. Errors are logged but never rethrown — losing
// an audit line shouldn't fail the relay call that caused it.
export async function recordCall(entry) {
@@ -59,6 +113,55 @@ export async function recordCall(entry) {
}
}
// Truncate the entire audit log. Used by the dashboard's "Delete all"
// button for cleanup before going-live or after a string of bad-data
// test runs (relay re-installed mid-run, config tweaks producing
// inconsistent measurements, etc.). Destructive — no undo.
export async function clearAllAuditEntries() {
try {
await fs.writeFile(logPath, "", { mode: 0o600 });
return { ok: true };
} catch (err) {
return { ok: false, error: err?.message || String(err) };
}
}
// Delete audit rows matching specific job_ids. Reads the whole log,
// filters out lines belonging to the target jobs, writes the remainder
// back. O(N) on the file size; fine for any plausible audit log (we
// rotate at 64MB anyway). Returns the count of rows removed.
export async function deleteAuditRowsByJobIds(jobIds) {
if (!Array.isArray(jobIds) || jobIds.length === 0) return { deleted: 0 };
const idSet = new Set(jobIds);
const lines = [];
let deleted = 0;
try {
const stream = createReadStream(logPath, { encoding: "utf8" });
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
for await (const line of rl) {
if (!line.trim()) continue;
try {
const r = JSON.parse(line);
if (r.job_id && idSet.has(r.job_id)) {
deleted += 1;
continue;
}
lines.push(line);
} catch {
// Bad line — preserve it rather than dropping; matches the
// skip-and-continue behavior of readEntries.
lines.push(line);
}
}
} catch (err) {
if (err.code !== "ENOENT") throw err;
}
await fs.writeFile(logPath, lines.join("\n") + (lines.length ? "\n" : ""), {
mode: 0o600,
});
return { deleted };
}
// Read all entries since `sinceMs` (default: 30 days). Streamed
// line-by-line so the whole file doesn't sit in memory at once.
// Returned array is newest-first.
@@ -114,13 +217,21 @@ export function aggregate(entries) {
}
// ── By tier ──
// unique_users is install-count for Core (no license to dedup against)
// and distinct-license-count for paid tiers (so a Pro license active
// on two installs counts ONCE here, matching the post-refactor credit
// model where they share one monthly pool). Falls back to install_id
// for paid entries that predate the license_fingerprint field.
const byTier = groupBy(entries, (e) => e.tier || "unknown");
const tierRows = Object.entries(byTier).map(([tier, list]) => ({
tier,
calls: list.length,
cost_usd: sumBy(list, "cost_usd"),
avg_duration_ms: avgBy(list, "duration_ms"),
unique_installs: new Set(list.map((e) => e.install_id)).size,
unique_installs:
tier === "core" || tier === "unknown"
? new Set(list.map((e) => e.install_id)).size
: new Set(list.map((e) => e.license_fingerprint || e.install_id)).size,
}));
// ── By model ──
@@ -170,6 +281,30 @@ export function aggregate(entries) {
.sort((a, b) => b.cost_usd - a.cost_usd)
.slice(0, 20);
// ── By license fingerprint (top 20 by spend, paid tiers only) ──
// One license may span multiple installs (cloud account + self-host),
// and the post-refactor credit ledger aggregates their spend onto a
// single pool. This view mirrors that — operators get a "by paid
// user" rollup that doesn't double-count multi-install Pros, plus an
// install-count column to see distribution per license.
const byLicense = groupBy(
entries.filter((e) => e.license_fingerprint),
(e) => e.license_fingerprint
);
const licenseRows = Object.entries(byLicense)
.map(([fp, list]) => ({
license_fingerprint: fp,
tier_snapshot: list[0]?.tier || "core",
calls: list.length,
cost_usd: sumBy(list, "cost_usd"),
summaries: new Set(list.map((e) => e.job_id).filter(Boolean)).size,
unique_installs: new Set(list.map((e) => e.install_id).filter(Boolean)).size,
avg_duration_ms: avgBy(list, "duration_ms"),
last_active_at: Math.max(...list.map((e) => e.ts || 0)),
}))
.sort((a, b) => b.cost_usd - a.cost_usd)
.slice(0, 20);
// ── By hour-of-day (for traffic-pattern view) ──
const byHour = groupBy(entries, (e) => new Date(e.ts).getUTCHours());
const hourRows = Array.from({ length: 24 }, (_, h) => {
@@ -193,6 +328,198 @@ export function aggregate(entries) {
}))
.sort((a, b) => a.avg_duration_ms - b.avg_duration_ms);
// ── Per-summary rollup (collapse transcribe + analyze pairs) ──
// Every "summarize" produces 2 audit entries — one transcribe, one
// analyze — sharing a job_id. The dashboard's call-level views show
// them separately, which is useful for backend-vs-pipeline tuning but
// confusing as "how many summaries did I serve". Group by job_id so
// operators see one row per summary with combined cost/duration.
// Entries without a job_id (older relay versions, or balance pings)
// are bucketed into their own "no-jobid" row at the bottom.
const byJob = groupBy(entries, (e) => e.job_id || "__no_jobid__");
const summaryRows = Object.entries(byJob)
.filter(([k]) => k !== "__no_jobid__")
.map(([jobId, list]) => {
const transcribe = list.find((e) => e.pipeline === "transcribe");
const analyze = list.find((e) => e.pipeline === "analyze");
return {
job_id: jobId,
install_id: list[0]?.install_id || null,
tier: list[0]?.tier || null,
started_at: Math.min(...list.map((e) => e.ts || Infinity)),
completed_at: Math.max(...list.map((e) => e.ts || 0)),
transcribe_backend: transcribe?.backend || null,
transcribe_model: transcribe?.model || null,
analyze_backend: analyze?.backend || null,
analyze_model: analyze?.model || null,
total_cost_usd: sumBy(list, "cost_usd"),
total_duration_ms: sumBy(list, "duration_ms"),
status:
list.every((e) => e.status === "success")
? "success"
: list.some((e) => e.status === "error")
? "error"
: "partial",
had_transcribe: !!transcribe,
had_analyze: !!analyze,
};
})
.sort((a, b) => b.completed_at - a.completed_at);
// ── Recent errors (newest 50) ──
// Quick triage view — when something is failing, the operator needs
// to see the offending error strings without scrolling the full
// call log.
// Surface any audit row carrying an error message — that catches
// status="error" (true backend failures) AND status="partial"
// (e.g. transcribe-with-truncated-chunks, which records the
// missing-speech message in the error field). Operators rely on
// this view to triage all degraded behavior, not just outright
// 5xx-class failures, so the broader filter is the right default.
const errorRows = entries
.filter((e) => e.error)
.slice(0, 50)
.map((e) => ({
ts: e.ts,
install_id: e.install_id || null,
tier: e.tier || null,
pipeline: e.pipeline || null,
backend: e.backend || null,
model: e.model || null,
duration_ms: e.duration_ms || 0,
error: (e.error || "").slice(0, 280),
attempts: Array.isArray(e.attempts) ? e.attempts : null,
}));
// ── Per-(pipeline, model) performance + failure tables ──
// Normalizes raw duration_ms by audio_seconds so different models
// can be compared on a backend-agnostic benchmark: how many ms of
// wall-clock time does this model take per minute of audio? Analyze
// calls don't have audio (they consume the transcript text), so we
// report ms-per-1k-input-tokens for those instead.
//
// Failure rate is computed against `attempted` (success + error)
// and excludes `refused` calls — refused requests never reached the
// backend, so they shouldn't count against the model's reliability.
const byPipelineModel = {};
for (const e of entries) {
const pipeline = e.pipeline || "unknown";
const model = e.model || "unknown";
if (model === "unknown" && e.status === "refused") continue; // refused entries often have no model
const key = `${pipeline}::${model}`;
if (!byPipelineModel[key]) {
byPipelineModel[key] = {
pipeline,
model,
calls: 0,
success: 0,
errors: 0,
refused: 0,
partials: 0,
sum_duration_ms: 0,
sum_audio_seconds: 0,
sum_input_tokens: 0,
sum_output_tokens: 0,
error_counts: {}, // { error_signature: count }
};
}
const row = byPipelineModel[key];
row.calls += 1;
if (e.status === "success") row.success += 1;
if (e.status === "error") row.errors += 1;
if (e.status === "refused") row.refused += 1;
if (e.status === "partial") row.partials += 1;
row.sum_duration_ms += e.duration_ms || 0;
if (typeof e.audio_seconds === "number" && e.audio_seconds > 0) {
row.sum_audio_seconds += e.audio_seconds;
}
row.sum_input_tokens += e.input_tokens || 0;
row.sum_output_tokens += e.output_tokens || 0;
// Aggregate the top-error counts off ANY row that has a populated
// error message — not just status="error" rows. Partial (truncated
// transcribe) and refused (out-of-credits, capacity-gated) rows
// also carry useful error strings the operator wants to see in
// the "Top failure modes" table. The old gate `status === "error"`
// hid all truncations because they're recorded as status="partial".
if (e.error) {
const sig = errorSignature(e.error);
row.error_counts[sig] = (row.error_counts[sig] || 0) + 1;
}
}
const perfByModel = Object.values(byPipelineModel).map((r) => {
const attempted = r.success + r.errors;
const successRate = attempted > 0 ? r.success / attempted : null;
const audioMin = r.sum_audio_seconds / 60;
const msPerAudioMin = audioMin > 0 ? r.sum_duration_ms / audioMin : null;
const msPer1kInputTokens =
r.sum_input_tokens > 0
? r.sum_duration_ms / (r.sum_input_tokens / 1000)
: null;
// Top 3 error signatures by frequency for this model.
const topErrors = Object.entries(r.error_counts)
.map(([signature, count]) => ({ signature, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 3);
return {
pipeline: r.pipeline,
model: r.model,
calls: r.calls,
success: r.success,
errors: r.errors,
refused: r.refused,
partials: r.partials,
// "failures" = total signal worth surfacing in failure tables.
// Includes partials so a TX that lost minutes of speech via a
// truncated chunk is counted as a failure mode, not silently
// tucked away under a "success" pipe. The errors-by-model
// dashboard table reads this; the per-call "errors" field stays
// available for stricter computations.
failures: r.errors + r.partials,
success_rate: successRate,
// Speed benchmark fields. Either or both may be null when there
// wasn't enough successful-with-metadata data to compute them.
ms_per_audio_minute: msPerAudioMin,
ms_per_1k_input_tokens: msPer1kInputTokens,
total_audio_minutes: audioMin > 0 ? audioMin : null,
top_errors: topErrors,
};
});
// ── Revenue / margin (requires tier prices supplied by caller) ──
// Distinct paying USERS in the window × the operator's per-tier
// monthly price. For Core (free) we count distinct installs — that's
// still the right grain for free-tier "active users", since Core has
// no license to dedup against. For Pro/Max we count distinct license
// fingerprints so a single Pro license activated on two installs
// (cloud + self-host) counts ONCE toward monthly revenue, matching
// the post-refactor credit model where they share one monthly pool.
// Falls back to install_id for paid entries missing a fingerprint
// (legacy pre-refactor audit rows) so historical ranges stay
// approximately correct rather than dropping to zero.
//
// Strictly an *estimate* — the relay doesn't know if a Pro user
// actually paid this month, just that they touched a request.
// Underestimates churned customers (who paid but didn't call) and
// overestimates trial users (who haven't paid yet). Hooked in by
// the dashboard route, not here, so tests can pass an empty prices
// map and get zero.
const tierActiveInstalls = {
core: new Set(),
pro: new Set(),
max: new Set(),
};
for (const e of entries) {
const t = e.tier || "core";
if (!tierActiveInstalls[t]) continue;
if (t === "core") {
if (e.install_id) tierActiveInstalls.core.add(e.install_id);
} else {
// Paid: prefer fingerprint, fall back to install_id for legacy rows.
const id = e.license_fingerprint || e.install_id;
if (id) tierActiveInstalls[t].add(id);
}
}
return {
summary: {
calls,
@@ -206,14 +533,89 @@ export function aggregate(entries) {
total_input_tokens: totalInputTokens,
total_output_tokens: totalOutputTokens,
total_thinking_tokens: totalThinkingTokens,
total_summaries: summaryRows.length,
// active_installs_by_tier name retained for dashboard compatibility,
// but the paid-tier counts here are actually DISTINCT LICENSES,
// not distinct installs (see the comment on tierActiveInstalls
// above). Core remains install-based. The dashboard label is
// "Active users by tier" which fits either grain.
active_installs_by_tier: {
core: tierActiveInstalls.core.size,
pro: tierActiveInstalls.pro.size,
max: tierActiveInstalls.max.size,
},
},
by_tier: tierRows,
by_model: modelRows,
by_pipeline: pipelineRows,
by_backend: backendRows,
by_install: installRows,
by_license: licenseRows,
by_hour_utc: hourRows,
cost_vs_speed: costSpeedRows,
by_summary: summaryRows,
errors: errorRows,
perf_by_model: perfByModel,
};
}
// Normalize a raw error string into a stable signature so two
// near-identical messages bucket together. The audit log stores
// truncated raw messages — we want the bucket key to be coarse enough
// that small variations (a different request-id, file name, port
// number, etc.) collapse into a single error class.
//
// Heuristics:
// - Strip ISO timestamps and timestamps with offsets
// - Strip UUIDs / hex blob hashes / long alphanumeric IDs
// - Strip numeric file sizes and ports
// - Strip URLs to their host + path-pattern
// - Trim to first 120 chars after normalization
function errorSignature(raw) {
if (!raw) return "(unknown)";
let s = String(raw);
s = s.replace(/\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:?\d{2})?\b/g, "<ts>");
s = s.replace(/\b[0-9a-f]{32,}\b/gi, "<hex>");
s = s.replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, "<uuid>");
s = s.replace(/https?:\/\/[^\s)"']+/g, "<url>");
s = s.replace(/:\d{2,5}\b/g, ":<port>");
s = s.replace(/\b\d{4,}\b/g, "<n>");
return s.trim().slice(0, 120);
}
// Derived revenue/margin numbers. Pulled out of aggregate() because it
// needs prices the operator sets in config — keeping the core
// aggregator config-agnostic. Returns:
// {
// monthly_revenue_usd: pro_count * pro_price + max_count * max_price
// + core_count * core_price,
// gemini_cost_usd_in_range: summary.total_cost_usd (passed through),
// margin_usd: revenue - cost, // approximate
// by_tier_revenue: [{ tier, active_installs, price_usd, revenue_usd }],
// }
//
// `active_installs_by_tier` should come from the aggregate summary
// (Set sizes already computed there). `prices` is the {core,pro,max}
// USD-per-month map. `geminiCostInRange` is total_cost_usd from the
// summary.
export function computeRevenue({ activeInstallsByTier, prices, geminiCostInRange }) {
const tiers = ["core", "pro", "max"];
const byTier = tiers.map((tier) => {
const installs = activeInstallsByTier?.[tier] || 0;
const price = Math.max(0, Number(prices?.[tier] ?? 0));
return {
tier,
active_installs: installs,
price_usd: price,
revenue_usd: installs * price,
};
});
const revenue = byTier.reduce((s, r) => s + r.revenue_usd, 0);
return {
monthly_revenue_usd: revenue,
gemini_cost_usd_in_range: geminiCostInRange,
margin_usd: revenue - geminiCostInRange,
by_tier_revenue: byTier,
};
}