Wire new routes; identity, summarize-url, dashboard, admin
This commit is contained in:
+403
-1
@@ -10,6 +10,13 @@
|
||||
// {
|
||||
// ts: ms-epoch when the request landed
|
||||
// install_id: X-Recap-Install-Id (truncated for log readability)
|
||||
// license_fingerprint: stable 16-hex hash of the licenseUuid for
|
||||
// paid-tier calls; null for anonymous/Core. Added
|
||||
// in the license-keyed-credits refactor so spend can
|
||||
// be aggregated by license-pool (since one license
|
||||
// may span multiple installs). install_id is STILL
|
||||
// logged on every entry — license_fingerprint is
|
||||
// additive forensic visibility, not a replacement.
|
||||
// tier: "core" | "pro" | "max"
|
||||
// pipeline: "transcribe" | "analyze"
|
||||
// backend: "gemini" | "hardware"
|
||||
@@ -36,9 +43,16 @@ import path from "path";
|
||||
let dataDir = "/data";
|
||||
let logPath = "/data/relay-calls.ndjson";
|
||||
|
||||
// Size at which we rotate the live ndjson to a dated archive. Picked
|
||||
// to roughly match a year of high-volume relay traffic — a typical
|
||||
// entry is ~400 bytes, so 50MB ≈ 130k entries. Rotation runs once at
|
||||
// boot; the operator can also rotate manually any time.
|
||||
const ROTATION_THRESHOLD_BYTES = 50 * 1024 * 1024;
|
||||
|
||||
export async function initAuditLog({ dataDir: dd }) {
|
||||
if (dd) dataDir = dd;
|
||||
logPath = path.join(dataDir, "relay-calls.ndjson");
|
||||
await maybeRotateLog();
|
||||
// Ensure the file exists so the streaming read path doesn't trip.
|
||||
try {
|
||||
await fs.access(logPath);
|
||||
@@ -48,6 +62,46 @@ export async function initAuditLog({ dataDir: dd }) {
|
||||
console.log(`[audit-log] writing to ${logPath}`);
|
||||
}
|
||||
|
||||
// Rotate the live ndjson to a dated archive when it grows past the
|
||||
// threshold. The dashboard's `readEntries` always reads the live file
|
||||
// only — archived entries fall out of the rolling 30-day window
|
||||
// naturally and are kept around as raw files for ad-hoc analysis or
|
||||
// long-term storage / CSV export. If a same-day archive already exists
|
||||
// (e.g. operator restarts the relay mid-rotation), append a counter.
|
||||
async function maybeRotateLog() {
|
||||
let stat;
|
||||
try {
|
||||
stat = await fs.stat(logPath);
|
||||
} catch {
|
||||
return; // No file yet — nothing to rotate.
|
||||
}
|
||||
if (stat.size < ROTATION_THRESHOLD_BYTES) return;
|
||||
|
||||
const ymd = new Date().toISOString().slice(0, 10);
|
||||
let archive = path.join(dataDir, `relay-calls-${ymd}.ndjson`);
|
||||
let counter = 1;
|
||||
while (true) {
|
||||
try {
|
||||
await fs.access(archive);
|
||||
// Exists; pick a new name with a counter suffix.
|
||||
archive = path.join(dataDir, `relay-calls-${ymd}.${counter}.ndjson`);
|
||||
counter += 1;
|
||||
if (counter > 99) return; // pathological — give up rotating
|
||||
} catch {
|
||||
break; // Free name found.
|
||||
}
|
||||
}
|
||||
try {
|
||||
await fs.rename(logPath, archive);
|
||||
await fs.writeFile(logPath, "", { mode: 0o600 });
|
||||
console.log(
|
||||
`[audit-log] rotated ${(stat.size / 1024 / 1024).toFixed(1)}MB → ${archive}`
|
||||
);
|
||||
} catch (err) {
|
||||
console.warn(`[audit-log] rotation failed: ${err?.message || err}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Best-effort append. Errors are logged but never rethrown — losing
|
||||
// an audit line shouldn't fail the relay call that caused it.
|
||||
export async function recordCall(entry) {
|
||||
@@ -59,6 +113,55 @@ export async function recordCall(entry) {
|
||||
}
|
||||
}
|
||||
|
||||
// Truncate the entire audit log. Used by the dashboard's "Delete all"
|
||||
// button for cleanup before going-live or after a string of bad-data
|
||||
// test runs (relay re-installed mid-run, config tweaks producing
|
||||
// inconsistent measurements, etc.). Destructive — no undo.
|
||||
export async function clearAllAuditEntries() {
|
||||
try {
|
||||
await fs.writeFile(logPath, "", { mode: 0o600 });
|
||||
return { ok: true };
|
||||
} catch (err) {
|
||||
return { ok: false, error: err?.message || String(err) };
|
||||
}
|
||||
}
|
||||
|
||||
// Delete audit rows matching specific job_ids. Reads the whole log,
|
||||
// filters out lines belonging to the target jobs, writes the remainder
|
||||
// back. O(N) on the file size; fine for any plausible audit log (we
|
||||
// rotate at 64MB anyway). Returns the count of rows removed.
|
||||
export async function deleteAuditRowsByJobIds(jobIds) {
|
||||
if (!Array.isArray(jobIds) || jobIds.length === 0) return { deleted: 0 };
|
||||
const idSet = new Set(jobIds);
|
||||
const lines = [];
|
||||
let deleted = 0;
|
||||
try {
|
||||
const stream = createReadStream(logPath, { encoding: "utf8" });
|
||||
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
|
||||
for await (const line of rl) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const r = JSON.parse(line);
|
||||
if (r.job_id && idSet.has(r.job_id)) {
|
||||
deleted += 1;
|
||||
continue;
|
||||
}
|
||||
lines.push(line);
|
||||
} catch {
|
||||
// Bad line — preserve it rather than dropping; matches the
|
||||
// skip-and-continue behavior of readEntries.
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err.code !== "ENOENT") throw err;
|
||||
}
|
||||
await fs.writeFile(logPath, lines.join("\n") + (lines.length ? "\n" : ""), {
|
||||
mode: 0o600,
|
||||
});
|
||||
return { deleted };
|
||||
}
|
||||
|
||||
// Read all entries since `sinceMs` (default: 30 days). Streamed
|
||||
// line-by-line so the whole file doesn't sit in memory at once.
|
||||
// Returned array is newest-first.
|
||||
@@ -114,13 +217,21 @@ export function aggregate(entries) {
|
||||
}
|
||||
|
||||
// ── By tier ──
|
||||
// unique_users is install-count for Core (no license to dedup against)
|
||||
// and distinct-license-count for paid tiers (so a Pro license active
|
||||
// on two installs counts ONCE here, matching the post-refactor credit
|
||||
// model where they share one monthly pool). Falls back to install_id
|
||||
// for paid entries that predate the license_fingerprint field.
|
||||
const byTier = groupBy(entries, (e) => e.tier || "unknown");
|
||||
const tierRows = Object.entries(byTier).map(([tier, list]) => ({
|
||||
tier,
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
unique_installs: new Set(list.map((e) => e.install_id)).size,
|
||||
unique_installs:
|
||||
tier === "core" || tier === "unknown"
|
||||
? new Set(list.map((e) => e.install_id)).size
|
||||
: new Set(list.map((e) => e.license_fingerprint || e.install_id)).size,
|
||||
}));
|
||||
|
||||
// ── By model ──
|
||||
@@ -170,6 +281,30 @@ export function aggregate(entries) {
|
||||
.sort((a, b) => b.cost_usd - a.cost_usd)
|
||||
.slice(0, 20);
|
||||
|
||||
// ── By license fingerprint (top 20 by spend, paid tiers only) ──
|
||||
// One license may span multiple installs (cloud account + self-host),
|
||||
// and the post-refactor credit ledger aggregates their spend onto a
|
||||
// single pool. This view mirrors that — operators get a "by paid
|
||||
// user" rollup that doesn't double-count multi-install Pros, plus an
|
||||
// install-count column to see distribution per license.
|
||||
const byLicense = groupBy(
|
||||
entries.filter((e) => e.license_fingerprint),
|
||||
(e) => e.license_fingerprint
|
||||
);
|
||||
const licenseRows = Object.entries(byLicense)
|
||||
.map(([fp, list]) => ({
|
||||
license_fingerprint: fp,
|
||||
tier_snapshot: list[0]?.tier || "core",
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
summaries: new Set(list.map((e) => e.job_id).filter(Boolean)).size,
|
||||
unique_installs: new Set(list.map((e) => e.install_id).filter(Boolean)).size,
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
last_active_at: Math.max(...list.map((e) => e.ts || 0)),
|
||||
}))
|
||||
.sort((a, b) => b.cost_usd - a.cost_usd)
|
||||
.slice(0, 20);
|
||||
|
||||
// ── By hour-of-day (for traffic-pattern view) ──
|
||||
const byHour = groupBy(entries, (e) => new Date(e.ts).getUTCHours());
|
||||
const hourRows = Array.from({ length: 24 }, (_, h) => {
|
||||
@@ -193,6 +328,198 @@ export function aggregate(entries) {
|
||||
}))
|
||||
.sort((a, b) => a.avg_duration_ms - b.avg_duration_ms);
|
||||
|
||||
// ── Per-summary rollup (collapse transcribe + analyze pairs) ──
|
||||
// Every "summarize" produces 2 audit entries — one transcribe, one
|
||||
// analyze — sharing a job_id. The dashboard's call-level views show
|
||||
// them separately, which is useful for backend-vs-pipeline tuning but
|
||||
// confusing as "how many summaries did I serve". Group by job_id so
|
||||
// operators see one row per summary with combined cost/duration.
|
||||
// Entries without a job_id (older relay versions, or balance pings)
|
||||
// are bucketed into their own "no-jobid" row at the bottom.
|
||||
const byJob = groupBy(entries, (e) => e.job_id || "__no_jobid__");
|
||||
const summaryRows = Object.entries(byJob)
|
||||
.filter(([k]) => k !== "__no_jobid__")
|
||||
.map(([jobId, list]) => {
|
||||
const transcribe = list.find((e) => e.pipeline === "transcribe");
|
||||
const analyze = list.find((e) => e.pipeline === "analyze");
|
||||
return {
|
||||
job_id: jobId,
|
||||
install_id: list[0]?.install_id || null,
|
||||
tier: list[0]?.tier || null,
|
||||
started_at: Math.min(...list.map((e) => e.ts || Infinity)),
|
||||
completed_at: Math.max(...list.map((e) => e.ts || 0)),
|
||||
transcribe_backend: transcribe?.backend || null,
|
||||
transcribe_model: transcribe?.model || null,
|
||||
analyze_backend: analyze?.backend || null,
|
||||
analyze_model: analyze?.model || null,
|
||||
total_cost_usd: sumBy(list, "cost_usd"),
|
||||
total_duration_ms: sumBy(list, "duration_ms"),
|
||||
status:
|
||||
list.every((e) => e.status === "success")
|
||||
? "success"
|
||||
: list.some((e) => e.status === "error")
|
||||
? "error"
|
||||
: "partial",
|
||||
had_transcribe: !!transcribe,
|
||||
had_analyze: !!analyze,
|
||||
};
|
||||
})
|
||||
.sort((a, b) => b.completed_at - a.completed_at);
|
||||
|
||||
// ── Recent errors (newest 50) ──
|
||||
// Quick triage view — when something is failing, the operator needs
|
||||
// to see the offending error strings without scrolling the full
|
||||
// call log.
|
||||
// Surface any audit row carrying an error message — that catches
|
||||
// status="error" (true backend failures) AND status="partial"
|
||||
// (e.g. transcribe-with-truncated-chunks, which records the
|
||||
// missing-speech message in the error field). Operators rely on
|
||||
// this view to triage all degraded behavior, not just outright
|
||||
// 5xx-class failures, so the broader filter is the right default.
|
||||
const errorRows = entries
|
||||
.filter((e) => e.error)
|
||||
.slice(0, 50)
|
||||
.map((e) => ({
|
||||
ts: e.ts,
|
||||
install_id: e.install_id || null,
|
||||
tier: e.tier || null,
|
||||
pipeline: e.pipeline || null,
|
||||
backend: e.backend || null,
|
||||
model: e.model || null,
|
||||
duration_ms: e.duration_ms || 0,
|
||||
error: (e.error || "").slice(0, 280),
|
||||
attempts: Array.isArray(e.attempts) ? e.attempts : null,
|
||||
}));
|
||||
|
||||
// ── Per-(pipeline, model) performance + failure tables ──
|
||||
// Normalizes raw duration_ms by audio_seconds so different models
|
||||
// can be compared on a backend-agnostic benchmark: how many ms of
|
||||
// wall-clock time does this model take per minute of audio? Analyze
|
||||
// calls don't have audio (they consume the transcript text), so we
|
||||
// report ms-per-1k-input-tokens for those instead.
|
||||
//
|
||||
// Failure rate is computed against `attempted` (success + error)
|
||||
// and excludes `refused` calls — refused requests never reached the
|
||||
// backend, so they shouldn't count against the model's reliability.
|
||||
const byPipelineModel = {};
|
||||
for (const e of entries) {
|
||||
const pipeline = e.pipeline || "unknown";
|
||||
const model = e.model || "unknown";
|
||||
if (model === "unknown" && e.status === "refused") continue; // refused entries often have no model
|
||||
const key = `${pipeline}::${model}`;
|
||||
if (!byPipelineModel[key]) {
|
||||
byPipelineModel[key] = {
|
||||
pipeline,
|
||||
model,
|
||||
calls: 0,
|
||||
success: 0,
|
||||
errors: 0,
|
||||
refused: 0,
|
||||
partials: 0,
|
||||
sum_duration_ms: 0,
|
||||
sum_audio_seconds: 0,
|
||||
sum_input_tokens: 0,
|
||||
sum_output_tokens: 0,
|
||||
error_counts: {}, // { error_signature: count }
|
||||
};
|
||||
}
|
||||
const row = byPipelineModel[key];
|
||||
row.calls += 1;
|
||||
if (e.status === "success") row.success += 1;
|
||||
if (e.status === "error") row.errors += 1;
|
||||
if (e.status === "refused") row.refused += 1;
|
||||
if (e.status === "partial") row.partials += 1;
|
||||
row.sum_duration_ms += e.duration_ms || 0;
|
||||
if (typeof e.audio_seconds === "number" && e.audio_seconds > 0) {
|
||||
row.sum_audio_seconds += e.audio_seconds;
|
||||
}
|
||||
row.sum_input_tokens += e.input_tokens || 0;
|
||||
row.sum_output_tokens += e.output_tokens || 0;
|
||||
// Aggregate the top-error counts off ANY row that has a populated
|
||||
// error message — not just status="error" rows. Partial (truncated
|
||||
// transcribe) and refused (out-of-credits, capacity-gated) rows
|
||||
// also carry useful error strings the operator wants to see in
|
||||
// the "Top failure modes" table. The old gate `status === "error"`
|
||||
// hid all truncations because they're recorded as status="partial".
|
||||
if (e.error) {
|
||||
const sig = errorSignature(e.error);
|
||||
row.error_counts[sig] = (row.error_counts[sig] || 0) + 1;
|
||||
}
|
||||
}
|
||||
const perfByModel = Object.values(byPipelineModel).map((r) => {
|
||||
const attempted = r.success + r.errors;
|
||||
const successRate = attempted > 0 ? r.success / attempted : null;
|
||||
const audioMin = r.sum_audio_seconds / 60;
|
||||
const msPerAudioMin = audioMin > 0 ? r.sum_duration_ms / audioMin : null;
|
||||
const msPer1kInputTokens =
|
||||
r.sum_input_tokens > 0
|
||||
? r.sum_duration_ms / (r.sum_input_tokens / 1000)
|
||||
: null;
|
||||
// Top 3 error signatures by frequency for this model.
|
||||
const topErrors = Object.entries(r.error_counts)
|
||||
.map(([signature, count]) => ({ signature, count }))
|
||||
.sort((a, b) => b.count - a.count)
|
||||
.slice(0, 3);
|
||||
return {
|
||||
pipeline: r.pipeline,
|
||||
model: r.model,
|
||||
calls: r.calls,
|
||||
success: r.success,
|
||||
errors: r.errors,
|
||||
refused: r.refused,
|
||||
partials: r.partials,
|
||||
// "failures" = total signal worth surfacing in failure tables.
|
||||
// Includes partials so a TX that lost minutes of speech via a
|
||||
// truncated chunk is counted as a failure mode, not silently
|
||||
// tucked away under a "success" pipe. The errors-by-model
|
||||
// dashboard table reads this; the per-call "errors" field stays
|
||||
// available for stricter computations.
|
||||
failures: r.errors + r.partials,
|
||||
success_rate: successRate,
|
||||
// Speed benchmark fields. Either or both may be null when there
|
||||
// wasn't enough successful-with-metadata data to compute them.
|
||||
ms_per_audio_minute: msPerAudioMin,
|
||||
ms_per_1k_input_tokens: msPer1kInputTokens,
|
||||
total_audio_minutes: audioMin > 0 ? audioMin : null,
|
||||
top_errors: topErrors,
|
||||
};
|
||||
});
|
||||
|
||||
// ── Revenue / margin (requires tier prices supplied by caller) ──
|
||||
// Distinct paying USERS in the window × the operator's per-tier
|
||||
// monthly price. For Core (free) we count distinct installs — that's
|
||||
// still the right grain for free-tier "active users", since Core has
|
||||
// no license to dedup against. For Pro/Max we count distinct license
|
||||
// fingerprints so a single Pro license activated on two installs
|
||||
// (cloud + self-host) counts ONCE toward monthly revenue, matching
|
||||
// the post-refactor credit model where they share one monthly pool.
|
||||
// Falls back to install_id for paid entries missing a fingerprint
|
||||
// (legacy pre-refactor audit rows) so historical ranges stay
|
||||
// approximately correct rather than dropping to zero.
|
||||
//
|
||||
// Strictly an *estimate* — the relay doesn't know if a Pro user
|
||||
// actually paid this month, just that they touched a request.
|
||||
// Underestimates churned customers (who paid but didn't call) and
|
||||
// overestimates trial users (who haven't paid yet). Hooked in by
|
||||
// the dashboard route, not here, so tests can pass an empty prices
|
||||
// map and get zero.
|
||||
const tierActiveInstalls = {
|
||||
core: new Set(),
|
||||
pro: new Set(),
|
||||
max: new Set(),
|
||||
};
|
||||
for (const e of entries) {
|
||||
const t = e.tier || "core";
|
||||
if (!tierActiveInstalls[t]) continue;
|
||||
if (t === "core") {
|
||||
if (e.install_id) tierActiveInstalls.core.add(e.install_id);
|
||||
} else {
|
||||
// Paid: prefer fingerprint, fall back to install_id for legacy rows.
|
||||
const id = e.license_fingerprint || e.install_id;
|
||||
if (id) tierActiveInstalls[t].add(id);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
summary: {
|
||||
calls,
|
||||
@@ -206,14 +533,89 @@ export function aggregate(entries) {
|
||||
total_input_tokens: totalInputTokens,
|
||||
total_output_tokens: totalOutputTokens,
|
||||
total_thinking_tokens: totalThinkingTokens,
|
||||
total_summaries: summaryRows.length,
|
||||
// active_installs_by_tier name retained for dashboard compatibility,
|
||||
// but the paid-tier counts here are actually DISTINCT LICENSES,
|
||||
// not distinct installs (see the comment on tierActiveInstalls
|
||||
// above). Core remains install-based. The dashboard label is
|
||||
// "Active users by tier" which fits either grain.
|
||||
active_installs_by_tier: {
|
||||
core: tierActiveInstalls.core.size,
|
||||
pro: tierActiveInstalls.pro.size,
|
||||
max: tierActiveInstalls.max.size,
|
||||
},
|
||||
},
|
||||
by_tier: tierRows,
|
||||
by_model: modelRows,
|
||||
by_pipeline: pipelineRows,
|
||||
by_backend: backendRows,
|
||||
by_install: installRows,
|
||||
by_license: licenseRows,
|
||||
by_hour_utc: hourRows,
|
||||
cost_vs_speed: costSpeedRows,
|
||||
by_summary: summaryRows,
|
||||
errors: errorRows,
|
||||
perf_by_model: perfByModel,
|
||||
};
|
||||
}
|
||||
|
||||
// Normalize a raw error string into a stable signature so two
|
||||
// near-identical messages bucket together. The audit log stores
|
||||
// truncated raw messages — we want the bucket key to be coarse enough
|
||||
// that small variations (a different request-id, file name, port
|
||||
// number, etc.) collapse into a single error class.
|
||||
//
|
||||
// Heuristics:
|
||||
// - Strip ISO timestamps and timestamps with offsets
|
||||
// - Strip UUIDs / hex blob hashes / long alphanumeric IDs
|
||||
// - Strip numeric file sizes and ports
|
||||
// - Strip URLs to their host + path-pattern
|
||||
// - Trim to first 120 chars after normalization
|
||||
function errorSignature(raw) {
|
||||
if (!raw) return "(unknown)";
|
||||
let s = String(raw);
|
||||
s = s.replace(/\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:?\d{2})?\b/g, "<ts>");
|
||||
s = s.replace(/\b[0-9a-f]{32,}\b/gi, "<hex>");
|
||||
s = s.replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, "<uuid>");
|
||||
s = s.replace(/https?:\/\/[^\s)"']+/g, "<url>");
|
||||
s = s.replace(/:\d{2,5}\b/g, ":<port>");
|
||||
s = s.replace(/\b\d{4,}\b/g, "<n>");
|
||||
return s.trim().slice(0, 120);
|
||||
}
|
||||
|
||||
// Derived revenue/margin numbers. Pulled out of aggregate() because it
|
||||
// needs prices the operator sets in config — keeping the core
|
||||
// aggregator config-agnostic. Returns:
|
||||
// {
|
||||
// monthly_revenue_usd: pro_count * pro_price + max_count * max_price
|
||||
// + core_count * core_price,
|
||||
// gemini_cost_usd_in_range: summary.total_cost_usd (passed through),
|
||||
// margin_usd: revenue - cost, // approximate
|
||||
// by_tier_revenue: [{ tier, active_installs, price_usd, revenue_usd }],
|
||||
// }
|
||||
//
|
||||
// `active_installs_by_tier` should come from the aggregate summary
|
||||
// (Set sizes already computed there). `prices` is the {core,pro,max}
|
||||
// USD-per-month map. `geminiCostInRange` is total_cost_usd from the
|
||||
// summary.
|
||||
export function computeRevenue({ activeInstallsByTier, prices, geminiCostInRange }) {
|
||||
const tiers = ["core", "pro", "max"];
|
||||
const byTier = tiers.map((tier) => {
|
||||
const installs = activeInstallsByTier?.[tier] || 0;
|
||||
const price = Math.max(0, Number(prices?.[tier] ?? 0));
|
||||
return {
|
||||
tier,
|
||||
active_installs: installs,
|
||||
price_usd: price,
|
||||
revenue_usd: installs * price,
|
||||
};
|
||||
});
|
||||
const revenue = byTier.reduce((s, r) => s + r.revenue_usd, 0);
|
||||
return {
|
||||
monthly_revenue_usd: revenue,
|
||||
gemini_cost_usd_in_range: geminiCostInRange,
|
||||
margin_usd: revenue - geminiCostInRange,
|
||||
by_tier_revenue: byTier,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user