v0.2.8 operator dashboard with per-call audit log + cost tracking

2026-05-12 00:26:59 -05:00
parent 9af70302b1
commit 05ebeb5d51
12 changed files with 924 additions and 13 deletions
@@ -0,0 +1,239 @@
+// Per-call audit log for profitability + observability. Each relay
+// request (success or failure) appends one line of newline-delimited
+// JSON to /data/relay-calls.ndjson. Append-only — read paths parse
+// the whole file in memory for aggregation, which is cheap up to
+// 100k+ entries at typical relay scale (low-tens-of-thousands of
+// calls per month).
+//
+// Record shape (no field is required; missing fields just don't
+// appear in aggregations):
+//   {
+//     ts:           ms-epoch when the request landed
+//     install_id:   X-Recap-Install-Id (truncated for log readability)
+//     tier:         "core" | "pro" | "max"
+//     pipeline:     "transcribe" | "analyze"
+//     backend:      "gemini" | "hardware"
+//     model:        e.g. "gemini-3-flash-preview", "parakeet-tdt-0.6b-v3"
+//     status:       "success" | "error" | "refused"  (refused = quota)
+//     credit_charged: 0 | 1
+//     duration_ms:  end-to-end wall time
+//     input_tokens, output_tokens, thinking_tokens (Gemini only)
+//     cost_usd:     computed from token counts × per-1M-token rates
+//     job_id:       X-Recap-Job-Id (so we can collapse pairs into one)
+//     error:        short error string if status="error"
+//   }
+//
+// Rotation isn't built in — for the prototype, operator can rotate
+// manually (mv relay-calls.ndjson relay-calls.ndjson.0; restart). Once
+// volume warrants, replace this with a daily-rotated logger or move to
+// SQLite for indexed time-range queries.
+
+import fs from "fs/promises";
+import { createReadStream } from "fs";
+import readline from "readline";
+import path from "path";
+
+let dataDir = "/data";
+let logPath = "/data/relay-calls.ndjson";
+
+export async function initAuditLog({ dataDir: dd }) {
+  if (dd) dataDir = dd;
+  logPath = path.join(dataDir, "relay-calls.ndjson");
+  // Ensure the file exists so the streaming read path doesn't trip.
+  try {
+    await fs.access(logPath);
+  } catch {
+    await fs.writeFile(logPath, "", { mode: 0o600 });
+  }
+  console.log(`[audit-log] writing to ${logPath}`);
+}
+
+// Best-effort append. Errors are logged but never rethrown — losing
+// an audit line shouldn't fail the relay call that caused it.
+export async function recordCall(entry) {
+  const record = { ts: Date.now(), ...entry };
+  try {
+    await fs.appendFile(logPath, JSON.stringify(record) + "\n", { mode: 0o600 });
+  } catch (err) {
+    console.error(`[audit-log] append failed: ${err?.message || err}`);
+  }
+}
+
+// Read all entries since `sinceMs` (default: 30 days). Streamed
+// line-by-line so the whole file doesn't sit in memory at once.
+// Returned array is newest-first.
+export async function readEntries({
+  sinceMs = Date.now() - 30 * 24 * 3600 * 1000,
+  untilMs = Number.POSITIVE_INFINITY,
+} = {}) {
+  const out = [];
+  try {
+    const stream = createReadStream(logPath, { encoding: "utf8" });
+    const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+    for await (const line of rl) {
+      if (!line.trim()) continue;
+      try {
+        const r = JSON.parse(line);
+        if (typeof r.ts === "number" && r.ts >= sinceMs && r.ts <= untilMs) {
+          out.push(r);
+        }
+      } catch {
+        // Bad line — skip silently. Doesn't disrupt the rest of the read.
+      }
+    }
+  } catch (err) {
+    if (err.code !== "ENOENT") {
+      console.error(`[audit-log] read failed: ${err?.message || err}`);
+    }
+  }
+  // Newest first by ts.
+  out.sort((a, b) => b.ts - a.ts);
+  return out;
+}
+
+// Compute multi-dimensional aggregates over a set of entries. The
+// dashboard renders all of these — each is a small object array
+// suitable for direct tabulation.
+export function aggregate(entries) {
+  const calls = entries.length;
+  const success = entries.filter((e) => e.status === "success").length;
+  const errors = entries.filter((e) => e.status === "error").length;
+  const refused = entries.filter((e) => e.status === "refused").length;
+
+  let totalCost = 0;
+  let totalDuration = 0;
+  let totalInputTokens = 0;
+  let totalOutputTokens = 0;
+  let totalThinkingTokens = 0;
+  for (const e of entries) {
+    totalCost += e.cost_usd || 0;
+    totalDuration += e.duration_ms || 0;
+    totalInputTokens += e.input_tokens || 0;
+    totalOutputTokens += e.output_tokens || 0;
+    totalThinkingTokens += e.thinking_tokens || 0;
+  }
+
+  // ── By tier ──
+  const byTier = groupBy(entries, (e) => e.tier || "unknown");
+  const tierRows = Object.entries(byTier).map(([tier, list]) => ({
+    tier,
+    calls: list.length,
+    cost_usd: sumBy(list, "cost_usd"),
+    avg_duration_ms: avgBy(list, "duration_ms"),
+    unique_installs: new Set(list.map((e) => e.install_id)).size,
+  }));
+
+  // ── By model ──
+  const byModel = groupBy(entries, (e) => e.model || "unknown");
+  const modelRows = Object.entries(byModel).map(([model, list]) => ({
+    model,
+    calls: list.length,
+    cost_usd: sumBy(list, "cost_usd"),
+    input_tokens: sumBy(list, "input_tokens"),
+    output_tokens: sumBy(list, "output_tokens"),
+    thinking_tokens: sumBy(list, "thinking_tokens"),
+    avg_duration_ms: avgBy(list, "duration_ms"),
+    avg_cost_usd: list.length > 0 ? sumBy(list, "cost_usd") / list.length : 0,
+  }));
+
+  // ── By pipeline ──
+  const byPipeline = groupBy(entries, (e) => e.pipeline || "unknown");
+  const pipelineRows = Object.entries(byPipeline).map(([pipeline, list]) => ({
+    pipeline,
+    calls: list.length,
+    cost_usd: sumBy(list, "cost_usd"),
+    avg_duration_ms: avgBy(list, "duration_ms"),
+  }));
+
+  // ── By backend ──
+  const byBackend = groupBy(entries, (e) => e.backend || "unknown");
+  const backendRows = Object.entries(byBackend).map(([backend, list]) => ({
+    backend,
+    calls: list.length,
+    cost_usd: sumBy(list, "cost_usd"),
+    avg_duration_ms: avgBy(list, "duration_ms"),
+  }));
+
+  // ── By install (top 20 by spend) ──
+  const byInstall = groupBy(entries, (e) => e.install_id || "unknown");
+  const installRows = Object.entries(byInstall)
+    .map(([install, list]) => ({
+      install_id: install,
+      tier_snapshot: list[0]?.tier || "core",
+      calls: list.length,
+      cost_usd: sumBy(list, "cost_usd"),
+      // Distinct summarize jobs (collapse transcribe+analyze pairs).
+      summaries: new Set(list.map((e) => e.job_id).filter(Boolean)).size,
+      avg_duration_ms: avgBy(list, "duration_ms"),
+      last_active_at: Math.max(...list.map((e) => e.ts || 0)),
+    }))
+    .sort((a, b) => b.cost_usd - a.cost_usd)
+    .slice(0, 20);
+
+  // ── By hour-of-day (for traffic-pattern view) ──
+  const byHour = groupBy(entries, (e) => new Date(e.ts).getUTCHours());
+  const hourRows = Array.from({ length: 24 }, (_, h) => {
+    const list = byHour[h] || [];
+    return {
+      hour_utc: h,
+      calls: list.length,
+      cost_usd: sumBy(list, "cost_usd"),
+    };
+  });
+
+  // ── Cost vs speed (per-model averages) ──
+  // Same source as modelRows but kept separate so the dashboard can
+  // render it as a scatter / table without extra transformation.
+  const costSpeedRows = modelRows
+    .map((r) => ({
+      model: r.model,
+      avg_cost_usd: r.avg_cost_usd,
+      avg_duration_ms: r.avg_duration_ms,
+      calls: r.calls,
+    }))
+    .sort((a, b) => a.avg_duration_ms - b.avg_duration_ms);
+
+  return {
+    summary: {
+      calls,
+      success,
+      errors,
+      refused,
+      success_rate: calls > 0 ? success / calls : 0,
+      total_cost_usd: totalCost,
+      total_duration_ms: totalDuration,
+      avg_duration_ms: calls > 0 ? totalDuration / calls : 0,
+      total_input_tokens: totalInputTokens,
+      total_output_tokens: totalOutputTokens,
+      total_thinking_tokens: totalThinkingTokens,
+    },
+    by_tier: tierRows,
+    by_model: modelRows,
+    by_pipeline: pipelineRows,
+    by_backend: backendRows,
+    by_install: installRows,
+    by_hour_utc: hourRows,
+    cost_vs_speed: costSpeedRows,
+  };
+}
+
+function groupBy(list, keyFn) {
+  const out = {};
+  for (const item of list) {
+    const k = keyFn(item);
+    if (!out[k]) out[k] = [];
+    out[k].push(item);
+  }
+  return out;
+}
+
+function sumBy(list, key) {
+  let s = 0;
+  for (const item of list) s += item[key] || 0;
+  return s;
+}
+
+function avgBy(list, key) {
+  if (list.length === 0) return 0;
+  return sumBy(list, key) / list.length;
+}