v0.2.8 operator dashboard with per-call audit log + cost tracking

2026-05-12 00:26:59 -05:00
parent 9af70302b1
commit 05ebeb5d51
12 changed files with 924 additions and 13 deletions
@@ -0,0 +1,53 @@
+// Gemini per-1M-token pricing in USD. Used by the relay to compute
+// per-call API costs as audit-log entries are written. Operator can
+// update this table by editing the file and redeploying when Google
+// changes published rates — preferable to a config field because
+// (a) prices are stable for months at a time, (b) hardcoding keeps
+// the audit log self-contained without a config-snapshot copy at
+// write time.
+//
+// Rates as of mid-2026. ALWAYS verify against the current Google AI
+// Studio pricing page before relying on these for billing-grade
+// margin math — Google has been known to adjust prices ~quarterly.
+
+export const GEMINI_PRICING = {
+  // Pro family — best for analysis.
+  "gemini-3.1-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 },
+  "gemini-3-pro-preview":   { input: 5.0, output: 25.0, thinking: 25.0 },
+
+  // Flash family — best speed/cost for transcription, common for
+  // analysis when sub-Pro quality is acceptable.
+  "gemini-3-flash-preview": { input: 0.3, output: 2.5, thinking: 2.5 },
+  "gemini-2.5-flash":       { input: 0.3, output: 2.5, thinking: 2.5 },
+  "gemini-2.0-flash":       { input: 0.1, output: 0.4, thinking: 0.4 },
+
+  // Fallback used when an unknown model id appears (e.g. operator
+  // typed a custom model name in setBackendRouting). Conservative —
+  // priced like Flash so cost estimates skew low rather than 0.
+  default: { input: 0.3, output: 2.5, thinking: 2.5 },
+};
+
+// Compute cost for a Gemini API call given its model + usageMetadata
+// (the shape @google/genai returns: promptTokenCount,
+// candidatesTokenCount, thoughtsTokenCount). Returns:
+//   { input_tokens, output_tokens, thinking_tokens, cost_usd }
+export function calcGeminiCost(model, usage) {
+  const rates = GEMINI_PRICING[model] || GEMINI_PRICING.default;
+  const inputTokens = usage?.promptTokenCount || 0;
+  const outputTokens = usage?.candidatesTokenCount || 0;
+  const thinkingTokens = usage?.thoughtsTokenCount || 0;
+  const costUSD =
+    (inputTokens / 1_000_000) * rates.input +
+    (outputTokens / 1_000_000) * rates.output +
+    (thinkingTokens / 1_000_000) * (rates.thinking ?? rates.output);
+  return {
+    input_tokens: inputTokens,
+    output_tokens: outputTokens,
+    thinking_tokens: thinkingTokens,
+    cost_usd: costUSD,
+  };
+}
+
+export function listKnownModels() {
+  return Object.keys(GEMINI_PRICING).filter((k) => k !== "default");
+}