recap-relay/server/pricing.js

// Gemini per-1M-token pricing in USD. Used by the relay to compute
// per-call API costs as audit-log entries are written. Operator can
// update this table by editing the file and redeploying when Google
// changes published rates — preferable to a config field because
// (a) prices are stable for months at a time, (b) hardcoding keeps
// the audit log self-contained without a config-snapshot copy at
// write time.
//
// Rates as of mid-2026. ALWAYS verify against the current Google AI
// Studio pricing page before relying on these for billing-grade
// margin math — Google has been known to adjust prices ~quarterly.

export const GEMINI_PRICING = {
  // The five supported models, verified against Google's official
  // docs (ai.google.dev/gemini-api/docs/models) on 2026-05-12.
  // Retired model IDs (gemini-3-pro-preview shut down 2026-03-09,
  // gemini-2.0-flash deprecated) intentionally omitted — they should
  // never appear in cost calc here for current calls.

  // Pro tier — best for analysis.
  "gemini-3.1-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 },
  "gemini-2.5-pro":         { input: 1.25, output: 10.0, thinking: 10.0 },

  // Flash tier — best speed/cost for transcription + cheap analysis.
  "gemini-3-flash-preview": { input: 0.3, output: 2.5, thinking: 2.5 },
  "gemini-2.5-flash":       { input: 0.3, output: 2.5, thinking: 2.5 },
  "gemini-3.1-flash-lite":  { input: 0.1, output: 0.4, thinking: 0.4 },

  // Fallback used when an unknown model id appears (e.g. operator
  // typed a custom model name in setBackendRouting). Conservative —
  // priced like Flash so cost estimates skew low rather than 0.
  default: { input: 0.3, output: 2.5, thinking: 2.5 },
};

// Compute cost for a Gemini API call given its model + usageMetadata
// (the shape @google/genai returns: promptTokenCount,
// candidatesTokenCount, thoughtsTokenCount). Returns:
//   { input_tokens, output_tokens, thinking_tokens, cost_usd }
export function calcGeminiCost(model, usage) {
  const rates = GEMINI_PRICING[model] || GEMINI_PRICING.default;
  const inputTokens = usage?.promptTokenCount || 0;
  const outputTokens = usage?.candidatesTokenCount || 0;
  const thinkingTokens = usage?.thoughtsTokenCount || 0;
  const costUSD =
    (inputTokens / 1_000_000) * rates.input +
    (outputTokens / 1_000_000) * rates.output +
    (thinkingTokens / 1_000_000) * (rates.thinking ?? rates.output);
  return {
    input_tokens: inputTokens,
    output_tokens: outputTokens,
    thinking_tokens: thinkingTokens,
    cost_usd: costUSD,
  };
}

export function listKnownModels() {
  return Object.keys(GEMINI_PRICING).filter((k) => k !== "default");
}