// Gemini per-1M-token pricing in USD. Used by the relay to compute // per-call API costs as audit-log entries are written. Operator can // update this table by editing the file and redeploying when Google // changes published rates — preferable to a config field because // (a) prices are stable for months at a time, (b) hardcoding keeps // the audit log self-contained without a config-snapshot copy at // write time. // // Rates as of mid-2026. ALWAYS verify against the current Google AI // Studio pricing page before relying on these for billing-grade // margin math — Google has been known to adjust prices ~quarterly. export const GEMINI_PRICING = { // Pro family — best for analysis. "gemini-3.1-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 }, "gemini-3-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 }, // Flash family — best speed/cost for transcription, common for // analysis when sub-Pro quality is acceptable. "gemini-3-flash-preview": { input: 0.3, output: 2.5, thinking: 2.5 }, "gemini-2.5-flash": { input: 0.3, output: 2.5, thinking: 2.5 }, "gemini-2.0-flash": { input: 0.1, output: 0.4, thinking: 0.4 }, // Fallback used when an unknown model id appears (e.g. operator // typed a custom model name in setBackendRouting). Conservative — // priced like Flash so cost estimates skew low rather than 0. default: { input: 0.3, output: 2.5, thinking: 2.5 }, }; // Compute cost for a Gemini API call given its model + usageMetadata // (the shape @google/genai returns: promptTokenCount, // candidatesTokenCount, thoughtsTokenCount). Returns: // { input_tokens, output_tokens, thinking_tokens, cost_usd } export function calcGeminiCost(model, usage) { const rates = GEMINI_PRICING[model] || GEMINI_PRICING.default; const inputTokens = usage?.promptTokenCount || 0; const outputTokens = usage?.candidatesTokenCount || 0; const thinkingTokens = usage?.thoughtsTokenCount || 0; const costUSD = (inputTokens / 1_000_000) * rates.input + (outputTokens / 1_000_000) * rates.output + (thinkingTokens / 1_000_000) * (rates.thinking ?? rates.output); return { input_tokens: inputTokens, output_tokens: outputTokens, thinking_tokens: thinkingTokens, cost_usd: costUSD, }; } export function listKnownModels() { return Object.keys(GEMINI_PRICING).filter((k) => k !== "default"); }