v0.2.8 operator dashboard with per-call audit log + cost tracking
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
// Gemini per-1M-token pricing in USD. Used by the relay to compute
|
||||
// per-call API costs as audit-log entries are written. Operator can
|
||||
// update this table by editing the file and redeploying when Google
|
||||
// changes published rates — preferable to a config field because
|
||||
// (a) prices are stable for months at a time, (b) hardcoding keeps
|
||||
// the audit log self-contained without a config-snapshot copy at
|
||||
// write time.
|
||||
//
|
||||
// Rates as of mid-2026. ALWAYS verify against the current Google AI
|
||||
// Studio pricing page before relying on these for billing-grade
|
||||
// margin math — Google has been known to adjust prices ~quarterly.
|
||||
|
||||
export const GEMINI_PRICING = {
|
||||
// Pro family — best for analysis.
|
||||
"gemini-3.1-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 },
|
||||
"gemini-3-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 },
|
||||
|
||||
// Flash family — best speed/cost for transcription, common for
|
||||
// analysis when sub-Pro quality is acceptable.
|
||||
"gemini-3-flash-preview": { input: 0.3, output: 2.5, thinking: 2.5 },
|
||||
"gemini-2.5-flash": { input: 0.3, output: 2.5, thinking: 2.5 },
|
||||
"gemini-2.0-flash": { input: 0.1, output: 0.4, thinking: 0.4 },
|
||||
|
||||
// Fallback used when an unknown model id appears (e.g. operator
|
||||
// typed a custom model name in setBackendRouting). Conservative —
|
||||
// priced like Flash so cost estimates skew low rather than 0.
|
||||
default: { input: 0.3, output: 2.5, thinking: 2.5 },
|
||||
};
|
||||
|
||||
// Compute cost for a Gemini API call given its model + usageMetadata
|
||||
// (the shape @google/genai returns: promptTokenCount,
|
||||
// candidatesTokenCount, thoughtsTokenCount). Returns:
|
||||
// { input_tokens, output_tokens, thinking_tokens, cost_usd }
|
||||
export function calcGeminiCost(model, usage) {
|
||||
const rates = GEMINI_PRICING[model] || GEMINI_PRICING.default;
|
||||
const inputTokens = usage?.promptTokenCount || 0;
|
||||
const outputTokens = usage?.candidatesTokenCount || 0;
|
||||
const thinkingTokens = usage?.thoughtsTokenCount || 0;
|
||||
const costUSD =
|
||||
(inputTokens / 1_000_000) * rates.input +
|
||||
(outputTokens / 1_000_000) * rates.output +
|
||||
(thinkingTokens / 1_000_000) * (rates.thinking ?? rates.output);
|
||||
return {
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens,
|
||||
thinking_tokens: thinkingTokens,
|
||||
cost_usd: costUSD,
|
||||
};
|
||||
}
|
||||
|
||||
export function listKnownModels() {
|
||||
return Object.keys(GEMINI_PRICING).filter((k) => k !== "default");
|
||||
}
|
||||
Reference in New Issue
Block a user