v0.2.8 operator dashboard with per-call audit log + cost tracking
This commit is contained in:
@@ -0,0 +1,239 @@
|
||||
// Per-call audit log for profitability + observability. Each relay
|
||||
// request (success or failure) appends one line of newline-delimited
|
||||
// JSON to /data/relay-calls.ndjson. Append-only — read paths parse
|
||||
// the whole file in memory for aggregation, which is cheap up to
|
||||
// 100k+ entries at typical relay scale (low-tens-of-thousands of
|
||||
// calls per month).
|
||||
//
|
||||
// Record shape (no field is required; missing fields just don't
|
||||
// appear in aggregations):
|
||||
// {
|
||||
// ts: ms-epoch when the request landed
|
||||
// install_id: X-Recap-Install-Id (truncated for log readability)
|
||||
// tier: "core" | "pro" | "max"
|
||||
// pipeline: "transcribe" | "analyze"
|
||||
// backend: "gemini" | "hardware"
|
||||
// model: e.g. "gemini-3-flash-preview", "parakeet-tdt-0.6b-v3"
|
||||
// status: "success" | "error" | "refused" (refused = quota)
|
||||
// credit_charged: 0 | 1
|
||||
// duration_ms: end-to-end wall time
|
||||
// input_tokens, output_tokens, thinking_tokens (Gemini only)
|
||||
// cost_usd: computed from token counts × per-1M-token rates
|
||||
// job_id: X-Recap-Job-Id (so we can collapse pairs into one)
|
||||
// error: short error string if status="error"
|
||||
// }
|
||||
//
|
||||
// Rotation isn't built in — for the prototype, operator can rotate
|
||||
// manually (mv relay-calls.ndjson relay-calls.ndjson.0; restart). Once
|
||||
// volume warrants, replace this with a daily-rotated logger or move to
|
||||
// SQLite for indexed time-range queries.
|
||||
|
||||
import fs from "fs/promises";
|
||||
import { createReadStream } from "fs";
|
||||
import readline from "readline";
|
||||
import path from "path";
|
||||
|
||||
let dataDir = "/data";
|
||||
let logPath = "/data/relay-calls.ndjson";
|
||||
|
||||
export async function initAuditLog({ dataDir: dd }) {
|
||||
if (dd) dataDir = dd;
|
||||
logPath = path.join(dataDir, "relay-calls.ndjson");
|
||||
// Ensure the file exists so the streaming read path doesn't trip.
|
||||
try {
|
||||
await fs.access(logPath);
|
||||
} catch {
|
||||
await fs.writeFile(logPath, "", { mode: 0o600 });
|
||||
}
|
||||
console.log(`[audit-log] writing to ${logPath}`);
|
||||
}
|
||||
|
||||
// Best-effort append. Errors are logged but never rethrown — losing
|
||||
// an audit line shouldn't fail the relay call that caused it.
|
||||
export async function recordCall(entry) {
|
||||
const record = { ts: Date.now(), ...entry };
|
||||
try {
|
||||
await fs.appendFile(logPath, JSON.stringify(record) + "\n", { mode: 0o600 });
|
||||
} catch (err) {
|
||||
console.error(`[audit-log] append failed: ${err?.message || err}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Read all entries since `sinceMs` (default: 30 days). Streamed
|
||||
// line-by-line so the whole file doesn't sit in memory at once.
|
||||
// Returned array is newest-first.
|
||||
export async function readEntries({
|
||||
sinceMs = Date.now() - 30 * 24 * 3600 * 1000,
|
||||
untilMs = Number.POSITIVE_INFINITY,
|
||||
} = {}) {
|
||||
const out = [];
|
||||
try {
|
||||
const stream = createReadStream(logPath, { encoding: "utf8" });
|
||||
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
|
||||
for await (const line of rl) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const r = JSON.parse(line);
|
||||
if (typeof r.ts === "number" && r.ts >= sinceMs && r.ts <= untilMs) {
|
||||
out.push(r);
|
||||
}
|
||||
} catch {
|
||||
// Bad line — skip silently. Doesn't disrupt the rest of the read.
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err.code !== "ENOENT") {
|
||||
console.error(`[audit-log] read failed: ${err?.message || err}`);
|
||||
}
|
||||
}
|
||||
// Newest first by ts.
|
||||
out.sort((a, b) => b.ts - a.ts);
|
||||
return out;
|
||||
}
|
||||
|
||||
// Compute multi-dimensional aggregates over a set of entries. The
|
||||
// dashboard renders all of these — each is a small object array
|
||||
// suitable for direct tabulation.
|
||||
export function aggregate(entries) {
|
||||
const calls = entries.length;
|
||||
const success = entries.filter((e) => e.status === "success").length;
|
||||
const errors = entries.filter((e) => e.status === "error").length;
|
||||
const refused = entries.filter((e) => e.status === "refused").length;
|
||||
|
||||
let totalCost = 0;
|
||||
let totalDuration = 0;
|
||||
let totalInputTokens = 0;
|
||||
let totalOutputTokens = 0;
|
||||
let totalThinkingTokens = 0;
|
||||
for (const e of entries) {
|
||||
totalCost += e.cost_usd || 0;
|
||||
totalDuration += e.duration_ms || 0;
|
||||
totalInputTokens += e.input_tokens || 0;
|
||||
totalOutputTokens += e.output_tokens || 0;
|
||||
totalThinkingTokens += e.thinking_tokens || 0;
|
||||
}
|
||||
|
||||
// ── By tier ──
|
||||
const byTier = groupBy(entries, (e) => e.tier || "unknown");
|
||||
const tierRows = Object.entries(byTier).map(([tier, list]) => ({
|
||||
tier,
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
unique_installs: new Set(list.map((e) => e.install_id)).size,
|
||||
}));
|
||||
|
||||
// ── By model ──
|
||||
const byModel = groupBy(entries, (e) => e.model || "unknown");
|
||||
const modelRows = Object.entries(byModel).map(([model, list]) => ({
|
||||
model,
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
input_tokens: sumBy(list, "input_tokens"),
|
||||
output_tokens: sumBy(list, "output_tokens"),
|
||||
thinking_tokens: sumBy(list, "thinking_tokens"),
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
avg_cost_usd: list.length > 0 ? sumBy(list, "cost_usd") / list.length : 0,
|
||||
}));
|
||||
|
||||
// ── By pipeline ──
|
||||
const byPipeline = groupBy(entries, (e) => e.pipeline || "unknown");
|
||||
const pipelineRows = Object.entries(byPipeline).map(([pipeline, list]) => ({
|
||||
pipeline,
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
}));
|
||||
|
||||
// ── By backend ──
|
||||
const byBackend = groupBy(entries, (e) => e.backend || "unknown");
|
||||
const backendRows = Object.entries(byBackend).map(([backend, list]) => ({
|
||||
backend,
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
}));
|
||||
|
||||
// ── By install (top 20 by spend) ──
|
||||
const byInstall = groupBy(entries, (e) => e.install_id || "unknown");
|
||||
const installRows = Object.entries(byInstall)
|
||||
.map(([install, list]) => ({
|
||||
install_id: install,
|
||||
tier_snapshot: list[0]?.tier || "core",
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
// Distinct summarize jobs (collapse transcribe+analyze pairs).
|
||||
summaries: new Set(list.map((e) => e.job_id).filter(Boolean)).size,
|
||||
avg_duration_ms: avgBy(list, "duration_ms"),
|
||||
last_active_at: Math.max(...list.map((e) => e.ts || 0)),
|
||||
}))
|
||||
.sort((a, b) => b.cost_usd - a.cost_usd)
|
||||
.slice(0, 20);
|
||||
|
||||
// ── By hour-of-day (for traffic-pattern view) ──
|
||||
const byHour = groupBy(entries, (e) => new Date(e.ts).getUTCHours());
|
||||
const hourRows = Array.from({ length: 24 }, (_, h) => {
|
||||
const list = byHour[h] || [];
|
||||
return {
|
||||
hour_utc: h,
|
||||
calls: list.length,
|
||||
cost_usd: sumBy(list, "cost_usd"),
|
||||
};
|
||||
});
|
||||
|
||||
// ── Cost vs speed (per-model averages) ──
|
||||
// Same source as modelRows but kept separate so the dashboard can
|
||||
// render it as a scatter / table without extra transformation.
|
||||
const costSpeedRows = modelRows
|
||||
.map((r) => ({
|
||||
model: r.model,
|
||||
avg_cost_usd: r.avg_cost_usd,
|
||||
avg_duration_ms: r.avg_duration_ms,
|
||||
calls: r.calls,
|
||||
}))
|
||||
.sort((a, b) => a.avg_duration_ms - b.avg_duration_ms);
|
||||
|
||||
return {
|
||||
summary: {
|
||||
calls,
|
||||
success,
|
||||
errors,
|
||||
refused,
|
||||
success_rate: calls > 0 ? success / calls : 0,
|
||||
total_cost_usd: totalCost,
|
||||
total_duration_ms: totalDuration,
|
||||
avg_duration_ms: calls > 0 ? totalDuration / calls : 0,
|
||||
total_input_tokens: totalInputTokens,
|
||||
total_output_tokens: totalOutputTokens,
|
||||
total_thinking_tokens: totalThinkingTokens,
|
||||
},
|
||||
by_tier: tierRows,
|
||||
by_model: modelRows,
|
||||
by_pipeline: pipelineRows,
|
||||
by_backend: backendRows,
|
||||
by_install: installRows,
|
||||
by_hour_utc: hourRows,
|
||||
cost_vs_speed: costSpeedRows,
|
||||
};
|
||||
}
|
||||
|
||||
function groupBy(list, keyFn) {
|
||||
const out = {};
|
||||
for (const item of list) {
|
||||
const k = keyFn(item);
|
||||
if (!out[k]) out[k] = [];
|
||||
out[k].push(item);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function sumBy(list, key) {
|
||||
let s = 0;
|
||||
for (const item of list) s += item[key] || 0;
|
||||
return s;
|
||||
}
|
||||
|
||||
function avgBy(list, key) {
|
||||
if (list.length === 0) return 0;
|
||||
return sumBy(list, key) / list.length;
|
||||
}
|
||||
@@ -120,6 +120,10 @@ export function createGeminiBackend({
|
||||
// that handles this exact shape.
|
||||
segments: [],
|
||||
duration_seconds: 0,
|
||||
// Pass usage + the model id back to the route so audit-log
|
||||
// entries can include token counts + computed cost.
|
||||
usage: result?.usageMetadata || null,
|
||||
model: transcriptionModel,
|
||||
};
|
||||
} finally {
|
||||
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
@@ -138,6 +142,8 @@ export function createGeminiBackend({
|
||||
});
|
||||
return {
|
||||
text: safeText(result) || "",
|
||||
usage: result?.usageMetadata || null,
|
||||
model: analysisModel,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -147,6 +147,8 @@ export function createHardwareBackend({
|
||||
text: lines.join("\n"),
|
||||
segments: shifted,
|
||||
duration_seconds: data.duration || 0,
|
||||
usage: null, // hardware backend doesn't expose token counts
|
||||
model: transcribeModel,
|
||||
};
|
||||
},
|
||||
|
||||
@@ -194,7 +196,11 @@ export function createHardwareBackend({
|
||||
|
||||
const data = await res.json();
|
||||
const text = data?.choices?.[0]?.message?.content || "";
|
||||
return { text };
|
||||
return {
|
||||
text,
|
||||
usage: null,
|
||||
model: analyzeModel,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import { fileURLToPath } from "url";
|
||||
|
||||
import { initConfig } from "./config.js";
|
||||
import { initCredits } from "./credits.js";
|
||||
import { initAuditLog } from "./audit-log.js";
|
||||
import {
|
||||
setupAdminAuthMiddleware,
|
||||
setupAdminAuthRoutes,
|
||||
@@ -33,6 +34,7 @@ const PORT = parseInt(process.env.PORT || "3002", 10);
|
||||
|
||||
await initConfig({ dataDir: DATA_DIR });
|
||||
await initCredits({ dataDir: DATA_DIR });
|
||||
await initAuditLog({ dataDir: DATA_DIR });
|
||||
|
||||
const app = express();
|
||||
app.use(cors());
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "recap-relay-server",
|
||||
"version": "0.2.7",
|
||||
"version": "0.2.8",
|
||||
"type": "module",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
// Gemini per-1M-token pricing in USD. Used by the relay to compute
|
||||
// per-call API costs as audit-log entries are written. Operator can
|
||||
// update this table by editing the file and redeploying when Google
|
||||
// changes published rates — preferable to a config field because
|
||||
// (a) prices are stable for months at a time, (b) hardcoding keeps
|
||||
// the audit log self-contained without a config-snapshot copy at
|
||||
// write time.
|
||||
//
|
||||
// Rates as of mid-2026. ALWAYS verify against the current Google AI
|
||||
// Studio pricing page before relying on these for billing-grade
|
||||
// margin math — Google has been known to adjust prices ~quarterly.
|
||||
|
||||
export const GEMINI_PRICING = {
|
||||
// Pro family — best for analysis.
|
||||
"gemini-3.1-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 },
|
||||
"gemini-3-pro-preview": { input: 5.0, output: 25.0, thinking: 25.0 },
|
||||
|
||||
// Flash family — best speed/cost for transcription, common for
|
||||
// analysis when sub-Pro quality is acceptable.
|
||||
"gemini-3-flash-preview": { input: 0.3, output: 2.5, thinking: 2.5 },
|
||||
"gemini-2.5-flash": { input: 0.3, output: 2.5, thinking: 2.5 },
|
||||
"gemini-2.0-flash": { input: 0.1, output: 0.4, thinking: 0.4 },
|
||||
|
||||
// Fallback used when an unknown model id appears (e.g. operator
|
||||
// typed a custom model name in setBackendRouting). Conservative —
|
||||
// priced like Flash so cost estimates skew low rather than 0.
|
||||
default: { input: 0.3, output: 2.5, thinking: 2.5 },
|
||||
};
|
||||
|
||||
// Compute cost for a Gemini API call given its model + usageMetadata
|
||||
// (the shape @google/genai returns: promptTokenCount,
|
||||
// candidatesTokenCount, thoughtsTokenCount). Returns:
|
||||
// { input_tokens, output_tokens, thinking_tokens, cost_usd }
|
||||
export function calcGeminiCost(model, usage) {
|
||||
const rates = GEMINI_PRICING[model] || GEMINI_PRICING.default;
|
||||
const inputTokens = usage?.promptTokenCount || 0;
|
||||
const outputTokens = usage?.candidatesTokenCount || 0;
|
||||
const thinkingTokens = usage?.thoughtsTokenCount || 0;
|
||||
const costUSD =
|
||||
(inputTokens / 1_000_000) * rates.input +
|
||||
(outputTokens / 1_000_000) * rates.output +
|
||||
(thinkingTokens / 1_000_000) * (rates.thinking ?? rates.output);
|
||||
return {
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens,
|
||||
thinking_tokens: thinkingTokens,
|
||||
cost_usd: costUSD,
|
||||
};
|
||||
}
|
||||
|
||||
export function listKnownModels() {
|
||||
return Object.keys(GEMINI_PRICING).filter((k) => k !== "default");
|
||||
}
|
||||
@@ -12,6 +12,8 @@ import { getConfigSnapshot } from "../config.js";
|
||||
import { snapshotAll } from "../credits.js";
|
||||
import { snapshotCache } from "../keysat-client.js";
|
||||
import { snapshotJobs } from "../job-credits.js";
|
||||
import { readEntries, aggregate } from "../audit-log.js";
|
||||
import { GEMINI_PRICING } from "../pricing.js";
|
||||
import fs from "fs/promises";
|
||||
import path from "path";
|
||||
|
||||
@@ -48,6 +50,49 @@ export function adminRouter({ dataDir }) {
|
||||
res.json({ entries: snapshotJobs() });
|
||||
});
|
||||
|
||||
// ── Dashboard ─────────────────────────────────────────────────────────
|
||||
// Time-range aggregations over the per-call audit log. Default range
|
||||
// is the last 30 days; override with ?days=N or ?since=<ms-epoch>.
|
||||
// Returns { range, summary, by_tier, by_model, by_pipeline,
|
||||
// by_backend, by_install, by_hour_utc, cost_vs_speed, pricing }.
|
||||
router.get("/dashboard", async (req, res) => {
|
||||
const days =
|
||||
typeof req.query.days === "string"
|
||||
? parseInt(req.query.days, 10)
|
||||
: null;
|
||||
const explicitSince =
|
||||
typeof req.query.since === "string"
|
||||
? parseInt(req.query.since, 10)
|
||||
: null;
|
||||
const sinceMs =
|
||||
explicitSince && Number.isFinite(explicitSince)
|
||||
? explicitSince
|
||||
: Date.now() -
|
||||
(Number.isFinite(days) && days > 0 ? days : 30) *
|
||||
24 *
|
||||
3600 *
|
||||
1000;
|
||||
try {
|
||||
const entries = await readEntries({ sinceMs });
|
||||
const agg = aggregate(entries);
|
||||
res.json({
|
||||
range: {
|
||||
since_ms: sinceMs,
|
||||
until_ms: Date.now(),
|
||||
days: Number.isFinite(days) && days > 0 ? days : null,
|
||||
total_entries: entries.length,
|
||||
},
|
||||
...agg,
|
||||
pricing: GEMINI_PRICING,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(`[admin/dashboard] failed: ${err?.message || err}`);
|
||||
res
|
||||
.status(500)
|
||||
.json({ error: "dashboard_failed", message: err?.message || String(err) });
|
||||
}
|
||||
});
|
||||
|
||||
// Adjust the live quotas blob. Same shape the StartOS action writes
|
||||
// to relay_tier_quotas_json — kept here so the dashboard can tune
|
||||
// quotas without round-tripping the StartOS UI.
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
// Same charge-once-per-job semantics: a Recap summarize job pairs
|
||||
// transcribe + analyze with the same X-Recap-Job-Id. The first call
|
||||
// (whichever endpoint) charges 1 credit; the second is free.
|
||||
//
|
||||
// Every outcome (success / quota-refused / backend-error) writes one
|
||||
// row to the audit log so the admin dashboard can compute cost,
|
||||
// margin, and speed metrics.
|
||||
|
||||
import express from "express";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
@@ -19,11 +23,14 @@ import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
|
||||
export function analyzeRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
router.post("/analyze", express.json({ limit: "10mb" }), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
@@ -65,6 +72,19 @@ export function analyzeRouter() {
|
||||
cfg.relay_analyze_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
@@ -98,6 +118,21 @@ export function analyzeRouter() {
|
||||
} catch (err) {
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
console.error(`[relay/analyze] backend error: ${err?.message}`);
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: chosenBackend,
|
||||
model: chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_analysis_model
|
||||
: cfg.relay_gemma_model,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: (err?.message || String(err)).slice(0, 200),
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
installId,
|
||||
@@ -114,6 +149,28 @@ export function analyzeRouter() {
|
||||
creditCharged = 1;
|
||||
}
|
||||
|
||||
const costDetails =
|
||||
chosenBackend === "gemini" && result.usage
|
||||
? calcGeminiCost(result.model, result.usage)
|
||||
: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
thinking_tokens: 0,
|
||||
cost_usd: 0,
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: chosenBackend,
|
||||
model: result?.model || null,
|
||||
status: "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
job_id: jobId,
|
||||
...costDetails,
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
@@ -21,6 +21,10 @@
|
||||
// result: { text: "[MM:SS] ...", segments: [], duration_seconds: 0 },
|
||||
// credits_remaining, tier, credit_charged
|
||||
// }
|
||||
//
|
||||
// Every outcome (success / quota-refused / backend-error) writes one
|
||||
// row to the audit log so the admin dashboard can compute cost,
|
||||
// margin, and speed metrics.
|
||||
|
||||
import express from "express";
|
||||
import multer from "multer";
|
||||
@@ -31,6 +35,8 @@ import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
|
||||
const upload = multer({
|
||||
storage: multer.memoryStorage(),
|
||||
@@ -41,6 +47,7 @@ export function transcribeRouter() {
|
||||
const router = express.Router();
|
||||
|
||||
router.post("/transcribe", upload.single("audio"), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
@@ -60,14 +67,9 @@ export function transcribeRouter() {
|
||||
const license = await resolveLicense(auth);
|
||||
const tier = license.tier;
|
||||
|
||||
// Persist tier on the row so the admin dashboard reflects the
|
||||
// most recently seen tier for this install.
|
||||
const row = await getOrCreateRow(installId);
|
||||
row.tier_snapshot = tier;
|
||||
|
||||
// Job-id dedup. If we've already charged this job, skip the
|
||||
// credit check entirely — the user is paying once for the whole
|
||||
// summarize job.
|
||||
let reusedJob = false;
|
||||
let chosenBackend = null;
|
||||
const existingJob = lookupJob(installId, jobId);
|
||||
@@ -82,6 +84,19 @@ export function transcribeRouter() {
|
||||
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
@@ -93,7 +108,6 @@ export function transcribeRouter() {
|
||||
chosenBackend = plan.backend;
|
||||
}
|
||||
|
||||
// Build the backend client based on chosenBackend.
|
||||
const cfg = await getConfigSnapshot();
|
||||
let result;
|
||||
try {
|
||||
@@ -126,10 +140,23 @@ export function transcribeRouter() {
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
// If we'd charged this job already (rare — most refundable
|
||||
// failures happen on the FIRST call), refund.
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
console.error(`[relay/transcribe] backend error: ${err?.message}`);
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_transcription_model
|
||||
: cfg.relay_parakeet_model,
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: (err?.message || String(err)).slice(0, 200),
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
installId,
|
||||
@@ -139,7 +166,6 @@ export function transcribeRouter() {
|
||||
return res.status(e.statusHint).json(e.body);
|
||||
}
|
||||
|
||||
// Commit the credit on success (unless this was a job-id reuse).
|
||||
let creditCharged = 0;
|
||||
if (!reusedJob) {
|
||||
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||
@@ -147,6 +173,32 @@ export function transcribeRouter() {
|
||||
creditCharged = 1;
|
||||
}
|
||||
|
||||
// Success — write the audit row with cost details. Gemini's usage
|
||||
// metadata gives us token counts; calcGeminiCost translates that
|
||||
// into USD. Hardware-served calls have no token data and we
|
||||
// report cost_usd: 0 (operator's hardware is fixed-cost).
|
||||
const costDetails =
|
||||
chosenBackend === "gemini" && result.usage
|
||||
? calcGeminiCost(result.model, result.usage)
|
||||
: {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
thinking_tokens: 0,
|
||||
cost_usd: 0,
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "transcribe",
|
||||
backend: chosenBackend,
|
||||
model: result?.model || null,
|
||||
status: "success",
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
job_id: jobId,
|
||||
...costDetails,
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user