279 lines
10 KiB
JavaScript
279 lines
10 KiB
JavaScript
// POST /relay/transcribe — forwards an audio payload to the chosen
|
|
// backend (Gemini first, operator hardware as overflow) and returns
|
|
// the standard envelope.
|
|
//
|
|
// Request shape: multipart/form-data
|
|
// audio: binary audio file (required)
|
|
// mime_type: string (default application/octet-stream)
|
|
// title: string (optional, used by Gemini prompt)
|
|
// channel: string (optional)
|
|
// description: string (optional)
|
|
// chapters: JSON-stringified array (optional)
|
|
// offset_seconds: number string (optional, for chunked audio)
|
|
//
|
|
// Headers:
|
|
// X-Recap-Install-Id (required)
|
|
// X-Recap-Job-Id (optional but expected — pairs with /analyze)
|
|
// Authorization (optional Bearer LIC1-... for licensed tiers)
|
|
//
|
|
// Response (standard envelope):
|
|
// {
|
|
// result: { text: "[MM:SS] ...", segments: [], duration_seconds: 0 },
|
|
// credits_remaining, tier, credit_charged
|
|
// }
|
|
//
|
|
// Every outcome (success / quota-refused / backend-error) writes one
|
|
// row to the audit log so the admin dashboard can compute cost,
|
|
// margin, and speed metrics.
|
|
|
|
import express from "express";
|
|
import multer from "multer";
|
|
import { resolveIdentity, identityTier } from "../identity.js";
|
|
import {
|
|
getOrCreateRow,
|
|
planBackend,
|
|
commitCredit,
|
|
licenseFingerprint,
|
|
} from "../credits.js";
|
|
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
|
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
|
import { createGeminiBackend } from "../backends/gemini.js";
|
|
import { createHardwareBackend } from "../backends/hardware.js";
|
|
import { envelope, errorEnvelope } from "./envelope.js";
|
|
import { recordCall } from "../audit-log.js";
|
|
import { calcGeminiCost } from "../pricing.js";
|
|
import { getAudioDurationSecondsFromBuffer } from "../audio-meta.js";
|
|
import { resolveHardwareConfig } from "../hardware-config.js";
|
|
import { reportHealthEvent } from "../spark-control-events.js";
|
|
|
|
const upload = multer({
|
|
storage: multer.memoryStorage(),
|
|
limits: { fileSize: 200 * 1024 * 1024 }, // 200 MB per request
|
|
});
|
|
|
|
export function transcribeRouter() {
|
|
const router = express.Router();
|
|
|
|
router.post("/transcribe", upload.single("audio"), async (req, res) => {
|
|
const t0 = Date.now();
|
|
const jobId = req.header("X-Recap-Job-Id") || null;
|
|
|
|
let identity;
|
|
try {
|
|
identity = await resolveIdentity(req);
|
|
} catch (err) {
|
|
const e = await errorEnvelope({
|
|
error: err?.message || "auth_error",
|
|
statusHint: err?.status || 401,
|
|
});
|
|
return res.status(e.statusHint || 401).json(e.body);
|
|
}
|
|
if (identity.kind === "license" && !identity.installId) {
|
|
const e = await errorEnvelope({
|
|
error: "missing X-Recap-Install-Id header",
|
|
statusHint: 400,
|
|
});
|
|
return res.status(400).json(e.body);
|
|
}
|
|
const { creditKey, installId, license } = identity;
|
|
if (!req.file) {
|
|
const e = await errorEnvelope({ error: "missing audio file", creditKey, installId, statusHint: 400 });
|
|
return res.status(400).json(e.body);
|
|
}
|
|
|
|
const row = await getOrCreateRow({ creditKey, installId, license });
|
|
const tier = identityTier(identity, row);
|
|
row.tier_snapshot = tier;
|
|
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
|
|
const auditInstall = installId || identity.userId || null;
|
|
|
|
// Probe audio duration BEFORE the backend call so we can record
|
|
// it on every audit row (success and error alike). Used by the
|
|
// dashboard to normalize wall-clock time to "ms per minute of
|
|
// audio" — a backend-agnostic speed benchmark.
|
|
const audioSeconds = await getAudioDurationSecondsFromBuffer(
|
|
req.file?.buffer
|
|
);
|
|
|
|
// Billing vs. routing are decoupled — see analyze.js for the
|
|
// full reasoning. Look up job to decide whether to charge a
|
|
// credit, but always run planBackend fresh so transcribe's
|
|
// routing decision respects relay_transcribe_backend_preference.
|
|
const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
|
|
const cfg = await getConfigSnapshot();
|
|
const hw = await resolveHardwareConfig(cfg);
|
|
// Operator-only diagnostic — see the matching comment in
|
|
// summarize-url.js for the full reasoning. We don't 503 here on
|
|
// blocked_reason because doing so pre-empts planBackend and
|
|
// surfaces operator-internal wording to clients even when
|
|
// Gemini was the configured preference.
|
|
if (hw.transcribe.blocked_reason) {
|
|
console.warn(
|
|
`[transcribe] hardware transcribe currently blocked (planBackend will route to Gemini if available): ${hw.transcribe.blocked_reason}`,
|
|
);
|
|
}
|
|
const hasHardware = !!hw.transcribe.url;
|
|
const quota = await getTierQuotas();
|
|
const preference =
|
|
cfg.relay_transcribe_backend_preference || "gemini_first";
|
|
const plan = planBackend(row, quota, { hasHardware, preference });
|
|
if (!plan.allowed) {
|
|
await recordCall({
|
|
install_id: auditInstall,
|
|
license_fingerprint: licenseFp,
|
|
tier,
|
|
pipeline: "transcribe",
|
|
backend: null,
|
|
model: null,
|
|
status: "refused",
|
|
credit_charged: 0,
|
|
duration_ms: Date.now() - t0,
|
|
audio_seconds: audioSeconds,
|
|
cost_usd: 0,
|
|
job_id: jobId,
|
|
error: plan.reason,
|
|
});
|
|
const e = await errorEnvelope({
|
|
error: plan.reason,
|
|
creditKey,
|
|
installId,
|
|
tier,
|
|
statusHint: 402,
|
|
});
|
|
return res.status(402).json(e.body);
|
|
}
|
|
const chosenBackend = plan.backend;
|
|
let result;
|
|
try {
|
|
if (chosenBackend === "gemini") {
|
|
const backend = createGeminiBackend({
|
|
apiKey: cfg.relay_gemini_api_key,
|
|
transcriptionModel: cfg.relay_gemini_transcription_model,
|
|
analysisModel: cfg.relay_gemini_analysis_model,
|
|
txChunkSeconds: (cfg.relay_gemini_tx_chunk_minutes || 30) * 60,
|
|
txConcurrency: cfg.relay_gemini_tx_concurrency || 12,
|
|
});
|
|
result = await backend.transcribeAudio({
|
|
audio: req.file.buffer,
|
|
mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream",
|
|
title: req.body?.title || "",
|
|
channel: req.body?.channel || "",
|
|
description: req.body?.description || "",
|
|
chapters: parseChaptersField(req.body?.chapters),
|
|
offsetSeconds: Number(req.body?.offset_seconds) || 0,
|
|
});
|
|
} else {
|
|
const backend = createHardwareBackend({
|
|
parakeetBaseURL: hw.transcribe.url || "",
|
|
gemmaBaseURL: hw.analyze.url || "",
|
|
sparkControlBaseURL: hw.sparkBase || "",
|
|
parakeetModel: hw.transcribe.model || "",
|
|
gemmaModel: hw.analyze.model || "",
|
|
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
|
|
txChunkOverlapSeconds: cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
|
|
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
|
|
clusterThresholdPct: cfg.relay_hardware_voice_clustering_threshold ?? 70,
|
|
anchorMinSpeakingSec: cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
|
|
smallClusterMaxSpeakingSec: cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
|
|
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
|
|
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
|
|
});
|
|
result = await backend.transcribeAudio({
|
|
audio: req.file.buffer,
|
|
mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream",
|
|
offsetSeconds: Number(req.body?.offset_seconds) || 0,
|
|
});
|
|
}
|
|
} catch (err) {
|
|
if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
|
|
console.error(`[relay/transcribe] backend error: ${err?.message}`);
|
|
// Fire-and-forget health report for hardware-served calls;
|
|
// Gemini failures are a separate observability surface.
|
|
if (chosenBackend === "hardware") {
|
|
reportHealthEvent({
|
|
service: "parakeet",
|
|
ok: false,
|
|
error: (err?.message || String(err)).slice(0, 280),
|
|
ms: Date.now() - t0,
|
|
});
|
|
}
|
|
await recordCall({
|
|
install_id: auditInstall,
|
|
license_fingerprint: licenseFp,
|
|
tier,
|
|
pipeline: "transcribe",
|
|
backend: chosenBackend,
|
|
model: chosenBackend === "gemini"
|
|
? cfg.relay_gemini_transcription_model
|
|
: hw.transcribe.model || "(auto)",
|
|
status: "error",
|
|
credit_charged: 0,
|
|
duration_ms: Date.now() - t0,
|
|
audio_seconds: audioSeconds,
|
|
cost_usd: 0,
|
|
job_id: jobId,
|
|
error: (err?.message || String(err)).slice(0, 200),
|
|
});
|
|
const e = await errorEnvelope({
|
|
error: err?.message || "backend_error",
|
|
creditKey,
|
|
installId,
|
|
tier,
|
|
statusHint: err?.status || 502,
|
|
});
|
|
return res.status(e.statusHint).json(e.body);
|
|
}
|
|
|
|
let creditCharged = 0;
|
|
if (!reusedJob) {
|
|
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
|
|
await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
|
|
creditCharged = 1;
|
|
}
|
|
|
|
// Success — write the audit row with cost details. Gemini's usage
|
|
// metadata gives us token counts; calcGeminiCost translates that
|
|
// into USD. Hardware-served calls have no token data and we
|
|
// report cost_usd: 0 (operator's hardware is fixed-cost).
|
|
const costDetails =
|
|
chosenBackend === "gemini" && result.usage
|
|
? calcGeminiCost(result.model, result.usage)
|
|
: {
|
|
input_tokens: 0,
|
|
output_tokens: 0,
|
|
thinking_tokens: 0,
|
|
cost_usd: 0,
|
|
};
|
|
await recordCall({
|
|
install_id: installId,
|
|
license_fingerprint: licenseFp,
|
|
tier,
|
|
pipeline: "transcribe",
|
|
backend: chosenBackend,
|
|
model: result?.model || null,
|
|
status: "success",
|
|
credit_charged: creditCharged,
|
|
duration_ms: Date.now() - t0,
|
|
audio_seconds: audioSeconds,
|
|
job_id: jobId,
|
|
attempts: result?.attempts || null,
|
|
...costDetails,
|
|
});
|
|
|
|
const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
|
|
res.json(body);
|
|
});
|
|
|
|
return router;
|
|
}
|
|
|
|
function parseChaptersField(raw) {
|
|
if (!raw) return [];
|
|
try {
|
|
const arr = JSON.parse(raw);
|
|
return Array.isArray(arr) ? arr : [];
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|