Wire new routes; identity, summarize-url, dashboard, admin
This commit is contained in:
+114
-58
@@ -16,14 +16,21 @@
|
||||
// margin, and speed metrics.
|
||||
|
||||
import express from "express";
|
||||
import { resolveLicense } from "../keysat-client.js";
|
||||
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
|
||||
import { resolveIdentity, identityTier } from "../identity.js";
|
||||
import {
|
||||
getOrCreateRow,
|
||||
planBackend,
|
||||
commitCredit,
|
||||
licenseFingerprint,
|
||||
} from "../credits.js";
|
||||
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
||||
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||
import { createGeminiBackend } from "../backends/gemini.js";
|
||||
import { createHardwareBackend } from "../backends/hardware.js";
|
||||
import { envelope, errorEnvelope } from "./envelope.js";
|
||||
import { recordCall } from "../audit-log.js";
|
||||
import { resolveHardwareConfig } from "../hardware-config.js";
|
||||
import { reportHealthEvent } from "../spark-control-events.js";
|
||||
import { calcGeminiCost } from "../pricing.js";
|
||||
|
||||
export function analyzeRouter() {
|
||||
@@ -31,72 +38,100 @@ export function analyzeRouter() {
|
||||
|
||||
router.post("/analyze", express.json({ limit: "10mb" }), async (req, res) => {
|
||||
const t0 = Date.now();
|
||||
const installId = req.header("X-Recap-Install-Id");
|
||||
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||
const auth = req.header("Authorization");
|
||||
|
||||
if (!installId) {
|
||||
let identity;
|
||||
try {
|
||||
identity = await resolveIdentity(req);
|
||||
} catch (err) {
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "auth_error",
|
||||
statusHint: err?.status || 401,
|
||||
});
|
||||
return res.status(e.statusHint || 401).json(e.body);
|
||||
}
|
||||
if (identity.kind === "license" && !identity.installId) {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing X-Recap-Install-Id header",
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
const { creditKey, installId, license } = identity;
|
||||
const prompt = req.body?.prompt;
|
||||
if (!prompt || typeof prompt !== "string") {
|
||||
const e = await errorEnvelope({
|
||||
error: "missing or non-string body.prompt",
|
||||
creditKey,
|
||||
installId,
|
||||
statusHint: 400,
|
||||
});
|
||||
return res.status(400).json(e.body);
|
||||
}
|
||||
|
||||
const license = await resolveLicense(auth);
|
||||
const tier = license.tier;
|
||||
|
||||
const row = await getOrCreateRow(installId);
|
||||
const row = await getOrCreateRow({ creditKey, installId, license });
|
||||
const tier = identityTier(identity, row);
|
||||
row.tier_snapshot = tier;
|
||||
const licenseFp = identity.kind === "cloud" ? null : licenseFingerprint(license);
|
||||
const auditInstall = installId || identity.userId || null;
|
||||
|
||||
let reusedJob = false;
|
||||
let chosenBackend = null;
|
||||
const existingJob = lookupJob(installId, jobId);
|
||||
if (existingJob) {
|
||||
reusedJob = true;
|
||||
chosenBackend = existingJob.backend;
|
||||
} else {
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hasHardware = !!cfg.relay_gemma_base_url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_analyze_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
chosenBackend = plan.backend;
|
||||
}
|
||||
|
||||
// Two separate decisions on every call:
|
||||
// 1. Billing: did we already charge a credit for this job? (look
|
||||
// up by job_id; reused → don't charge again.)
|
||||
// 2. Routing: which backend serves THIS pipeline step's request?
|
||||
// (always per-pipeline preference + planBackend, even when
|
||||
// the job has a prior transcribe call that routed elsewhere.)
|
||||
//
|
||||
// The old code conflated the two — it copied `backend` from the
|
||||
// existing job, which meant analyze would silently inherit
|
||||
// transcribe's backend choice even when the operator's analyze
|
||||
// preference said something different. Fixed: routing is decided
|
||||
// fresh per pipeline step, regardless of job history.
|
||||
const reusedJob = !!lookupJob({ creditKey, installId, license, jobId });
|
||||
const cfg = await getConfigSnapshot();
|
||||
const hw = await resolveHardwareConfig(cfg);
|
||||
// Operator-only diagnostic — see summarize-url.js for the full
|
||||
// reasoning. We don't 503 here on blocked_reason because doing
|
||||
// so pre-empts planBackend and would surface operator-internal
|
||||
// Spark Control / vLLM wording to clients even when Gemini was
|
||||
// the configured preference. planBackend correctly routes around
|
||||
// an unavailable hardware path via hasHardware = false.
|
||||
if (hw.analyze.blocked_reason) {
|
||||
console.warn(
|
||||
`[analyze] hardware analyze currently blocked (planBackend will route to Gemini if available): ${hw.analyze.blocked_reason}`,
|
||||
);
|
||||
}
|
||||
const hasHardware = !!hw.analyze.url;
|
||||
const quota = await getTierQuotas();
|
||||
const preference =
|
||||
cfg.relay_analyze_backend_preference || "gemini_first";
|
||||
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||
if (!plan.allowed) {
|
||||
await recordCall({
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: null,
|
||||
model: null,
|
||||
status: "refused",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
cost_usd: 0,
|
||||
job_id: jobId,
|
||||
error: plan.reason,
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: plan.reason,
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: 402,
|
||||
});
|
||||
return res.status(402).json(e.body);
|
||||
}
|
||||
const chosenBackend = plan.backend;
|
||||
|
||||
let result;
|
||||
try {
|
||||
if (chosenBackend === "gemini") {
|
||||
@@ -108,24 +143,39 @@ export function analyzeRouter() {
|
||||
result = await backend.analyzeText({ prompt });
|
||||
} else {
|
||||
const backend = createHardwareBackend({
|
||||
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||
parakeetModel: cfg.relay_parakeet_model,
|
||||
gemmaModel: cfg.relay_gemma_model,
|
||||
parakeetBaseURL: hw.transcribe.url || "",
|
||||
gemmaBaseURL: hw.analyze.url || "",
|
||||
sparkControlBaseURL: hw.sparkBase || "",
|
||||
parakeetModel: hw.transcribe.model || "",
|
||||
gemmaModel: hw.analyze.model || "",
|
||||
});
|
||||
result = await backend.analyzeText({ prompt });
|
||||
}
|
||||
} catch (err) {
|
||||
if (reusedJob) refundJob(installId, jobId);
|
||||
if (reusedJob) await refundJob({ creditKey, installId, license, jobId });
|
||||
console.error(`[relay/analyze] backend error: ${err?.message}`);
|
||||
// Passive health-event report to Spark Control so the
|
||||
// operator's dashboard surfaces the failure immediately
|
||||
// (without waiting for its own polling cycle to catch it).
|
||||
// Only fired for hardware-side calls — Gemini failures are a
|
||||
// separate observability surface (Google's API health).
|
||||
if (chosenBackend === "hardware") {
|
||||
reportHealthEvent({
|
||||
service: "vllm",
|
||||
ok: false,
|
||||
error: (err?.message || String(err)).slice(0, 280),
|
||||
ms: Date.now() - t0,
|
||||
});
|
||||
}
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: chosenBackend,
|
||||
model: chosenBackend === "gemini"
|
||||
? cfg.relay_gemini_analysis_model
|
||||
: cfg.relay_gemma_model,
|
||||
: hw.analyze.model || "(auto)",
|
||||
status: "error",
|
||||
credit_charged: 0,
|
||||
duration_ms: Date.now() - t0,
|
||||
@@ -135,6 +185,7 @@ export function analyzeRouter() {
|
||||
});
|
||||
const e = await errorEnvelope({
|
||||
error: err?.message || "backend_error",
|
||||
creditKey,
|
||||
installId,
|
||||
tier,
|
||||
statusHint: err?.status || 502,
|
||||
@@ -144,8 +195,8 @@ export function analyzeRouter() {
|
||||
|
||||
let creditCharged = 0;
|
||||
if (!reusedJob) {
|
||||
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
|
||||
await commitCredit({ creditKey, installId, license, backend: chosenBackend, tier });
|
||||
await markJobCharged({ creditKey, installId, license, jobId, backend: chosenBackend, tier });
|
||||
creditCharged = 1;
|
||||
}
|
||||
|
||||
@@ -159,7 +210,8 @@ export function analyzeRouter() {
|
||||
cost_usd: 0,
|
||||
};
|
||||
await recordCall({
|
||||
install_id: installId,
|
||||
install_id: auditInstall,
|
||||
license_fingerprint: licenseFp,
|
||||
tier,
|
||||
pipeline: "analyze",
|
||||
backend: chosenBackend,
|
||||
@@ -168,10 +220,14 @@ export function analyzeRouter() {
|
||||
credit_charged: creditCharged,
|
||||
duration_ms: Date.now() - t0,
|
||||
job_id: jobId,
|
||||
// Surface the cascade so the dashboard can show "served by
|
||||
// 2.5-flash after 3-flash 503'd" — Gemini backend returns this;
|
||||
// hardware backend doesn't (no per-model fallback there).
|
||||
attempts: result?.attempts || null,
|
||||
...costDetails,
|
||||
});
|
||||
|
||||
const body = await envelope({ result, installId, tier, creditCharged });
|
||||
const body = await envelope({ result, creditKey, installId, license, tier, creditCharged });
|
||||
res.json(body);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user