v0.2.7 configurable Gemini models + per-pipeline backend preference
This commit is contained in:
+68
-9
@@ -250,7 +250,24 @@ export function computeRemaining(row, quota) {
|
||||
// served at all. Returns { allowed, backend: "gemini"|"hardware",
|
||||
// reason }. Does NOT debit — that's a separate commit step after the
|
||||
// backend call succeeds.
|
||||
export function planBackend(row, quota, { hasHardware }) {
|
||||
//
|
||||
// `preference` is the operator-configured routing strategy for the
|
||||
// current pipeline step (transcribe or analyze), one of:
|
||||
// - "gemini_first" try Gemini until cap is exceeded, then hardware
|
||||
// (default — best quality routing on operator's
|
||||
// Gemini budget, hardware as overflow)
|
||||
// - "hardware_first" try hardware first, fall back to Gemini when
|
||||
// hardware isn't configured (lets the operator
|
||||
// conserve Gemini budget for premium use cases)
|
||||
// - "gemini_only" Gemini only, fail when cap exceeded (caps the
|
||||
// operator's spend at the per-tier limit)
|
||||
// - "hardware_only" Hardware only, fail when not configured (good
|
||||
// for fully local / offline deployments)
|
||||
//
|
||||
// The Gemini cap (geminiCapMonthly / geminiCapLifetime on the tier
|
||||
// quota) still applies regardless of preference — preference just
|
||||
// controls the order in which backends are tried.
|
||||
export function planBackend(row, quota, { hasHardware, preference = "gemini_first" }) {
|
||||
const balance = computeRemaining(row, quota);
|
||||
|
||||
// Out of credits entirely?
|
||||
@@ -258,15 +275,57 @@ export function planBackend(row, quota, { hasHardware }) {
|
||||
return { allowed: false, backend: null, reason: "out_of_credits" };
|
||||
}
|
||||
|
||||
// Pick backend: Gemini if there's room under the Gemini cap; else
|
||||
// fall back to hardware if configured; else 402.
|
||||
if (balance.gemini_remaining === null || balance.gemini_remaining > 0) {
|
||||
return { allowed: true, backend: "gemini", reason: null };
|
||||
const geminiAvailable =
|
||||
balance.gemini_remaining === null || balance.gemini_remaining > 0;
|
||||
|
||||
switch (preference) {
|
||||
case "hardware_only":
|
||||
if (hasHardware) {
|
||||
return { allowed: true, backend: "hardware", reason: null };
|
||||
}
|
||||
return {
|
||||
allowed: false,
|
||||
backend: null,
|
||||
reason: "hardware_only_not_configured",
|
||||
};
|
||||
|
||||
case "gemini_only":
|
||||
if (geminiAvailable) {
|
||||
return { allowed: true, backend: "gemini", reason: null };
|
||||
}
|
||||
return {
|
||||
allowed: false,
|
||||
backend: null,
|
||||
reason: "gemini_cap_exceeded_no_fallback",
|
||||
};
|
||||
|
||||
case "hardware_first":
|
||||
if (hasHardware) {
|
||||
return { allowed: true, backend: "hardware", reason: null };
|
||||
}
|
||||
if (geminiAvailable) {
|
||||
return { allowed: true, backend: "gemini", reason: null };
|
||||
}
|
||||
return {
|
||||
allowed: false,
|
||||
backend: null,
|
||||
reason: "no_backend_available",
|
||||
};
|
||||
|
||||
case "gemini_first":
|
||||
default:
|
||||
if (geminiAvailable) {
|
||||
return { allowed: true, backend: "gemini", reason: null };
|
||||
}
|
||||
if (hasHardware) {
|
||||
return { allowed: true, backend: "hardware", reason: null };
|
||||
}
|
||||
return {
|
||||
allowed: false,
|
||||
backend: null,
|
||||
reason: "gemini_cap_exceeded_no_hardware",
|
||||
};
|
||||
}
|
||||
if (hasHardware) {
|
||||
return { allowed: true, backend: "hardware", reason: null };
|
||||
}
|
||||
return { allowed: false, backend: null, reason: "gemini_cap_exceeded_no_hardware" };
|
||||
}
|
||||
|
||||
// Debit one credit on a successful call. Persists immediately.
|
||||
|
||||
Reference in New Issue
Block a user