v0.2.7 configurable Gemini models + per-pipeline backend preference
This commit is contained in:
@@ -18,8 +18,13 @@ import fs from "fs/promises";
|
|||||||
import os from "os";
|
import os from "os";
|
||||||
import path from "path";
|
import path from "path";
|
||||||
|
|
||||||
const TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
|
// Defaults used only when the caller doesn't supply explicit model
|
||||||
const ANALYSIS_MODEL = "gemini-3.1-pro-preview";
|
// names. Production callers should pass models pulled from
|
||||||
|
// relay_gemini_transcription_model / relay_gemini_analysis_model in
|
||||||
|
// the relay config so the operator can swap SKUs (e.g. flash for
|
||||||
|
// analysis) without rebuilding the relay.
|
||||||
|
const DEFAULT_TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
|
||||||
|
const DEFAULT_ANALYSIS_MODEL = "gemini-3.1-pro-preview";
|
||||||
const EMPTY_RETRIES = 3;
|
const EMPTY_RETRIES = 3;
|
||||||
|
|
||||||
const TRANSCRIPTION_SAFETY = [
|
const TRANSCRIPTION_SAFETY = [
|
||||||
@@ -29,7 +34,12 @@ const TRANSCRIPTION_SAFETY = [
|
|||||||
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
|
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
|
||||||
];
|
];
|
||||||
|
|
||||||
export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
export function createGeminiBackend({
|
||||||
|
apiKey,
|
||||||
|
transcriptionModel = DEFAULT_TRANSCRIPTION_MODEL,
|
||||||
|
analysisModel = DEFAULT_ANALYSIS_MODEL,
|
||||||
|
timeoutMs = 900_000,
|
||||||
|
} = {}) {
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
throw new Error("createGeminiBackend: apiKey is required");
|
throw new Error("createGeminiBackend: apiKey is required");
|
||||||
}
|
}
|
||||||
@@ -37,6 +47,10 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
|||||||
apiKey,
|
apiKey,
|
||||||
httpOptions: { timeout: timeoutMs, headersTimeout: timeoutMs },
|
httpOptions: { timeout: timeoutMs, headersTimeout: timeoutMs },
|
||||||
});
|
});
|
||||||
|
// Flash models accept `thinkingLevel: "minimal"`; Pro models reject
|
||||||
|
// it. Detect from the model id so the operator can flip flash <-> pro
|
||||||
|
// via the StartOS action without breaking the request.
|
||||||
|
const txIsFlash = /flash/i.test(transcriptionModel);
|
||||||
|
|
||||||
async function transcribeAudio({
|
async function transcribeAudio({
|
||||||
audio,
|
audio,
|
||||||
@@ -73,9 +87,12 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
|||||||
let result;
|
let result;
|
||||||
for (let attempt = 0; attempt < EMPTY_RETRIES; attempt++) {
|
for (let attempt = 0; attempt < EMPTY_RETRIES; attempt++) {
|
||||||
result = await ai.models.generateContent({
|
result = await ai.models.generateContent({
|
||||||
model: TRANSCRIPTION_MODEL,
|
model: transcriptionModel,
|
||||||
config: {
|
config: {
|
||||||
thinkingConfig: { thinkingLevel: "minimal" },
|
// thinkingLevel: "minimal" is only valid for Flash. Pro
|
||||||
|
// models reject it. Skip when the operator picks a Pro
|
||||||
|
// model for transcription (slower but valid).
|
||||||
|
...(txIsFlash ? { thinkingConfig: { thinkingLevel: "minimal" } } : {}),
|
||||||
safetySettings: TRANSCRIPTION_SAFETY,
|
safetySettings: TRANSCRIPTION_SAFETY,
|
||||||
},
|
},
|
||||||
contents: [
|
contents: [
|
||||||
@@ -111,7 +128,7 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
|||||||
|
|
||||||
async function analyzeText({ prompt }) {
|
async function analyzeText({ prompt }) {
|
||||||
const result = await ai.models.generateContent({
|
const result = await ai.models.generateContent({
|
||||||
model: ANALYSIS_MODEL,
|
model: analysisModel,
|
||||||
contents: [
|
contents: [
|
||||||
{
|
{
|
||||||
role: "user",
|
role: "user",
|
||||||
|
|||||||
@@ -18,6 +18,10 @@ function defaultConfig() {
|
|||||||
relay_gemma_base_url: "",
|
relay_gemma_base_url: "",
|
||||||
relay_parakeet_model: "parakeet-tdt-0.6b-v3",
|
relay_parakeet_model: "parakeet-tdt-0.6b-v3",
|
||||||
relay_gemma_model: "gemma3:27b",
|
relay_gemma_model: "gemma3:27b",
|
||||||
|
relay_gemini_transcription_model: "gemini-3-flash-preview",
|
||||||
|
relay_gemini_analysis_model: "gemini-3.1-pro-preview",
|
||||||
|
relay_transcribe_backend_preference: "gemini_first",
|
||||||
|
relay_analyze_backend_preference: "gemini_first",
|
||||||
relay_keysat_base_url: "https://keysat.xyz",
|
relay_keysat_base_url: "https://keysat.xyz",
|
||||||
relay_admin_username: "",
|
relay_admin_username: "",
|
||||||
relay_admin_password_hash: "",
|
relay_admin_password_hash: "",
|
||||||
|
|||||||
+64
-5
@@ -250,7 +250,24 @@ export function computeRemaining(row, quota) {
|
|||||||
// served at all. Returns { allowed, backend: "gemini"|"hardware",
|
// served at all. Returns { allowed, backend: "gemini"|"hardware",
|
||||||
// reason }. Does NOT debit — that's a separate commit step after the
|
// reason }. Does NOT debit — that's a separate commit step after the
|
||||||
// backend call succeeds.
|
// backend call succeeds.
|
||||||
export function planBackend(row, quota, { hasHardware }) {
|
//
|
||||||
|
// `preference` is the operator-configured routing strategy for the
|
||||||
|
// current pipeline step (transcribe or analyze), one of:
|
||||||
|
// - "gemini_first" try Gemini until cap is exceeded, then hardware
|
||||||
|
// (default — best quality routing on operator's
|
||||||
|
// Gemini budget, hardware as overflow)
|
||||||
|
// - "hardware_first" try hardware first, fall back to Gemini when
|
||||||
|
// hardware isn't configured (lets the operator
|
||||||
|
// conserve Gemini budget for premium use cases)
|
||||||
|
// - "gemini_only" Gemini only, fail when cap exceeded (caps the
|
||||||
|
// operator's spend at the per-tier limit)
|
||||||
|
// - "hardware_only" Hardware only, fail when not configured (good
|
||||||
|
// for fully local / offline deployments)
|
||||||
|
//
|
||||||
|
// The Gemini cap (geminiCapMonthly / geminiCapLifetime on the tier
|
||||||
|
// quota) still applies regardless of preference — preference just
|
||||||
|
// controls the order in which backends are tried.
|
||||||
|
export function planBackend(row, quota, { hasHardware, preference = "gemini_first" }) {
|
||||||
const balance = computeRemaining(row, quota);
|
const balance = computeRemaining(row, quota);
|
||||||
|
|
||||||
// Out of credits entirely?
|
// Out of credits entirely?
|
||||||
@@ -258,15 +275,57 @@ export function planBackend(row, quota, { hasHardware }) {
|
|||||||
return { allowed: false, backend: null, reason: "out_of_credits" };
|
return { allowed: false, backend: null, reason: "out_of_credits" };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pick backend: Gemini if there's room under the Gemini cap; else
|
const geminiAvailable =
|
||||||
// fall back to hardware if configured; else 402.
|
balance.gemini_remaining === null || balance.gemini_remaining > 0;
|
||||||
if (balance.gemini_remaining === null || balance.gemini_remaining > 0) {
|
|
||||||
|
switch (preference) {
|
||||||
|
case "hardware_only":
|
||||||
|
if (hasHardware) {
|
||||||
|
return { allowed: true, backend: "hardware", reason: null };
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
backend: null,
|
||||||
|
reason: "hardware_only_not_configured",
|
||||||
|
};
|
||||||
|
|
||||||
|
case "gemini_only":
|
||||||
|
if (geminiAvailable) {
|
||||||
|
return { allowed: true, backend: "gemini", reason: null };
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
backend: null,
|
||||||
|
reason: "gemini_cap_exceeded_no_fallback",
|
||||||
|
};
|
||||||
|
|
||||||
|
case "hardware_first":
|
||||||
|
if (hasHardware) {
|
||||||
|
return { allowed: true, backend: "hardware", reason: null };
|
||||||
|
}
|
||||||
|
if (geminiAvailable) {
|
||||||
|
return { allowed: true, backend: "gemini", reason: null };
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
backend: null,
|
||||||
|
reason: "no_backend_available",
|
||||||
|
};
|
||||||
|
|
||||||
|
case "gemini_first":
|
||||||
|
default:
|
||||||
|
if (geminiAvailable) {
|
||||||
return { allowed: true, backend: "gemini", reason: null };
|
return { allowed: true, backend: "gemini", reason: null };
|
||||||
}
|
}
|
||||||
if (hasHardware) {
|
if (hasHardware) {
|
||||||
return { allowed: true, backend: "hardware", reason: null };
|
return { allowed: true, backend: "hardware", reason: null };
|
||||||
}
|
}
|
||||||
return { allowed: false, backend: null, reason: "gemini_cap_exceeded_no_hardware" };
|
return {
|
||||||
|
allowed: false,
|
||||||
|
backend: null,
|
||||||
|
reason: "gemini_cap_exceeded_no_hardware",
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debit one credit on a successful call. Persists immediately.
|
// Debit one credit on a successful call. Persists immediately.
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "recap-relay-server",
|
"name": "recap-relay-server",
|
||||||
"version": "0.2.6",
|
"version": "0.2.7",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"private": true,
|
"private": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
@@ -61,7 +61,9 @@ export function analyzeRouter() {
|
|||||||
const cfg = await getConfigSnapshot();
|
const cfg = await getConfigSnapshot();
|
||||||
const hasHardware = !!cfg.relay_gemma_base_url;
|
const hasHardware = !!cfg.relay_gemma_base_url;
|
||||||
const quota = await getTierQuotas();
|
const quota = await getTierQuotas();
|
||||||
const plan = planBackend(row, quota, { hasHardware });
|
const preference =
|
||||||
|
cfg.relay_analyze_backend_preference || "gemini_first";
|
||||||
|
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||||
if (!plan.allowed) {
|
if (!plan.allowed) {
|
||||||
const e = await errorEnvelope({
|
const e = await errorEnvelope({
|
||||||
error: plan.reason,
|
error: plan.reason,
|
||||||
@@ -78,7 +80,11 @@ export function analyzeRouter() {
|
|||||||
let result;
|
let result;
|
||||||
try {
|
try {
|
||||||
if (chosenBackend === "gemini") {
|
if (chosenBackend === "gemini") {
|
||||||
const backend = createGeminiBackend({ apiKey: cfg.relay_gemini_api_key });
|
const backend = createGeminiBackend({
|
||||||
|
apiKey: cfg.relay_gemini_api_key,
|
||||||
|
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||||
|
analysisModel: cfg.relay_gemini_analysis_model,
|
||||||
|
});
|
||||||
result = await backend.analyzeText({ prompt });
|
result = await backend.analyzeText({ prompt });
|
||||||
} else {
|
} else {
|
||||||
const backend = createHardwareBackend({
|
const backend = createHardwareBackend({
|
||||||
|
|||||||
@@ -78,7 +78,9 @@ export function transcribeRouter() {
|
|||||||
const cfg = await getConfigSnapshot();
|
const cfg = await getConfigSnapshot();
|
||||||
const hasHardware = !!cfg.relay_parakeet_base_url;
|
const hasHardware = !!cfg.relay_parakeet_base_url;
|
||||||
const quota = await getTierQuotas();
|
const quota = await getTierQuotas();
|
||||||
const plan = planBackend(row, quota, { hasHardware });
|
const preference =
|
||||||
|
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||||
|
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||||
if (!plan.allowed) {
|
if (!plan.allowed) {
|
||||||
const e = await errorEnvelope({
|
const e = await errorEnvelope({
|
||||||
error: plan.reason,
|
error: plan.reason,
|
||||||
@@ -96,7 +98,11 @@ export function transcribeRouter() {
|
|||||||
let result;
|
let result;
|
||||||
try {
|
try {
|
||||||
if (chosenBackend === "gemini") {
|
if (chosenBackend === "gemini") {
|
||||||
const backend = createGeminiBackend({ apiKey: cfg.relay_gemini_api_key });
|
const backend = createGeminiBackend({
|
||||||
|
apiKey: cfg.relay_gemini_api_key,
|
||||||
|
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||||
|
analysisModel: cfg.relay_gemini_analysis_model,
|
||||||
|
});
|
||||||
result = await backend.transcribeAudio({
|
result = await backend.transcribeAudio({
|
||||||
audio: req.file.buffer,
|
audio: req.file.buffer,
|
||||||
mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream",
|
mimeType: req.body?.mime_type || req.file.mimetype || "application/octet-stream",
|
||||||
|
|||||||
@@ -5,11 +5,13 @@ import { setParakeetUrl } from './setParakeetUrl'
|
|||||||
import { setGemmaUrl } from './setGemmaUrl'
|
import { setGemmaUrl } from './setGemmaUrl'
|
||||||
import { setAdminPassword } from './setAdminPassword'
|
import { setAdminPassword } from './setAdminPassword'
|
||||||
import { adjustTierQuotas } from './adjustTierQuotas'
|
import { adjustTierQuotas } from './adjustTierQuotas'
|
||||||
|
import { setBackendRouting } from './setBackendRouting'
|
||||||
|
|
||||||
export const actions = sdk.Actions.of()
|
export const actions = sdk.Actions.of()
|
||||||
.addAction(setGeminiKey)
|
.addAction(setGeminiKey)
|
||||||
.addAction(setKeysatBaseUrl)
|
.addAction(setKeysatBaseUrl)
|
||||||
.addAction(setParakeetUrl)
|
.addAction(setParakeetUrl)
|
||||||
.addAction(setGemmaUrl)
|
.addAction(setGemmaUrl)
|
||||||
|
.addAction(setBackendRouting)
|
||||||
.addAction(setAdminPassword)
|
.addAction(setAdminPassword)
|
||||||
.addAction(adjustTierQuotas)
|
.addAction(adjustTierQuotas)
|
||||||
|
|||||||
@@ -0,0 +1,116 @@
|
|||||||
|
import { sdk } from '../sdk'
|
||||||
|
import { configFile } from '../file-models/config.json'
|
||||||
|
|
||||||
|
const { InputSpec, Value } = sdk
|
||||||
|
|
||||||
|
// Lets the operator tune which backend gets tried first per pipeline
|
||||||
|
// step (transcribe vs analyze) AND which Gemini SKU is used when
|
||||||
|
// Gemini is the backend. All four knobs live-reload — change them
|
||||||
|
// via this action and the next relay request honors the new values
|
||||||
|
// without a daemon restart.
|
||||||
|
|
||||||
|
const inputSpec = InputSpec.of({
|
||||||
|
// ── Gemini model selection ──
|
||||||
|
relay_gemini_transcription_model: Value.text({
|
||||||
|
name: 'Gemini Transcription Model',
|
||||||
|
description:
|
||||||
|
"The Gemini SKU used when a transcription request is routed to Gemini. Flash is recommended (cheap, fast, multimodal). Examples: gemini-3-flash-preview (default), gemini-2.5-flash, gemini-2.0-flash, gemini-3-pro-preview (slower + pricier but higher quality on edge cases).",
|
||||||
|
required: true,
|
||||||
|
default: 'gemini-3-flash-preview',
|
||||||
|
minLength: 1,
|
||||||
|
maxLength: 128,
|
||||||
|
}),
|
||||||
|
relay_gemini_analysis_model: Value.text({
|
||||||
|
name: 'Gemini Analysis Model',
|
||||||
|
description:
|
||||||
|
"The Gemini SKU used when an analysis request is routed to Gemini. Pro is the default for higher-quality structured output. Swap to a flash SKU (e.g. gemini-3-flash-preview) for faster + cheaper analysis at some loss of section-boundary precision.",
|
||||||
|
required: true,
|
||||||
|
default: 'gemini-3.1-pro-preview',
|
||||||
|
minLength: 1,
|
||||||
|
maxLength: 128,
|
||||||
|
}),
|
||||||
|
|
||||||
|
// ── Backend routing preference per pipeline ──
|
||||||
|
relay_transcribe_backend_preference: Value.select({
|
||||||
|
name: 'Transcribe Backend Preference',
|
||||||
|
description:
|
||||||
|
'Routing strategy for transcription requests. The selected option controls the ORDER in which the relay tries each backend. The Gemini per-tier cap still applies regardless of this setting.',
|
||||||
|
default: 'gemini_first',
|
||||||
|
values: {
|
||||||
|
gemini_first:
|
||||||
|
'Gemini first → operator hardware (Parakeet) when cap exceeded',
|
||||||
|
hardware_first: 'Operator hardware first → Gemini as fallback',
|
||||||
|
gemini_only: 'Gemini only — fail when cap is exceeded',
|
||||||
|
hardware_only:
|
||||||
|
'Hardware only — fail when no Parakeet endpoint is configured',
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
relay_analyze_backend_preference: Value.select({
|
||||||
|
name: 'Analyze Backend Preference',
|
||||||
|
description:
|
||||||
|
'Routing strategy for analysis requests. Same options as transcription but applies to the analyze step independently — you can route transcribe to hardware and analyze to Gemini, or vice versa.',
|
||||||
|
default: 'gemini_first',
|
||||||
|
values: {
|
||||||
|
gemini_first:
|
||||||
|
'Gemini first → operator hardware (Gemma) when cap exceeded',
|
||||||
|
hardware_first: 'Operator hardware first → Gemini as fallback',
|
||||||
|
gemini_only: 'Gemini only — fail when cap is exceeded',
|
||||||
|
hardware_only:
|
||||||
|
'Hardware only — fail when no Gemma endpoint is configured',
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
|
||||||
|
export const setBackendRouting = sdk.Action.withInput(
|
||||||
|
'set-backend-routing',
|
||||||
|
|
||||||
|
async ({ effects }) => ({
|
||||||
|
name: 'Set Backend Routing & Models',
|
||||||
|
description:
|
||||||
|
"Tune which Gemini SKUs the relay uses and the per-pipeline backend pecking order. Live-reloaded — changes take effect on the next request, no restart.",
|
||||||
|
warning: null,
|
||||||
|
allowedStatuses: 'any',
|
||||||
|
group: 'AI Backends',
|
||||||
|
visibility: 'enabled',
|
||||||
|
}),
|
||||||
|
|
||||||
|
inputSpec,
|
||||||
|
|
||||||
|
async ({ effects }) => {
|
||||||
|
const config = await configFile.read().once()
|
||||||
|
return {
|
||||||
|
relay_gemini_transcription_model:
|
||||||
|
config?.relay_gemini_transcription_model || 'gemini-3-flash-preview',
|
||||||
|
relay_gemini_analysis_model:
|
||||||
|
config?.relay_gemini_analysis_model || 'gemini-3.1-pro-preview',
|
||||||
|
relay_transcribe_backend_preference:
|
||||||
|
(config?.relay_transcribe_backend_preference as
|
||||||
|
| 'gemini_first'
|
||||||
|
| 'hardware_first'
|
||||||
|
| 'gemini_only'
|
||||||
|
| 'hardware_only'
|
||||||
|
| undefined) || 'gemini_first',
|
||||||
|
relay_analyze_backend_preference:
|
||||||
|
(config?.relay_analyze_backend_preference as
|
||||||
|
| 'gemini_first'
|
||||||
|
| 'hardware_first'
|
||||||
|
| 'gemini_only'
|
||||||
|
| 'hardware_only'
|
||||||
|
| undefined) || 'gemini_first',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async ({ effects, input }) => {
|
||||||
|
await configFile.merge(effects, {
|
||||||
|
relay_gemini_transcription_model: (
|
||||||
|
input.relay_gemini_transcription_model || 'gemini-3-flash-preview'
|
||||||
|
).trim(),
|
||||||
|
relay_gemini_analysis_model: (
|
||||||
|
input.relay_gemini_analysis_model || 'gemini-3.1-pro-preview'
|
||||||
|
).trim(),
|
||||||
|
relay_transcribe_backend_preference: input.relay_transcribe_backend_preference,
|
||||||
|
relay_analyze_backend_preference: input.relay_analyze_backend_preference,
|
||||||
|
})
|
||||||
|
return null
|
||||||
|
},
|
||||||
|
)
|
||||||
@@ -36,6 +36,33 @@ export const configFile = FileHelper.json(
|
|||||||
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
|
relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
|
||||||
relay_gemma_model: z.string().default('gemma3:27b'),
|
relay_gemma_model: z.string().default('gemma3:27b'),
|
||||||
|
|
||||||
|
// ── Gemini model selection ──
|
||||||
|
// Operator can pick which Gemini SKU is used per pipeline step
|
||||||
|
// without rebuilding the relay. Defaults match Google's typical
|
||||||
|
// recommendations: Flash for transcription (cheap, fast,
|
||||||
|
// multimodal-capable), Pro for analysis (higher quality on
|
||||||
|
// structured-JSON outputs). Operators can swap to flash for
|
||||||
|
// analysis when they want faster + cheaper at the cost of some
|
||||||
|
// section-boundary precision.
|
||||||
|
relay_gemini_transcription_model: z.string().default('gemini-3-flash-preview'),
|
||||||
|
relay_gemini_analysis_model: z.string().default('gemini-3.1-pro-preview'),
|
||||||
|
|
||||||
|
// ── Backend routing preference per pipeline ──
|
||||||
|
// Controls whether the relay tries Gemini first (current default —
|
||||||
|
// best quality, costs operator's Gemini API budget) or the
|
||||||
|
// operator-hardware backend first (saves Gemini budget, may be
|
||||||
|
// slower depending on the operator's hardware). One of:
|
||||||
|
// - "gemini_first" try Gemini until per-tier cap, then hardware
|
||||||
|
// - "hardware_first" try hardware first, fall back to Gemini
|
||||||
|
// - "gemini_only" Gemini only, fail when cap is exceeded
|
||||||
|
// - "hardware_only" Hardware only, fail when not configured
|
||||||
|
relay_transcribe_backend_preference: z
|
||||||
|
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
|
||||||
|
.default('gemini_first'),
|
||||||
|
relay_analyze_backend_preference: z
|
||||||
|
.enum(['gemini_first', 'hardware_first', 'gemini_only', 'hardware_only'])
|
||||||
|
.default('gemini_first'),
|
||||||
|
|
||||||
// ── License server ──
|
// ── License server ──
|
||||||
// URL of the Keysat license server used for the cached online
|
// URL of the Keysat license server used for the cached online
|
||||||
// license-validation check. Defaults to the public endpoint;
|
// license-validation check. Defaults to the public endpoint;
|
||||||
|
|||||||
@@ -7,8 +7,9 @@ import { v_0_2_3 } from './v0.2.3'
|
|||||||
import { v_0_2_4 } from './v0.2.4'
|
import { v_0_2_4 } from './v0.2.4'
|
||||||
import { v_0_2_5 } from './v0.2.5'
|
import { v_0_2_5 } from './v0.2.5'
|
||||||
import { v_0_2_6 } from './v0.2.6'
|
import { v_0_2_6 } from './v0.2.6'
|
||||||
|
import { v_0_2_7 } from './v0.2.7'
|
||||||
|
|
||||||
export const versionGraph = VersionGraph.of({
|
export const versionGraph = VersionGraph.of({
|
||||||
current: v_0_2_6,
|
current: v_0_2_7,
|
||||||
other: [v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
|
other: [v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
import { VersionInfo } from '@start9labs/start-sdk'
|
||||||
|
|
||||||
|
export const v_0_2_7 = VersionInfo.of({
|
||||||
|
version: '0.2.7:0',
|
||||||
|
releaseNotes: {
|
||||||
|
en_US:
|
||||||
|
'New "Set Backend Routing & Models" action exposes four operator knobs: Gemini transcription model, Gemini analysis model, transcribe backend preference (gemini_first / hardware_first / gemini_only / hardware_only), and analyze backend preference. Routing strategies are honored by planBackend per-pipeline, so the operator can route transcribe to Parakeet for speed AND analyze through Gemini Flash for faster + cheaper structured output, or any other combination.',
|
||||||
|
},
|
||||||
|
migrations: {
|
||||||
|
up: async ({ effects }) => {},
|
||||||
|
down: async ({ effects }) => {},
|
||||||
|
},
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user