v0.2.7 configurable Gemini models + per-pipeline backend preference
This commit is contained in:
@@ -18,8 +18,13 @@ import fs from "fs/promises";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
|
||||
const TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
|
||||
const ANALYSIS_MODEL = "gemini-3.1-pro-preview";
|
||||
// Defaults used only when the caller doesn't supply explicit model
|
||||
// names. Production callers should pass models pulled from
|
||||
// relay_gemini_transcription_model / relay_gemini_analysis_model in
|
||||
// the relay config so the operator can swap SKUs (e.g. flash for
|
||||
// analysis) without rebuilding the relay.
|
||||
const DEFAULT_TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
|
||||
const DEFAULT_ANALYSIS_MODEL = "gemini-3.1-pro-preview";
|
||||
const EMPTY_RETRIES = 3;
|
||||
|
||||
const TRANSCRIPTION_SAFETY = [
|
||||
@@ -29,7 +34,12 @@ const TRANSCRIPTION_SAFETY = [
|
||||
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
|
||||
];
|
||||
|
||||
export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
||||
export function createGeminiBackend({
|
||||
apiKey,
|
||||
transcriptionModel = DEFAULT_TRANSCRIPTION_MODEL,
|
||||
analysisModel = DEFAULT_ANALYSIS_MODEL,
|
||||
timeoutMs = 900_000,
|
||||
} = {}) {
|
||||
if (!apiKey) {
|
||||
throw new Error("createGeminiBackend: apiKey is required");
|
||||
}
|
||||
@@ -37,6 +47,10 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
||||
apiKey,
|
||||
httpOptions: { timeout: timeoutMs, headersTimeout: timeoutMs },
|
||||
});
|
||||
// Flash models accept `thinkingLevel: "minimal"`; Pro models reject
|
||||
// it. Detect from the model id so the operator can flip flash <-> pro
|
||||
// via the StartOS action without breaking the request.
|
||||
const txIsFlash = /flash/i.test(transcriptionModel);
|
||||
|
||||
async function transcribeAudio({
|
||||
audio,
|
||||
@@ -73,9 +87,12 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
||||
let result;
|
||||
for (let attempt = 0; attempt < EMPTY_RETRIES; attempt++) {
|
||||
result = await ai.models.generateContent({
|
||||
model: TRANSCRIPTION_MODEL,
|
||||
model: transcriptionModel,
|
||||
config: {
|
||||
thinkingConfig: { thinkingLevel: "minimal" },
|
||||
// thinkingLevel: "minimal" is only valid for Flash. Pro
|
||||
// models reject it. Skip when the operator picks a Pro
|
||||
// model for transcription (slower but valid).
|
||||
...(txIsFlash ? { thinkingConfig: { thinkingLevel: "minimal" } } : {}),
|
||||
safetySettings: TRANSCRIPTION_SAFETY,
|
||||
},
|
||||
contents: [
|
||||
@@ -111,7 +128,7 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
|
||||
|
||||
async function analyzeText({ prompt }) {
|
||||
const result = await ai.models.generateContent({
|
||||
model: ANALYSIS_MODEL,
|
||||
model: analysisModel,
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
|
||||
Reference in New Issue
Block a user