v0.2.7 configurable Gemini models + per-pipeline backend preference

2026-05-12 00:15:07 -05:00
parent cd377683fb
commit 9af70302b1
11 changed files with 273 additions and 22 deletions
@@ -18,8 +18,13 @@ import fs from "fs/promises";
 import os from "os";
 import path from "path";

-const TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
-const ANALYSIS_MODEL = "gemini-3.1-pro-preview";
+// Defaults used only when the caller doesn't supply explicit model
+// names. Production callers should pass models pulled from
+// relay_gemini_transcription_model / relay_gemini_analysis_model in
+// the relay config so the operator can swap SKUs (e.g. flash for
+// analysis) without rebuilding the relay.
+const DEFAULT_TRANSCRIPTION_MODEL = "gemini-3-flash-preview";
+const DEFAULT_ANALYSIS_MODEL = "gemini-3.1-pro-preview";
 const EMPTY_RETRIES = 3;

 const TRANSCRIPTION_SAFETY = [
@@ -29,7 +34,12 @@ const TRANSCRIPTION_SAFETY = [
  { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_NONE" },
 ];

-export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
+export function createGeminiBackend({
+  apiKey,
+  transcriptionModel = DEFAULT_TRANSCRIPTION_MODEL,
+  analysisModel = DEFAULT_ANALYSIS_MODEL,
+  timeoutMs = 900_000,
+} = {}) {
  if (!apiKey) {
    throw new Error("createGeminiBackend: apiKey is required");
  }
@@ -37,6 +47,10 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
    apiKey,
    httpOptions: { timeout: timeoutMs, headersTimeout: timeoutMs },
  });
+  // Flash models accept `thinkingLevel: "minimal"`; Pro models reject
+  // it. Detect from the model id so the operator can flip flash <-> pro
+  // via the StartOS action without breaking the request.
+  const txIsFlash = /flash/i.test(transcriptionModel);

  async function transcribeAudio({
    audio,
@@ -73,9 +87,12 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {
      let result;
      for (let attempt = 0; attempt < EMPTY_RETRIES; attempt++) {
        result = await ai.models.generateContent({
-          model: TRANSCRIPTION_MODEL,
+          model: transcriptionModel,
          config: {
-            thinkingConfig: { thinkingLevel: "minimal" },
+            // thinkingLevel: "minimal" is only valid for Flash. Pro
+            // models reject it. Skip when the operator picks a Pro
+            // model for transcription (slower but valid).
+            ...(txIsFlash ? { thinkingConfig: { thinkingLevel: "minimal" } } : {}),
            safetySettings: TRANSCRIPTION_SAFETY,
          },
          contents: [
@@ -111,7 +128,7 @@ export function createGeminiBackend({ apiKey, timeoutMs = 900_000 } = {}) {

  async function analyzeText({ prompt }) {
    const result = await ai.models.generateContent({
-      model: ANALYSIS_MODEL,
+      model: analysisModel,
      contents: [
        {
          role: "user",