v0.2.1 model names config-driven

2026-05-11 20:27:19 -05:00
parent cccbee27e4
commit c9f051cd07
9 changed files with 75 additions and 28 deletions
@@ -18,26 +18,25 @@ const ANALYZE_MAX_TOKENS = 16000;
 // internet, so generous timeouts. Same scale as Recap's defaults.
 const DEFAULT_TIMEOUT_MS = 900_000;
-// Pull the model identifier out of the prompt if the operator wants a
+// Defaults used only when the route handler doesn't supply explicit
-// specific Gemma SKU. We default to "gemma3:27b" which is the typical
+// model names (e.g. a unit test instantiating the backend directly).
-// Ollama tag for the analysis-capable Gemma model. Operators with a
+// In production the model names come from relay-config.json via
-// different deployment can update this via a future StartOS action;
+// setParakeetUrl / setGemmaUrl, so the operator can swap models on
-// for v0.2 it's hardcoded.
+// their Ollama deployment without rebuilding the relay.
-const HARDWARE_ANALYZE_MODEL = process.env.RELAY_GEMMA_MODEL || "gemma3:27b";
+const DEFAULT_TRANSCRIBE_MODEL = "parakeet-tdt-0.6b-v3";
-
+const DEFAULT_ANALYZE_MODEL = "gemma3:27b";
 // Parakeet's typical model identifier. Mirrors what Recap's whisper.js
 // sends when the operator points the relay at a NeMo Parakeet HTTP
 // wrapper. Configurable via env var for non-default deployments.
 const HARDWARE_TRANSCRIBE_MODEL =
  process.env.RELAY_PARAKEET_MODEL || "parakeet-tdt-0.6b-v3";
 export function createHardwareBackend({
  parakeetBaseURL = "",
  gemmaBaseURL = "",
  parakeetModel = DEFAULT_TRANSCRIBE_MODEL,
  gemmaModel = DEFAULT_ANALYZE_MODEL,
  timeoutMs = DEFAULT_TIMEOUT_MS,
 } = {}) {
  const parakeet = parakeetBaseURL ? parakeetBaseURL.replace(/\/$/, "") : "";
  const gemma = gemmaBaseURL ? gemmaBaseURL.replace(/\/$/, "") : "";
  const transcribeModel = parakeetModel || DEFAULT_TRANSCRIBE_MODEL;
  const analyzeModel = gemmaModel || DEFAULT_ANALYZE_MODEL;
  return {
    hasTranscribe: !!parakeet,
@@ -69,7 +68,7 @@ export function createHardwareBackend({
        const form = new FormData();
        const blob = new Blob([audio], { type: mimeType });
        form.append("file", blob, "audio.bin");
-        form.append("model", HARDWARE_TRANSCRIBE_MODEL);
+        form.append("model", transcribeModel);
        if (richMode) {
          form.append("response_format", "verbose_json");
          form.append("timestamp_granularities[]", "segment");
@@ -171,7 +170,7 @@ export function createHardwareBackend({
          method: "POST",
          headers: { "Content-Type": "application/json" },
          body: JSON.stringify({
-            model: HARDWARE_ANALYZE_MODEL,
+            model: analyzeModel,
            max_tokens: ANALYZE_MAX_TOKENS,
            messages: [{ role: "user", content: prompt }],
            stream: false,
@@ -16,6 +16,8 @@ function defaultConfig() {
    relay_gemini_api_key: "",
    relay_parakeet_base_url: "",
    relay_gemma_base_url: "",
    relay_parakeet_model: "parakeet-tdt-0.6b-v3",
    relay_gemma_model: "gemma3:27b",
    relay_keysat_base_url: "https://keysat.xyz",
    relay_admin_username: "",
    relay_admin_password_hash: "",
@@ -84,6 +84,8 @@ export function analyzeRouter() {
        const backend = createHardwareBackend({
          parakeetBaseURL: cfg.relay_parakeet_base_url,
          gemmaBaseURL: cfg.relay_gemma_base_url,
          parakeetModel: cfg.relay_parakeet_model,
          gemmaModel: cfg.relay_gemma_model,
        });
        result = await backend.analyzeText({ prompt });
      }
@@ -110,6 +110,8 @@ export function transcribeRouter() {
        const backend = createHardwareBackend({
          parakeetBaseURL: cfg.relay_parakeet_base_url,
          gemmaBaseURL: cfg.relay_gemma_base_url,
          parakeetModel: cfg.relay_parakeet_model,
          gemmaModel: cfg.relay_gemma_model,
        });
        result = await backend.transcribeAudio({
          audio: req.file.buffer,
@@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
-// Optional Gemma/Ollama endpoint for the operator-hardware analysis
+// Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
-// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe
+// + which model to request. Both fields live-reload so the operator
-// overflow, this handles analyze overflow.
+// can pull a different Gemma SKU on Ollama and update the model name
 // here without restarting the relay.
 const inputSpec = InputSpec.of({
  relay_gemma_base_url: Value.text({
    name: 'Gemma Base URL',
@@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({
      },
    ],
  }),
  relay_gemma_model: Value.text({
    name: 'Gemma Model Name',
    description:
      'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
    required: true,
    default: 'gemma3:27b',
    minLength: 1,
    maxLength: 128,
  }),
 })
 export const setGemmaUrl = sdk.Action.withInput(
@@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput(
  async ({ effects }) => ({
    name: 'Set Gemma URL',
    description:
-      'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.',
+      'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
    warning: null,
    allowedStatuses: 'any',
    group: null,
@@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput(
    const config = await configFile.read().once()
    return {
      relay_gemma_base_url: config?.relay_gemma_base_url || '',
      relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
      relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
    })
    return null
  },
@@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json'
 const { InputSpec, Value } = sdk
-// Optional Parakeet endpoint for the operator-hardware fallback path.
+// Operator's Parakeet endpoint + which model to request. Both fields
-// When a Pro/Max user exceeds their Gemini monthly cap, the relay
+// live-reload — change them via this action and the next relay request
-// routes transcribe requests here instead. Empty disables the fallback
+// picks up the new values without a daemon restart.
 // — over-cap users get 402.
 //
 // In a typical setup this points at the operator's NVIDIA Spark or
 // similar local GPU box running the NeMo / Parakeet HTTP wrapper.
 const inputSpec = InputSpec.of({
  relay_parakeet_base_url: Value.text({
    name: 'Parakeet Base URL',
@@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({
      },
    ],
  }),
  relay_parakeet_model: Value.text({
    name: 'Parakeet Model Name',
    description:
      'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
    required: true,
    default: 'parakeet-tdt-0.6b-v3',
    minLength: 1,
    maxLength: 128,
  }),
 })
 export const setParakeetUrl = sdk.Action.withInput(
@@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput(
  async ({ effects }) => ({
    name: 'Set Parakeet URL',
    description:
-      "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.",
+      "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
    warning: null,
    allowedStatuses: 'any',
    group: null,
@@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput(
    const config = await configFile.read().once()
    return {
      relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
      relay_parakeet_model:
        config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
    }
  },
  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
      relay_parakeet_model:
        (input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
    })
    return null
  },
@@ -28,6 +28,13 @@ export const configFile = FileHelper.json(
    // and return 402 once exceeded (no fallback).
    relay_parakeet_base_url: z.string().default(''),
    relay_gemma_base_url: z.string().default(''),
    // Model identifiers to send in the upstream request bodies. The
    // operator's Ollama or Parakeet wrapper may serve different models
    // depending on what's been pulled; making these config-driven
    // means the operator can swap models without rebuilding the relay.
    // Live-reloaded — change applies to the next request.
    relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
    relay_gemma_model: z.string().default('gemma3:27b'),
    // ── License server ──
    // URL of the Keysat license server used for the cached online
@@ -1,8 +1,9 @@
 import { VersionGraph } from '@start9labs/start-sdk'
 import { v_0_1_0 } from './v0.1.0'
 import { v_0_2_0 } from './v0.2.0'
 import { v_0_2_1 } from './v0.2.1'
 export const versionGraph = VersionGraph.of({
-  current: v_0_2_0,
+  current: v_0_2_1,
-  other: [v_0_1_0],
+  other: [v_0_2_0, v_0_1_0],
 })
@@ -0,0 +1,13 @@
 import { VersionInfo } from '@start9labs/start-sdk'
 export const v_0_2_1 = VersionInfo.of({
  version: '0.2.1:0',
  releaseNotes: {
    en_US:
      'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.',
  },
  migrations: {
    up: async ({ effects }) => {},
    down: async ({ effects }) => {},
  },
 })