v0.2.1 model names config-driven

2026-05-11 20:27:19 -05:00
parent cccbee27e4
commit c9f051cd07
9 changed files with 75 additions and 28 deletions
@@ -3,9 +3,10 @@ import { configFile } from '../file-models/config.json'

 const { InputSpec, Value } = sdk

-// Optional Gemma/Ollama endpoint for the operator-hardware analysis
-// fallback. Counterpart to setParakeetUrl — Parakeet handles transcribe
-// overflow, this handles analyze overflow.
+// Operator's Gemma (or any OpenAI-compatible chat-completions) endpoint
+// + which model to request. Both fields live-reload so the operator
+// can pull a different Gemma SKU on Ollama and update the model name
+// here without restarting the relay.
 const inputSpec = InputSpec.of({
  relay_gemma_base_url: Value.text({
    name: 'Gemma Base URL',
@@ -22,6 +23,15 @@ const inputSpec = InputSpec.of({
      },
    ],
  }),
+  relay_gemma_model: Value.text({
+    name: 'Gemma Model Name',
+    description:
+      'The model identifier sent in upstream chat-completions requests. Match whatever name your Ollama / vLLM / llama.cpp deployment exposes (run `ollama list` to see what you have pulled). Example: gemma3:27b, gemma2:9b, llama3.1:70b',
+    required: true,
+    default: 'gemma3:27b',
+    minLength: 1,
+    maxLength: 128,
+  }),
 })

 export const setGemmaUrl = sdk.Action.withInput(
@@ -30,7 +40,7 @@ export const setGemmaUrl = sdk.Action.withInput(
  async ({ effects }) => ({
    name: 'Set Gemma URL',
    description:
-      'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave empty to disable the fallback.',
+      'Optional. Where the relay forwards analysis requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the fallback.',
    warning: null,
    allowedStatuses: 'any',
    group: null,
@@ -43,12 +53,14 @@ export const setGemmaUrl = sdk.Action.withInput(
    const config = await configFile.read().once()
    return {
      relay_gemma_base_url: config?.relay_gemma_base_url || '',
+      relay_gemma_model: config?.relay_gemma_model || 'gemma3:27b',
    }
  },

  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_gemma_base_url: (input.relay_gemma_base_url || '').trim(),
+      relay_gemma_model: (input.relay_gemma_model || 'gemma3:27b').trim(),
    })
    return null
  },
@@ -3,13 +3,9 @@ import { configFile } from '../file-models/config.json'

 const { InputSpec, Value } = sdk

-// Optional Parakeet endpoint for the operator-hardware fallback path.
-// When a Pro/Max user exceeds their Gemini monthly cap, the relay
-// routes transcribe requests here instead. Empty disables the fallback
-// — over-cap users get 402.
-//
-// In a typical setup this points at the operator's NVIDIA Spark or
-// similar local GPU box running the NeMo / Parakeet HTTP wrapper.
+// Operator's Parakeet endpoint + which model to request. Both fields
+// live-reload — change them via this action and the next relay request
+// picks up the new values without a daemon restart.
 const inputSpec = InputSpec.of({
  relay_parakeet_base_url: Value.text({
    name: 'Parakeet Base URL',
@@ -26,6 +22,15 @@ const inputSpec = InputSpec.of({
      },
    ],
  }),
+  relay_parakeet_model: Value.text({
+    name: 'Parakeet Model Name',
+    description:
+      'The model identifier sent in upstream requests (the "model" field in the OpenAI Whisper API body). Match whatever name your Parakeet wrapper expects. Default: parakeet-tdt-0.6b-v3',
+    required: true,
+    default: 'parakeet-tdt-0.6b-v3',
+    minLength: 1,
+    maxLength: 128,
+  }),
 })

 export const setParakeetUrl = sdk.Action.withInput(
@@ -34,7 +39,7 @@ export const setParakeetUrl = sdk.Action.withInput(
  async ({ effects }) => ({
    name: 'Set Parakeet URL',
    description:
-      "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave empty to disable the operator-hardware fallback.",
+      "Optional. Where the relay forwards transcription requests once a user exceeds their monthly Gemini cap. Leave URL empty to disable the operator-hardware fallback.",
    warning: null,
    allowedStatuses: 'any',
    group: null,
@@ -47,12 +52,16 @@ export const setParakeetUrl = sdk.Action.withInput(
    const config = await configFile.read().once()
    return {
      relay_parakeet_base_url: config?.relay_parakeet_base_url || '',
+      relay_parakeet_model:
+        config?.relay_parakeet_model || 'parakeet-tdt-0.6b-v3',
    }
  },

  async ({ effects, input }) => {
    await configFile.merge(effects, {
      relay_parakeet_base_url: (input.relay_parakeet_base_url || '').trim(),
+      relay_parakeet_model:
+        (input.relay_parakeet_model || 'parakeet-tdt-0.6b-v3').trim(),
    })
    return null
  },
@@ -28,6 +28,13 @@ export const configFile = FileHelper.json(
    // and return 402 once exceeded (no fallback).
    relay_parakeet_base_url: z.string().default(''),
    relay_gemma_base_url: z.string().default(''),
+    // Model identifiers to send in the upstream request bodies. The
+    // operator's Ollama or Parakeet wrapper may serve different models
+    // depending on what's been pulled; making these config-driven
+    // means the operator can swap models without rebuilding the relay.
+    // Live-reloaded — change applies to the next request.
+    relay_parakeet_model: z.string().default('parakeet-tdt-0.6b-v3'),
+    relay_gemma_model: z.string().default('gemma3:27b'),

    // ── License server ──
    // URL of the Keysat license server used for the cached online
@@ -1,8 +1,9 @@
 import { VersionGraph } from '@start9labs/start-sdk'
 import { v_0_1_0 } from './v0.1.0'
 import { v_0_2_0 } from './v0.2.0'
+import { v_0_2_1 } from './v0.2.1'

 export const versionGraph = VersionGraph.of({
-  current: v_0_2_0,
-  other: [v_0_1_0],
+  current: v_0_2_1,
+  other: [v_0_2_0, v_0_1_0],
 })
@@ -0,0 +1,13 @@
+import { VersionInfo } from '@start9labs/start-sdk'
+
+export const v_0_2_1 = VersionInfo.of({
+  version: '0.2.1:0',
+  releaseNotes: {
+    en_US:
+      'Parakeet + Gemma model names are now config-driven (live-reloadable via the Set Parakeet URL / Set Gemma URL actions). Operators can swap which model their Ollama / Parakeet wrapper serves without rebuilding the relay.',
+  },
+  migrations: {
+    up: async ({ effects }) => {},
+    down: async ({ effects }) => {},
+  },
+})