recap/server/providers/openai-compatible.js

// OpenAI-compatible provider — analysis only.
//
// Same wire format as OpenAI's chat.completions endpoint, but pointed
// at a user-supplied baseURL: DeepSeek, Together, Groq, Fireworks, your
// own self-hosted vLLM, etc. The user provides baseURL + apiKey + model
// name; we don't ship a hardcoded model list (each backend's catalog
// differs), and we don't have pricing (varies wildly per backend).
//
// Structurally this is a thin re-export of the OpenAI SDK with the
// pricing table forced to zero — costs are reported as $0.0000 since we
// can't know the backend's rates without per-deploy configuration.

import OpenAI from "openai";
import { retryAPI } from "../util.js";
import { zeroCost } from "./cost.js";

// Default model lists are empty — the picker UI surfaces a free-text
// model field for OpenAI-compatible. listAnalysisModels() returns the
// caller-provided defaults if any were passed at construction time.
const ANALYSIS_MAX_TOKENS = 16000;

export function createOpenAICompatibleProvider({
  apiKey,
  baseURL,
  defaultModels = [],
  timeoutMs = 900_000,
} = {}) {
  if (!baseURL) {
    throw new Error(
      "createOpenAICompatibleProvider: baseURL is required (e.g. https://api.deepseek.com/v1)"
    );
  }
  // Some self-hosted backends accept any non-empty key. Default to a
  // sentinel so the SDK's auth header stays well-formed.
  const client = new OpenAI({
    apiKey: apiKey || "no-auth",
    baseURL,
    timeout: timeoutMs,
  });

  return {
    name: "openai-compatible",

    capabilities: {
      transcribe: false,
      analyze: true,
      listModels: defaultModels.length > 0,
    },

    listAnalysisModels() {
      return [...defaultModels];
    },

    listTranscriptionModels() {
      return [];
    },

    async transcribeAudio() {
      throw new Error(
        "openai-compatible providers are wired for analysis only. Use Gemini or OpenAI Whisper for transcription."
      );
    },

    async analyzeText({
      prompt,
      model,
      onProgress = () => {},
      retries = 2,
      signal,
    }) {
      const result = await retryAPI(
        () =>
          client.chat.completions.create(
            {
              model,
              max_tokens: ANALYSIS_MAX_TOKENS,
              messages: [{ role: "user", content: prompt }],
            },
            signal ? { signal } : undefined
          ),
        {
          retries,
          delayMs: 5000,
          label: "openai-compatible analysis",
          log: (msg) => onProgress(msg),
        }
      );

      const choice = result.choices?.[0];
      const text = choice?.message?.content || "";

      const usage = {
        inputTokens: result.usage?.prompt_tokens || 0,
        outputTokens: result.usage?.completion_tokens || 0,
        thinkingTokens: 0,
      };
      // Per-backend pricing varies — report zero. UI can warn that cost
      // tracking is not available for this provider.
      const cost = zeroCost(usage);

      return {
        text,
        usage,
        cost,
        finishReason: choice?.finish_reason || null,
        raw: result,
      };
    },
  };
}