recap/server/providers/openai.js

// OpenAI provider — analysis (chat.completions) + transcription (Whisper).
//
// Whisper (whisper-1) has a 25 MB per-request file size cap. The
// orchestration layer's audio chunking is currently sized for Gemini's
// much larger cap; long podcasts at high bitrate can push individual
// chunks over Whisper's cap. We surface that as a clear error rather
// than silently truncating — users can mix providers (Whisper for
// short audio, Gemini for long) per-request via the picker.
//
// Pricing values are placeholders — verify against current OpenAI
// pricing before billing-sensitive use.

import { createReadStream, statSync } from "fs";
import OpenAI from "openai";
import { retryAPI, formatTime } from "../util.js";
import { formatCost, ratesFor } from "./cost.js";

// Per-1M-token rates in USD for chat.completions models.
// VERIFY against current OpenAI pricing before relying on these for billing.
export const OPENAI_PRICING = {
  "gpt-4o":         { input: 2.50,  output: 10.00 },
  "gpt-4o-mini":    { input: 0.15,  output: 0.60 },
  "gpt-4-turbo":    { input: 10.00, output: 30.00 },
  "o3-mini":        { input: 1.10,  output: 4.40 },
  // Fallback for unknown / future models.
  "default":        { input: 2.50,  output: 10.00 },
};

// Whisper bills per minute of audio, not per token. The cost record
// reuses the token cost shape, but stores minute-based math in the
// `inputCost` field.
const WHISPER_USD_PER_MINUTE = 0.006;
const WHISPER_MAX_BYTES = 25 * 1024 * 1024; // OpenAI hard limit

export const OPENAI_ANALYSIS_MODELS = [
  "gpt-4o",
  "gpt-4o-mini",
  "gpt-4-turbo",
  "o3-mini",
];

export const OPENAI_TRANSCRIPTION_MODELS = ["whisper-1"];

const ANALYSIS_MAX_TOKENS = 16000;

export function createOpenAIProvider({
  apiKey,
  baseURL,
  timeoutMs = 900_000,
} = {}) {
  if (!apiKey) {
    throw new Error("createOpenAIProvider: apiKey is required");
  }
  const client = new OpenAI({
    apiKey,
    baseURL: baseURL || undefined,
    timeout: timeoutMs,
  });

  return {
    name: "openai",

    capabilities: {
      transcribe: true,
      analyze: true,
      listModels: true,
    },

    listAnalysisModels() {
      return [...OPENAI_ANALYSIS_MODELS];
    },

    listTranscriptionModels() {
      return [...OPENAI_TRANSCRIPTION_MODELS];
    },

    // Whisper-based transcription. Returns the same [MM:SS] formatted
    // text shape Gemini produces, so the orchestration layer's
    // parseTimestampedTranscript() works unchanged.
    async transcribeAudio({
      filePath,
      model = "whisper-1",
      offsetSeconds = 0,
      onProgress = () => {},
      signal,
    }) {
      let bytes = 0;
      try {
        bytes = statSync(filePath).size;
      } catch {}
      if (bytes > WHISPER_MAX_BYTES) {
        const sizeMB = (bytes / (1024 * 1024)).toFixed(1);
        throw new Error(
          `OpenAI Whisper file size limit is 25 MB. This chunk is ${sizeMB} MB. Try Gemini for transcription, or split the audio more aggressively.`
        );
      }

      onProgress(
        `Uploading audio${offsetSeconds > 0 ? ` (offset ${formatTime(offsetSeconds)})` : ""} to OpenAI Whisper (${model})...`
      );
      const start = Date.now();
      const result = await retryAPI(
        () =>
          client.audio.transcriptions.create(
            {
              file: createReadStream(filePath),
              model,
              response_format: "verbose_json",
              timestamp_granularities: ["segment"],
            },
            signal ? { signal } : undefined
          ),
        {
          retries: 3,
          delayMs: 5000,
          label: `Whisper transcription${offsetSeconds > 0 ? ` (chunk@${formatTime(offsetSeconds)})` : ""}`,
          log: (msg) => onProgress(msg),
        }
      );
      const elapsed = ((Date.now() - start) / 1000).toFixed(1);
      onProgress(`Whisper transcription complete in ${elapsed}s`);

      const segments = Array.isArray(result.segments) ? result.segments : [];
      const lines = segments.length
        ? segments.map((s) => `[${formatTime(s.start || 0)}] ${(s.text || "").trim()}`)
        : [`[0:00] ${(result.text || "").trim()}`];
      const text = lines.join("\n");

      // Whisper bills by audio duration in minutes, not tokens.
      const durationSeconds = result.duration || 0;
      const minutes = durationSeconds / 60;
      const usdCost = minutes * WHISPER_USD_PER_MINUTE;
      const cost = {
        inputTokens: 0,
        outputTokens: 0,
        thinkingTokens: 0,
        totalTokens: 0,
        inputCost: usdCost.toFixed(6),
        outputCost: "0.000000",
        thinkingCost: "0.000000",
        totalCost: usdCost.toFixed(6),
        totalCostDisplay: usdCost < 0.01
          ? `$${(usdCost * 100).toFixed(3)}¢`
          : `$${usdCost.toFixed(4)}`,
      };

      return {
        text,
        usage: { inputTokens: 0, outputTokens: 0, thinkingTokens: 0, totalTokens: 0 },
        cost,
        finishReason: null,
        blockReason: "none",
        raw: result,
      };
    },

    async analyzeText({
      prompt,
      model,
      onProgress = () => {},
      retries = 2,
      signal,
    }) {
      const result = await retryAPI(
        () =>
          client.chat.completions.create(
            {
              model,
              max_tokens: ANALYSIS_MAX_TOKENS,
              messages: [{ role: "user", content: prompt }],
            },
            signal ? { signal } : undefined
          ),
        {
          retries,
          delayMs: 5000,
          label: "OpenAI analysis",
          log: (msg) => onProgress(msg),
        }
      );

      const choice = result.choices?.[0];
      const text = choice?.message?.content || "";

      const usage = {
        inputTokens: result.usage?.prompt_tokens || 0,
        outputTokens: result.usage?.completion_tokens || 0,
        thinkingTokens: 0,
      };
      const cost = formatCost(ratesFor(OPENAI_PRICING, model), usage);

      return {
        text,
        usage,
        cost,
        finishReason: choice?.finish_reason || null,
        raw: result,
      };
    },
  };
}