0ae59f3550
Introduces RECAP_MODE=multi alongside single-mode self-host: - Tenant auth + accounts (magic-link via System SMTP), per-tenant credit pool, anonymous trial minting with per-IP/-64 caps - Self-serve Pro/Max purchase: inline Lightning (BTCPay) + card (Zaprite), prepaid 30-day periods, expiry-reminder emails - Core-decoupling: relay owns cloud tier/expiry keyed by Recaps user-id - SQLite (better-sqlite3) schema for multi-mode; filesystem unchanged for single - StartOS actions/versions through 0.2.155
145 lines
7.1 KiB
JavaScript
145 lines
7.1 KiB
JavaScript
// Gemini-specific helpers: pricing table, cost calculation, prompt
|
||
// builder. Pure module — no state, no I/O. When we add other providers,
|
||
// each provider gets its own equivalent of this file.
|
||
|
||
import { formatTime } from "./util.js";
|
||
|
||
// ── Pricing (per 1M tokens) ─────────────────────────────────────────────────
|
||
// Only the models we actually use as analysis fallbacks. Keep flat — the
|
||
// numbers are operational data, not configuration. Update when Google
|
||
// changes published rates.
|
||
export const PRICING = {
|
||
// The five Gemini models we support. Verified against Google's
|
||
// official docs on 2026-05-12. Retired/never-existed IDs omitted.
|
||
"gemini-3.1-pro-preview": { input: 2.00, output: 12.00, thinking: 12.00 },
|
||
"gemini-2.5-pro": { input: 1.25, output: 10.00, thinking: 10.00 },
|
||
"gemini-3-flash-preview": { input: 0.50, output: 3.00, thinking: 3.00 },
|
||
"gemini-2.5-flash": { input: 0.15, output: 0.60, thinking: 0.60 },
|
||
"gemini-3.1-flash-lite": { input: 0.10, output: 0.40, thinking: 0.40 },
|
||
// Fallback for unknown / future models — better an estimate than nothing.
|
||
"default": { input: 1.00, output: 5.00, thinking: 5.00 },
|
||
};
|
||
|
||
// ── Cost calculation ────────────────────────────────────────────────────────
|
||
// Takes a Gemini SDK `usage` object (response.usageMetadata) and produces
|
||
// a structured cost record. Display strings are formatted at extraction
|
||
// time so callers don't reformat. Returns zeros for unknown models (uses
|
||
// the "default" rates).
|
||
export function calcCost(modelName, usage) {
|
||
const rates = PRICING[modelName] || PRICING["default"];
|
||
const inputTokens = usage.promptTokenCount || 0;
|
||
const outputTokens = usage.candidatesTokenCount || 0;
|
||
const thinkingTokens = usage.thoughtsTokenCount || 0;
|
||
|
||
const inputCost = (inputTokens / 1_000_000) * rates.input;
|
||
const outputCost = (outputTokens / 1_000_000) * rates.output;
|
||
const thinkingCost = (thinkingTokens / 1_000_000) * rates.thinking;
|
||
const totalCost = inputCost + outputCost + thinkingCost;
|
||
|
||
return {
|
||
inputTokens,
|
||
outputTokens,
|
||
thinkingTokens,
|
||
totalTokens: usage.totalTokenCount || (inputTokens + outputTokens + thinkingTokens),
|
||
inputCost: inputCost.toFixed(6),
|
||
outputCost: outputCost.toFixed(6),
|
||
thinkingCost: thinkingCost.toFixed(6),
|
||
totalCost: totalCost.toFixed(6),
|
||
totalCostDisplay: totalCost < 0.01 ? `$${(totalCost * 100).toFixed(3)}¢` : `$${totalCost.toFixed(4)}`,
|
||
};
|
||
}
|
||
|
||
// ── Section-count target by VIDEO duration ─────────────────────────────────
|
||
// Mirrors recap-relay's computePerWindowTarget() (server/chunked-analyze.js).
|
||
// Operator-tunable on the relay; baked into code defaults here on the
|
||
// Recap-app direct path. The defaults match the relay's defaults so
|
||
// segmentation density is consistent across both pipelines.
|
||
//
|
||
// Buckets are TOTAL video duration in minutes:
|
||
// <30 → 6 sections / 30-60 → 8 / 60-90 → 9 / 90-120 → 10
|
||
// 120-150 → 11 / 150-180 → 12 / >=180 → 12
|
||
// Per-window target = total_target × window_sec / total_audio_sec
|
||
// (clamped to ≥1 for single-shot runs).
|
||
function pickTotalSectionsTarget(totalAudioSec) {
|
||
const m = (totalAudioSec || 0) / 60;
|
||
if (m < 30) return 6;
|
||
if (m < 60) return 8;
|
||
if (m < 90) return 9;
|
||
if (m < 120) return 10;
|
||
if (m < 150) return 11;
|
||
if (m < 180) return 12;
|
||
return 12;
|
||
}
|
||
function formatTargetSectionsLabel(avg) {
|
||
if (avg <= 1.2) return "1 section";
|
||
const lo = Math.max(1, Math.floor(avg));
|
||
const hi = Math.max(lo, Math.ceil(avg));
|
||
if (lo === hi) return "around " + lo + " sections";
|
||
return lo + "–" + hi + " sections";
|
||
}
|
||
|
||
// ── Topic-analysis prompt builder ───────────────────────────────────────────
|
||
// Takes the parsed transcript entries for a WINDOW and builds the
|
||
// JSON-output prompt fed to the analysis model. Indices in the response
|
||
// are positional into the same window-entries array — the caller relies
|
||
// on that contract.
|
||
//
|
||
// `opts.totalAudioSec` is the FULL audio duration (not just this window),
|
||
// used to scale the section-count target via the per-video-duration table
|
||
// above. When omitted, falls back to deriving from the windowEntries
|
||
// themselves (legacy callers / unit tests / single-shot path).
|
||
export function buildAnalysisPrompt(entries, opts = {}) {
|
||
const numbered = entries
|
||
.map((e, i) => `[${i}] (${formatTime(e.offset)}) ${e.text}`)
|
||
.join("\n");
|
||
|
||
// Window length in minutes (this window's own transcript span).
|
||
const windowSec = entries.length > 1
|
||
? (entries[entries.length - 1].offset || 0) - (entries[0].offset || 0)
|
||
: 0;
|
||
const windowMin = Math.max(1, Math.round(windowSec / 60));
|
||
const maxIndex = Math.max(0, entries.length - 1);
|
||
|
||
// Total audio duration drives the per-video-duration target picker.
|
||
// If the caller didn't supply it, assume this is a single-shot run
|
||
// and the window IS the whole audio.
|
||
const totalAudioSec = opts.totalAudioSec || windowSec || 60;
|
||
const totalTarget = pickTotalSectionsTarget(totalAudioSec);
|
||
const numWindows = Math.max(1, totalAudioSec / Math.max(60, windowSec || 60));
|
||
const avgPerWindow = totalTarget / numWindows;
|
||
const targetSections = formatTargetSectionsLabel(avgPerWindow);
|
||
|
||
return `You are analyzing a ~${windowMin}-minute section of a longer transcript. Your job is to identify natural topic boundaries and group the transcript into discussion-based sections — aim for ${targetSections}.
|
||
|
||
TRANSCRIPT (each line is numbered with a timestamp):
|
||
${numbered}
|
||
|
||
INSTRUCTIONS:
|
||
1. Read the entire transcript carefully.
|
||
2. Identify where the discussion naturally shifts from one topic to another.
|
||
3. Group consecutive transcript segments by topic. Some sections may be short (a quick aside) and some may be long (an extended deep-dive). Let the content dictate the length.
|
||
4. For each section, write:
|
||
- A short, specific topic title (3-8 words)
|
||
- A 1-3 sentence summary of what's discussed
|
||
- The start and end segment indices (inclusive), counted as the bracketed [N] number at the start of each transcript line above.
|
||
|
||
IMPORTANT:
|
||
- Sections must be chronological and non-overlapping.
|
||
- Every segment index from 0 to ${maxIndex} must belong to exactly one section.
|
||
- startIndex of section N+1 must equal endIndex of section N plus 1.
|
||
- Create as many or as few sections as the content naturally requires — but lean toward broad, substantive topics rather than minute-by-minute breakdowns. A natural topic that spans several minutes of dialogue should be one section, not several.
|
||
- Titles should be descriptive and specific, not generic like "Introduction" unless it truly is one.
|
||
|
||
Respond with ONLY valid JSON in this exact format, no other text:
|
||
{
|
||
"sections": [
|
||
{
|
||
"title": "Brief Topic Title",
|
||
"summary": "1-3 sentence summary of this discussion section.",
|
||
"startIndex": 0,
|
||
"endIndex": 15
|
||
}
|
||
]
|
||
}`;
|
||
}
|