v0.2.11 /relay/capabilities + /relay/transcribe-url (yt-dlp in container)
This commit is contained in:
+15
-4
@@ -1,9 +1,10 @@
|
|||||||
# ─────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────
|
||||||
# Recap Relay — StartOS 0.4 Docker image
|
# Recap Relay — StartOS 0.4 Docker image
|
||||||
#
|
#
|
||||||
# Includes: Node.js 20 only. No yt-dlp / ffmpeg / Python — the relay
|
# Includes: Node.js 20 + yt-dlp + ffmpeg + Python. yt-dlp + ffmpeg are
|
||||||
# receives audio buffers from Recap clients and forwards to Gemini's
|
# needed by the /relay/transcribe-url endpoint, which downloads
|
||||||
# File API; no local audio processing.
|
# YouTube / direct-audio URLs server-side so Recap clients don't have
|
||||||
|
# to ship the audio over their own (often slow) upstream.
|
||||||
#
|
#
|
||||||
# Uses Debian slim for the same reason Recap does — pip-free, but
|
# Uses Debian slim for the same reason Recap does — pip-free, but
|
||||||
# pulled-in C deps from npm packages prefer glibc over musl.
|
# pulled-in C deps from npm packages prefer glibc over musl.
|
||||||
@@ -32,10 +33,20 @@ WORKDIR /app
|
|||||||
# Runtime deps:
|
# Runtime deps:
|
||||||
# - dumb-init: PID 1 signal handling
|
# - dumb-init: PID 1 signal handling
|
||||||
# - ca-certificates: HTTPS for Gemini + Keysat
|
# - ca-certificates: HTTPS for Gemini + Keysat
|
||||||
|
# - python3 + pip: yt-dlp installation
|
||||||
|
# - ffmpeg: audio extraction (yt-dlp invokes it for -x)
|
||||||
|
# - curl: yt-dlp self-update fallback
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
dumb-init \
|
dumb-init \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
curl \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
python3-venv \
|
||||||
|
ffmpeg \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& pip3 install --break-system-packages yt-dlp \
|
||||||
|
&& yt-dlp --version
|
||||||
|
|
||||||
# Copy installed deps + app code from builder
|
# Copy installed deps + app code from builder
|
||||||
COPY --from=builder /app/vendor ./vendor/
|
COPY --from=builder /app/vendor ./vendor/
|
||||||
|
|||||||
@@ -20,10 +20,12 @@ import {
|
|||||||
setupAdminAuthRoutes,
|
setupAdminAuthRoutes,
|
||||||
} from "./admin-auth.js";
|
} from "./admin-auth.js";
|
||||||
import { transcribeRouter } from "./routes/transcribe.js";
|
import { transcribeRouter } from "./routes/transcribe.js";
|
||||||
|
import { transcribeUrlRouter } from "./routes/transcribe-url.js";
|
||||||
import { analyzeRouter } from "./routes/analyze.js";
|
import { analyzeRouter } from "./routes/analyze.js";
|
||||||
import { healthRouter } from "./routes/health.js";
|
import { healthRouter } from "./routes/health.js";
|
||||||
import { balanceRouter } from "./routes/balance.js";
|
import { balanceRouter } from "./routes/balance.js";
|
||||||
import { policyRouter } from "./routes/policy.js";
|
import { policyRouter } from "./routes/policy.js";
|
||||||
|
import { capabilitiesRouter } from "./routes/capabilities.js";
|
||||||
import { adminRouter } from "./routes/admin.js";
|
import { adminRouter } from "./routes/admin.js";
|
||||||
|
|
||||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||||
@@ -51,8 +53,10 @@ setupAdminAuthRoutes(app);
|
|||||||
// Authorization optional).
|
// Authorization optional).
|
||||||
app.use("/relay", healthRouter());
|
app.use("/relay", healthRouter());
|
||||||
app.use("/relay", policyRouter());
|
app.use("/relay", policyRouter());
|
||||||
|
app.use("/relay", capabilitiesRouter());
|
||||||
app.use("/relay", balanceRouter());
|
app.use("/relay", balanceRouter());
|
||||||
app.use("/relay", transcribeRouter());
|
app.use("/relay", transcribeRouter());
|
||||||
|
app.use("/relay", transcribeUrlRouter());
|
||||||
app.use("/relay", analyzeRouter());
|
app.use("/relay", analyzeRouter());
|
||||||
|
|
||||||
// Admin dashboard endpoints (cookie-gated).
|
// Admin dashboard endpoints (cookie-gated).
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "recap-relay-server",
|
"name": "recap-relay-server",
|
||||||
"version": "0.2.10",
|
"version": "0.2.11",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"private": true,
|
"private": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
// GET /relay/capabilities — operator-aware metadata for Recap clients
|
||||||
|
// to plan their audio handling. Returns the upper bounds the relay's
|
||||||
|
// CURRENT routing config can comfortably accept, so Recap can decide
|
||||||
|
// whether to chunk a long video before sending it.
|
||||||
|
//
|
||||||
|
// Today's logic:
|
||||||
|
// - When the operator's transcribe_backend_preference routes through
|
||||||
|
// Gemini at all (gemini_first / gemini_only), we report Gemini-safe
|
||||||
|
// limits (60 min / 30 MB / 2700 s chunks). Even with hardware as
|
||||||
|
// overflow, the FIRST attempt is Gemini, which needs the chunk
|
||||||
|
// budget.
|
||||||
|
// - When the operator's preference is hardware-only (or hardware-
|
||||||
|
// first with overflow to Gemini disabled in spirit), we report
|
||||||
|
// "unbounded" — the operator's Parakeet wrapper can typically
|
||||||
|
// ingest 2+ hour podcasts in a single shot, so chunking just adds
|
||||||
|
// extra inference passes and timestamp-stitching overhead.
|
||||||
|
//
|
||||||
|
// Recap reads this once on boot + on policy refresh; when its
|
||||||
|
// transcriptionProvider is "relay", it honors these limits instead of
|
||||||
|
// its own hardcoded thresholds. For non-relay providers, Recap's
|
||||||
|
// internal per-provider thresholds apply.
|
||||||
|
|
||||||
|
import express from "express";
|
||||||
|
import { getConfigSnapshot } from "../config.js";
|
||||||
|
|
||||||
|
export function capabilitiesRouter() {
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
router.get("/capabilities", async (_req, res) => {
|
||||||
|
const cfg = await getConfigSnapshot();
|
||||||
|
const txPref =
|
||||||
|
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||||
|
const hasParakeet = !!cfg.relay_parakeet_base_url;
|
||||||
|
|
||||||
|
// Conservative default: Gemini-safe limits unless the operator has
|
||||||
|
// explicitly said "use hardware (only or first) and I've got a
|
||||||
|
// Parakeet endpoint wired up". Without the Parakeet endpoint we
|
||||||
|
// can't make use of larger inputs — Gemini's the only path —
|
||||||
|
// so we'd just be lying to the client.
|
||||||
|
const hardwareCapable =
|
||||||
|
hasParakeet && (txPref === "hardware_only" || txPref === "hardware_first");
|
||||||
|
|
||||||
|
if (hardwareCapable) {
|
||||||
|
res.json({
|
||||||
|
// Effective unbounded — Parakeet wrappers commonly handle 2+
|
||||||
|
// hour audio in one shot. Set high but finite ceilings so a
|
||||||
|
// 24-hour file doesn't OOM the operator's GPU box silently.
|
||||||
|
max_audio_mb: 500,
|
||||||
|
max_audio_minutes: 240,
|
||||||
|
preferred_chunk_seconds: null,
|
||||||
|
// Diagnostic — Recap doesn't need this but the dashboard / a
|
||||||
|
// curious operator might want to know which limit shape they
|
||||||
|
// returned and why.
|
||||||
|
reason: "hardware-capable backend preference (" + txPref + ")",
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
res.json({
|
||||||
|
// Gemini File-API + practical reliability limits. Matches
|
||||||
|
// Recap's pre-relay defaults so existing chunking behavior
|
||||||
|
// is preserved.
|
||||||
|
max_audio_mb: 30,
|
||||||
|
max_audio_minutes: 60,
|
||||||
|
preferred_chunk_seconds: 2700, // 45 min chunks
|
||||||
|
reason: "Gemini-backed preference (" + txPref + ")",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return router;
|
||||||
|
}
|
||||||
@@ -0,0 +1,415 @@
|
|||||||
|
// POST /relay/transcribe-url — like /relay/transcribe but the relay
|
||||||
|
// fetches the audio itself instead of accepting it in the request
|
||||||
|
// body. Saves the buyer's upstream-bandwidth bottleneck: a 100-MB
|
||||||
|
// podcast that takes 60s to upload from a home connection takes <5s
|
||||||
|
// for the operator's relay (typically on a Start9 server with much
|
||||||
|
// fatter pipe) to download from the original source.
|
||||||
|
//
|
||||||
|
// Request body (application/json):
|
||||||
|
// {
|
||||||
|
// media_url: string, // YouTube URL OR direct audio (.mp3 / .m4a / etc.)
|
||||||
|
// type?: "youtube" | "podcast", // hint; we sniff URL shape if absent
|
||||||
|
// mime_type?: string, // hint for the transcribe backend; we sniff from
|
||||||
|
// // the downloaded file if absent
|
||||||
|
// title?: string, // metadata for Gemini's transcription prompt
|
||||||
|
// channel?: string,
|
||||||
|
// description?: string,
|
||||||
|
// chapters?: any[]
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Same auth as /relay/transcribe (X-Recap-Install-Id required,
|
||||||
|
// X-Recap-Job-Id optional, Authorization optional Bearer license).
|
||||||
|
// Same standard envelope on response. Same job-id dedup + credit
|
||||||
|
// accounting + audit log. Adds `download_ms` to the audit row so
|
||||||
|
// dashboard can show how long the relay's own download took
|
||||||
|
// separately from the backend's inference time.
|
||||||
|
|
||||||
|
import express from "express";
|
||||||
|
import fs from "fs/promises";
|
||||||
|
import { createWriteStream } from "fs";
|
||||||
|
import os from "os";
|
||||||
|
import path from "path";
|
||||||
|
import { execFile } from "child_process";
|
||||||
|
import { promisify } from "util";
|
||||||
|
import { Readable } from "stream";
|
||||||
|
import { pipeline } from "stream/promises";
|
||||||
|
import { resolveLicense } from "../keysat-client.js";
|
||||||
|
import { getOrCreateRow, planBackend, commitCredit } from "../credits.js";
|
||||||
|
import { lookupJob, markJobCharged, refundJob } from "../job-credits.js";
|
||||||
|
import { getConfigSnapshot, getTierQuotas } from "../config.js";
|
||||||
|
import { createGeminiBackend } from "../backends/gemini.js";
|
||||||
|
import { createHardwareBackend } from "../backends/hardware.js";
|
||||||
|
import { envelope, errorEnvelope } from "./envelope.js";
|
||||||
|
import { recordCall } from "../audit-log.js";
|
||||||
|
import { calcGeminiCost } from "../pricing.js";
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
// Max file size the relay is willing to download. Generous enough for
|
||||||
|
// 4-hour podcasts at ~256 kbps but caps DOS exposure.
|
||||||
|
const MAX_DOWNLOAD_BYTES = 500 * 1024 * 1024;
|
||||||
|
|
||||||
|
// Per-request safety timeout on the download leg alone (separate from
|
||||||
|
// the transcribe call's own timeout). yt-dlp can be slow when YouTube
|
||||||
|
// rate-limits; a hard ceiling avoids holding the request open forever.
|
||||||
|
const DOWNLOAD_TIMEOUT_MS = 10 * 60 * 1000;
|
||||||
|
|
||||||
|
function looksLikeYouTube(url) {
|
||||||
|
if (!url) return false;
|
||||||
|
return /(?:^|\.)(youtube\.com|youtu\.be)\b/i.test(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
function guessMimeFromExt(filePath) {
|
||||||
|
const ext = path.extname(filePath).toLowerCase().replace(/^\./, "");
|
||||||
|
return (
|
||||||
|
{
|
||||||
|
mp3: "audio/mpeg",
|
||||||
|
m4a: "audio/mp4",
|
||||||
|
mp4: "audio/mp4",
|
||||||
|
aac: "audio/aac",
|
||||||
|
ogg: "audio/ogg",
|
||||||
|
opus: "audio/opus",
|
||||||
|
wav: "audio/wav",
|
||||||
|
webm: "audio/webm",
|
||||||
|
flac: "audio/flac",
|
||||||
|
}[ext] || "audio/mpeg"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download an HTTP(S) audio URL to a temp file. Stops if the file
|
||||||
|
// would exceed MAX_DOWNLOAD_BYTES. Returns { filePath, bytes,
|
||||||
|
// mimeType }.
|
||||||
|
async function downloadDirect(url, tmpDir) {
|
||||||
|
const res = await fetch(url, {
|
||||||
|
redirect: "follow",
|
||||||
|
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Download ${url} returned HTTP ${res.status}`);
|
||||||
|
}
|
||||||
|
const contentType = res.headers.get("content-type") || "";
|
||||||
|
const isAudioy =
|
||||||
|
contentType.startsWith("audio/") ||
|
||||||
|
contentType === "application/octet-stream" ||
|
||||||
|
contentType.includes("mpeg") ||
|
||||||
|
contentType.includes("mp4");
|
||||||
|
if (!isAudioy) {
|
||||||
|
// Don't enforce strictly — some podcast CDNs serve audio with
|
||||||
|
// generic content-types. Log + continue; the transcription backend
|
||||||
|
// will reject if it's truly not audio.
|
||||||
|
console.warn(
|
||||||
|
`[transcribe-url] non-audio content-type "${contentType}" for ${url} — proceeding anyway`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const ext =
|
||||||
|
contentType.includes("mp4") ? "m4a" :
|
||||||
|
contentType.includes("ogg") ? "ogg" :
|
||||||
|
contentType.includes("opus") ? "opus" :
|
||||||
|
"mp3";
|
||||||
|
const filePath = path.join(tmpDir, `audio.${ext}`);
|
||||||
|
|
||||||
|
// Stream the response body into the temp file with a running byte
|
||||||
|
// count so we can abort if it gets too large mid-download.
|
||||||
|
if (!res.body) throw new Error("response has no body");
|
||||||
|
let bytes = 0;
|
||||||
|
const out = createWriteStream(filePath);
|
||||||
|
const counted = new ReadableStream({
|
||||||
|
async start(controller) {
|
||||||
|
const reader = res.body.getReader();
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
bytes += value.byteLength;
|
||||||
|
if (bytes > MAX_DOWNLOAD_BYTES) {
|
||||||
|
controller.error(
|
||||||
|
new Error(
|
||||||
|
`Download exceeded ${MAX_DOWNLOAD_BYTES} bytes — refusing to continue`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
controller.enqueue(value);
|
||||||
|
}
|
||||||
|
controller.close();
|
||||||
|
} catch (err) {
|
||||||
|
controller.error(err);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
await pipeline(Readable.fromWeb(counted), out);
|
||||||
|
return { filePath, bytes, mimeType: contentType || guessMimeFromExt(filePath) };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download a YouTube URL via yt-dlp. Picks the audio-only m4a/mp3.
|
||||||
|
// Logs the chosen path back as the file. Caller manages tmpDir.
|
||||||
|
async function downloadYouTube(url, tmpDir) {
|
||||||
|
const outTemplate = path.join(tmpDir, "audio.%(ext)s");
|
||||||
|
const args = [
|
||||||
|
"-x", // extract audio
|
||||||
|
"--audio-format",
|
||||||
|
"mp3",
|
||||||
|
"--audio-quality",
|
||||||
|
"5",
|
||||||
|
"-o",
|
||||||
|
outTemplate,
|
||||||
|
"--no-playlist",
|
||||||
|
"--no-simulate",
|
||||||
|
"--no-warnings",
|
||||||
|
url,
|
||||||
|
];
|
||||||
|
try {
|
||||||
|
await execFileAsync("yt-dlp", args, {
|
||||||
|
timeout: DOWNLOAD_TIMEOUT_MS,
|
||||||
|
maxBuffer: 10 * 1024 * 1024,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const stderr = (err?.stderr || "").toString();
|
||||||
|
const stdout = (err?.stdout || "").toString();
|
||||||
|
throw new Error(
|
||||||
|
`yt-dlp failed: ${stderr.trim() || stdout.trim() || err?.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Find the produced file — yt-dlp's audio-format=mp3 means it ends
|
||||||
|
// up at audio.mp3, but be defensive in case it landed at a
|
||||||
|
// different extension.
|
||||||
|
const files = await fs.readdir(tmpDir);
|
||||||
|
const audioFile = files.find((f) => /^audio\.(mp3|m4a|opus|webm|aac|ogg)$/i.test(f));
|
||||||
|
if (!audioFile) {
|
||||||
|
throw new Error(`yt-dlp ran but no audio file found in ${tmpDir}`);
|
||||||
|
}
|
||||||
|
const filePath = path.join(tmpDir, audioFile);
|
||||||
|
const stat = await fs.stat(filePath);
|
||||||
|
if (stat.size > MAX_DOWNLOAD_BYTES) {
|
||||||
|
throw new Error(
|
||||||
|
`YouTube download exceeded ${MAX_DOWNLOAD_BYTES} bytes — refusing to continue`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
filePath,
|
||||||
|
bytes: stat.size,
|
||||||
|
mimeType: guessMimeFromExt(filePath),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function transcribeUrlRouter() {
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
router.post("/transcribe-url", express.json({ limit: "1mb" }), async (req, res) => {
|
||||||
|
const t0 = Date.now();
|
||||||
|
const installId = req.header("X-Recap-Install-Id");
|
||||||
|
const jobId = req.header("X-Recap-Job-Id") || null;
|
||||||
|
const auth = req.header("Authorization");
|
||||||
|
|
||||||
|
if (!installId) {
|
||||||
|
const e = await errorEnvelope({
|
||||||
|
error: "missing X-Recap-Install-Id header",
|
||||||
|
statusHint: 400,
|
||||||
|
});
|
||||||
|
return res.status(400).json(e.body);
|
||||||
|
}
|
||||||
|
const {
|
||||||
|
media_url: mediaUrl,
|
||||||
|
type,
|
||||||
|
mime_type: bodyMime,
|
||||||
|
title,
|
||||||
|
channel,
|
||||||
|
description,
|
||||||
|
chapters,
|
||||||
|
} = req.body || {};
|
||||||
|
if (!mediaUrl || typeof mediaUrl !== "string") {
|
||||||
|
const e = await errorEnvelope({
|
||||||
|
error: "missing or non-string body.media_url",
|
||||||
|
installId,
|
||||||
|
statusHint: 400,
|
||||||
|
});
|
||||||
|
return res.status(400).json(e.body);
|
||||||
|
}
|
||||||
|
|
||||||
|
const license = await resolveLicense(auth);
|
||||||
|
const tier = license.tier;
|
||||||
|
const row = await getOrCreateRow(installId);
|
||||||
|
row.tier_snapshot = tier;
|
||||||
|
|
||||||
|
// Quota check + backend choice. Same as /relay/transcribe.
|
||||||
|
let reusedJob = false;
|
||||||
|
let chosenBackend = null;
|
||||||
|
const existingJob = lookupJob(installId, jobId);
|
||||||
|
if (existingJob) {
|
||||||
|
reusedJob = true;
|
||||||
|
chosenBackend = existingJob.backend;
|
||||||
|
} else {
|
||||||
|
const cfg = await getConfigSnapshot();
|
||||||
|
const hasHardware = !!cfg.relay_parakeet_base_url;
|
||||||
|
const quota = await getTierQuotas();
|
||||||
|
const preference =
|
||||||
|
cfg.relay_transcribe_backend_preference || "gemini_first";
|
||||||
|
const plan = planBackend(row, quota, { hasHardware, preference });
|
||||||
|
if (!plan.allowed) {
|
||||||
|
await recordCall({
|
||||||
|
install_id: installId,
|
||||||
|
tier,
|
||||||
|
pipeline: "transcribe",
|
||||||
|
backend: null,
|
||||||
|
model: null,
|
||||||
|
status: "refused",
|
||||||
|
credit_charged: 0,
|
||||||
|
duration_ms: Date.now() - t0,
|
||||||
|
cost_usd: 0,
|
||||||
|
job_id: jobId,
|
||||||
|
error: plan.reason,
|
||||||
|
});
|
||||||
|
const e = await errorEnvelope({
|
||||||
|
error: plan.reason,
|
||||||
|
installId,
|
||||||
|
tier,
|
||||||
|
statusHint: 402,
|
||||||
|
});
|
||||||
|
return res.status(402).json(e.body);
|
||||||
|
}
|
||||||
|
chosenBackend = plan.backend;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Download phase ─────────────────────────────────────────────
|
||||||
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "relay-dl-"));
|
||||||
|
const isYT = type === "youtube" || (!type && looksLikeYouTube(mediaUrl));
|
||||||
|
const dlStart = Date.now();
|
||||||
|
let audio;
|
||||||
|
let downloadMs = 0;
|
||||||
|
try {
|
||||||
|
audio = isYT
|
||||||
|
? await downloadYouTube(mediaUrl, tmpDir)
|
||||||
|
: await downloadDirect(mediaUrl, tmpDir);
|
||||||
|
downloadMs = Date.now() - dlStart;
|
||||||
|
console.log(
|
||||||
|
`[transcribe-url] downloaded ${audio.bytes} bytes from ${isYT ? "youtube" : "direct"} in ${downloadMs}ms (${mediaUrl.slice(0, 80)})`
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||||
|
console.error(`[transcribe-url] download failed: ${err?.message || err}`);
|
||||||
|
await recordCall({
|
||||||
|
install_id: installId,
|
||||||
|
tier,
|
||||||
|
pipeline: "transcribe",
|
||||||
|
backend: chosenBackend,
|
||||||
|
model: null,
|
||||||
|
status: "error",
|
||||||
|
credit_charged: 0,
|
||||||
|
duration_ms: Date.now() - t0,
|
||||||
|
download_ms: Date.now() - dlStart,
|
||||||
|
cost_usd: 0,
|
||||||
|
job_id: jobId,
|
||||||
|
error: ("download_failed: " + (err?.message || String(err))).slice(0, 200),
|
||||||
|
});
|
||||||
|
const e = await errorEnvelope({
|
||||||
|
error: "download_failed: " + (err?.message || String(err)).slice(0, 200),
|
||||||
|
installId,
|
||||||
|
tier,
|
||||||
|
statusHint: 502,
|
||||||
|
});
|
||||||
|
return res.status(502).json(e.body);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Transcription phase ────────────────────────────────────────
|
||||||
|
const cfg = await getConfigSnapshot();
|
||||||
|
let result;
|
||||||
|
try {
|
||||||
|
const audioBuf = await fs.readFile(audio.filePath);
|
||||||
|
const mimeType = bodyMime || audio.mimeType;
|
||||||
|
if (chosenBackend === "gemini") {
|
||||||
|
const backend = createGeminiBackend({
|
||||||
|
apiKey: cfg.relay_gemini_api_key,
|
||||||
|
transcriptionModel: cfg.relay_gemini_transcription_model,
|
||||||
|
analysisModel: cfg.relay_gemini_analysis_model,
|
||||||
|
});
|
||||||
|
result = await backend.transcribeAudio({
|
||||||
|
audio: audioBuf,
|
||||||
|
mimeType,
|
||||||
|
title: title || "",
|
||||||
|
channel: channel || "",
|
||||||
|
description: description || "",
|
||||||
|
chapters: Array.isArray(chapters) ? chapters : [],
|
||||||
|
offsetSeconds: 0,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
const backend = createHardwareBackend({
|
||||||
|
parakeetBaseURL: cfg.relay_parakeet_base_url,
|
||||||
|
gemmaBaseURL: cfg.relay_gemma_base_url,
|
||||||
|
parakeetModel: cfg.relay_parakeet_model,
|
||||||
|
gemmaModel: cfg.relay_gemma_model,
|
||||||
|
});
|
||||||
|
result = await backend.transcribeAudio({
|
||||||
|
audio: audioBuf,
|
||||||
|
mimeType,
|
||||||
|
offsetSeconds: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||||
|
if (reusedJob) refundJob(installId, jobId);
|
||||||
|
console.error(`[transcribe-url] transcribe failed: ${err?.message}`);
|
||||||
|
await recordCall({
|
||||||
|
install_id: installId,
|
||||||
|
tier,
|
||||||
|
pipeline: "transcribe",
|
||||||
|
backend: chosenBackend,
|
||||||
|
model:
|
||||||
|
chosenBackend === "gemini"
|
||||||
|
? cfg.relay_gemini_transcription_model
|
||||||
|
: cfg.relay_parakeet_model,
|
||||||
|
status: "error",
|
||||||
|
credit_charged: 0,
|
||||||
|
duration_ms: Date.now() - t0,
|
||||||
|
download_ms: downloadMs,
|
||||||
|
cost_usd: 0,
|
||||||
|
job_id: jobId,
|
||||||
|
error: (err?.message || String(err)).slice(0, 200),
|
||||||
|
});
|
||||||
|
const e = await errorEnvelope({
|
||||||
|
error: err?.message || "backend_error",
|
||||||
|
installId,
|
||||||
|
tier,
|
||||||
|
statusHint: err?.status || 502,
|
||||||
|
});
|
||||||
|
return res.status(e.statusHint).json(e.body);
|
||||||
|
} finally {
|
||||||
|
try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Commit + audit ─────────────────────────────────────────────
|
||||||
|
let creditCharged = 0;
|
||||||
|
if (!reusedJob) {
|
||||||
|
await commitCredit(installId, { backend: chosenBackend, tier });
|
||||||
|
markJobCharged(installId, jobId, { backend: chosenBackend, tier });
|
||||||
|
creditCharged = 1;
|
||||||
|
}
|
||||||
|
const costDetails =
|
||||||
|
chosenBackend === "gemini" && result.usage
|
||||||
|
? calcGeminiCost(result.model, result.usage)
|
||||||
|
: {
|
||||||
|
input_tokens: 0,
|
||||||
|
output_tokens: 0,
|
||||||
|
thinking_tokens: 0,
|
||||||
|
cost_usd: 0,
|
||||||
|
};
|
||||||
|
await recordCall({
|
||||||
|
install_id: installId,
|
||||||
|
tier,
|
||||||
|
pipeline: "transcribe",
|
||||||
|
backend: chosenBackend,
|
||||||
|
model: result?.model || null,
|
||||||
|
status: "success",
|
||||||
|
credit_charged: creditCharged,
|
||||||
|
duration_ms: Date.now() - t0,
|
||||||
|
download_ms: downloadMs,
|
||||||
|
audio_bytes: audio.bytes,
|
||||||
|
job_id: jobId,
|
||||||
|
...costDetails,
|
||||||
|
});
|
||||||
|
|
||||||
|
const body = await envelope({ result, installId, tier, creditCharged });
|
||||||
|
res.json(body);
|
||||||
|
});
|
||||||
|
|
||||||
|
return router;
|
||||||
|
}
|
||||||
@@ -11,8 +11,9 @@ import { v_0_2_7 } from './v0.2.7'
|
|||||||
import { v_0_2_8 } from './v0.2.8'
|
import { v_0_2_8 } from './v0.2.8'
|
||||||
import { v_0_2_9 } from './v0.2.9'
|
import { v_0_2_9 } from './v0.2.9'
|
||||||
import { v_0_2_10 } from './v0.2.10'
|
import { v_0_2_10 } from './v0.2.10'
|
||||||
|
import { v_0_2_11 } from './v0.2.11'
|
||||||
|
|
||||||
export const versionGraph = VersionGraph.of({
|
export const versionGraph = VersionGraph.of({
|
||||||
current: v_0_2_10,
|
current: v_0_2_11,
|
||||||
other: [v_0_2_9, v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
|
other: [v_0_2_10, v_0_2_9, v_0_2_8, v_0_2_7, v_0_2_6, v_0_2_5, v_0_2_4, v_0_2_3, v_0_2_2, v_0_2_1, v_0_2_0, v_0_1_0],
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
import { VersionInfo } from '@start9labs/start-sdk'
|
||||||
|
|
||||||
|
export const v_0_2_11 = VersionInfo.of({
|
||||||
|
version: '0.2.11:0',
|
||||||
|
releaseNotes: {
|
||||||
|
en_US:
|
||||||
|
'New GET /relay/capabilities endpoint tells Recap clients the max audio size/duration the relay can comfortably handle given its current backend-routing preference. New POST /relay/transcribe-url endpoint downloads YouTube or direct audio URLs server-side so Recap clients no longer have to upload large audio files from their home connection. Docker image now ships yt-dlp + ffmpeg + python (~150MB larger).',
|
||||||
|
},
|
||||||
|
migrations: {
|
||||||
|
up: async ({ effects }) => {},
|
||||||
|
down: async ({ effects }) => {},
|
||||||
|
},
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user