0ae59f3550
Introduces RECAP_MODE=multi alongside single-mode self-host: - Tenant auth + accounts (magic-link via System SMTP), per-tenant credit pool, anonymous trial minting with per-IP/-64 caps - Self-serve Pro/Max purchase: inline Lightning (BTCPay) + card (Zaprite), prepaid 30-day periods, expiry-reminder emails - Core-decoupling: relay owns cloud tier/expiry keyed by Recaps user-id - SQLite (better-sqlite3) schema for multi-mode; filesystem unchanged for single - StartOS actions/versions through 0.2.155
476 lines
18 KiB
JavaScript
476 lines
18 KiB
JavaScript
// Resolves "share URLs" from Apple Podcasts and Spotify into something
|
|
// Recap's existing podcast pipeline can swallow. Most users share these
|
|
// links rather than the underlying RSS feed (which they rarely know
|
|
// exists), so transparent resolution turns the most common podcast
|
|
// share path into "paste link, hit summarize, done".
|
|
//
|
|
// Apple Podcasts URLs resolve directly via the public iTunes Lookup API:
|
|
// the episode result includes `episodeUrl` (the audio enclosure) and
|
|
// the show's `feedUrl`. No API key required, no auth.
|
|
//
|
|
// Spotify URLs are harder: Spotify-hosted audio is DRM-wrapped and not
|
|
// served via a public stream URL. We use the unauthenticated oEmbed
|
|
// endpoint to get the episode + show titles, then ask PodcastIndex to
|
|
// find the same episode in its RSS-indexed catalog. Spotify Originals
|
|
// (Joe Rogan, Anchor exclusives, …) have no RSS counterpart and fail
|
|
// the lookup — we surface a clear error in that case so the user
|
|
// understands and can paste the RSS link manually.
|
|
//
|
|
// Returns a normalized shape that maps cleanly onto Recap's existing
|
|
// podcast pipeline:
|
|
// {
|
|
// source: "apple" | "spotify",
|
|
// audioUrl: string, // direct audio URL (.mp3/.m4a) — feeds the existing podcast path
|
|
// episodeId: string, // stable GUID used by history dedup
|
|
// title: string,
|
|
// podcastTitle: string,
|
|
// uploadDate: string, // "YYYYMMDD"
|
|
// durationSec: number?, // null when unknown
|
|
// feedUrl: string?, // for context; not required downstream
|
|
// }
|
|
//
|
|
// Throws `URLResolveError` with a `.code` field for things the UI may
|
|
// want to format specifically:
|
|
// - "spotify_no_rss" → episode is Spotify-exclusive
|
|
// - "episode_not_found" → looked up but couldn't match
|
|
// - "apple_lookup_failed"
|
|
// - "podcastindex_unconfigured"
|
|
// - "podcastindex_not_implemented" → caller didn't pass keys
|
|
|
|
import crypto from "crypto";
|
|
import { fetchUrl } from "./util.js";
|
|
|
|
export class URLResolveError extends Error {
|
|
constructor(code, message) {
|
|
super(message);
|
|
this.name = "URLResolveError";
|
|
this.code = code;
|
|
}
|
|
}
|
|
|
|
const APPLE_EPISODE_URL_RE =
|
|
/^https?:\/\/(?:www\.)?podcasts\.apple\.com\/[^/]+\/podcast\/[^/]+\/id(\d+)(?:\?|.*[?&])i=(\d+)/i;
|
|
|
|
// Detection only — no I/O.
|
|
export function isApplePodcastUrl(url) {
|
|
if (!url) return false;
|
|
return /^https?:\/\/(?:www\.)?podcasts\.apple\.com\//i.test(url);
|
|
}
|
|
|
|
export function isSpotifyUrl(url) {
|
|
if (!url) return false;
|
|
return /^https?:\/\/(?:open|play)\.spotify\.com\/(?:episode|show)\//i.test(url);
|
|
}
|
|
|
|
// Fountain (https://fountain.fm) is a Bitcoin-Lightning podcast app
|
|
// that hosts a Podcasting 2.0-native catalog. Episode pages are at
|
|
// /episode/<short-id>; the underlying media is served from
|
|
// feeds.fountain.fm and exposed via standard Open Graph tags
|
|
// (og:audio, og:image, og:title) on the public episode HTML — no API
|
|
// key required to resolve. Show pages (/show/<id>) aren't supported
|
|
// for now; users should paste a specific episode link.
|
|
export function isFountainUrl(url) {
|
|
if (!url) return false;
|
|
return /^https?:\/\/(?:www\.)?fountain\.fm\/episode\//i.test(url);
|
|
}
|
|
|
|
// True if the URL is one of the "share link" forms we know how to turn
|
|
// into a podcast audio URL. Callers should only invoke the network-
|
|
// touching resolvers when this returns true.
|
|
export function isResolvableShareUrl(url) {
|
|
return isApplePodcastUrl(url) || isSpotifyUrl(url) || isFountainUrl(url);
|
|
}
|
|
|
|
// ── Apple Podcasts ─────────────────────────────────────────────────────
|
|
// Strategy: parse the podcast ID + episode track ID out of the URL,
|
|
// hit iTunes Lookup, find the matching episode by trackId. Apple
|
|
// returns the episode's actual audio enclosure URL — same URL the
|
|
// Apple Podcasts app streams from — so the existing podcast download
|
|
// pipeline (audio.downloadPodcastAudio) can swallow it unchanged.
|
|
export async function resolveApplePodcastUrl(url) {
|
|
const m = url.match(APPLE_EPISODE_URL_RE);
|
|
if (!m) {
|
|
throw new URLResolveError(
|
|
"apple_lookup_failed",
|
|
"Apple Podcasts URL is missing podcast ID or episode ID (?i= param)"
|
|
);
|
|
}
|
|
const podcastId = m[1];
|
|
const episodeTrackId = m[2];
|
|
|
|
// The lookup endpoint returns the show metadata as result[0] and the
|
|
// most-recent N episodes as result[1..]. Apple silently caps at 200
|
|
// even if you ask for more.
|
|
const lookupUrl = `https://itunes.apple.com/lookup?id=${encodeURIComponent(
|
|
podcastId
|
|
)}&entity=podcastEpisode&limit=200`;
|
|
let parsed;
|
|
try {
|
|
const raw = await fetchUrl(lookupUrl);
|
|
parsed = JSON.parse(raw);
|
|
} catch (err) {
|
|
throw new URLResolveError(
|
|
"apple_lookup_failed",
|
|
`iTunes lookup failed: ${err?.message || err}`
|
|
);
|
|
}
|
|
const results = Array.isArray(parsed?.results) ? parsed.results : [];
|
|
const show = results.find((r) => r.wrapperType === "track" || r.kind === "podcast") || {};
|
|
const episode = results.find(
|
|
(r) =>
|
|
r.wrapperType === "podcastEpisode" &&
|
|
String(r.trackId) === String(episodeTrackId)
|
|
);
|
|
if (!episode || !episode.episodeUrl) {
|
|
throw new URLResolveError(
|
|
"episode_not_found",
|
|
`Apple returned ${results.length} results for podcast ${podcastId} but episode ${episodeTrackId} was not among them. The episode may be older than Apple's 200-episode lookup cap.`
|
|
);
|
|
}
|
|
|
|
// releaseDate is ISO 8601; collapse to YYYYMMDD to match the rest of
|
|
// the pipeline's date convention.
|
|
let uploadDate = "";
|
|
if (episode.releaseDate) {
|
|
try {
|
|
const d = new Date(episode.releaseDate);
|
|
if (!isNaN(d.getTime())) {
|
|
uploadDate = d.toISOString().slice(0, 10).replace(/-/g, "");
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
const durationSec =
|
|
typeof episode.trackTimeMillis === "number"
|
|
? Math.round(episode.trackTimeMillis / 1000)
|
|
: null;
|
|
|
|
return {
|
|
source: "apple",
|
|
audioUrl: episode.episodeUrl,
|
|
episodeId: episode.episodeGuid || `apple-${episodeTrackId}`,
|
|
title: episode.trackName || show.collectionName || "Untitled episode",
|
|
podcastTitle: show.collectionName || episode.collectionName || "Unknown podcast",
|
|
uploadDate,
|
|
durationSec,
|
|
feedUrl: show.feedUrl || null,
|
|
};
|
|
}
|
|
|
|
// ── Spotify ────────────────────────────────────────────────────────────
|
|
// Strategy: oEmbed for title/show, then PodcastIndex search to map
|
|
// title+show → RSS feed → episode → audio enclosure. Spotify-exclusive
|
|
// content has no RSS counterpart and fails the lookup with a clear
|
|
// `spotify_no_rss` error.
|
|
//
|
|
// PodcastIndex auth (https://podcastindex.org/api/dev) requires:
|
|
// - `User-Agent` header (PodcastIndex blocks anonymous UAs)
|
|
// - `X-Auth-Key`: API key (free, signup at api.podcastindex.org)
|
|
// - `X-Auth-Date`: unix timestamp (current time)
|
|
// - `Authorization`: sha1(apiKey + apiSecret + apiDate)
|
|
export async function resolveSpotifyUrl(url, { podcastIndexKey, podcastIndexSecret } = {}) {
|
|
if (!podcastIndexKey || !podcastIndexSecret) {
|
|
throw new URLResolveError(
|
|
"podcastindex_unconfigured",
|
|
'Spotify needs both a free PodcastIndex API Key AND API Secret. Sign up at api.podcastindex.org — your account page shows both credentials side-by-side. Paste them in Recaps → Settings → API Keys → PodcastIndex. (Apple Podcasts and Fountain links work without any API key — try those for the same episode if it\'s also distributed there.)'
|
|
);
|
|
}
|
|
|
|
// oEmbed gives us episode title + show name with no auth.
|
|
let episodeTitle = "";
|
|
let showName = "";
|
|
try {
|
|
const oemRaw = await fetchUrl(
|
|
`https://open.spotify.com/oembed?url=${encodeURIComponent(url)}`
|
|
);
|
|
const oem = JSON.parse(oemRaw);
|
|
episodeTitle = (oem?.title || "").trim();
|
|
// oEmbed's "title" includes the show name in some Spotify variants
|
|
// (e.g. "<episode> · <show>"). Split if we see the delimiter.
|
|
const sep = episodeTitle.lastIndexOf(" · ");
|
|
if (sep > 0) {
|
|
showName = episodeTitle.slice(sep + 3).trim();
|
|
episodeTitle = episodeTitle.slice(0, sep).trim();
|
|
}
|
|
} catch (err) {
|
|
throw new URLResolveError(
|
|
"episode_not_found",
|
|
`Could not fetch Spotify episode metadata: ${err?.message || err}`
|
|
);
|
|
}
|
|
if (!episodeTitle) {
|
|
throw new URLResolveError(
|
|
"episode_not_found",
|
|
"Spotify oEmbed returned no episode title"
|
|
);
|
|
}
|
|
|
|
// Authoritative PodcastIndex search uses byperson/bypath/byterm. The
|
|
// episode-search endpoint accepts a free-text query and returns
|
|
// candidate episodes across the index.
|
|
const q = encodeURIComponent(
|
|
showName ? `${episodeTitle} ${showName}` : episodeTitle
|
|
);
|
|
const searchUrl = `https://api.podcastindex.org/api/1.0/search/byterm?q=${q}&max=5`;
|
|
|
|
let candidate = null;
|
|
try {
|
|
const headers = buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret });
|
|
const r = await fetch(searchUrl, { headers });
|
|
const data = await r.json();
|
|
const feeds = Array.isArray(data?.feeds) ? data.feeds : [];
|
|
// Best-match heuristic: prefer a feed whose title fuzzy-matches the
|
|
// show name, fall back to the first result.
|
|
const norm = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
const showKey = norm(showName);
|
|
candidate =
|
|
(showKey && feeds.find((f) => norm(f.title).includes(showKey))) ||
|
|
feeds[0] ||
|
|
null;
|
|
} catch (err) {
|
|
throw new URLResolveError(
|
|
"episode_not_found",
|
|
`PodcastIndex feed search failed: ${err?.message || err}`
|
|
);
|
|
}
|
|
if (!candidate || !candidate.id) {
|
|
throw new URLResolveError(
|
|
"spotify_no_rss",
|
|
`This Spotify episode "${episodeTitle}" doesn't appear in PodcastIndex. It may be a Spotify exclusive (Spotify Originals, Anchor-only shows). Paste the show's RSS feed URL instead, or use the YouTube version if available.`
|
|
);
|
|
}
|
|
|
|
// Pull the episode list for the matched feed and find the closest
|
|
// title match.
|
|
let episodes = [];
|
|
try {
|
|
const headers = buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret });
|
|
const r = await fetch(
|
|
`https://api.podcastindex.org/api/1.0/episodes/byfeedid?id=${candidate.id}&max=200`,
|
|
{ headers }
|
|
);
|
|
const data = await r.json();
|
|
episodes = Array.isArray(data?.items) ? data.items : [];
|
|
} catch (err) {
|
|
throw new URLResolveError(
|
|
"episode_not_found",
|
|
`PodcastIndex episode lookup failed: ${err?.message || err}`
|
|
);
|
|
}
|
|
const norm = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
const targetKey = norm(episodeTitle);
|
|
const episode =
|
|
episodes.find((e) => norm(e.title) === targetKey) ||
|
|
episodes.find((e) => norm(e.title).includes(targetKey)) ||
|
|
episodes.find((e) => targetKey.includes(norm(e.title))) ||
|
|
null;
|
|
if (!episode || !episode.enclosureUrl) {
|
|
throw new URLResolveError(
|
|
"episode_not_found",
|
|
`Matched "${candidate.title}" in PodcastIndex but couldn't find an episode titled "${episodeTitle}". Episode may be too new for PodcastIndex's snapshot, or only available on Spotify.`
|
|
);
|
|
}
|
|
|
|
let uploadDate = "";
|
|
if (episode.datePublished) {
|
|
try {
|
|
const d = new Date(episode.datePublished * 1000);
|
|
if (!isNaN(d.getTime())) {
|
|
uploadDate = d.toISOString().slice(0, 10).replace(/-/g, "");
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
return {
|
|
source: "spotify",
|
|
audioUrl: episode.enclosureUrl,
|
|
episodeId: episode.guid || `spotify-${episode.id}`,
|
|
title: episode.title || episodeTitle,
|
|
podcastTitle: candidate.title || showName || "Unknown podcast",
|
|
uploadDate,
|
|
durationSec:
|
|
typeof episode.duration === "number" ? episode.duration : null,
|
|
feedUrl: candidate.url || null,
|
|
};
|
|
}
|
|
|
|
// ── Fountain ───────────────────────────────────────────────────────────
|
|
// Strategy: fetch the episode HTML, parse Open Graph + JSON-LD tags
|
|
// for the audio URL, title, podcast name, and upload date. Fountain
|
|
// serves the actual MP3 enclosure URL on og:audio so we don't need
|
|
// any PodcastIndex lookup or API key. Same shape as the Apple
|
|
// resolver returns so the downstream podcast pipeline doesn't have
|
|
// to branch.
|
|
//
|
|
// The og:title format is "Show • Episode title • Watch on Fountain".
|
|
// We split on " • " to separate show + episode; the trailing "Watch
|
|
// on Fountain" branding gets dropped.
|
|
//
|
|
// Fountain URLs encode the episode in a short opaque id at the URL
|
|
// path; we use that as our episodeId for history dedup.
|
|
export async function resolveFountainUrl(url) {
|
|
const m = url.match(/\/episode\/([A-Za-z0-9_-]+)/);
|
|
if (!m) {
|
|
throw new URLResolveError(
|
|
"fountain_lookup_failed",
|
|
"Fountain URL is missing the /episode/<id> path",
|
|
);
|
|
}
|
|
const shortId = m[1];
|
|
|
|
// Use global fetch directly (Node 18+) so we can send a UA header.
|
|
// fetchUrl() in util.js doesn't take options; we don't want to
|
|
// expand its signature just for this one caller. Fountain's SSR
|
|
// response includes the og:audio tag we need for ANY UA, but mimic
|
|
// a modern Safari to stay on the well-tested response path.
|
|
let html;
|
|
try {
|
|
const res = await fetch(url, {
|
|
headers: {
|
|
"User-Agent":
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
|
},
|
|
// Aggressive timeout — Fountain's page is small (~120KB) and
|
|
// we shouldn't hold up the summarize pipeline if their server
|
|
// hangs. AbortSignal.timeout is Node 18+, same baseline.
|
|
signal: AbortSignal.timeout(8000),
|
|
});
|
|
if (!res.ok) {
|
|
throw new URLResolveError(
|
|
"fountain_lookup_failed",
|
|
`Fountain returned HTTP ${res.status}`,
|
|
);
|
|
}
|
|
html = await res.text();
|
|
} catch (err) {
|
|
if (err instanceof URLResolveError) throw err;
|
|
throw new URLResolveError(
|
|
"fountain_lookup_failed",
|
|
`Couldn't reach Fountain: ${err?.message || err}`,
|
|
);
|
|
}
|
|
if (!html || typeof html !== "string") {
|
|
throw new URLResolveError(
|
|
"fountain_lookup_failed",
|
|
"Empty response from Fountain",
|
|
);
|
|
}
|
|
|
|
// Extract a meta tag's content by either property= or name=.
|
|
// Fountain uses property= for OG and name= for Twitter — we match
|
|
// both since users might paste links Twitter has re-fetched.
|
|
function metaContent(key) {
|
|
const re = new RegExp(
|
|
`<meta\\s+(?:property|name)="${key}"\\s+content="([^"]+)"`,
|
|
"i",
|
|
);
|
|
const found = html.match(re);
|
|
return found ? decodeHtmlEntities(found[1]) : null;
|
|
}
|
|
|
|
const audioUrl = metaContent("og:audio");
|
|
if (!audioUrl) {
|
|
throw new URLResolveError(
|
|
"fountain_lookup_failed",
|
|
"Fountain episode has no og:audio tag — the page may have changed format or the episode is video-only.",
|
|
);
|
|
}
|
|
|
|
const ogTitleRaw = metaContent("og:title") || "";
|
|
// og:title format: "Show • Episode • Watch on Fountain". Strip the
|
|
// trailing brand and split.
|
|
const titlePieces = ogTitleRaw
|
|
.replace(/\s*•\s*Watch on Fountain\s*$/i, "")
|
|
.split(/\s*•\s*/);
|
|
const podcastTitle = titlePieces[0] || "Podcast";
|
|
const episodeTitle = titlePieces.slice(1).join(" • ") || podcastTitle;
|
|
|
|
// JSON-LD on the page carries an ISO uploadDate. We don't parse
|
|
// the full JSON; a targeted regex is enough.
|
|
const uploadDateMatch = html.match(/"uploadDate":"([^"]+)"/);
|
|
const uploadDateRaw = uploadDateMatch ? uploadDateMatch[1] : "";
|
|
const uploadDate = isoToYYYYMMDD(uploadDateRaw);
|
|
|
|
// ISO-8601 duration (e.g. "PT2H7M27S") → seconds. Optional —
|
|
// returns null if absent.
|
|
const durationMatch = html.match(/"duration":"(PT[0-9HMS]+)"/);
|
|
const durationSec = durationMatch
|
|
? iso8601DurationToSeconds(durationMatch[1])
|
|
: null;
|
|
|
|
return {
|
|
source: "fountain",
|
|
audioUrl,
|
|
episodeId: `fountain:${shortId}`,
|
|
title: episodeTitle,
|
|
podcastTitle,
|
|
uploadDate,
|
|
durationSec,
|
|
feedUrl: null, // Fountain doesn't always expose the source RSS URL
|
|
};
|
|
}
|
|
|
|
// "2026-05-07T20:53:13.003Z" → "20260507". Returns empty string on
|
|
// unparseable input so the downstream pipeline treats it as unknown.
|
|
function isoToYYYYMMDD(iso) {
|
|
if (!iso) return "";
|
|
const d = new Date(iso);
|
|
if (Number.isNaN(d.getTime())) return "";
|
|
const y = d.getUTCFullYear();
|
|
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
|
|
const day = String(d.getUTCDate()).padStart(2, "0");
|
|
return `${y}${m}${day}`;
|
|
}
|
|
|
|
// "PT2H7M27S" → 7647 seconds. Handles the subset of ISO-8601 that
|
|
// podcasts actually use (no fractional, no days).
|
|
function iso8601DurationToSeconds(s) {
|
|
if (typeof s !== "string") return null;
|
|
const m = s.match(/^PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?$/);
|
|
if (!m) return null;
|
|
const h = parseInt(m[1] || "0", 10);
|
|
const min = parseInt(m[2] || "0", 10);
|
|
const sec = parseInt(m[3] || "0", 10);
|
|
return h * 3600 + min * 60 + sec;
|
|
}
|
|
|
|
function decodeHtmlEntities(s) {
|
|
return String(s)
|
|
.replace(/&/g, "&")
|
|
.replace(/</g, "<")
|
|
.replace(/>/g, ">")
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'");
|
|
}
|
|
|
|
function buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret }) {
|
|
const date = Math.floor(Date.now() / 1000).toString();
|
|
const sig = crypto
|
|
.createHash("sha1")
|
|
.update(podcastIndexKey + podcastIndexSecret + date)
|
|
.digest("hex");
|
|
return {
|
|
"User-Agent": "Recap/1.0 (+https://github.com/keysat-xyz/recap)",
|
|
"X-Auth-Key": podcastIndexKey,
|
|
"X-Auth-Date": date,
|
|
Authorization: sig,
|
|
};
|
|
}
|
|
|
|
// Single entry point: takes any URL and a config object, returns
|
|
// either the normalized resolved shape (for apple/spotify) or null
|
|
// (for URLs we don't recognize as share links — caller passes those
|
|
// through to the existing youtube / rss path unchanged).
|
|
export async function resolveShareUrl(url, opts = {}) {
|
|
if (isApplePodcastUrl(url)) {
|
|
return resolveApplePodcastUrl(url);
|
|
}
|
|
if (isSpotifyUrl(url)) {
|
|
return resolveSpotifyUrl(url, opts);
|
|
}
|
|
if (isFountainUrl(url)) {
|
|
return resolveFountainUrl(url);
|
|
}
|
|
return null;
|
|
}
|