Files
recap/server/url-resolver.js
Keysat 0ae59f3550 Add multi-tenant cloud mode: self-serve purchase, credit metering, core-decoupling
Introduces RECAP_MODE=multi alongside single-mode self-host:
- Tenant auth + accounts (magic-link via System SMTP), per-tenant credit pool,
  anonymous trial minting with per-IP/-64 caps
- Self-serve Pro/Max purchase: inline Lightning (BTCPay) + card (Zaprite),
  prepaid 30-day periods, expiry-reminder emails
- Core-decoupling: relay owns cloud tier/expiry keyed by Recaps user-id
- SQLite (better-sqlite3) schema for multi-mode; filesystem unchanged for single
- StartOS actions/versions through 0.2.155
2026-06-13 14:25:05 -05:00

476 lines
18 KiB
JavaScript

// Resolves "share URLs" from Apple Podcasts and Spotify into something
// Recap's existing podcast pipeline can swallow. Most users share these
// links rather than the underlying RSS feed (which they rarely know
// exists), so transparent resolution turns the most common podcast
// share path into "paste link, hit summarize, done".
//
// Apple Podcasts URLs resolve directly via the public iTunes Lookup API:
// the episode result includes `episodeUrl` (the audio enclosure) and
// the show's `feedUrl`. No API key required, no auth.
//
// Spotify URLs are harder: Spotify-hosted audio is DRM-wrapped and not
// served via a public stream URL. We use the unauthenticated oEmbed
// endpoint to get the episode + show titles, then ask PodcastIndex to
// find the same episode in its RSS-indexed catalog. Spotify Originals
// (Joe Rogan, Anchor exclusives, …) have no RSS counterpart and fail
// the lookup — we surface a clear error in that case so the user
// understands and can paste the RSS link manually.
//
// Returns a normalized shape that maps cleanly onto Recap's existing
// podcast pipeline:
// {
// source: "apple" | "spotify",
// audioUrl: string, // direct audio URL (.mp3/.m4a) — feeds the existing podcast path
// episodeId: string, // stable GUID used by history dedup
// title: string,
// podcastTitle: string,
// uploadDate: string, // "YYYYMMDD"
// durationSec: number?, // null when unknown
// feedUrl: string?, // for context; not required downstream
// }
//
// Throws `URLResolveError` with a `.code` field for things the UI may
// want to format specifically:
// - "spotify_no_rss" → episode is Spotify-exclusive
// - "episode_not_found" → looked up but couldn't match
// - "apple_lookup_failed"
// - "podcastindex_unconfigured"
// - "podcastindex_not_implemented" → caller didn't pass keys
import crypto from "crypto";
import { fetchUrl } from "./util.js";
export class URLResolveError extends Error {
constructor(code, message) {
super(message);
this.name = "URLResolveError";
this.code = code;
}
}
const APPLE_EPISODE_URL_RE =
/^https?:\/\/(?:www\.)?podcasts\.apple\.com\/[^/]+\/podcast\/[^/]+\/id(\d+)(?:\?|.*[?&])i=(\d+)/i;
// Detection only — no I/O.
export function isApplePodcastUrl(url) {
if (!url) return false;
return /^https?:\/\/(?:www\.)?podcasts\.apple\.com\//i.test(url);
}
export function isSpotifyUrl(url) {
if (!url) return false;
return /^https?:\/\/(?:open|play)\.spotify\.com\/(?:episode|show)\//i.test(url);
}
// Fountain (https://fountain.fm) is a Bitcoin-Lightning podcast app
// that hosts a Podcasting 2.0-native catalog. Episode pages are at
// /episode/<short-id>; the underlying media is served from
// feeds.fountain.fm and exposed via standard Open Graph tags
// (og:audio, og:image, og:title) on the public episode HTML — no API
// key required to resolve. Show pages (/show/<id>) aren't supported
// for now; users should paste a specific episode link.
export function isFountainUrl(url) {
if (!url) return false;
return /^https?:\/\/(?:www\.)?fountain\.fm\/episode\//i.test(url);
}
// True if the URL is one of the "share link" forms we know how to turn
// into a podcast audio URL. Callers should only invoke the network-
// touching resolvers when this returns true.
export function isResolvableShareUrl(url) {
return isApplePodcastUrl(url) || isSpotifyUrl(url) || isFountainUrl(url);
}
// ── Apple Podcasts ─────────────────────────────────────────────────────
// Strategy: parse the podcast ID + episode track ID out of the URL,
// hit iTunes Lookup, find the matching episode by trackId. Apple
// returns the episode's actual audio enclosure URL — same URL the
// Apple Podcasts app streams from — so the existing podcast download
// pipeline (audio.downloadPodcastAudio) can swallow it unchanged.
export async function resolveApplePodcastUrl(url) {
const m = url.match(APPLE_EPISODE_URL_RE);
if (!m) {
throw new URLResolveError(
"apple_lookup_failed",
"Apple Podcasts URL is missing podcast ID or episode ID (?i= param)"
);
}
const podcastId = m[1];
const episodeTrackId = m[2];
// The lookup endpoint returns the show metadata as result[0] and the
// most-recent N episodes as result[1..]. Apple silently caps at 200
// even if you ask for more.
const lookupUrl = `https://itunes.apple.com/lookup?id=${encodeURIComponent(
podcastId
)}&entity=podcastEpisode&limit=200`;
let parsed;
try {
const raw = await fetchUrl(lookupUrl);
parsed = JSON.parse(raw);
} catch (err) {
throw new URLResolveError(
"apple_lookup_failed",
`iTunes lookup failed: ${err?.message || err}`
);
}
const results = Array.isArray(parsed?.results) ? parsed.results : [];
const show = results.find((r) => r.wrapperType === "track" || r.kind === "podcast") || {};
const episode = results.find(
(r) =>
r.wrapperType === "podcastEpisode" &&
String(r.trackId) === String(episodeTrackId)
);
if (!episode || !episode.episodeUrl) {
throw new URLResolveError(
"episode_not_found",
`Apple returned ${results.length} results for podcast ${podcastId} but episode ${episodeTrackId} was not among them. The episode may be older than Apple's 200-episode lookup cap.`
);
}
// releaseDate is ISO 8601; collapse to YYYYMMDD to match the rest of
// the pipeline's date convention.
let uploadDate = "";
if (episode.releaseDate) {
try {
const d = new Date(episode.releaseDate);
if (!isNaN(d.getTime())) {
uploadDate = d.toISOString().slice(0, 10).replace(/-/g, "");
}
} catch {}
}
const durationSec =
typeof episode.trackTimeMillis === "number"
? Math.round(episode.trackTimeMillis / 1000)
: null;
return {
source: "apple",
audioUrl: episode.episodeUrl,
episodeId: episode.episodeGuid || `apple-${episodeTrackId}`,
title: episode.trackName || show.collectionName || "Untitled episode",
podcastTitle: show.collectionName || episode.collectionName || "Unknown podcast",
uploadDate,
durationSec,
feedUrl: show.feedUrl || null,
};
}
// ── Spotify ────────────────────────────────────────────────────────────
// Strategy: oEmbed for title/show, then PodcastIndex search to map
// title+show → RSS feed → episode → audio enclosure. Spotify-exclusive
// content has no RSS counterpart and fails the lookup with a clear
// `spotify_no_rss` error.
//
// PodcastIndex auth (https://podcastindex.org/api/dev) requires:
// - `User-Agent` header (PodcastIndex blocks anonymous UAs)
// - `X-Auth-Key`: API key (free, signup at api.podcastindex.org)
// - `X-Auth-Date`: unix timestamp (current time)
// - `Authorization`: sha1(apiKey + apiSecret + apiDate)
export async function resolveSpotifyUrl(url, { podcastIndexKey, podcastIndexSecret } = {}) {
if (!podcastIndexKey || !podcastIndexSecret) {
throw new URLResolveError(
"podcastindex_unconfigured",
'Spotify needs both a free PodcastIndex API Key AND API Secret. Sign up at api.podcastindex.org — your account page shows both credentials side-by-side. Paste them in Recaps → Settings → API Keys → PodcastIndex. (Apple Podcasts and Fountain links work without any API key — try those for the same episode if it\'s also distributed there.)'
);
}
// oEmbed gives us episode title + show name with no auth.
let episodeTitle = "";
let showName = "";
try {
const oemRaw = await fetchUrl(
`https://open.spotify.com/oembed?url=${encodeURIComponent(url)}`
);
const oem = JSON.parse(oemRaw);
episodeTitle = (oem?.title || "").trim();
// oEmbed's "title" includes the show name in some Spotify variants
// (e.g. "<episode> · <show>"). Split if we see the delimiter.
const sep = episodeTitle.lastIndexOf(" · ");
if (sep > 0) {
showName = episodeTitle.slice(sep + 3).trim();
episodeTitle = episodeTitle.slice(0, sep).trim();
}
} catch (err) {
throw new URLResolveError(
"episode_not_found",
`Could not fetch Spotify episode metadata: ${err?.message || err}`
);
}
if (!episodeTitle) {
throw new URLResolveError(
"episode_not_found",
"Spotify oEmbed returned no episode title"
);
}
// Authoritative PodcastIndex search uses byperson/bypath/byterm. The
// episode-search endpoint accepts a free-text query and returns
// candidate episodes across the index.
const q = encodeURIComponent(
showName ? `${episodeTitle} ${showName}` : episodeTitle
);
const searchUrl = `https://api.podcastindex.org/api/1.0/search/byterm?q=${q}&max=5`;
let candidate = null;
try {
const headers = buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret });
const r = await fetch(searchUrl, { headers });
const data = await r.json();
const feeds = Array.isArray(data?.feeds) ? data.feeds : [];
// Best-match heuristic: prefer a feed whose title fuzzy-matches the
// show name, fall back to the first result.
const norm = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]/g, "");
const showKey = norm(showName);
candidate =
(showKey && feeds.find((f) => norm(f.title).includes(showKey))) ||
feeds[0] ||
null;
} catch (err) {
throw new URLResolveError(
"episode_not_found",
`PodcastIndex feed search failed: ${err?.message || err}`
);
}
if (!candidate || !candidate.id) {
throw new URLResolveError(
"spotify_no_rss",
`This Spotify episode "${episodeTitle}" doesn't appear in PodcastIndex. It may be a Spotify exclusive (Spotify Originals, Anchor-only shows). Paste the show's RSS feed URL instead, or use the YouTube version if available.`
);
}
// Pull the episode list for the matched feed and find the closest
// title match.
let episodes = [];
try {
const headers = buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret });
const r = await fetch(
`https://api.podcastindex.org/api/1.0/episodes/byfeedid?id=${candidate.id}&max=200`,
{ headers }
);
const data = await r.json();
episodes = Array.isArray(data?.items) ? data.items : [];
} catch (err) {
throw new URLResolveError(
"episode_not_found",
`PodcastIndex episode lookup failed: ${err?.message || err}`
);
}
const norm = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]/g, "");
const targetKey = norm(episodeTitle);
const episode =
episodes.find((e) => norm(e.title) === targetKey) ||
episodes.find((e) => norm(e.title).includes(targetKey)) ||
episodes.find((e) => targetKey.includes(norm(e.title))) ||
null;
if (!episode || !episode.enclosureUrl) {
throw new URLResolveError(
"episode_not_found",
`Matched "${candidate.title}" in PodcastIndex but couldn't find an episode titled "${episodeTitle}". Episode may be too new for PodcastIndex's snapshot, or only available on Spotify.`
);
}
let uploadDate = "";
if (episode.datePublished) {
try {
const d = new Date(episode.datePublished * 1000);
if (!isNaN(d.getTime())) {
uploadDate = d.toISOString().slice(0, 10).replace(/-/g, "");
}
} catch {}
}
return {
source: "spotify",
audioUrl: episode.enclosureUrl,
episodeId: episode.guid || `spotify-${episode.id}`,
title: episode.title || episodeTitle,
podcastTitle: candidate.title || showName || "Unknown podcast",
uploadDate,
durationSec:
typeof episode.duration === "number" ? episode.duration : null,
feedUrl: candidate.url || null,
};
}
// ── Fountain ───────────────────────────────────────────────────────────
// Strategy: fetch the episode HTML, parse Open Graph + JSON-LD tags
// for the audio URL, title, podcast name, and upload date. Fountain
// serves the actual MP3 enclosure URL on og:audio so we don't need
// any PodcastIndex lookup or API key. Same shape as the Apple
// resolver returns so the downstream podcast pipeline doesn't have
// to branch.
//
// The og:title format is "Show • Episode title • Watch on Fountain".
// We split on " • " to separate show + episode; the trailing "Watch
// on Fountain" branding gets dropped.
//
// Fountain URLs encode the episode in a short opaque id at the URL
// path; we use that as our episodeId for history dedup.
export async function resolveFountainUrl(url) {
const m = url.match(/\/episode\/([A-Za-z0-9_-]+)/);
if (!m) {
throw new URLResolveError(
"fountain_lookup_failed",
"Fountain URL is missing the /episode/<id> path",
);
}
const shortId = m[1];
// Use global fetch directly (Node 18+) so we can send a UA header.
// fetchUrl() in util.js doesn't take options; we don't want to
// expand its signature just for this one caller. Fountain's SSR
// response includes the og:audio tag we need for ANY UA, but mimic
// a modern Safari to stay on the well-tested response path.
let html;
try {
const res = await fetch(url, {
headers: {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
},
// Aggressive timeout — Fountain's page is small (~120KB) and
// we shouldn't hold up the summarize pipeline if their server
// hangs. AbortSignal.timeout is Node 18+, same baseline.
signal: AbortSignal.timeout(8000),
});
if (!res.ok) {
throw new URLResolveError(
"fountain_lookup_failed",
`Fountain returned HTTP ${res.status}`,
);
}
html = await res.text();
} catch (err) {
if (err instanceof URLResolveError) throw err;
throw new URLResolveError(
"fountain_lookup_failed",
`Couldn't reach Fountain: ${err?.message || err}`,
);
}
if (!html || typeof html !== "string") {
throw new URLResolveError(
"fountain_lookup_failed",
"Empty response from Fountain",
);
}
// Extract a meta tag's content by either property= or name=.
// Fountain uses property= for OG and name= for Twitter — we match
// both since users might paste links Twitter has re-fetched.
function metaContent(key) {
const re = new RegExp(
`<meta\\s+(?:property|name)="${key}"\\s+content="([^"]+)"`,
"i",
);
const found = html.match(re);
return found ? decodeHtmlEntities(found[1]) : null;
}
const audioUrl = metaContent("og:audio");
if (!audioUrl) {
throw new URLResolveError(
"fountain_lookup_failed",
"Fountain episode has no og:audio tag — the page may have changed format or the episode is video-only.",
);
}
const ogTitleRaw = metaContent("og:title") || "";
// og:title format: "Show • Episode • Watch on Fountain". Strip the
// trailing brand and split.
const titlePieces = ogTitleRaw
.replace(/\s*•\s*Watch on Fountain\s*$/i, "")
.split(/\s*•\s*/);
const podcastTitle = titlePieces[0] || "Podcast";
const episodeTitle = titlePieces.slice(1).join(" • ") || podcastTitle;
// JSON-LD on the page carries an ISO uploadDate. We don't parse
// the full JSON; a targeted regex is enough.
const uploadDateMatch = html.match(/"uploadDate":"([^"]+)"/);
const uploadDateRaw = uploadDateMatch ? uploadDateMatch[1] : "";
const uploadDate = isoToYYYYMMDD(uploadDateRaw);
// ISO-8601 duration (e.g. "PT2H7M27S") → seconds. Optional —
// returns null if absent.
const durationMatch = html.match(/"duration":"(PT[0-9HMS]+)"/);
const durationSec = durationMatch
? iso8601DurationToSeconds(durationMatch[1])
: null;
return {
source: "fountain",
audioUrl,
episodeId: `fountain:${shortId}`,
title: episodeTitle,
podcastTitle,
uploadDate,
durationSec,
feedUrl: null, // Fountain doesn't always expose the source RSS URL
};
}
// "2026-05-07T20:53:13.003Z" → "20260507". Returns empty string on
// unparseable input so the downstream pipeline treats it as unknown.
function isoToYYYYMMDD(iso) {
if (!iso) return "";
const d = new Date(iso);
if (Number.isNaN(d.getTime())) return "";
const y = d.getUTCFullYear();
const m = String(d.getUTCMonth() + 1).padStart(2, "0");
const day = String(d.getUTCDate()).padStart(2, "0");
return `${y}${m}${day}`;
}
// "PT2H7M27S" → 7647 seconds. Handles the subset of ISO-8601 that
// podcasts actually use (no fractional, no days).
function iso8601DurationToSeconds(s) {
if (typeof s !== "string") return null;
const m = s.match(/^PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?$/);
if (!m) return null;
const h = parseInt(m[1] || "0", 10);
const min = parseInt(m[2] || "0", 10);
const sec = parseInt(m[3] || "0", 10);
return h * 3600 + min * 60 + sec;
}
function decodeHtmlEntities(s) {
return String(s)
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
}
function buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret }) {
const date = Math.floor(Date.now() / 1000).toString();
const sig = crypto
.createHash("sha1")
.update(podcastIndexKey + podcastIndexSecret + date)
.digest("hex");
return {
"User-Agent": "Recap/1.0 (+https://github.com/keysat-xyz/recap)",
"X-Auth-Key": podcastIndexKey,
"X-Auth-Date": date,
Authorization: sig,
};
}
// Single entry point: takes any URL and a config object, returns
// either the normalized resolved shape (for apple/spotify) or null
// (for URLs we don't recognize as share links — caller passes those
// through to the existing youtube / rss path unchanged).
export async function resolveShareUrl(url, opts = {}) {
if (isApplePodcastUrl(url)) {
return resolveApplePodcastUrl(url);
}
if (isSpotifyUrl(url)) {
return resolveSpotifyUrl(url, opts);
}
if (isFountainUrl(url)) {
return resolveFountainUrl(url);
}
return null;
}