// Resolves "share URLs" from Apple Podcasts and Spotify into something // Recap's existing podcast pipeline can swallow. Most users share these // links rather than the underlying RSS feed (which they rarely know // exists), so transparent resolution turns the most common podcast // share path into "paste link, hit summarize, done". // // Apple Podcasts URLs resolve directly via the public iTunes Lookup API: // the episode result includes `episodeUrl` (the audio enclosure) and // the show's `feedUrl`. No API key required, no auth. // // Spotify URLs are harder: Spotify-hosted audio is DRM-wrapped and not // served via a public stream URL. We use the unauthenticated oEmbed // endpoint to get the episode + show titles, then ask PodcastIndex to // find the same episode in its RSS-indexed catalog. Spotify Originals // (Joe Rogan, Anchor exclusives, …) have no RSS counterpart and fail // the lookup — we surface a clear error in that case so the user // understands and can paste the RSS link manually. // // Returns a normalized shape that maps cleanly onto Recap's existing // podcast pipeline: // { // source: "apple" | "spotify", // audioUrl: string, // direct audio URL (.mp3/.m4a) — feeds the existing podcast path // episodeId: string, // stable GUID used by history dedup // title: string, // podcastTitle: string, // uploadDate: string, // "YYYYMMDD" // durationSec: number?, // null when unknown // feedUrl: string?, // for context; not required downstream // } // // Throws `URLResolveError` with a `.code` field for things the UI may // want to format specifically: // - "spotify_no_rss" → episode is Spotify-exclusive // - "episode_not_found" → looked up but couldn't match // - "apple_lookup_failed" // - "podcastindex_unconfigured" // - "podcastindex_not_implemented" → caller didn't pass keys import crypto from "crypto"; import { fetchUrl } from "./util.js"; export class URLResolveError extends Error { constructor(code, message) { super(message); this.name = "URLResolveError"; this.code = code; } } const APPLE_EPISODE_URL_RE = /^https?:\/\/(?:www\.)?podcasts\.apple\.com\/[^/]+\/podcast\/[^/]+\/id(\d+)(?:\?|.*[?&])i=(\d+)/i; // Detection only — no I/O. export function isApplePodcastUrl(url) { if (!url) return false; return /^https?:\/\/(?:www\.)?podcasts\.apple\.com\//i.test(url); } export function isSpotifyUrl(url) { if (!url) return false; return /^https?:\/\/(?:open|play)\.spotify\.com\/(?:episode|show)\//i.test(url); } // Fountain (https://fountain.fm) is a Bitcoin-Lightning podcast app // that hosts a Podcasting 2.0-native catalog. Episode pages are at // /episode/; the underlying media is served from // feeds.fountain.fm and exposed via standard Open Graph tags // (og:audio, og:image, og:title) on the public episode HTML — no API // key required to resolve. Show pages (/show/) aren't supported // for now; users should paste a specific episode link. export function isFountainUrl(url) { if (!url) return false; return /^https?:\/\/(?:www\.)?fountain\.fm\/episode\//i.test(url); } // True if the URL is one of the "share link" forms we know how to turn // into a podcast audio URL. Callers should only invoke the network- // touching resolvers when this returns true. export function isResolvableShareUrl(url) { return isApplePodcastUrl(url) || isSpotifyUrl(url) || isFountainUrl(url); } // ── Apple Podcasts ───────────────────────────────────────────────────── // Strategy: parse the podcast ID + episode track ID out of the URL, // hit iTunes Lookup, find the matching episode by trackId. Apple // returns the episode's actual audio enclosure URL — same URL the // Apple Podcasts app streams from — so the existing podcast download // pipeline (audio.downloadPodcastAudio) can swallow it unchanged. export async function resolveApplePodcastUrl(url) { const m = url.match(APPLE_EPISODE_URL_RE); if (!m) { throw new URLResolveError( "apple_lookup_failed", "Apple Podcasts URL is missing podcast ID or episode ID (?i= param)" ); } const podcastId = m[1]; const episodeTrackId = m[2]; // The lookup endpoint returns the show metadata as result[0] and the // most-recent N episodes as result[1..]. Apple silently caps at 200 // even if you ask for more. const lookupUrl = `https://itunes.apple.com/lookup?id=${encodeURIComponent( podcastId )}&entity=podcastEpisode&limit=200`; let parsed; try { const raw = await fetchUrl(lookupUrl); parsed = JSON.parse(raw); } catch (err) { throw new URLResolveError( "apple_lookup_failed", `iTunes lookup failed: ${err?.message || err}` ); } const results = Array.isArray(parsed?.results) ? parsed.results : []; const show = results.find((r) => r.wrapperType === "track" || r.kind === "podcast") || {}; const episode = results.find( (r) => r.wrapperType === "podcastEpisode" && String(r.trackId) === String(episodeTrackId) ); if (!episode || !episode.episodeUrl) { throw new URLResolveError( "episode_not_found", `Apple returned ${results.length} results for podcast ${podcastId} but episode ${episodeTrackId} was not among them. The episode may be older than Apple's 200-episode lookup cap.` ); } // releaseDate is ISO 8601; collapse to YYYYMMDD to match the rest of // the pipeline's date convention. let uploadDate = ""; if (episode.releaseDate) { try { const d = new Date(episode.releaseDate); if (!isNaN(d.getTime())) { uploadDate = d.toISOString().slice(0, 10).replace(/-/g, ""); } } catch {} } const durationSec = typeof episode.trackTimeMillis === "number" ? Math.round(episode.trackTimeMillis / 1000) : null; return { source: "apple", audioUrl: episode.episodeUrl, episodeId: episode.episodeGuid || `apple-${episodeTrackId}`, title: episode.trackName || show.collectionName || "Untitled episode", podcastTitle: show.collectionName || episode.collectionName || "Unknown podcast", uploadDate, durationSec, feedUrl: show.feedUrl || null, }; } // ── Spotify ──────────────────────────────────────────────────────────── // Strategy: oEmbed for title/show, then PodcastIndex search to map // title+show → RSS feed → episode → audio enclosure. Spotify-exclusive // content has no RSS counterpart and fails the lookup with a clear // `spotify_no_rss` error. // // PodcastIndex auth (https://podcastindex.org/api/dev) requires: // - `User-Agent` header (PodcastIndex blocks anonymous UAs) // - `X-Auth-Key`: API key (free, signup at api.podcastindex.org) // - `X-Auth-Date`: unix timestamp (current time) // - `Authorization`: sha1(apiKey + apiSecret + apiDate) export async function resolveSpotifyUrl(url, { podcastIndexKey, podcastIndexSecret } = {}) { if (!podcastIndexKey || !podcastIndexSecret) { throw new URLResolveError( "podcastindex_unconfigured", 'Spotify needs both a free PodcastIndex API Key AND API Secret. Sign up at api.podcastindex.org — your account page shows both credentials side-by-side. Paste them in Recaps → Settings → API Keys → PodcastIndex. (Apple Podcasts and Fountain links work without any API key — try those for the same episode if it\'s also distributed there.)' ); } // oEmbed gives us episode title + show name with no auth. let episodeTitle = ""; let showName = ""; try { const oemRaw = await fetchUrl( `https://open.spotify.com/oembed?url=${encodeURIComponent(url)}` ); const oem = JSON.parse(oemRaw); episodeTitle = (oem?.title || "").trim(); // oEmbed's "title" includes the show name in some Spotify variants // (e.g. " · "). Split if we see the delimiter. const sep = episodeTitle.lastIndexOf(" · "); if (sep > 0) { showName = episodeTitle.slice(sep + 3).trim(); episodeTitle = episodeTitle.slice(0, sep).trim(); } } catch (err) { throw new URLResolveError( "episode_not_found", `Could not fetch Spotify episode metadata: ${err?.message || err}` ); } if (!episodeTitle) { throw new URLResolveError( "episode_not_found", "Spotify oEmbed returned no episode title" ); } // Authoritative PodcastIndex search uses byperson/bypath/byterm. The // episode-search endpoint accepts a free-text query and returns // candidate episodes across the index. const q = encodeURIComponent( showName ? `${episodeTitle} ${showName}` : episodeTitle ); const searchUrl = `https://api.podcastindex.org/api/1.0/search/byterm?q=${q}&max=5`; let candidate = null; try { const headers = buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret }); const r = await fetch(searchUrl, { headers }); const data = await r.json(); const feeds = Array.isArray(data?.feeds) ? data.feeds : []; // Best-match heuristic: prefer a feed whose title fuzzy-matches the // show name, fall back to the first result. const norm = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]/g, ""); const showKey = norm(showName); candidate = (showKey && feeds.find((f) => norm(f.title).includes(showKey))) || feeds[0] || null; } catch (err) { throw new URLResolveError( "episode_not_found", `PodcastIndex feed search failed: ${err?.message || err}` ); } if (!candidate || !candidate.id) { throw new URLResolveError( "spotify_no_rss", `This Spotify episode "${episodeTitle}" doesn't appear in PodcastIndex. It may be a Spotify exclusive (Spotify Originals, Anchor-only shows). Paste the show's RSS feed URL instead, or use the YouTube version if available.` ); } // Pull the episode list for the matched feed and find the closest // title match. let episodes = []; try { const headers = buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret }); const r = await fetch( `https://api.podcastindex.org/api/1.0/episodes/byfeedid?id=${candidate.id}&max=200`, { headers } ); const data = await r.json(); episodes = Array.isArray(data?.items) ? data.items : []; } catch (err) { throw new URLResolveError( "episode_not_found", `PodcastIndex episode lookup failed: ${err?.message || err}` ); } const norm = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]/g, ""); const targetKey = norm(episodeTitle); const episode = episodes.find((e) => norm(e.title) === targetKey) || episodes.find((e) => norm(e.title).includes(targetKey)) || episodes.find((e) => targetKey.includes(norm(e.title))) || null; if (!episode || !episode.enclosureUrl) { throw new URLResolveError( "episode_not_found", `Matched "${candidate.title}" in PodcastIndex but couldn't find an episode titled "${episodeTitle}". Episode may be too new for PodcastIndex's snapshot, or only available on Spotify.` ); } let uploadDate = ""; if (episode.datePublished) { try { const d = new Date(episode.datePublished * 1000); if (!isNaN(d.getTime())) { uploadDate = d.toISOString().slice(0, 10).replace(/-/g, ""); } } catch {} } return { source: "spotify", audioUrl: episode.enclosureUrl, episodeId: episode.guid || `spotify-${episode.id}`, title: episode.title || episodeTitle, podcastTitle: candidate.title || showName || "Unknown podcast", uploadDate, durationSec: typeof episode.duration === "number" ? episode.duration : null, feedUrl: candidate.url || null, }; } // ── Fountain ─────────────────────────────────────────────────────────── // Strategy: fetch the episode HTML, parse Open Graph + JSON-LD tags // for the audio URL, title, podcast name, and upload date. Fountain // serves the actual MP3 enclosure URL on og:audio so we don't need // any PodcastIndex lookup or API key. Same shape as the Apple // resolver returns so the downstream podcast pipeline doesn't have // to branch. // // The og:title format is "Show • Episode title • Watch on Fountain". // We split on " • " to separate show + episode; the trailing "Watch // on Fountain" branding gets dropped. // // Fountain URLs encode the episode in a short opaque id at the URL // path; we use that as our episodeId for history dedup. export async function resolveFountainUrl(url) { const m = url.match(/\/episode\/([A-Za-z0-9_-]+)/); if (!m) { throw new URLResolveError( "fountain_lookup_failed", "Fountain URL is missing the /episode/ path", ); } const shortId = m[1]; // Use global fetch directly (Node 18+) so we can send a UA header. // fetchUrl() in util.js doesn't take options; we don't want to // expand its signature just for this one caller. Fountain's SSR // response includes the og:audio tag we need for ANY UA, but mimic // a modern Safari to stay on the well-tested response path. let html; try { const res = await fetch(url, { headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", }, // Aggressive timeout — Fountain's page is small (~120KB) and // we shouldn't hold up the summarize pipeline if their server // hangs. AbortSignal.timeout is Node 18+, same baseline. signal: AbortSignal.timeout(8000), }); if (!res.ok) { throw new URLResolveError( "fountain_lookup_failed", `Fountain returned HTTP ${res.status}`, ); } html = await res.text(); } catch (err) { if (err instanceof URLResolveError) throw err; throw new URLResolveError( "fountain_lookup_failed", `Couldn't reach Fountain: ${err?.message || err}`, ); } if (!html || typeof html !== "string") { throw new URLResolveError( "fountain_lookup_failed", "Empty response from Fountain", ); } // Extract a meta tag's content by either property= or name=. // Fountain uses property= for OG and name= for Twitter — we match // both since users might paste links Twitter has re-fetched. function metaContent(key) { const re = new RegExp( `") .replace(/"/g, '"') .replace(/'/g, "'"); } function buildPodcastIndexHeaders({ podcastIndexKey, podcastIndexSecret }) { const date = Math.floor(Date.now() / 1000).toString(); const sig = crypto .createHash("sha1") .update(podcastIndexKey + podcastIndexSecret + date) .digest("hex"); return { "User-Agent": "Recap/1.0 (+https://github.com/keysat-xyz/recap)", "X-Auth-Key": podcastIndexKey, "X-Auth-Date": date, Authorization: sig, }; } // Single entry point: takes any URL and a config object, returns // either the normalized resolved shape (for apple/spotify) or null // (for URLs we don't recognize as share links — caller passes those // through to the existing youtube / rss path unchanged). export async function resolveShareUrl(url, opts = {}) { if (isApplePodcastUrl(url)) { return resolveApplePodcastUrl(url); } if (isSpotifyUrl(url)) { return resolveSpotifyUrl(url, opts); } if (isFountainUrl(url)) { return resolveFountainUrl(url); } return null; }