// History storage + routes. Per-user-scoped under /data/history//. // // "Scope" is: // - single mode: "owner" (always) // - multi mode signed-in user: "" // - multi mode anonymous trial: "anon/" // // Each scope has its own folder with one *.json file per summary and a // `_meta.json` for the folder/ordering UI. Scope isolation is enforced // here at the path level — handlers in this file refuse to read across // scopes, period. The auth middleware populates req.userId; we derive // the scope via scopeForRequest(req) and never trust raw URL input. // // On a brand-new install nothing exists. Single-mode installs created // before 0.2.77 wrote files flat to /data/history/*.json; the migration // hook (see migrateLegacyLibrary below) moves those into the "owner" // scope on first multi-mode boot. // // Module-private state: historyDir (the root path). All per-scope // paths are derived per-call so adding a new user doesn't need a // re-init. import fs from "fs/promises"; import path from "path"; let historyDir = null; // ── Initialization ────────────────────────────────────────────────────────── // Call once at boot. Creates the root directory and stores the path. // In single mode also ensures /data/history/owner exists so the // owner-scope writes don't race on first-summary mkdir. export async function initHistory({ dataDir, mode = "single" }) { historyDir = path.join(dataDir, "history"); await fs.mkdir(historyDir, { recursive: true }).catch(() => {}); if (mode === "single") { await fs.mkdir(ownerScopeDir(), { recursive: true }).catch(() => {}); } } // ── Scope helpers ─────────────────────────────────────────────────────────── // Files that live at the root of /data/history/ (not inside a per-user // scope) — subscription state, skip lists, etc. Filtered out when // listing sessions so they don't appear as phantom library items. // Files that live inside a scope dir but are NOT session records: // the folder/order meta + the per-scope subscription state (moved here by // the 0.2.147 migration). They must be filtered out of every place that // lists `.json` files as sessions, or they show up as phantom "Invalid // Date · undefined topics" library entries. export const ROOT_SIDECARS = new Set([ "_meta.json", "subscriptions.json", "skip-list.json", "seen-list.json", "auto-queue.json", ]); // Sanitize a user-supplied scope component so it can't escape the // history root via path traversal. Allows the alphabet that user_ids // and trial cookie_ids use (base64url + hex chars + the literal "anon" // and "owner" prefixes). Anything else → throw. function safeComponent(s) { if (typeof s !== "string" || !s) throw new Error("invalid_scope_component"); if (!/^[A-Za-z0-9_-]+$/.test(s)) throw new Error("invalid_scope_component"); return s; } // scopeForRequest(req) — single string identifying the writer/reader // of a library. Used as a subpath under /data/history/. Throws if the // request has no usable identity (caller should 401 in that case). // // Returned strings: // "owner" — single mode, OR the multi-mode admin (so // a multi→single mode flip preserves the // operator's library at the same path) // "" — multi mode non-admin signed-in user // "anon/" — multi mode anonymous-trial cookie // // Why admin → "owner": before v0.2.91 we renamed /data/history/owner/ // → /data/history// on first multi-mode signup, which // made switching back to single mode hide the operator's library // (single mode reads "owner"). Keeping admin's scope at "owner" // regardless of mode makes mode-switching lossless. export function scopeForRequest(req) { if (req.recapMode !== "multi") return "owner"; if (req.user && req.user.is_admin) return "owner"; if (req.user && req.user.id) return safeComponent(req.user.id); if (typeof req.userId === "string" && req.userId.startsWith("anon:")) { return `anon/${safeComponent(req.userId.slice(5))}`; } if (req.userId === "owner") return "owner"; // pre-multi-mode legacy shim throw new Error("no_scope"); } function scopeDir(scope) { // `scope` may contain a slash for the "anon/" case — split into // segments so path.join doesn't treat it as one component (and so // safeComponent enforcement covers each piece). const parts = scope.split("/").map(safeComponent); return path.join(historyDir, ...parts); } function ownerScopeDir() { return path.join(historyDir, "owner"); } function metaPathFor(scope) { return path.join(scopeDir(scope), "_meta.json"); } // ── Storage ───────────────────────────────────────────────────────────────── // saveToHistory persists a completed summary. Returns the generated id. // Caller (the /api/process handler) is responsible for passing the // right scope — derived via scopeForRequest(req) up the call stack. // // The id encodes the timestamp + a content hint (videoId for YouTube, // base64-truncated guid/url for podcasts) so files sort chronologically // by name. export async function saveToHistory( scope, videoId, url, title, chunks, entries, logs, uploadDate, type, speakers = null, speakerNames = null, ) { const idSuffix = type === "podcast" ? Buffer.from(videoId).toString("base64url").slice(0, 16) : videoId; const id = `${Date.now()}-${idSuffix}`; const record = { id, videoId, url, title: title || "Untitled", type: type || "youtube", topicCount: chunks.length, segmentCount: entries.length, createdAt: new Date().toISOString(), uploadDate: uploadDate || "", chunks, entries, logs, // Phase 1E — speaker legend summary keyed by global speaker ID // (Speaker_A, Speaker_B, ...). Each chunk's entries also carry // `.speaker` and `.speaker_confidence` fields inline. Null when // diarization wasn't available (older relay, off, or no // fingerprints collected). Persisting at the record level lets // the library card show "2 speakers" without scanning entries. speakers: speakers || null, // Phase 2 — inferred speaker names from the relay's post-cluster // polish pass. Map { Speaker_A: "Matt Hill", ... } with null // values for unidentified speakers. Reopening a saved session // restores names alongside the cluster IDs. speakerNames: speakerNames || null, }; const dir = scopeDir(scope); await fs.mkdir(dir, { recursive: true }).catch(() => {}); await fs.writeFile(path.join(dir, `${id}.json`), JSON.stringify(record)); return id; } // ── Meta ──────────────────────────────────────────────────────────────────── // Each scope has its own `_meta.json` for folder/ordering UI state. // New scope = empty meta on read (no file yet). export async function loadMeta(scope) { try { return JSON.parse(await fs.readFile(metaPathFor(scope), "utf-8")); } catch { return { folders: [], uncategorized: [] }; } } export async function saveMeta(scope, meta) { const dir = scopeDir(scope); await fs.mkdir(dir, { recursive: true }).catch(() => {}); await fs.writeFile(metaPathFor(scope), JSON.stringify(meta, null, 2)); } // getHistoryDir() — root /data/history/. Some callers (subscriptions, // skip-list, etc.) write sidecar files here that aren't scoped. export function getHistoryDir() { return historyDir; } // getScopeHistoryDir(scope) — the per-scope directory. Used by handlers // that need raw filesystem access (e.g. delete). export function getScopeHistoryDir(scope) { return scopeDir(scope); } // ── Audio-first ("walking mode") TTS cache helpers ────────────────────────── // Per-topic synthesized summary clips live alongside the session JSON in // a sibling folder: /data/history//-audio/topic-.mp3. Same // scope-isolation guarantees as the session record (safeFilename guards // the id; scopeDir guards the scope). // Directory holding a session's cached summary-audio clips. export function sessionAudioDir(scope, id) { return path.join(scopeDir(scope), `${safeFilename(id)}-audio`); } // Load a full session record by id within a scope. Returns null if it // doesn't exist (or can't be parsed) — callers 404 on null. export async function loadSession(scope, id) { try { const raw = await fs.readFile( path.join(scopeDir(scope), `${safeFilename(id)}.json`), "utf-8", ); return JSON.parse(raw); } catch { return null; } } // List a scope's saved sessions as lightweight metadata (no entries / // chunks), oldest first. The daily-digest scan uses this to pick recaps // created after a watermark before loading each full record for // synthesis. Returns [] when the scope has no library yet (or the id is // malformed — safeComponent throws inside scopeDir, caught here). export async function listScopeSessions(scope) { let dir; try { dir = scopeDir(scope); } catch { return []; } let files = []; try { files = await fs.readdir(dir); } catch { return []; } const out = []; for (const file of files.filter( (f) => f.endsWith(".json") && !f.startsWith("_") && !ROOT_SIDECARS.has(f), )) { try { const data = JSON.parse(await fs.readFile(path.join(dir, file), "utf-8")); out.push({ id: data.id, title: data.title, type: data.type || "youtube", url: data.url, createdAt: data.createdAt, }); } catch {} } out.sort((a, b) => new Date(a.createdAt) - new Date(b.createdAt)); return out; } // Shallow-merge `patch` into a session record on disk (e.g. to stamp // `summaryAudio` availability). No-op-safe: returns null if the record // is missing rather than throwing. export async function patchSession(scope, id, patch) { const file = path.join(scopeDir(scope), `${safeFilename(id)}.json`); let rec; try { rec = JSON.parse(await fs.readFile(file, "utf-8")); } catch { return null; } const merged = { ...rec, ...patch }; await fs.writeFile(file, JSON.stringify(merged)); return merged; } // ── Legacy library migration (single → multi) ─────────────────────────────── // Pre-0.2.77 single-mode installs wrote summaries flat to // /data/history/*.json with a single _meta.json. On first boot in // multi mode we move all of that into /data/history/owner/ so the // operator's library is accessible under the "owner" scope. After the // first real user signs up (is_admin=1), auth-routes.js renames that // folder to the user's actual id so they own their original library. // // Idempotent — writes a sentinel after the first migration. Safe to // call on every boot; no-op if there's nothing flat to move. export async function migrateLegacyLibrary() { const sentinel = path.join(historyDir, ".migrated_to_multi"); try { await fs.access(sentinel); return { migrated: 0, skipped: "already_migrated" }; } catch {} let entries = []; try { entries = await fs.readdir(historyDir); } catch { return { migrated: 0, skipped: "no_history_dir" }; } const flatFiles = entries.filter( (f) => f.endsWith(".json") && !ROOT_SIDECARS.has(f), ); if (flatFiles.length === 0 && !entries.includes("_meta.json")) { // Truly empty. Write the sentinel so future boots don't keep // checking, but flag this as a non-migration. await fs.writeFile(sentinel, new Date().toISOString()); return { migrated: 0, skipped: "empty_legacy_library" }; } const target = ownerScopeDir(); await fs.mkdir(target, { recursive: true }).catch(() => {}); let moved = 0; for (const f of flatFiles) { try { await fs.rename( path.join(historyDir, f), path.join(target, f), ); moved += 1; } catch (err) { console.warn(`[history] failed to migrate ${f}:`, err?.message || err); } } // Move _meta.json too if it exists at the root. Folder/ordering state // belongs to the same library. try { await fs.rename( path.join(historyDir, "_meta.json"), path.join(target, "_meta.json"), ); } catch {} // no _meta.json is fine await fs.writeFile(sentinel, new Date().toISOString()); console.log( `[history] migrated ${moved} legacy session(s) to /data/history/owner/`, ); return { migrated: moved }; } // reclaimAdminLibraryToOwner({ db }) — one-time fixup for installs // upgraded from <0.2.91 where the first admin's library got renamed // from /data/history/owner/ → /data/history// on their // signup. With the new admin-scope-is-always-owner rule (above), we // need that library back under "owner" so multi-mode admin reads it // AND a future single-mode flip can still find it. Idempotent — runs // the rename only if BOTH (a) an admin user exists in SQLite AND // (b) /data/history// exists AND (c) /data/history/owner/ // does NOT already exist. Otherwise no-op. // // Pass in the better-sqlite3 db handle from db.js — we don't import // here to avoid a dep cycle (db.js is multi-mode only, history.js is // loaded in single mode too). export async function reclaimAdminLibraryToOwner({ db }) { if (!db) return { reclaimed: false, reason: "no_db" }; let admin; try { admin = db .prepare( "SELECT id, email FROM users WHERE is_admin = 1 ORDER BY created_at ASC LIMIT 1", ) .get(); } catch { return { reclaimed: false, reason: "no_users_table" }; } if (!admin) return { reclaimed: false, reason: "no_admin" }; const ownerDir = path.join(historyDir, "owner"); const adminDir = path.join(historyDir, safeComponent(admin.id)); try { await fs.access(ownerDir); // /data/history/owner/ already exists → either a fresh install or // the fixup already ran. Either way, do nothing. return { reclaimed: false, reason: "owner_already_exists" }; } catch {} try { await fs.access(adminDir); } catch { return { reclaimed: false, reason: "admin_dir_missing" }; } await fs.rename(adminDir, ownerDir); console.log( `[history] reclaimed admin library: /data/history/${admin.id}/ → /data/history/owner/`, ); return { reclaimed: true, admin_id: admin.id, email: admin.email }; } // renameScopeDir(fromScope, toScope) — atomic rename of a per-scope // folder. Used when: // - the first multi-mode signup claims the "owner" legacy library // (auth-routes.js calls this with fromScope="owner", toScope=user.id) // - an anonymous trial converts to a real user (auth-routes.js, // fromScope="anon/", toScope=user.id) // // If `fromScope` doesn't exist, no-op (returns false). If `toScope` // already exists, we don't clobber — the caller has to merge manually // (which currently only matters in edge cases, since fresh user ids // are uuids that won't collide). Returns true on actual rename. export async function renameScopeDir(fromScope, toScope) { const from = scopeDir(fromScope); const to = scopeDir(toScope); try { await fs.access(from); } catch { return false; } try { await fs.access(to); console.warn( `[history] renameScopeDir: ${toScope} already exists; refusing to clobber. Leaving ${fromScope} in place for manual reconciliation.`, ); return false; } catch {} // Ensure parent of `to` exists (for the "anon/" case the parent // is /data/history/anon/, which won't be there in fresh installs). await fs.mkdir(path.dirname(to), { recursive: true }).catch(() => {}); await fs.rename(from, to); console.log(`[history] renamed scope ${fromScope} → ${toScope}`); return true; } // ── Routes ────────────────────────────────────────────────────────────────── // All routes are scoped to req — they read scopeForRequest(req) and // refuse to operate outside that scope. No request body or URL param // can reference another user's library. // // `addToSkipList(scope, videoId)` is injected so the DELETE route can // suppress re-queueing of a subscription video the user explicitly removed. // It's scope-keyed (./subscriptions.js): the skip applies to the same // scope's subscription store. export function setupHistoryRoutes(app, { addToSkipList } = {}) { function requireScope(req, res) { try { return scopeForRequest(req); } catch { res.status(401).json({ error: "auth_required" }); return null; } } // Get all history: sessions + folder structure for THIS user. app.get("/api/history", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const dir = scopeDir(scope); let files = []; try { files = await fs.readdir(dir); } catch { files = []; // no library yet — render an empty state } const sessionsMap = {}; for (const file of files.filter( (f) => f.endsWith(".json") && !f.startsWith("_") && !ROOT_SIDECARS.has(f), )) { try { const raw = await fs.readFile(path.join(dir, file), "utf-8"); const data = JSON.parse(raw); sessionsMap[data.id] = { id: data.id, videoId: data.videoId, url: data.url, title: data.title, topicCount: data.topicCount, type: data.type || "youtube", segmentCount: data.segmentCount, createdAt: data.createdAt, uploadDate: data.uploadDate || "", }; } catch {} } const meta = await loadMeta(scope); // Clean up: remove references to deleted sessions for (const folder of meta.folders) { folder.items = folder.items.filter((id) => sessionsMap[id]); } meta.uncategorized = meta.uncategorized.filter((id) => sessionsMap[id]); // Add any sessions not in meta (newly created) const allReferenced = new Set([ ...meta.uncategorized, ...meta.folders.flatMap((f) => f.items), ]); const allIds = Object.keys(sessionsMap); const orphans = allIds .filter((id) => !allReferenced.has(id)) .sort( (a, b) => new Date(sessionsMap[b].createdAt) - new Date(sessionsMap[a].createdAt), ); meta.uncategorized = [...orphans, ...meta.uncategorized]; await saveMeta(scope, meta); res.json({ sessions: sessionsMap, meta }); } catch (err) { res.json({ sessions: {}, meta: { folders: [], uncategorized: [] }, }); } }); // Get a single session (full data) — scoped to current user. app.get("/api/history/:id", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const raw = await fs.readFile( path.join(scopeDir(scope), `${safeFilename(req.params.id)}.json`), "utf-8", ); res.json(JSON.parse(raw)); } catch { res.status(404).json({ error: "Session not found" }); } }); // Rename a session title — scoped. app.put("/api/history/:id/title", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const filePath = path.join( scopeDir(scope), `${safeFilename(req.params.id)}.json`, ); const raw = await fs.readFile(filePath, "utf-8"); const data = JSON.parse(raw); data.title = req.body.title || data.title; await fs.writeFile(filePath, JSON.stringify(data)); res.json({ ok: true, title: data.title }); } catch { res.status(404).json({ error: "Session not found" }); } }); // Delete a session — scoped. Also adds the videoId to the (global) // skip list so subscriptions don't re-queue it. app.delete("/api/history/:id", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const filePath = path.join( scopeDir(scope), `${safeFilename(req.params.id)}.json`, ); let videoId = null; try { const raw = await fs.readFile(filePath, "utf-8"); videoId = JSON.parse(raw).videoId; } catch {} await fs.unlink(filePath); if (videoId && typeof addToSkipList === "function") { await addToSkipList(scope, videoId); } const meta = await loadMeta(scope); meta.uncategorized = meta.uncategorized.filter( (id) => id !== req.params.id, ); for (const folder of meta.folders) { folder.items = folder.items.filter((id) => id !== req.params.id); } await saveMeta(scope, meta); res.json({ ok: true }); } catch { res.status(404).json({ error: "Session not found" }); } }); // Update meta (folders, ordering) — scoped. app.put("/api/history/meta", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const meta = req.body; await saveMeta(scope, meta); res.json({ ok: true }); } catch (err) { res.status(500).json({ error: err.message }); } }); app.post("/api/history/folders", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const meta = await loadMeta(scope); const folder = { id: `folder-${Date.now()}`, name: req.body.name || "New Folder", collapsed: false, items: [], }; meta.folders.push(folder); await saveMeta(scope, meta); res.json(folder); } catch (err) { res.status(500).json({ error: err.message }); } }); app.put("/api/history/folders/:id", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const meta = await loadMeta(scope); const folder = meta.folders.find((f) => f.id === req.params.id); if (!folder) return res.status(404).json({ error: "Folder not found" }); folder.name = req.body.name || folder.name; await saveMeta(scope, meta); res.json(folder); } catch (err) { res.status(500).json({ error: err.message }); } }); app.put("/api/history/folders/:id/collapsed", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const meta = await loadMeta(scope); const folder = meta.folders.find((f) => f.id === req.params.id); if (!folder) return res.status(404).json({ error: "Folder not found" }); folder.collapsed = !!req.body.collapsed; await saveMeta(scope, meta); res.json({ ok: true, collapsed: folder.collapsed }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Delete a folder — items move back to uncategorized. app.delete("/api/history/folders/:id", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const meta = await loadMeta(scope); const idx = meta.folders.findIndex((f) => f.id === req.params.id); if (idx === -1) return res.status(404).json({ error: "Folder not found" }); const [folder] = meta.folders.splice(idx, 1); meta.uncategorized = [...folder.items, ...meta.uncategorized]; await saveMeta(scope, meta); res.json({ ok: true }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Move a session to a folder (or uncategorized if folderId is null). app.put("/api/history/move", async (req, res) => { const scope = requireScope(req, res); if (!scope) return; try { const { sessionId, folderId, index } = req.body; const meta = await loadMeta(scope); meta.uncategorized = meta.uncategorized.filter((id) => id !== sessionId); for (const folder of meta.folders) { folder.items = folder.items.filter((id) => id !== sessionId); } if (folderId) { const folder = meta.folders.find((f) => f.id === folderId); if (folder) { const i = typeof index === "number" ? index : folder.items.length; folder.items.splice(i, 0, sessionId); } } else { const i = typeof index === "number" ? index : 0; meta.uncategorized.splice(i, 0, sessionId); } await saveMeta(scope, meta); res.json({ ok: true }); } catch (err) { res.status(500).json({ error: err.message }); } }); } // Allow the same character set as scope components for session ids. // Belt-and-suspenders against ../../ in :id; ids generated by // saveToHistory always match. export function safeFilename(s) { if (typeof s !== "string" || !/^[A-Za-z0-9_-]+$/.test(s)) { throw new Error("invalid_session_id"); } return s; }