Files
Keysat b4fa5d7be8 Add opt-in Daily Digest (daily email of last 24h of library recaps)
Multi-mode, off by default. Each new recap is synthesized into a 1-2
paragraph overview via the relay (operator-absorbed) and cached onto the
session JSON; a daily 08:00 scan emails opted-in users their fresh
recaps, deduped by a per-user watermark that never skips a failed or
over-cap recap. One-click tokenized unsubscribe; settings-modal toggle;
admin test trigger. Bumps to 0.2.158.
2026-06-15 19:50:48 -05:00

699 lines
25 KiB
JavaScript

// History storage + routes. Per-user-scoped under /data/history/<scope>/.
//
// "Scope" is:
// - single mode: "owner" (always)
// - multi mode signed-in user: "<user_id>"
// - multi mode anonymous trial: "anon/<trial_cookie_id>"
//
// Each scope has its own folder with one *.json file per summary and a
// `_meta.json` for the folder/ordering UI. Scope isolation is enforced
// here at the path level — handlers in this file refuse to read across
// scopes, period. The auth middleware populates req.userId; we derive
// the scope via scopeForRequest(req) and never trust raw URL input.
//
// On a brand-new install nothing exists. Single-mode installs created
// before 0.2.77 wrote files flat to /data/history/*.json; the migration
// hook (see migrateLegacyLibrary below) moves those into the "owner"
// scope on first multi-mode boot.
//
// Module-private state: historyDir (the root path). All per-scope
// paths are derived per-call so adding a new user doesn't need a
// re-init.
import fs from "fs/promises";
import path from "path";
let historyDir = null;
// ── Initialization ──────────────────────────────────────────────────────────
// Call once at boot. Creates the root directory and stores the path.
// In single mode also ensures /data/history/owner exists so the
// owner-scope writes don't race on first-summary mkdir.
export async function initHistory({ dataDir, mode = "single" }) {
historyDir = path.join(dataDir, "history");
await fs.mkdir(historyDir, { recursive: true }).catch(() => {});
if (mode === "single") {
await fs.mkdir(ownerScopeDir(), { recursive: true }).catch(() => {});
}
}
// ── Scope helpers ───────────────────────────────────────────────────────────
// Files that live at the root of /data/history/ (not inside a per-user
// scope) — subscription state, skip lists, etc. Filtered out when
// listing sessions so they don't appear as phantom library items.
// Files that live inside a scope dir but are NOT session records:
// the folder/order meta + the per-scope subscription state (moved here by
// the 0.2.147 migration). They must be filtered out of every place that
// lists `.json` files as sessions, or they show up as phantom "Invalid
// Date · undefined topics" library entries.
export const ROOT_SIDECARS = new Set([
"_meta.json",
"subscriptions.json",
"skip-list.json",
"seen-list.json",
"auto-queue.json",
]);
// Sanitize a user-supplied scope component so it can't escape the
// history root via path traversal. Allows the alphabet that user_ids
// and trial cookie_ids use (base64url + hex chars + the literal "anon"
// and "owner" prefixes). Anything else → throw.
function safeComponent(s) {
if (typeof s !== "string" || !s) throw new Error("invalid_scope_component");
if (!/^[A-Za-z0-9_-]+$/.test(s)) throw new Error("invalid_scope_component");
return s;
}
// scopeForRequest(req) — single string identifying the writer/reader
// of a library. Used as a subpath under /data/history/. Throws if the
// request has no usable identity (caller should 401 in that case).
//
// Returned strings:
// "owner" — single mode, OR the multi-mode admin (so
// a multi→single mode flip preserves the
// operator's library at the same path)
// "<user_id>" — multi mode non-admin signed-in user
// "anon/<cookie_id>" — multi mode anonymous-trial cookie
//
// Why admin → "owner": before v0.2.91 we renamed /data/history/owner/
// → /data/history/<admin_user_id>/ on first multi-mode signup, which
// made switching back to single mode hide the operator's library
// (single mode reads "owner"). Keeping admin's scope at "owner"
// regardless of mode makes mode-switching lossless.
export function scopeForRequest(req) {
if (req.recapMode !== "multi") return "owner";
if (req.user && req.user.is_admin) return "owner";
if (req.user && req.user.id) return safeComponent(req.user.id);
if (typeof req.userId === "string" && req.userId.startsWith("anon:")) {
return `anon/${safeComponent(req.userId.slice(5))}`;
}
if (req.userId === "owner") return "owner"; // pre-multi-mode legacy shim
throw new Error("no_scope");
}
function scopeDir(scope) {
// `scope` may contain a slash for the "anon/<id>" case — split into
// segments so path.join doesn't treat it as one component (and so
// safeComponent enforcement covers each piece).
const parts = scope.split("/").map(safeComponent);
return path.join(historyDir, ...parts);
}
function ownerScopeDir() {
return path.join(historyDir, "owner");
}
function metaPathFor(scope) {
return path.join(scopeDir(scope), "_meta.json");
}
// ── Storage ─────────────────────────────────────────────────────────────────
// saveToHistory persists a completed summary. Returns the generated id.
// Caller (the /api/process handler) is responsible for passing the
// right scope — derived via scopeForRequest(req) up the call stack.
//
// The id encodes the timestamp + a content hint (videoId for YouTube,
// base64-truncated guid/url for podcasts) so files sort chronologically
// by name.
export async function saveToHistory(
scope,
videoId,
url,
title,
chunks,
entries,
logs,
uploadDate,
type,
speakers = null,
speakerNames = null,
) {
const idSuffix =
type === "podcast"
? Buffer.from(videoId).toString("base64url").slice(0, 16)
: videoId;
const id = `${Date.now()}-${idSuffix}`;
const record = {
id,
videoId,
url,
title: title || "Untitled",
type: type || "youtube",
topicCount: chunks.length,
segmentCount: entries.length,
createdAt: new Date().toISOString(),
uploadDate: uploadDate || "",
chunks,
entries,
logs,
// Phase 1E — speaker legend summary keyed by global speaker ID
// (Speaker_A, Speaker_B, ...). Each chunk's entries also carry
// `.speaker` and `.speaker_confidence` fields inline. Null when
// diarization wasn't available (older relay, off, or no
// fingerprints collected). Persisting at the record level lets
// the library card show "2 speakers" without scanning entries.
speakers: speakers || null,
// Phase 2 — inferred speaker names from the relay's post-cluster
// polish pass. Map { Speaker_A: "Matt Hill", ... } with null
// values for unidentified speakers. Reopening a saved session
// restores names alongside the cluster IDs.
speakerNames: speakerNames || null,
};
const dir = scopeDir(scope);
await fs.mkdir(dir, { recursive: true }).catch(() => {});
await fs.writeFile(path.join(dir, `${id}.json`), JSON.stringify(record));
return id;
}
// ── Meta ────────────────────────────────────────────────────────────────────
// Each scope has its own `_meta.json` for folder/ordering UI state.
// New scope = empty meta on read (no file yet).
export async function loadMeta(scope) {
try {
return JSON.parse(await fs.readFile(metaPathFor(scope), "utf-8"));
} catch {
return { folders: [], uncategorized: [] };
}
}
export async function saveMeta(scope, meta) {
const dir = scopeDir(scope);
await fs.mkdir(dir, { recursive: true }).catch(() => {});
await fs.writeFile(metaPathFor(scope), JSON.stringify(meta, null, 2));
}
// getHistoryDir() — root /data/history/. Some callers (subscriptions,
// skip-list, etc.) write sidecar files here that aren't scoped.
export function getHistoryDir() {
return historyDir;
}
// getScopeHistoryDir(scope) — the per-scope directory. Used by handlers
// that need raw filesystem access (e.g. delete).
export function getScopeHistoryDir(scope) {
return scopeDir(scope);
}
// ── Audio-first ("walking mode") TTS cache helpers ──────────────────────────
// Per-topic synthesized summary clips live alongside the session JSON in
// a sibling folder: /data/history/<scope>/<id>-audio/topic-<i>.mp3. Same
// scope-isolation guarantees as the session record (safeFilename guards
// the id; scopeDir guards the scope).
// Directory holding a session's cached summary-audio clips.
export function sessionAudioDir(scope, id) {
return path.join(scopeDir(scope), `${safeFilename(id)}-audio`);
}
// Load a full session record by id within a scope. Returns null if it
// doesn't exist (or can't be parsed) — callers 404 on null.
export async function loadSession(scope, id) {
try {
const raw = await fs.readFile(
path.join(scopeDir(scope), `${safeFilename(id)}.json`),
"utf-8",
);
return JSON.parse(raw);
} catch {
return null;
}
}
// List a scope's saved sessions as lightweight metadata (no entries /
// chunks), oldest first. The daily-digest scan uses this to pick recaps
// created after a watermark before loading each full record for
// synthesis. Returns [] when the scope has no library yet (or the id is
// malformed — safeComponent throws inside scopeDir, caught here).
export async function listScopeSessions(scope) {
let dir;
try {
dir = scopeDir(scope);
} catch {
return [];
}
let files = [];
try {
files = await fs.readdir(dir);
} catch {
return [];
}
const out = [];
for (const file of files.filter(
(f) => f.endsWith(".json") && !f.startsWith("_") && !ROOT_SIDECARS.has(f),
)) {
try {
const data = JSON.parse(await fs.readFile(path.join(dir, file), "utf-8"));
out.push({
id: data.id,
title: data.title,
type: data.type || "youtube",
url: data.url,
createdAt: data.createdAt,
});
} catch {}
}
out.sort((a, b) => new Date(a.createdAt) - new Date(b.createdAt));
return out;
}
// Shallow-merge `patch` into a session record on disk (e.g. to stamp
// `summaryAudio` availability). No-op-safe: returns null if the record
// is missing rather than throwing.
export async function patchSession(scope, id, patch) {
const file = path.join(scopeDir(scope), `${safeFilename(id)}.json`);
let rec;
try {
rec = JSON.parse(await fs.readFile(file, "utf-8"));
} catch {
return null;
}
const merged = { ...rec, ...patch };
await fs.writeFile(file, JSON.stringify(merged));
return merged;
}
// ── Legacy library migration (single → multi) ───────────────────────────────
// Pre-0.2.77 single-mode installs wrote summaries flat to
// /data/history/*.json with a single _meta.json. On first boot in
// multi mode we move all of that into /data/history/owner/ so the
// operator's library is accessible under the "owner" scope. After the
// first real user signs up (is_admin=1), auth-routes.js renames that
// folder to the user's actual id so they own their original library.
//
// Idempotent — writes a sentinel after the first migration. Safe to
// call on every boot; no-op if there's nothing flat to move.
export async function migrateLegacyLibrary() {
const sentinel = path.join(historyDir, ".migrated_to_multi");
try {
await fs.access(sentinel);
return { migrated: 0, skipped: "already_migrated" };
} catch {}
let entries = [];
try {
entries = await fs.readdir(historyDir);
} catch {
return { migrated: 0, skipped: "no_history_dir" };
}
const flatFiles = entries.filter(
(f) => f.endsWith(".json") && !ROOT_SIDECARS.has(f),
);
if (flatFiles.length === 0 && !entries.includes("_meta.json")) {
// Truly empty. Write the sentinel so future boots don't keep
// checking, but flag this as a non-migration.
await fs.writeFile(sentinel, new Date().toISOString());
return { migrated: 0, skipped: "empty_legacy_library" };
}
const target = ownerScopeDir();
await fs.mkdir(target, { recursive: true }).catch(() => {});
let moved = 0;
for (const f of flatFiles) {
try {
await fs.rename(
path.join(historyDir, f),
path.join(target, f),
);
moved += 1;
} catch (err) {
console.warn(`[history] failed to migrate ${f}:`, err?.message || err);
}
}
// Move _meta.json too if it exists at the root. Folder/ordering state
// belongs to the same library.
try {
await fs.rename(
path.join(historyDir, "_meta.json"),
path.join(target, "_meta.json"),
);
} catch {} // no _meta.json is fine
await fs.writeFile(sentinel, new Date().toISOString());
console.log(
`[history] migrated ${moved} legacy session(s) to /data/history/owner/`,
);
return { migrated: moved };
}
// reclaimAdminLibraryToOwner({ db }) — one-time fixup for installs
// upgraded from <0.2.91 where the first admin's library got renamed
// from /data/history/owner/ → /data/history/<admin_user_id>/ on their
// signup. With the new admin-scope-is-always-owner rule (above), we
// need that library back under "owner" so multi-mode admin reads it
// AND a future single-mode flip can still find it. Idempotent — runs
// the rename only if BOTH (a) an admin user exists in SQLite AND
// (b) /data/history/<admin_user_id>/ exists AND (c) /data/history/owner/
// does NOT already exist. Otherwise no-op.
//
// Pass in the better-sqlite3 db handle from db.js — we don't import
// here to avoid a dep cycle (db.js is multi-mode only, history.js is
// loaded in single mode too).
export async function reclaimAdminLibraryToOwner({ db }) {
if (!db) return { reclaimed: false, reason: "no_db" };
let admin;
try {
admin = db
.prepare(
"SELECT id, email FROM users WHERE is_admin = 1 ORDER BY created_at ASC LIMIT 1",
)
.get();
} catch {
return { reclaimed: false, reason: "no_users_table" };
}
if (!admin) return { reclaimed: false, reason: "no_admin" };
const ownerDir = path.join(historyDir, "owner");
const adminDir = path.join(historyDir, safeComponent(admin.id));
try {
await fs.access(ownerDir);
// /data/history/owner/ already exists → either a fresh install or
// the fixup already ran. Either way, do nothing.
return { reclaimed: false, reason: "owner_already_exists" };
} catch {}
try {
await fs.access(adminDir);
} catch {
return { reclaimed: false, reason: "admin_dir_missing" };
}
await fs.rename(adminDir, ownerDir);
console.log(
`[history] reclaimed admin library: /data/history/${admin.id}/ → /data/history/owner/`,
);
return { reclaimed: true, admin_id: admin.id, email: admin.email };
}
// renameScopeDir(fromScope, toScope) — atomic rename of a per-scope
// folder. Used when:
// - the first multi-mode signup claims the "owner" legacy library
// (auth-routes.js calls this with fromScope="owner", toScope=user.id)
// - an anonymous trial converts to a real user (auth-routes.js,
// fromScope="anon/<cookie_id>", toScope=user.id)
//
// If `fromScope` doesn't exist, no-op (returns false). If `toScope`
// already exists, we don't clobber — the caller has to merge manually
// (which currently only matters in edge cases, since fresh user ids
// are uuids that won't collide). Returns true on actual rename.
export async function renameScopeDir(fromScope, toScope) {
const from = scopeDir(fromScope);
const to = scopeDir(toScope);
try {
await fs.access(from);
} catch {
return false;
}
try {
await fs.access(to);
console.warn(
`[history] renameScopeDir: ${toScope} already exists; refusing to clobber. Leaving ${fromScope} in place for manual reconciliation.`,
);
return false;
} catch {}
// Ensure parent of `to` exists (for the "anon/<id>" case the parent
// is /data/history/anon/, which won't be there in fresh installs).
await fs.mkdir(path.dirname(to), { recursive: true }).catch(() => {});
await fs.rename(from, to);
console.log(`[history] renamed scope ${fromScope}${toScope}`);
return true;
}
// ── Routes ──────────────────────────────────────────────────────────────────
// All routes are scoped to req — they read scopeForRequest(req) and
// refuse to operate outside that scope. No request body or URL param
// can reference another user's library.
//
// `addToSkipList(scope, videoId)` is injected so the DELETE route can
// suppress re-queueing of a subscription video the user explicitly removed.
// It's scope-keyed (./subscriptions.js): the skip applies to the same
// scope's subscription store.
export function setupHistoryRoutes(app, { addToSkipList } = {}) {
function requireScope(req, res) {
try {
return scopeForRequest(req);
} catch {
res.status(401).json({ error: "auth_required" });
return null;
}
}
// Get all history: sessions + folder structure for THIS user.
app.get("/api/history", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const dir = scopeDir(scope);
let files = [];
try {
files = await fs.readdir(dir);
} catch {
files = []; // no library yet — render an empty state
}
const sessionsMap = {};
for (const file of files.filter(
(f) =>
f.endsWith(".json") && !f.startsWith("_") && !ROOT_SIDECARS.has(f),
)) {
try {
const raw = await fs.readFile(path.join(dir, file), "utf-8");
const data = JSON.parse(raw);
sessionsMap[data.id] = {
id: data.id,
videoId: data.videoId,
url: data.url,
title: data.title,
topicCount: data.topicCount,
type: data.type || "youtube",
segmentCount: data.segmentCount,
createdAt: data.createdAt,
uploadDate: data.uploadDate || "",
};
} catch {}
}
const meta = await loadMeta(scope);
// Clean up: remove references to deleted sessions
for (const folder of meta.folders) {
folder.items = folder.items.filter((id) => sessionsMap[id]);
}
meta.uncategorized = meta.uncategorized.filter((id) => sessionsMap[id]);
// Add any sessions not in meta (newly created)
const allReferenced = new Set([
...meta.uncategorized,
...meta.folders.flatMap((f) => f.items),
]);
const allIds = Object.keys(sessionsMap);
const orphans = allIds
.filter((id) => !allReferenced.has(id))
.sort(
(a, b) =>
new Date(sessionsMap[b].createdAt) -
new Date(sessionsMap[a].createdAt),
);
meta.uncategorized = [...orphans, ...meta.uncategorized];
await saveMeta(scope, meta);
res.json({ sessions: sessionsMap, meta });
} catch (err) {
res.json({
sessions: {},
meta: { folders: [], uncategorized: [] },
});
}
});
// Get a single session (full data) — scoped to current user.
app.get("/api/history/:id", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const raw = await fs.readFile(
path.join(scopeDir(scope), `${safeFilename(req.params.id)}.json`),
"utf-8",
);
res.json(JSON.parse(raw));
} catch {
res.status(404).json({ error: "Session not found" });
}
});
// Rename a session title — scoped.
app.put("/api/history/:id/title", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const filePath = path.join(
scopeDir(scope),
`${safeFilename(req.params.id)}.json`,
);
const raw = await fs.readFile(filePath, "utf-8");
const data = JSON.parse(raw);
data.title = req.body.title || data.title;
await fs.writeFile(filePath, JSON.stringify(data));
res.json({ ok: true, title: data.title });
} catch {
res.status(404).json({ error: "Session not found" });
}
});
// Delete a session — scoped. Also adds the videoId to the (global)
// skip list so subscriptions don't re-queue it.
app.delete("/api/history/:id", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const filePath = path.join(
scopeDir(scope),
`${safeFilename(req.params.id)}.json`,
);
let videoId = null;
try {
const raw = await fs.readFile(filePath, "utf-8");
videoId = JSON.parse(raw).videoId;
} catch {}
await fs.unlink(filePath);
if (videoId && typeof addToSkipList === "function") {
await addToSkipList(scope, videoId);
}
const meta = await loadMeta(scope);
meta.uncategorized = meta.uncategorized.filter(
(id) => id !== req.params.id,
);
for (const folder of meta.folders) {
folder.items = folder.items.filter((id) => id !== req.params.id);
}
await saveMeta(scope, meta);
res.json({ ok: true });
} catch {
res.status(404).json({ error: "Session not found" });
}
});
// Update meta (folders, ordering) — scoped.
app.put("/api/history/meta", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const meta = req.body;
await saveMeta(scope, meta);
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
app.post("/api/history/folders", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const meta = await loadMeta(scope);
const folder = {
id: `folder-${Date.now()}`,
name: req.body.name || "New Folder",
collapsed: false,
items: [],
};
meta.folders.push(folder);
await saveMeta(scope, meta);
res.json(folder);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
app.put("/api/history/folders/:id", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const meta = await loadMeta(scope);
const folder = meta.folders.find((f) => f.id === req.params.id);
if (!folder) return res.status(404).json({ error: "Folder not found" });
folder.name = req.body.name || folder.name;
await saveMeta(scope, meta);
res.json(folder);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
app.put("/api/history/folders/:id/collapsed", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const meta = await loadMeta(scope);
const folder = meta.folders.find((f) => f.id === req.params.id);
if (!folder) return res.status(404).json({ error: "Folder not found" });
folder.collapsed = !!req.body.collapsed;
await saveMeta(scope, meta);
res.json({ ok: true, collapsed: folder.collapsed });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Delete a folder — items move back to uncategorized.
app.delete("/api/history/folders/:id", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const meta = await loadMeta(scope);
const idx = meta.folders.findIndex((f) => f.id === req.params.id);
if (idx === -1) return res.status(404).json({ error: "Folder not found" });
const [folder] = meta.folders.splice(idx, 1);
meta.uncategorized = [...folder.items, ...meta.uncategorized];
await saveMeta(scope, meta);
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Move a session to a folder (or uncategorized if folderId is null).
app.put("/api/history/move", async (req, res) => {
const scope = requireScope(req, res);
if (!scope) return;
try {
const { sessionId, folderId, index } = req.body;
const meta = await loadMeta(scope);
meta.uncategorized = meta.uncategorized.filter((id) => id !== sessionId);
for (const folder of meta.folders) {
folder.items = folder.items.filter((id) => id !== sessionId);
}
if (folderId) {
const folder = meta.folders.find((f) => f.id === folderId);
if (folder) {
const i = typeof index === "number" ? index : folder.items.length;
folder.items.splice(i, 0, sessionId);
}
} else {
const i = typeof index === "number" ? index : 0;
meta.uncategorized.splice(i, 0, sessionId);
}
await saveMeta(scope, meta);
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
}
// Allow the same character set as scope components for session ids.
// Belt-and-suspenders against ../../ in :id; ids generated by
// saveToHistory always match.
export function safeFilename(s) {
if (typeof s !== "string" || !/^[A-Za-z0-9_-]+$/.test(s)) {
throw new Error("invalid_session_id");
}
return s;
}