Files
recap-relay/server/routes/internal-meetings.js
Keysat cbd9748a79 Guard meeting :id against path traversal
saveMeeting/loadMeeting/deleteMeeting built path.join(meetingsDir, id +
'.json') straight from req.params.id, so an admin-authed :id like
'../../etc/passwd' could read/write/delete outside internal-meetings/.
Centralize a meetingPath() helper that strips anything outside
[A-Za-z0-9_-] (mirrors output-store.js) and throws on an empty result;
load/delete catch it as 404/no-op. Add a regression test.
2026-06-13 18:22:00 -05:00

2238 lines
83 KiB
JavaScript

// Internal team meeting processing — Path 2A Phase 1.
//
// Operator-only endpoint family. Accepts an uploaded audio file
// (mp3 / m4a / wav / etc), runs the SAME hardware pipeline that
// /relay/v1/summarize-url uses for YouTube/podcast content
// (transcribe → diarize → cluster → analyze → post-cluster polish),
// and saves the result to /data/internal-meetings/<id>.json.
//
// Differences from summarize-url:
// - No download step (caller already has the file)
// - No license / credit accounting (operator-owned compute)
// - No Recaps-app envelope (results live on the relay, not in a
// user library — Path 2B will migrate them into per-user
// libraries once multi-tenant Recaps lands)
// - Saved JSON is downloadable + markdown-renderable so the
// operator can feed transcripts to other tools without
// re-running the pipeline
//
// All routes mount under /admin/internal-meetings/* and inherit
// the admin-session-cookie auth gate from the parent admin router.
// No public exposure.
import express from "express";
import multer from "multer";
import fs from "fs/promises";
import path from "path";
import os from "os";
import { randomUUID } from "crypto";
import { getConfigSnapshot } from "../config.js";
import { resolveHardwareConfig } from "../hardware-config.js";
import { createHardwareBackend } from "../backends/hardware.js";
import {
runPipelinedAnalysis,
parseBracketedTranscript,
firstEntryAtOrAfter,
lastEntryBefore,
canonicalIndexForOffset,
stitchAnalysisResults,
planWindowsByDuration,
} from "../chunked-analyze.js";
import { createChunkBuffer } from "../chunk-buffer.js";
import {
runNameInference,
runSummaryPolish,
} from "../post-cluster-polish.js";
import { runMeetingExtras } from "../meeting-extras.js";
import {
backfillEntrySpeakers,
mergeSpeakersInRecord,
reclusterMeetingRecord,
applyPolishedSummaries,
} from "../meeting-speaker-edits.js";
import { acquireHardwareSlot } from "../hardware-queue.js";
import { getAudioDurationSeconds } from "../audio-meta.js";
import {
createJob,
appendEvent,
subscribeToJob,
getJob,
markRunning,
setProgress,
markComplete,
markFailed,
} from "../jobs.js";
// Upload size cap — generous enough for a 4-hour meeting at 192kbps
// mp3 (~340MB) but rejects bigger uploads to avoid disk-fill DoS.
const MAX_UPLOAD_BYTES = 500 * 1024 * 1024; // 500MB
// Storage dir for completed meetings.
function meetingsDir(dataDir) {
return path.join(dataDir, "internal-meetings");
}
async function ensureMeetingsDir(dataDir) {
await fs.mkdir(meetingsDir(dataDir), { recursive: true }).catch(() => {});
}
// Build the on-disk path for a meeting record, sanitizing the id so a
// caller-supplied :id can't traverse out of internal-meetings/. Real
// ids are UUIDs; anything outside [A-Za-z0-9_-] is stripped (mirrors
// output-store.js's pathFor). Throws when the id sanitizes to empty —
// load/delete catch it (→ 404 / no-op); save only ever gets a freshly
// minted id.
export function meetingPath(dataDir, id) {
const safe = String(id || "").replace(/[^A-Za-z0-9_-]/g, "");
if (!safe) throw new Error("invalid meeting id");
return path.join(meetingsDir(dataDir), `${safe}.json`);
}
// ─── Storage layer ──────────────────────────────────────────────────
async function saveMeeting(dataDir, id, record) {
await ensureMeetingsDir(dataDir);
const filePath = meetingPath(dataDir, id);
await fs.writeFile(filePath, JSON.stringify(record, null, 2), {
mode: 0o600,
});
}
async function loadMeeting(dataDir, id) {
try {
const filePath = meetingPath(dataDir, id);
const raw = await fs.readFile(filePath, "utf8");
const rec = JSON.parse(raw);
// Retroactive chunk-contiguity backfill must run BEFORE the
// speaker backfill, because contiguity adds entries that the
// speaker backfill then needs to label. Both are no-ops when
// the saved record was already produced by the post-v0.2.107
// pipeline (entries are contiguous + speaker fields populated).
backfillChunkContiguity(rec);
backfillEntrySpeakers(rec);
return rec;
} catch {
return null;
}
}
// Reslice each chunk's entries to fill gaps the LLM analyze pass
// left between section boundaries. The original v0.2.103-v0.2.106
// pipeline saved chunks with strict LLM startIndex/endIndex slices,
// which dropped entries the model classified as "between topics"
// (filler, brief transitions). On a 3-min meeting that meant
// users saw topic 2 with one statement and 46-second timestamp
// gaps between consecutive topics.
//
// Fix on read: re-derive canonical entries from rec.transcript,
// match each saved chunk's first/last entry by (offset, text-prefix)
// to find its canonical index range, then extend ranges to be
// contiguous (chunk 0 → entry 0, chunk N's end → chunk N+1's start
// minus 1, last chunk → end of transcript) and rewrite entries
// from the extended slice. Section titles and summaries stay
// untouched — only the entries[] gets refilled.
function backfillChunkContiguity(rec) {
if (!rec || !Array.isArray(rec.chunks) || rec.chunks.length === 0) return;
if (typeof rec.transcript !== "string" || !rec.transcript) return;
const allEntries = parseBracketedTranscript(rec.transcript);
if (allEntries.length === 0) return;
// Quick exit: if the current saved chunks already cover every
// entry contiguously, do nothing. Heuristic — sum of entries
// across chunks equals total entries AND each chunk's last
// entry's offset equals the next chunk's first entry's offset
// minus the gap. Simpler check: total saved entries vs canonical.
const savedEntryCount = rec.chunks.reduce(
(n, c) => n + (Array.isArray(c.entries) ? c.entries.length : 0),
0
);
if (savedEntryCount >= allEntries.length) return;
const matchEntry = (saved) => {
if (!saved) return -1;
const t = saved.offset || 0;
const txt = (saved.text || "").slice(0, 24);
let fallback = -1;
for (let i = 0; i < allEntries.length; i++) {
const off = allEntries[i].offset || 0;
if (off !== t) {
if (off > t + 2) break;
continue;
}
const a = (allEntries[i].text || "").slice(0, 24);
if (a === txt) return i;
if (fallback < 0) fallback = i;
}
return fallback;
};
const ranges = [];
for (const chunk of rec.chunks) {
const entries = Array.isArray(chunk.entries) ? chunk.entries : [];
if (entries.length === 0) continue;
const firstIdx = matchEntry(entries[0]);
const lastIdx = matchEntry(entries[entries.length - 1]);
if (firstIdx < 0 || lastIdx < 0 || lastIdx < firstIdx) continue;
ranges.push({ chunk, firstIdx, lastIdx });
}
if (ranges.length === 0) return;
ranges.sort((a, b) => a.firstIdx - b.firstIdx);
// Extend ranges so they collectively cover every canonical entry.
ranges[0].firstIdx = 0;
for (let i = 0; i + 1 < ranges.length; i++) {
ranges[i].lastIdx = Math.max(
ranges[i].lastIdx,
ranges[i + 1].firstIdx - 1
);
}
ranges[ranges.length - 1].lastIdx = allEntries.length - 1;
// Rewrite each chunk's entries from its extended slice. We carry
// over the existing chunk's per-entry speaker/confidence/uncertain
// fields keyed by offset+text-prefix so any prior speaker
// attribution survives the rewrite.
for (const r of ranges) {
const priorByKey = new Map();
for (const e of r.chunk.entries || []) {
const k = `${e.offset || 0}|${(e.text || "").slice(0, 16)}`;
priorByKey.set(k, e);
}
const slice = allEntries.slice(r.firstIdx, r.lastIdx + 1).map((e) => {
const k = `${e.offset || 0}|${(e.text || "").slice(0, 16)}`;
const prior = priorByKey.get(k);
if (prior && prior.speaker) {
return {
...e,
speaker: prior.speaker,
speaker_confidence: prior.speaker_confidence ?? null,
speaker_uncertain: !!prior.speaker_uncertain,
};
}
return { ...e };
});
r.chunk.entries = slice;
r.chunk.startTime = slice[0]?.offset || 0;
}
}
async function listMeetings(dataDir) {
await ensureMeetingsDir(dataDir);
const dir = meetingsDir(dataDir);
let files = [];
try {
files = await fs.readdir(dir);
} catch {
return [];
}
const out = [];
for (const f of files) {
if (!f.endsWith(".json")) continue;
try {
const raw = await fs.readFile(path.join(dir, f), "utf8");
const rec = JSON.parse(raw);
out.push({
id: rec.id,
title: rec.title || "(untitled)",
created_at: rec.created_at,
audio_seconds: rec.audio_seconds || 0,
topic_count: Array.isArray(rec.chunks) ? rec.chunks.length : 0,
speaker_count: rec.speakers ? Object.keys(rec.speakers).length : 0,
});
} catch {
// skip malformed files
}
}
// Most-recent first
out.sort((a, b) => (b.created_at || 0) - (a.created_at || 0));
return out;
}
async function deleteMeeting(dataDir, id) {
try {
const filePath = meetingPath(dataDir, id);
await fs.unlink(filePath);
return true;
} catch {
return false;
}
}
// ─── Markdown formatter ─────────────────────────────────────────────
// Converts a saved meeting record into a human-readable markdown doc
// suitable for downloading + feeding to other LLMs / sharing.
function formatTimestamp(secs) {
const s = Math.max(0, Math.floor(secs || 0));
const h = Math.floor(s / 3600);
const m = Math.floor((s % 3600) / 60);
const sec = s % 60;
const pad = (n) => n.toString().padStart(2, "0");
return h > 0 ? `${h}:${pad(m)}:${pad(sec)}` : `${m}:${pad(sec)}`;
}
function speakerDisplayName(speakerId, speakerNames) {
if (!speakerId) return "Unknown";
if (speakerId === "Speaker_Unknown") return "Unknown";
const inferred = speakerNames && speakerNames[speakerId];
if (typeof inferred === "string" && inferred.trim()) return inferred.trim();
const m = String(speakerId).match(/^Speaker_([A-Z]+)$/);
return m ? `Speaker ${m[1]}` : speakerId;
}
export function meetingToMarkdown(rec) {
if (!rec) return "";
const lines = [];
lines.push(`# ${rec.title || "Untitled meeting"}`);
lines.push("");
const dateStr = rec.created_at
? new Date(rec.created_at).toLocaleString("en-US", {
dateStyle: "long",
timeStyle: "short",
})
: "(unknown)";
lines.push(`**Processed:** ${dateStr}`);
if (rec.audio_seconds) {
lines.push(`**Duration:** ${formatTimestamp(rec.audio_seconds)}`);
}
if (Array.isArray(rec.participants) && rec.participants.length) {
lines.push(`**Participant hints:** ${rec.participants.join(", ")}`);
}
// Speaker roster
if (rec.speakers && Object.keys(rec.speakers).length) {
lines.push("");
lines.push("## Speakers");
lines.push("");
const entries = Object.entries(rec.speakers).sort((a, b) => {
if (a[0] === "Speaker_Unknown") return 1;
if (b[0] === "Speaker_Unknown") return -1;
return a[0].localeCompare(b[0]);
});
for (const [id, stats] of entries) {
const display = speakerDisplayName(id, rec.speaker_names);
const secs = Math.round(stats.total_speaking_seconds || 0);
const turns = stats.turns || 0;
lines.push(`- **${display}** — ${formatTimestamp(secs)} speaking, ${turns} turn${turns !== 1 ? "s" : ""}`);
}
}
// Phase 2 extras (decisions / action items / open questions /
// key quotes). Rendered ABOVE the topics so the reader sees the
// most actionable content first. Empty categories collapse;
// entire block hides when no extras exist.
if (rec.extras) {
const x = rec.extras;
const tldr = x.tldr && typeof x.tldr === "object" ? x.tldr : null;
const decs = Array.isArray(x.decisions) ? x.decisions : [];
const acts = Array.isArray(x.action_items) ? x.action_items : [];
const qs = Array.isArray(x.open_questions) ? x.open_questions : [];
const quotes = Array.isArray(x.key_quotes) ? x.key_quotes : [];
const speakerNamesMd = rec.speaker_names || {};
const renderSpeakerMd = (sid) =>
sid ? speakerDisplayName(sid, speakerNamesMd) : "";
if (tldr && typeof tldr.summary === "string" && tldr.summary.trim()) {
lines.push("");
lines.push("## TL;DR");
lines.push("");
lines.push(tldr.summary.trim());
if (Array.isArray(tldr.primary_speakers) && tldr.primary_speakers.length) {
const names = tldr.primary_speakers.map(renderSpeakerMd).filter(Boolean);
if (names.length) {
lines.push("");
lines.push(`_Primary speakers: ${names.join(", ")}_`);
}
}
}
if (decs.length) {
lines.push("");
lines.push("## Decisions");
lines.push("");
for (const d of decs) {
const ts = d.supporting_offset != null ? ` [${formatTimestamp(d.supporting_offset)}]` : "";
const agreed = (d.agreed_by || []).map(renderSpeakerMd).filter(Boolean);
const agreedStr = agreed.length ? ` — agreed by ${agreed.join(", ")}` : "";
lines.push(`- ${d.statement}${ts}${agreedStr}`);
}
}
if (acts.length) {
lines.push("");
lines.push("## Action items");
lines.push("");
for (const a of acts) {
const ts = a.supporting_offset != null ? ` [${formatTimestamp(a.supporting_offset)}]` : "";
const owner = a.owner ? `${renderSpeakerMd(a.owner)}` : "";
const due = a.due_hint ? ` (due: ${a.due_hint})` : "";
lines.push(`- ${a.description}${ts}${owner}${due}`);
}
}
if (qs.length) {
lines.push("");
lines.push("## Open questions");
lines.push("");
for (const q of qs) {
const by = q.raised_by ? ` — raised by ${renderSpeakerMd(q.raised_by)}` : "";
lines.push(`- ${q.question}${by}`);
}
}
if (quotes.length) {
lines.push("");
lines.push("## Key quotes");
lines.push("");
for (const q of quotes) {
const ts = q.offset != null ? ` [${formatTimestamp(q.offset)}]` : "";
const sp = q.speaker ? `${renderSpeakerMd(q.speaker)}` : "";
const why = q.why_notable ? ` _(${q.why_notable})_` : "";
lines.push(`- "${q.quote}"${ts}${sp}${why}`);
}
}
}
// Topics + per-topic transcript chunks
if (Array.isArray(rec.chunks) && rec.chunks.length) {
lines.push("");
lines.push("## Topics");
lines.push("");
rec.chunks.forEach((chunk, i) => {
const start = formatTimestamp(chunk.startTime || 0);
// Adjacent display: end = next chunk's start so consecutive
// topics appear contiguous; last chunk extends to full audio.
let endSec;
if (i + 1 < rec.chunks.length) {
endSec = rec.chunks[i + 1].startTime || 0;
} else if (rec.audio_seconds) {
endSec = rec.audio_seconds;
} else if (chunk.entries && chunk.entries.length > 0) {
endSec = chunk.entries[chunk.entries.length - 1].offset || 0;
} else {
endSec = chunk.startTime || 0;
}
const end = formatTimestamp(endSec);
lines.push(`### ${i + 1}. ${chunk.title || "(untitled topic)"} (${start}${end})`);
lines.push("");
lines.push(chunk.summary || "");
if (Array.isArray(chunk.entries) && chunk.entries.length) {
lines.push("");
lines.push("<details><summary>Transcript</summary>");
lines.push("");
for (const entry of chunk.entries) {
const t = formatTimestamp(entry.offset || 0);
// Operator override wins (same convention as the .html
// and dashboard renderers).
const effSpeaker = entry.speaker_override || entry.speaker;
const who = effSpeaker
? speakerDisplayName(effSpeaker, rec.speaker_names)
: null;
if (who) {
lines.push(`- **[${t}] ${who}:** ${entry.text || ""}`);
} else {
lines.push(`- **[${t}]** ${entry.text || ""}`);
}
}
lines.push("");
lines.push("</details>");
}
lines.push("");
});
}
// Full unattributed transcript at the bottom — useful for grep'ing
// and as a clean LLM-input form. Always emitted so downloads are
// self-contained.
if (rec.transcript) {
lines.push("");
lines.push("## Full transcript (bracketed)");
lines.push("");
lines.push("```");
lines.push(rec.transcript);
lines.push("```");
}
return lines.join("\n");
}
// ─── HTML formatter ─────────────────────────────────────────────────
// Produces a fully self-contained HTML page that renders the meeting
// in the same Recaps-style layout as the dashboard's detail view —
// title, summary stats, speaker legend, topic cards with native
// <details>-driven expandable transcript lines, and the full bracketed
// transcript at the bottom. Includes inlined CSS so it's shareable as
// a single file (email attachment, drag-into-browser, link from a
// shared drive, etc.) and a print-friendly fallback.
//
// Phase-2 reserve: when `rec.extras` lands (Decisions / Action Items /
// Open Questions / Key Quotes), this function will render those above
// the topics. For now it just skips that section gracefully.
function htmlEsc(s) {
return String(s == null ? "" : s)
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#39;");
}
// Mirror dashboard.html's meetingsSpeakerChipColor — same palette so a
// speaker has the same color across the dashboard, .md, and .html.
function speakerChipColor(id) {
if (id === "Speaker_Unknown") {
return { bg: "rgba(100,116,139,0.18)", fg: "#cbd5e1", bd: "rgba(100,116,139,0.35)" };
}
const m = String(id || "").match(/^Speaker_([A-Z]+)$/);
const letters = m ? m[1] : "A";
let n = 0;
for (const c of letters) n = n * 26 + (c.charCodeAt(0) - 64);
n -= 1;
const palette = [
{ bg: "rgba(239,68,68,0.18)", fg: "#fca5a5", bd: "rgba(239,68,68,0.35)" },
{ bg: "rgba(59,130,246,0.18)", fg: "#93c5fd", bd: "rgba(59,130,246,0.35)" },
{ bg: "rgba(34,197,94,0.18)", fg: "#86efac", bd: "rgba(34,197,94,0.35)" },
{ bg: "rgba(245,158,11,0.18)", fg: "#fcd34d", bd: "rgba(245,158,11,0.35)" },
{ bg: "rgba(168,85,247,0.18)", fg: "#d8b4fe", bd: "rgba(168,85,247,0.35)" },
{ bg: "rgba(14,165,233,0.18)", fg: "#7dd3fc", bd: "rgba(14,165,233,0.35)" },
{ bg: "rgba(236,72,153,0.18)", fg: "#f9a8d4", bd: "rgba(236,72,153,0.35)" },
{ bg: "rgba(100,116,139,0.18)",fg: "#cbd5e1", bd: "rgba(100,116,139,0.35)" },
];
return palette[((n % 8) + 8) % 8];
}
function speakerChipLabel(id, speakerNames) {
if (id === "Speaker_Unknown") return "?";
const inferred = speakerNames && typeof speakerNames[id] === "string" && speakerNames[id].trim();
if (inferred) {
const parts = inferred.split(/\s+/).filter(Boolean);
if (parts.length === 1) return parts[0][0].toUpperCase();
return (parts[0][0] + parts[parts.length - 1][0]).toUpperCase();
}
const m = String(id).match(/^Speaker_([A-Z]+)$/);
return m ? m[1] : "?";
}
function renderChipHtml(speakerId, confidence, uncertain, speakerNames) {
if (!speakerId) return "";
const c = speakerChipColor(speakerId);
const label = speakerChipLabel(speakerId, speakerNames);
const showQ = uncertain || (typeof confidence === "number" && confidence < 0.5);
const text = label + (showQ ? "?" : "");
const full = speakerDisplayName(speakerId, speakerNames);
const tooltip = speakerId === "Speaker_Unknown"
? "Unknown speaker (brief utterance, no anchor match)"
: (showQ ? full + " — best-guess attribution" : full);
return (
`<span class="chip" style="background:${c.bg};color:${c.fg};border-color:${c.bd};" ` +
`title="${htmlEsc(tooltip)}">${htmlEsc(text)}</span>`
);
}
// Renders the Phase 2 extras block for the self-contained .html
// download. Mirrors the dashboard's renderMeetingExtras layout —
// four collapsible sections (Decisions / Action items / Open
// questions / Key quotes), each item with inline speaker chips +
// timestamp links. Timestamps in the downloaded file are NOT
// clickable (static HTML); they're shown as styled tags for visual
// parity with the dashboard. Returns "" when extras is empty/null.
function renderExtrasHtml(extras, speakerNames) {
if (!extras) return "";
const tldr = extras.tldr && typeof extras.tldr === "object" ? extras.tldr : null;
const decs = Array.isArray(extras.decisions) ? extras.decisions : [];
const acts = Array.isArray(extras.action_items) ? extras.action_items : [];
const qs = Array.isArray(extras.open_questions) ? extras.open_questions : [];
const quotes = Array.isArray(extras.key_quotes) ? extras.key_quotes : [];
if (!tldr && !decs.length && !acts.length && !qs.length && !quotes.length) return "";
const tsTag = (sec) => {
if (sec == null || !Number.isFinite(sec)) return "";
return `<span class="ts-tag">${htmlEsc(formatTimestamp(sec))}</span>`;
};
const inlineChip = (sid) => {
if (!sid) return "";
const c = speakerChipColor(sid);
const label = speakerChipLabel(sid, speakerNames);
const full = sid === "Speaker_Unknown" ? "Unknown" : speakerDisplayName(sid, speakerNames);
return (
`<span class="inline-chip">` +
`<span class="chip" style="background:${c.bg};color:${c.fg};border-color:${c.bd};">${htmlEsc(label)}</span>` +
`<span class="inline-name">${htmlEsc(full)}</span>` +
`</span>`
);
};
const section = (label, items, render, emoji) => {
if (!items.length) return "";
return (
`<details class="extras-section" open>` +
`<summary>` +
`<span class="extras-title">${emoji} ${htmlEsc(label)}</span>` +
`<span class="extras-count">· ${items.length}</span>` +
`</summary>` +
`<div class="extras-body">` + items.map(render).join("") + `</div>` +
`</details>`
);
};
const renderDecision = (d) => {
const agreed = (d.agreed_by || []).map(inlineChip).join('<span class="sep"> · </span>');
return (
`<div class="extras-item">` +
`<div class="extras-text">${htmlEsc(d.statement || "")}</div>` +
`<div class="extras-meta">` +
(d.supporting_offset != null ? tsTag(d.supporting_offset) : "") +
(agreed ? `<span class="meta-label">agreed by:</span>${agreed}` : "") +
`</div>` +
`</div>`
);
};
const renderAction = (a) => (
`<div class="extras-item">` +
`<div class="extras-text">${htmlEsc(a.description || "")}</div>` +
`<div class="extras-meta">` +
(a.supporting_offset != null ? tsTag(a.supporting_offset) : "") +
(a.owner ? `<span class="meta-label">owner:</span>${inlineChip(a.owner)}` : "") +
(a.due_hint ? `<span class="meta-due">due: ${htmlEsc(a.due_hint)}</span>` : "") +
`</div>` +
`</div>`
);
const renderQuestion = (q) => (
`<div class="extras-item">` +
`<div class="extras-text">${htmlEsc(q.question || "")}</div>` +
(q.raised_by ? `<div class="extras-meta"><span class="meta-label">raised by:</span>${inlineChip(q.raised_by)}</div>` : "") +
`</div>`
);
const renderQuote = (q) => (
`<div class="extras-item">` +
`<div class="extras-quote">"${htmlEsc(q.quote || "")}"</div>` +
`<div class="extras-meta">` +
(q.offset != null ? tsTag(q.offset) : "") +
(q.speaker ? inlineChip(q.speaker) : "") +
(q.why_notable ? `<span class="meta-due">— ${htmlEsc(q.why_notable)}</span>` : "") +
`</div>` +
`</div>`
);
// TLDR — highlighted callout above the four collapsibles. Always
// visible (not in a <details>) because it's meant as the first
// read. Mirrors the dashboard's styling pattern.
let tldrHtml = "";
if (tldr && typeof tldr.summary === "string" && tldr.summary.trim()) {
const primary = Array.isArray(tldr.primary_speakers) ? tldr.primary_speakers : [];
const primaryHtml = primary.length
? `<div class="tldr-meta"><span>primary speakers:</span>` +
primary.map(inlineChip).join(`<span class="sep"> · </span>`) +
`</div>`
: "";
tldrHtml = (
`<div class="tldr">` +
`<div class="tldr-label">TL;DR</div>` +
`<div class="tldr-summary">${htmlEsc(tldr.summary)}</div>` +
primaryHtml +
`</div>`
);
}
return (
`<section class="extras">` +
tldrHtml +
section("Decisions", decs, renderDecision, "✓") +
section("Action items", acts, renderAction, "→") +
section("Open questions", qs, renderQuestion, "?") +
section("Key quotes", quotes, renderQuote, "❝") +
`</section>`
);
}
export function meetingToHtml(rec) {
if (!rec) return "<!doctype html><html><body><p>Meeting not found.</p></body></html>";
const title = rec.title || "Untitled meeting";
const speakerNames = rec.speaker_names || {};
const speakerEntries = rec.speakers
? Object.entries(rec.speakers).sort((a, b) => {
if (a[0] === "Speaker_Unknown") return 1;
if (b[0] === "Speaker_Unknown") return -1;
return a[0].localeCompare(b[0]);
})
: [];
const dateStr = rec.created_at
? new Date(rec.created_at).toLocaleString("en-US", {
dateStyle: "medium",
timeStyle: "short",
})
: null;
const chunks = Array.isArray(rec.chunks) ? rec.chunks : [];
const metaParts = [];
if (rec.audio_seconds) metaParts.push(formatTimestamp(rec.audio_seconds));
metaParts.push(`${chunks.length} topic${chunks.length === 1 ? "" : "s"}`);
metaParts.push(`${speakerEntries.length} speaker${speakerEntries.length === 1 ? "" : "s"}`);
if (dateStr) metaParts.push(dateStr);
// ── Embed .md + .json as inline data URLs ──
// The shareable .html artifact carries the markdown and JSON
// serializations inside itself as base64 data URLs, so a recipient
// who downloads the .html (email attachment, shared drive, etc.)
// can grab either format without going back to the relay — the
// relay endpoints are admin-auth-gated anyway, so external
// recipients couldn't reach them. Self-contained = truly portable.
//
// Size overhead: base64 inflates ~33%, but the resulting blobs
// are still in the tens-of-KB range even for hour-long meetings.
// Done at HTML-build time so we don't recompute on every download
// click in the browser.
const safeFilenameBase = (title || "meeting")
.replace(/[^a-zA-Z0-9-_.]+/g, "-")
.slice(0, 80) || "meeting";
const mdContent = meetingToMarkdown(rec);
const jsonContent = JSON.stringify(rec, null, 2);
const mdDataUrl =
"data:text/markdown;charset=utf-8;base64," +
Buffer.from(mdContent, "utf8").toString("base64");
const jsonDataUrl =
"data:application/json;charset=utf-8;base64," +
Buffer.from(jsonContent, "utf8").toString("base64");
const legendHtml = speakerEntries.length
? (
`<section class="legend"><div class="legend-label">Speakers</div><div class="legend-chips">` +
speakerEntries.map(([id, stats]) => {
const c = speakerChipColor(id);
const label = speakerChipLabel(id, speakerNames);
const full = speakerDisplayName(id, speakerNames);
const secs = Math.round(stats.total_speaking_seconds || 0);
return (
`<span class="legend-chip">` +
`<span class="chip" style="background:${c.bg};color:${c.fg};border-color:${c.bd};">${htmlEsc(label)}</span>` +
`<span class="legend-name">${htmlEsc(full)}</span>` +
`<span class="legend-secs">· ${formatTimestamp(secs)}</span>` +
`</span>`
);
}).join("") +
`</div></section>`
)
: "";
const topicsHtml = chunks.length
? chunks.map((chunk, i) => {
const start = formatTimestamp(chunk.startTime || 0);
// Adjacent display: end = next chunk's start so topic ranges
// appear visually contiguous. Last chunk extends to the full
// audio duration. Fallback to last entry's offset.
let endSec;
if (i + 1 < chunks.length) {
endSec = chunks[i + 1].startTime || 0;
} else if (rec.audio_seconds) {
endSec = rec.audio_seconds;
} else {
const lastEntry = chunk.entries && chunk.entries[chunk.entries.length - 1];
endSec = lastEntry ? (lastEntry.offset || 0) : (chunk.startTime || 0);
}
const end = formatTimestamp(endSec);
const lines = (chunk.entries || []).map((entry) => {
const t = formatTimestamp(entry.offset || 0);
// Effective speaker: operator override (set via the
// dashboard's click-to-reassign UI) wins over the original
// diarization attribution. Override-source not shown in
// the static HTML download.
const effectiveSpeaker = entry.speaker_override || entry.speaker;
const chip = renderChipHtml(
effectiveSpeaker,
entry.speaker_confidence,
entry.speaker_uncertain,
speakerNames
);
return (
`<div class="line">` +
`<span class="ts">${htmlEsc(t)}</span>` +
(chip || `<span class="chip chip-empty"></span>`) +
`<span class="text">${htmlEsc(entry.text || "")}</span>` +
`</div>`
);
}).join("");
return (
`<details class="topic">` +
`<summary>` +
`<div class="topic-head">` +
`<span class="topic-title">${i + 1}. ${htmlEsc(chunk.title || "(untitled)")}</span>` +
`<span class="topic-range">${htmlEsc(start)}${htmlEsc(end)}</span>` +
`</div>` +
`<div class="topic-summary">${htmlEsc(chunk.summary || "")}</div>` +
`</summary>` +
`<div class="topic-body">` +
(lines || `<div class="empty-lines">No transcript entries for this topic.</div>`) +
`</div>` +
`</details>`
);
}).join("")
: `<div class="empty">No topic data — analyze may have failed.</div>`;
const transcriptHtml = rec.transcript
? (
`<details class="full-transcript">` +
`<summary>Full transcript (bracketed)</summary>` +
`<pre>${htmlEsc(rec.transcript)}</pre>` +
`</details>`
)
: "";
// Phase 2 extras (decisions / action items / open questions /
// key quotes). Renders above the topics block when present.
// Empty categories collapse; if all four are empty the block hides.
const extrasHtml = renderExtrasHtml(rec.extras, speakerNames);
const css = `
:root {
--bg: #0b1220;
--panel: #111827;
--line: rgba(148,163,184,0.18);
--fg: #e2e8f0;
--fg-dim: #cbd5e1;
--fg-faint: #94a3b8;
--accent: #60a5fa;
}
* { box-sizing: border-box; }
html, body {
margin: 0;
padding: 0;
background: var(--bg);
color: var(--fg);
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.5;
}
main {
max-width: 920px;
margin: 0 auto;
padding: 32px 24px 64px;
}
header { margin-bottom: 18px; }
h1 {
font-size: 22px;
font-weight: 600;
margin: 0 0 6px;
color: var(--fg);
}
.meta {
font-size: 12px;
color: var(--fg-dim);
}
.header-actions {
margin-top: 12px;
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.dl-btn {
display: inline-block;
padding: 6px 12px;
background: transparent;
border: 1px solid var(--line);
border-radius: 5px;
color: var(--fg);
font-size: 12px;
font-weight: 500;
text-decoration: none;
cursor: pointer;
transition: border-color 0.15s, background 0.15s;
}
.dl-btn:hover {
border-color: var(--accent);
background: rgba(96,165,250,0.08);
}
@media print {
.header-actions { display: none; }
}
.legend {
background: rgba(15,23,42,0.5);
border: 1px solid var(--line);
border-radius: 8px;
padding: 10px 14px;
margin-bottom: 16px;
}
.legend-label {
font-size: 10px;
font-weight: 600;
color: var(--fg-faint);
text-transform: uppercase;
letter-spacing: 0.06em;
margin-bottom: 8px;
}
.legend-chips { display: flex; flex-wrap: wrap; gap: 8px; }
.legend-chip {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 3px 10px;
background: rgba(255,255,255,0.03);
border: 1px solid var(--line);
border-radius: 16px;
font-size: 11px;
}
.legend-name { color: var(--fg); }
.legend-secs { color: var(--fg-faint); }
.chip {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 26px;
height: 18px;
padding: 0 6px;
font-size: 10px;
font-weight: 700;
border-radius: 4px;
flex-shrink: 0;
letter-spacing: 0.02em;
line-height: 1;
font-family: ui-monospace, Menlo, Consolas, monospace;
border: 1px solid;
}
.chip-empty { background: transparent; border-color: transparent; }
/* Phase 2 extras block — TLDR + decisions / action items /
open questions / key quotes. Sits above the topics list. */
.extras { display: flex; flex-direction: column; gap: 8px; margin-bottom: 16px; }
.tldr {
background: linear-gradient(135deg, rgba(96,165,250,0.08), rgba(15,23,42,0.5));
border: 1px solid var(--line);
border-left: 3px solid var(--accent);
border-radius: 8px;
padding: 12px 16px;
}
.tldr-label {
font-size: 10px;
font-weight: 600;
color: var(--accent);
text-transform: uppercase;
letter-spacing: 0.08em;
margin-bottom: 6px;
}
.tldr-summary { font-size: 13px; line-height: 1.6; color: var(--fg); }
.tldr-meta {
margin-top: 8px;
display: flex;
flex-wrap: wrap;
gap: 8px;
align-items: center;
font-size: 11px;
color: var(--fg-faint);
}
@media print {
.tldr { background: #f5f9ff; border-color: #cbd5e1; border-left-color: #2563eb; }
.tldr-label { color: #2563eb; }
}
details.extras-section {
background: var(--panel);
border: 1px solid var(--line);
border-radius: 8px;
overflow: hidden;
}
details.extras-section > summary {
padding: 10px 14px;
cursor: pointer;
list-style: none;
display: flex;
align-items: baseline;
gap: 8px;
}
details.extras-section > summary::-webkit-details-marker { display: none; }
.extras-title { font-size: 13px; font-weight: 600; color: var(--fg); }
.extras-count { font-size: 11px; color: var(--fg-faint); }
.extras-body {
border-top: 1px solid var(--line);
padding: 8px 14px 12px;
background: rgba(15,23,42,0.3);
display: flex;
flex-direction: column;
gap: 10px;
}
.extras-item { font-size: 12px; line-height: 1.55; color: var(--fg); }
.extras-text {}
.extras-quote {
font-style: italic;
border-left: 3px solid var(--line);
padding-left: 10px;
}
.extras-meta {
margin-top: 4px;
display: flex;
flex-wrap: wrap;
gap: 8px;
align-items: center;
font-size: 11px;
color: var(--fg-faint);
}
.meta-label { color: var(--fg-faint); }
.meta-due { color: var(--fg-dim); }
.sep { color: var(--fg-faint); }
.ts-tag {
display: inline-block;
padding: 1px 6px;
border: 1px solid var(--line);
border-radius: 4px;
color: var(--accent);
font-family: ui-monospace, Menlo, Consolas, monospace;
font-size: 10.5px;
}
.inline-chip { display: inline-flex; align-items: center; gap: 4px; }
.inline-chip .chip {
min-width: 22px;
height: 16px;
padding: 0 5px;
font-size: 9px;
}
.inline-name { font-size: 11px; color: var(--fg-dim); }
.topics { display: flex; flex-direction: column; gap: 10px; }
details.topic {
background: var(--panel);
border: 1px solid var(--line);
border-radius: 8px;
padding: 0;
overflow: hidden;
}
details.topic > summary {
padding: 12px 16px;
cursor: pointer;
list-style: none;
}
details.topic > summary::-webkit-details-marker { display: none; }
.topic-head {
display: flex;
align-items: baseline;
gap: 10px;
}
.topic-title { font-size: 13px; font-weight: 600; color: var(--fg); }
.topic-range {
font-size: 10.5px;
color: var(--fg-faint);
font-family: ui-monospace, Menlo, Consolas, monospace;
}
.topic-summary {
font-size: 12px;
color: var(--fg-dim);
line-height: 1.55;
margin-top: 6px;
}
.topic-body {
border-top: 1px solid var(--line);
padding: 8px 8px 12px;
background: rgba(15,23,42,0.3);
}
.line {
display: flex;
gap: 10px;
align-items: flex-start;
padding: 4px 8px;
font-size: 12px;
line-height: 1.55;
color: var(--fg);
}
.line .ts {
font-size: 11px;
color: var(--accent);
min-width: 54px;
padding-top: 2px;
font-family: ui-monospace, Menlo, Consolas, monospace;
}
.line .text { flex: 1; }
.empty-lines, .empty {
padding: 10px 14px;
font-size: 11px;
color: var(--fg-faint);
}
details.full-transcript {
margin-top: 24px;
background: var(--panel);
border: 1px solid var(--line);
border-radius: 8px;
padding: 12px 16px;
}
details.full-transcript > summary {
cursor: pointer;
font-size: 12px;
font-weight: 600;
color: var(--fg-dim);
}
details.full-transcript pre {
margin: 12px 0 0;
padding: 12px;
background: rgba(15,23,42,0.5);
border: 1px solid var(--line);
border-radius: 6px;
font-family: ui-monospace, Menlo, Consolas, monospace;
font-size: 11px;
line-height: 1.55;
color: var(--fg-dim);
white-space: pre-wrap;
word-break: break-word;
max-height: 60vh;
overflow: auto;
}
footer {
margin-top: 32px;
padding-top: 16px;
border-top: 1px solid var(--line);
font-size: 11px;
color: var(--fg-faint);
text-align: center;
}
/* Print styles — flatten dark theme for paper. */
@media print {
html, body { background: #fff; color: #111; }
.topic, .legend, details.full-transcript { background: #fff; border-color: #ddd; }
.topic-summary, .legend-name, .meta { color: #444; }
.legend-secs, .topic-range, .empty-lines, .empty { color: #888; }
.line .ts { color: #2563eb; }
details { break-inside: avoid; }
details > summary { list-style: none; }
details, details > summary { display: block !important; }
details > summary + * { display: block !important; }
}
`;
return (
`<!doctype html>\n<html lang="en">\n<head>\n` +
`<meta charset="utf-8">\n` +
`<meta name="viewport" content="width=device-width, initial-scale=1">\n` +
`<title>${htmlEsc(title)}</title>\n` +
`<style>${css}</style>\n` +
`</head>\n<body>\n<main>\n` +
`<header>\n` +
`<h1>${htmlEsc(title)}</h1>\n` +
`<div class="meta">${htmlEsc(metaParts.join(" · "))}</div>\n` +
// Download buttons for the .md + .json siblings — embedded in
// the .html itself as base64 data URLs so they work entirely
// offline (no relay round-trip, no auth needed). Lets a
// recipient who got the .html grab any of the three formats
// without operator access.
`<div class="header-actions">\n` +
`<a class="dl-btn" href="${mdDataUrl}" download="${htmlEsc(safeFilenameBase)}.md">Download .md</a>\n` +
`<a class="dl-btn" href="${jsonDataUrl}" download="${htmlEsc(safeFilenameBase)}.json">Download .json</a>\n` +
`</div>\n` +
`</header>\n` +
legendHtml +
extrasHtml +
`<section class="topics">${topicsHtml}</section>\n` +
transcriptHtml +
`<footer>Generated by Recap Relay · Internal Meetings</footer>\n` +
`</main>\n</body>\n</html>\n`
);
}
// ─── Pipeline orchestrator ──────────────────────────────────────────
// Runs the full hardware pipeline on an uploaded audio file. Mirrors
// the relevant portion of summarize-url.js but without the URL
// download / credit accounting / Recaps-app envelope layers.
async function runMeetingPipeline({
dataDir,
jobId,
audioPath,
mimeType,
audioSec,
title,
participants,
notes = "",
}) {
const cfg = await getConfigSnapshot();
const hw = await resolveHardwareConfig(cfg);
if (!hw.transcribe.url) {
throw new Error(
"hardware transcribe not available — Spark Control discovery isn't reporting a ready endpoint"
);
}
// ── Acquire hardware FIFO slot ──
const release = await acquireHardwareSlot({
jobId,
onWait: ({ position, activeJobId }) => {
appendEvent(jobId, "queued", {
position,
activeJobId: activeJobId || null,
});
setProgress(
jobId,
`queued — ${position} job(s) ahead on operator hardware`
);
},
});
try {
markRunning(jobId);
appendEvent(jobId, "progress", { message: "reading audio…" });
const audioBuf = await fs.readFile(audioPath);
// Build analyze backend up-front so the pipelined-analyze worker
// can fire windows as soon as their chunks arrive.
const analyzeBackend = createHardwareBackend({
parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: hw.analyze.url,
sparkControlBaseURL: hw.sparkBase || "",
parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
anMaxTokens: cfg.relay_hardware_an_max_tokens || 16000,
});
const bodyMin = cfg.relay_hardware_analyze_window_minutes || 18;
const overlapMin = cfg.relay_hardware_analyze_overlap_minutes || 2;
const anConcurrency = cfg.relay_hardware_analyze_concurrency || 8;
const cutoffMin = cfg.relay_analyze_cutoff_minutes || 25;
const targetTotalsByBucket = {
under_30: cfg.relay_analyze_total_sections_under_30,
"30_60": cfg.relay_analyze_total_sections_30_60,
"60_90": cfg.relay_analyze_total_sections_60_90,
"90_120": cfg.relay_analyze_total_sections_90_120,
"120_150": cfg.relay_analyze_total_sections_120_150,
"150_180": cfg.relay_analyze_total_sections_150_180,
over_180: cfg.relay_analyze_total_sections_over_180,
};
const analyzePromptOverride =
cfg.relay_analyze_prompt || cfg.relay_analyze_prompt_default || "";
const computeCostDetails = () => ({
input_tokens: 0,
output_tokens: 0,
thinking_tokens: 0,
cost_usd: 0,
});
const chunkBuffer = createChunkBuffer();
const pipelinedAnalyzePromise = runPipelinedAnalysis({
audioDurationSec: audioSec || 0,
waitForTime: (sec) => chunkBuffer.waitForTime(sec),
getReadySegments: (s, e) => chunkBuffer.getSegments(s, e),
bodySeconds: bodyMin * 60,
overlapSeconds: overlapMin * 60,
cutoffSeconds: cutoffMin * 60,
concurrency: anConcurrency,
backend: analyzeBackend,
pipelineBackend: "hardware",
jobId,
batchId: null,
mediaUrl: null,
title: title || null,
installId: "internal-meetings",
licenseFingerprint: null,
source: "internal-meetings",
computeCostDetails,
analyzePromptOverride,
targetTotalsByBucket,
onWindowComplete: (cb) => {
appendEvent(jobId, "window_complete", {
windowIdx: cb.windowIdx,
totalWindows: cb.totalWindows,
ownedSections: cb.ownedSections,
windowEntries: cb.windowEntries || undefined,
windowBodySeconds: cb.windowBodySeconds,
model: cb.model,
durationMs: cb.durationMs,
});
setProgress(
jobId,
`analyze window ${cb.windowIdx + 1}/${cb.totalWindows} done`
);
},
}).catch((err) => ({ __error: err }));
const transcribeBackend = createHardwareBackend({
parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: hw.analyze.url || "",
sparkControlBaseURL: hw.sparkBase || "",
parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
txChunkSeconds: (cfg.relay_hardware_tx_chunk_minutes || 5) * 60,
txChunkOverlapSeconds:
cfg.relay_hardware_tx_chunk_overlap_seconds ?? 30,
diarizationEnabled: !!cfg.relay_hardware_diarization_enabled,
clusterThresholdPct:
cfg.relay_hardware_voice_clustering_threshold ?? 70,
anchorMinSpeakingSec:
cfg.relay_hardware_anchor_min_speaking_sec ?? 30,
smallClusterMaxSpeakingSec:
cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15,
uncertainMarginPct: cfg.relay_hardware_uncertain_margin_pct ?? 10,
txConcurrency: cfg.relay_hardware_tx_concurrency || 4,
anMaxTokens: cfg.relay_hardware_an_max_tokens || 16000,
onChunkComplete: (cd) => chunkBuffer.add(cd),
});
const txResult = await transcribeBackend.transcribeAudio({
audio: audioBuf,
mimeType,
offsetSeconds: 0,
});
appendEvent(jobId, "transcribe_complete", {
transcript: txResult.text || "",
model: txResult.model || null,
chunk_count: txResult.chunk_count ?? null,
audio_seconds: audioSec || null,
});
// Await the pipelined analyze.
const pipelinedRaw = await pipelinedAnalyzePromise;
if (pipelinedRaw && pipelinedRaw.__error) {
throw pipelinedRaw.__error;
}
// Remap window-local section indices to global canonical indices,
// mirroring summarize-url.js's post-pipeline stitch.
const canonicalEntries = parseBracketedTranscript(txResult.text || "");
const syntheticResults = (pipelinedRaw.windowResults || []).map((wr) => {
if (!wr || !wr.ok) return wr;
const w = wr.window;
const globalStartIdx = firstEntryAtOrAfter(canonicalEntries, w.startSec);
const globalBodyStartIdx = firstEntryAtOrAfter(
canonicalEntries,
w.bodyStartSec
);
const globalEndIdx = lastEntryBefore(
canonicalEntries,
w.windowEndSec + 0.5
);
if (
globalStartIdx >= canonicalEntries.length ||
globalEndIdx < globalStartIdx
) {
return {
window: { startIdx: 0, endIdx: -1, bodyStartIdx: 0 },
ok: false,
error: new Error("pipelined window had no canonical entries"),
};
}
const remapped = [];
for (const s of wr.sections || []) {
const localStartEntry = wr.windowEntries?.[s.startIndex];
const localEndEntry = wr.windowEntries?.[s.endIndex];
if (!localStartEntry || !localEndEntry) continue;
const globalStart = canonicalIndexForOffset(
canonicalEntries,
localStartEntry.offset || 0
);
const globalEnd = canonicalIndexForOffset(
canonicalEntries,
localEndEntry.offset || 0
);
if (globalStart < 0 || globalEnd < 0) continue;
remapped.push({
startIndex: globalStart - globalStartIdx,
endIndex: globalEnd - globalStartIdx,
title: s.title,
summary: s.summary,
});
}
return {
window: {
startIdx: globalStartIdx,
endIdx: globalEndIdx,
bodyStartIdx: globalBodyStartIdx,
},
ok: true,
sections: remapped,
model: wr.model,
};
});
const stitched = stitchAnalysisResults(syntheticResults);
let analyzeResult = {
text: JSON.stringify({ sections: stitched }),
model: pipelinedRaw.dominantModel,
attempts: pipelinedRaw.attempts,
};
// ── Polish pass ──
let speakerNames = null;
const polishEnabled = cfg.relay_post_cluster_polish_enabled !== false;
const detectedSpeakerCount = Object.keys(txResult?.speakers || {}).length;
const parsedAnalysis = JSON.parse(analyzeResult.text);
const polishableSections = Array.isArray(parsedAnalysis.sections)
? parsedAnalysis.sections
: null;
if (
polishEnabled &&
detectedSpeakerCount >= 2 &&
Array.isArray(txResult?.segments) &&
polishableSections &&
polishableSections.length > 0
) {
appendEvent(jobId, "progress", { message: "polishing summaries…" });
try {
speakerNames = await runNameInference({
speakers: txResult.speakers,
transcriptSegments: txResult.segments,
channelHint: "",
titleHint: title || "",
// descriptionHint stays empty for internal meetings —
// we don't want to stuff participants into the "Description"
// metadata field where the LLM might read them as factual
// metadata about the meeting. They go through the explicit
// OPERATOR HINTS pathway instead, which the prompt warns
// the model to treat as suggestion-not-truth.
descriptionHint: "",
participantHints: Array.isArray(participants) && participants.length
? participants.join(", ")
: "",
operatorNotes: notes || "",
backend: analyzeBackend,
pipelineBackend: "hardware",
jobId,
batchId: null,
mediaUrl: null,
installId: "internal-meetings",
licenseFingerprint: null,
source: "internal-meetings",
computeCostDetails,
});
const canonicalForPolish = parseBracketedTranscript(
txResult.text || ""
);
const { planWindowsByDuration } = await import("../chunked-analyze.js");
const windowsForPolish = planWindowsByDuration({
totalAudioSec: audioSec || 0,
bodySeconds: bodyMin * 60,
overlapSeconds: overlapMin * 60,
cutoffSeconds: cutoffMin * 60,
});
const polishedSections = await runSummaryPolish({
sections: polishableSections,
canonicalEntries: canonicalForPolish,
windows: windowsForPolish,
transcriptSegments: txResult.segments,
speakerNames,
speakerStats: txResult.speakers,
backend: analyzeBackend,
concurrency: anConcurrency,
pipelineBackend: "hardware",
jobId,
batchId: null,
mediaUrl: null,
installId: "internal-meetings",
licenseFingerprint: null,
source: "internal-meetings",
computeCostDetails,
});
analyzeResult = {
...analyzeResult,
text: JSON.stringify({ sections: polishedSections }),
};
} catch (err) {
console.warn(
`[internal-meetings ${jobId.slice(0, 8)}] polish failed (keeping unpolished output): ${err?.message || err}`
);
speakerNames = null;
}
}
// Build the chunks shape Recaps-style rendering expects:
// chunks[i] = { title, summary, startTime, entries: [...] }
//
// Gap-absorbing slice: the LLM's analyze pass returns section
// ranges (startIndex/endIndex) that don't always cover every
// entry — short audio especially produces gaps where the model
// skipped over filler ("uh", "yeah, OK") between coherent
// topics. The naive slice(start, end+1) drops those orphaned
// entries entirely, so the user sees topic cards with one
// statement under a summary that clearly references more
// dialogue, and big un-rendered gaps between consecutive topic
// timestamps (e.g. 2:10 → 2:56 with nothing shown in between).
//
// We extend each chunk's end to one before the NEXT chunk's
// start (so consecutive topics are exactly adjacent), and the
// final chunk extends to the end of the transcript. Chunk 0
// backfills to entry 0 to absorb any opening preamble the LLM
// skipped. Result: every canonical entry lives in exactly one
// chunk's entries[], no transcript line is hidden, and topic
// ranges in the UI become contiguous.
//
// The LLM's original (uncovered) ranges stay in
// `analysis.sections` (saved with the record) for forensics —
// we only adjust the rendered `chunks`.
const finalAnalysis = JSON.parse(analyzeResult.text);
const sortedSections = (finalAnalysis.sections || [])
.slice()
.sort((a, b) => (a.startIndex ?? 0) - (b.startIndex ?? 0));
const lastEntryIdx = canonicalEntries.length - 1;
const chunks = sortedSections
.map((s, idx, arr) => {
let start = Math.max(0, s.startIndex ?? 0);
let end = Math.min(lastEntryIdx, s.endIndex ?? start);
// First chunk absorbs any pre-topic preamble.
if (idx === 0) start = 0;
// Non-final chunks extend to one before the next chunk's
// start. We take the MAX of (LLM-provided end, next-1) so
// we don't shrink a section the LLM intentionally made
// wider than the gap suggests.
if (idx + 1 < arr.length) {
const nextStart = arr[idx + 1].startIndex ?? canonicalEntries.length;
end = Math.max(end, Math.min(lastEntryIdx, nextStart - 1));
} else {
// Last section absorbs any trailing entries.
end = lastEntryIdx;
}
// Defensive clamps in case the LLM emitted backwards ranges.
if (end < start) end = start;
const slice = canonicalEntries.slice(start, end + 1);
return {
title: s.title,
summary: s.summary,
entries: slice,
startTime: slice[0]?.offset || 0,
};
})
.filter((c) => c.entries.length > 0);
// Attach speakers to each entry by timestamp match.
//
// Tricky bit: `entry.offset` came from parseBracketedTranscript,
// which extracted an INTEGER seconds value from a [m:ss] / [h:mm:ss]
// bracket. That bracket was originally emitted by formatMmSs in
// hardware.js using Math.floor(seg.start). So `entry.offset` is
// exactly Math.floor(sourceSegment.start). The segments' own
// `start` field still carries float precision.
//
// The earlier strict containment check (`seg.start <= t <= seg.end`)
// misses every entry where seg.start has a fractional part — e.g.
// seg.start=0.32 and t=0 fails because 0 < 0.32. That's why the
// detail view rendered no chips at all even though the speakers
// legend showed the diarized roster.
//
// Build a floored-start index for O(1) lookup, with a tolerant
// fallback for the rare cases where mergeShortEntries dropped a
// line.
if (Array.isArray(txResult.segments) && txResult.segments.length) {
const sortedSegs = txResult.segments
.slice()
.sort((a, b) => (a.start || 0) - (b.start || 0));
const segByFlooredStart = new Map();
for (const seg of sortedSegs) {
const k = Math.floor(seg.start || 0);
if (!segByFlooredStart.has(k)) segByFlooredStart.set(k, seg);
}
const pickSpeaker = (t) => {
// Primary: exact floored-start match — this is the segment
// that produced the bracket the entry was parsed from.
let found = segByFlooredStart.get(t);
if (found && found.speaker) return found;
// Secondary: containing segment (covers gaps where the
// bracket-source segment had no speaker but a neighboring
// diar segment does cover this timestamp).
for (const seg of sortedSegs) {
if ((seg.start || 0) > t + 5) break;
if ((seg.start || 0) - 0.5 <= t && t <= (seg.end || 0) + 0.5) {
if (seg.speaker) return seg;
}
}
// Tertiary: nearest preceding segment within 5s window.
let bestPrev = null;
let bestDist = Infinity;
for (const seg of sortedSegs) {
if ((seg.start || 0) > t) break;
const dist = t - (seg.start || 0);
if (dist < bestDist && seg.speaker) {
bestDist = dist;
bestPrev = seg;
}
}
if (bestPrev && bestDist <= 5) return bestPrev;
return null;
};
for (const chunk of chunks) {
for (const entry of chunk.entries) {
const t = entry.offset || 0;
const found = pickSpeaker(t);
if (found && found.speaker) {
entry.speaker = found.speaker;
entry.speaker_confidence = found.speaker_confidence ?? null;
entry.speaker_uncertain = !!found.speaker_uncertain;
}
}
}
}
const transcriptSegments = Array.isArray(txResult?.segments)
? txResult.segments.map((s) => ({
start: s.start || 0,
end: s.end || 0,
text: s.text || "",
speaker: s.speaker || null,
speaker_confidence: s.speaker_confidence ?? null,
speaker_uncertain: !!s.speaker_uncertain,
}))
: null;
// ── Phase 2: Meeting extras ──
// Single LLM pass to pull out structured information operators
// want at the top of a meeting recap: decisions, action items,
// open questions, key quotes. Each item carries speaker IDs +
// offsets so the dashboard can render speaker chips and
// clickable timestamps that jump to the supporting transcript
// line. Failure is non-fatal — rec.extras stays null and the
// UI just hides the section.
const extrasEnabled = cfg.relay_meeting_extras_enabled !== false;
let extras = null;
if (extrasEnabled && Array.isArray(transcriptSegments) && transcriptSegments.length) {
appendEvent(jobId, "progress", { message: "extracting decisions + action items…" });
try {
extras = await runMeetingExtras({
title,
audioSec: audioSec || 0,
speakers: txResult.speakers || {},
speakerNames: speakerNames || {},
transcriptSegments,
topics: chunks.map((c) => ({
title: c.title,
summary: c.summary,
startTime: c.startTime,
})),
// Operator hints passed through to the extras prompt the
// same way they're passed to name-inference — explicit
// hints, framed in the prompt as suggestions, not truth.
participantHints: Array.isArray(participants) && participants.length
? participants.join(", ")
: "",
operatorNotes: notes || "",
promptOverride: cfg.relay_meeting_extras_prompt || "",
backend: analyzeBackend,
pipelineBackend: "hardware",
jobId,
installId: "internal-meetings",
licenseFingerprint: null,
source: "internal-meetings",
computeCostDetails,
});
} catch (err) {
console.warn(
`[internal-meetings ${jobId.slice(0, 8)}] extras extraction failed (non-fatal): ${err?.message || err}`
);
extras = null;
}
}
// ── Save the record ──
// Note: `notes` from the upload form is intentionally NOT
// persisted here. Notes are LLM hints only — used by name-
// inference + extras at pipeline time, then dropped. They
// never appear in the dashboard detail view, the .md/.html/.json
// downloads, or any operator-readable artifact. The operator
// can write candid context ("Steve is the new guy still
// figuring stuff out", "John was angry about Q3") without
// worrying about it becoming part of a shareable meeting recap.
const record = {
id: jobId,
type: "internal-meeting",
title: title || "Untitled meeting",
participants: Array.isArray(participants) ? participants : [],
created_at: Date.now(),
audio_seconds: audioSec || 0,
transcript: txResult.text || "",
transcript_segments: transcriptSegments,
speakers: txResult.speakers || null,
speaker_names: speakerNames || null,
diarization: txResult.diarization || null,
chunks,
analysis: finalAnalysis,
extras,
meta: {
transcribe_model: txResult.model || null,
analyze_model: analyzeResult.model || null,
polish_done: !!speakerNames,
extras_done: !!extras,
},
};
await saveMeeting(dataDir, jobId, record);
markComplete(jobId, {
result: {
id: jobId,
title: record.title,
audio_seconds: record.audio_seconds,
topic_count: chunks.length,
speaker_count: Object.keys(record.speakers || {}).length,
named_speaker_count: speakerNames
? Object.values(speakerNames).filter(Boolean).length
: 0,
},
credit_charged: 0,
tier: "operator",
});
console.log(
`[internal-meetings ${jobId.slice(0, 8)}] complete — ${chunks.length} topics, ${Object.keys(record.speakers || {}).length} speakers`
);
} finally {
release();
// ALWAYS delete the uploaded audio file after processing (success
// or failure). The relay never retains audio for internal
// meetings — same policy as the YouTube/podcast download path.
try {
await fs.unlink(audioPath);
} catch {}
}
}
// ─── Router ─────────────────────────────────────────────────────────
export function internalMeetingsRouter({ dataDir }) {
const router = express.Router();
// Multer config: write uploads to OS tmp dir; we move/process and
// delete in the pipeline. memoryStorage would buffer the whole
// file in RAM — fine for short meetings, risky for 4-hour ones.
const upload = multer({
storage: multer.diskStorage({
destination: (_req, _file, cb) => {
fs.mkdtemp(path.join(os.tmpdir(), "relay-meeting-"))
.then((dir) => cb(null, dir))
.catch((err) => cb(err));
},
filename: (_req, file, cb) => {
// Preserve extension for mimeType detection downstream
const ext = path.extname(file.originalname || "") || ".bin";
cb(null, `upload${ext}`);
},
}),
limits: { fileSize: MAX_UPLOAD_BYTES },
});
// POST /admin/internal-meetings/upload
// multipart fields:
// file — audio (required)
// title — string (optional)
// participants — CSV string of attendee names (optional, treated
// as HINTS only by the LLM)
// notes — free-form prose context for the LLM. Used by
// the name-inference + extras prompts as a soft
// signal for who-said-what attribution.
router.post("/upload", upload.single("file"), async (req, res) => {
const file = req.file;
if (!file) {
return res.status(400).json({ error: "no file uploaded" });
}
const title = (req.body?.title || "").toString().slice(0, 200);
const participantsCsv = (req.body?.participants || "").toString();
const participants = participantsCsv
.split(",")
.map((s) => s.trim())
.filter(Boolean)
.slice(0, 30); // soft cap
// Notes — free-form context the operator wants the LLM to factor
// in. Trimmed + clamped to 4000 chars (matches the cap in
// post-cluster-polish.js / meeting-extras.js so we don't store
// more than we'll ever send to the model). Empty string when
// not provided.
const notes = (req.body?.notes || "").toString().trim().slice(0, 4000);
// Probe audio duration up-front so the pipeline knows the
// window plan size. Falls back to 0 if probe fails (pipeline
// will single-shot in that case).
let audioSec = 0;
try {
audioSec = await getAudioDurationSeconds(file.path);
} catch (err) {
console.warn(
`[internal-meetings] duration probe failed for ${file.path}: ${err?.message || err}`
);
}
const job = createJob({
kind: "internal-meeting",
installId: "internal-meetings",
metadata: {
title,
participants,
notes,
audio_seconds: audioSec,
original_filename: file.originalname || null,
size_bytes: file.size,
},
});
console.log(
`[internal-meetings ${job.id.slice(0, 8)}] upload received — ${file.originalname || "(no name)"} ${Math.round((file.size || 0) / (1024 * 1024))}MB, ${Math.round(audioSec / 60)} min audio, title="${title}"${notes ? `, notes=${notes.length} chars` : ""}`
);
// Kick off pipeline in the background. Don't await — we return
// the job ID immediately so the client can subscribe via SSE.
(async () => {
try {
await runMeetingPipeline({
dataDir,
jobId: job.id,
audioPath: file.path,
mimeType: file.mimetype || "audio/mpeg",
audioSec,
title,
participants,
notes,
});
} catch (err) {
const msg = (err?.message || String(err)).slice(0, 400);
markFailed(job.id, "meeting_failed: " + msg);
console.error(
`[internal-meetings ${job.id.slice(0, 8)}] worker crashed:`,
err
);
// Best-effort cleanup of the temp file + dir
try {
await fs.unlink(file.path);
} catch {}
try {
await fs.rmdir(path.dirname(file.path));
} catch {}
}
})();
res.json({
job_id: job.id,
title,
audio_seconds: audioSec,
participants,
notes,
});
});
// GET /admin/internal-meetings/jobs/:id/stream
// SSE stream of live progress events. Same event types
// summarize-url emits: progress, queued, transcribe_complete,
// window_complete, done, error.
router.get("/jobs/:id/stream", (req, res) => {
const job = getJob(req.params.id);
if (!job) {
res.status(404).json({ error: "no such job" });
return;
}
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache, no-transform");
res.setHeader("Connection", "keep-alive");
res.flushHeaders?.();
const send = (ev) => {
try {
res.write(`event: ${ev.type}\ndata: ${JSON.stringify(ev.data || {})}\n\n`);
} catch {}
};
// Replay any events that already happened before this connection
// opened — clients that subscribe after the job has progressed
// would otherwise miss them.
for (const ev of job.events) send(ev);
if (job.status === "complete" || job.status === "failed") {
// Already terminal; close the stream after replay.
res.end();
return;
}
const unsubscribe = subscribeToJob(job.id, (ev) => {
send(ev);
if (ev.type === "done" || ev.type === "error") {
try {
res.end();
} catch {}
}
});
req.on("close", () => {
try {
unsubscribe();
} catch {}
});
});
// GET /admin/internal-meetings/jobs/:id
router.get("/jobs/:id", (req, res) => {
const job = getJob(req.params.id);
if (!job) return res.status(404).json({ error: "no such job" });
res.json({
id: job.id,
kind: job.kind,
status: job.status,
progress: job.progress,
started_at: job.started_at,
completed_at: job.completed_at,
error: job.error,
result: job.result,
});
});
// GET /admin/internal-meetings — list saved meetings
router.get("/", async (_req, res) => {
res.json({ meetings: await listMeetings(dataDir) });
});
// GET /admin/internal-meetings/:id — get full record JSON
router.get("/:id", async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).json({ error: "not found" });
res.json(rec);
});
// GET /admin/internal-meetings/:id/markdown — markdown download
router.get("/:id/markdown", async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).send("not found");
const md = meetingToMarkdown(rec);
const safeTitle = (rec.title || "meeting")
.replace(/[^a-zA-Z0-9-_.]+/g, "-")
.slice(0, 80);
res.setHeader("Content-Type", "text/markdown; charset=utf-8");
res.setHeader(
"Content-Disposition",
`attachment; filename="${safeTitle}.md"`
);
res.send(md);
});
// PATCH /admin/internal-meetings/:id/entries — per-line operator
// speaker re-assignment. Body shape:
// { overrides: [{ chunk_idx, entry_idx, speaker_id }] }
// - speaker_id = a known cluster id ("Speaker_A", "Speaker_Unknown")
// sets entry.speaker_override
// - speaker_id = "" CLEARS the override (reverts the line to its
// original diarization-assigned speaker, if any)
//
// We persist the override as a separate field (entry.speaker_override)
// rather than overwriting entry.speaker so provenance is never
// destroyed — the original Sortformer attribution stays for
// debugging and so the operator can always revert. All downstream
// rendering (dashboard chips, .html / .md downloads) uses
// (entry.speaker_override || entry.speaker) so corrections show
// up everywhere.
//
// Why per-line and not just per-cluster: diarization on 4+
// speakers gets noisy in real meetings — similar voices get
// merged, mid-sentence speaker swaps get assigned to the wrong
// person, and the LLM name-inference compounds the issue when it
// confidently guesses the wrong person from limited context.
// Per-line override is the only knob that lets the operator
// perfectly correct the transcript without re-running anything
// upstream.
router.patch("/:id/entries", express.json(), async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).json({ error: "not found" });
const overrides = Array.isArray(req.body?.overrides) ? req.body.overrides : null;
if (!overrides) {
return res.status(400).json({ error: "overrides array required" });
}
const knownIds = new Set(Object.keys(rec.speakers || {}));
if (!knownIds.has("Speaker_Unknown")) knownIds.add("Speaker_Unknown");
let changed = 0;
for (const op of overrides) {
if (!op || typeof op !== "object") continue;
const ci = Number.isInteger(op.chunk_idx) ? op.chunk_idx : -1;
const ei = Number.isInteger(op.entry_idx) ? op.entry_idx : -1;
if (ci < 0 || ei < 0) continue;
const chunk = rec.chunks?.[ci];
if (!chunk) continue;
const entry = chunk.entries?.[ei];
if (!entry) continue;
const sid = typeof op.speaker_id === "string" ? op.speaker_id.trim() : "";
if (sid === "") {
if (entry.speaker_override) {
delete entry.speaker_override;
changed += 1;
}
continue;
}
if (!knownIds.has(sid)) continue; // ignore unknown ids silently
if (entry.speaker_override !== sid) {
entry.speaker_override = sid;
changed += 1;
}
}
if (changed > 0) {
rec.meta = rec.meta || {};
rec.meta.entries_edited_at = Date.now();
await saveMeeting(dataDir, req.params.id, rec);
}
res.json({ ok: true, changed });
});
// PATCH /admin/internal-meetings/:id/speakers — operator-driven
// speaker rename. Body: { speaker_names: { Speaker_A: "Matt", ... } }
// Persists into rec.speaker_names (the same field the polish-pass
// name inference writes). Markdown/HTML downloads and the
// dashboard speaker chips/legend pick this up immediately.
//
// Rationale: the LLM name inference is a best-guess from the first
// ~6 minutes of transcript context. On internal calls where
// participants speak each other's names rarely (or where the same
// first name is shared by two people), it gets things wrong. This
// endpoint lets the operator correct them after the fact. The
// change is a single field rewrite — no entry-rewriting, no
// re-clustering — because Speaker_X cluster IDs are stable; we
// just relabel the display name.
//
// Speaker MERGES (Speaker_A and Speaker_C are actually the same
// person) are a separate, harder change — they require rewriting
// every entry's .speaker and recomputing stats. Tracked as a
// follow-up; for now, two clusters with the same display name
// render with the same name on the legend but stay as distinct
// chips on the per-line attribution.
router.patch("/:id/speakers", express.json(), async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).json({ error: "not found" });
const incoming = req.body?.speaker_names;
if (!incoming || typeof incoming !== "object") {
return res.status(400).json({ error: "speaker_names object required" });
}
const current = rec.speaker_names && typeof rec.speaker_names === "object"
? { ...rec.speaker_names }
: {};
// Only allow updating keys that look like cluster IDs to avoid
// junk-write attacks. Empty string clears the name (falls back
// to "Speaker X" display).
const knownIds = new Set(Object.keys(rec.speakers || {}));
let changed = 0;
for (const [id, name] of Object.entries(incoming)) {
if (!/^Speaker_[A-Z]+$/.test(id)) continue;
if (!knownIds.has(id)) continue;
const trimmed = typeof name === "string" ? name.trim().slice(0, 60) : "";
if (trimmed) {
if (current[id] !== trimmed) {
current[id] = trimmed;
changed += 1;
}
} else if (current[id]) {
delete current[id];
changed += 1;
}
}
if (changed > 0) {
rec.speaker_names = current;
// Mark that this was operator-edited so we can preserve the
// edit if/when the pipeline ever re-runs name inference.
rec.meta = rec.meta || {};
rec.meta.speaker_names_edited_at = Date.now();
await saveMeeting(dataDir, req.params.id, rec);
}
res.json({
ok: true,
changed,
speaker_names: current,
});
});
// PATCH /admin/internal-meetings/:id/merge-speakers — fold one or
// more clusters that diarization mistakenly split apart into a single
// speaker. Body: { survivor: "Speaker_A", absorbed: ["Speaker_C"] }.
// Rewrites every label reference (segments, entries, overrides),
// sums the per-cluster stats, and rewrites extras attributions. The
// survivor keeps its own display name (inheriting the absorbed name
// only if it had none). This is the inverse of a re-cluster: use it
// when ONE person was diarized as two; use re-cluster when TWO people
// were diarized as one.
router.patch("/:id/merge-speakers", express.json(), async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).json({ error: "not found" });
const survivor = req.body?.survivor;
const absorbed = req.body?.absorbed;
try {
const result = mergeSpeakersInRecord(rec, survivor, absorbed);
await saveMeeting(dataDir, req.params.id, rec);
res.json({ ok: true, ...result });
} catch (err) {
if (err?.code === "BAD_REQUEST") {
return res.status(400).json({ error: err.message });
}
console.error(
`[internal-meetings] merge-speakers failed for ${req.params.id}: ${err?.message || err}`
);
res.status(500).json({ error: "merge failed" });
}
});
// POST /admin/internal-meetings/:id/recluster — re-run the cross-chunk
// voice clustering on the persisted per-chunk fingerprints at a new
// strictness threshold, to separate speakers that were over-merged
// into one cluster. Fully offline (no audio, no Spark Control). Body:
// { threshold, anchorMinSpeakingSec?, smallClusterMaxSpeakingSec?,
// uncertainMarginPct? }
// Unspecified suppression knobs fall back to the operator's global
// hardware defaults. Re-clustering changes cluster identity, so the
// inferred names, per-line overrides, and extras speaker tags are
// RESET — the operator re-labels via the legend afterward. Returns
// 400 when the meeting has no saved fingerprints (diarization was off
// or it predates fingerprint capture).
router.post("/:id/recluster", express.json(), async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).json({ error: "not found" });
const cfg = await getConfigSnapshot();
const numOr = (v, fallback) => (Number.isFinite(Number(v)) ? Number(v) : fallback);
try {
const result = reclusterMeetingRecord(rec, {
threshold: numOr(
req.body?.threshold,
cfg.relay_hardware_voice_clustering_threshold ?? 70
),
anchorMinSpeakingSec: numOr(
req.body?.anchorMinSpeakingSec,
cfg.relay_hardware_anchor_min_speaking_sec ?? 30
),
smallClusterMaxSpeakingSec: numOr(
req.body?.smallClusterMaxSpeakingSec,
cfg.relay_hardware_small_cluster_max_speaking_sec ?? 15
),
uncertainMarginPct: numOr(
req.body?.uncertainMarginPct,
cfg.relay_hardware_uncertain_margin_pct ?? 10
),
});
await saveMeeting(dataDir, req.params.id, rec);
res.json({
ok: true,
speakers: result.speakers,
cluster_count: result.clusterCount,
threshold: result.threshold,
});
} catch (err) {
if (err?.code === "NO_FINGERPRINTS") {
return res.status(400).json({ error: err.message, code: "NO_FINGERPRINTS" });
}
if (err?.code === "BAD_REQUEST") {
return res.status(400).json({ error: err.message });
}
console.error(
`[internal-meetings] recluster failed for ${req.params.id}: ${err?.message || err}`
);
res.status(500).json({ error: "recluster failed" });
}
});
// POST /admin/internal-meetings/:id/repolish — re-run the Phase-2
// summary polish using the meeting's CURRENT speaker names. After the
// operator corrects names in the legend (or merges/re-runs detection),
// the topic summaries still attribute statements to the old names —
// this rewrites each topic summary against the corrected roster. It
// does NOT re-infer names (uses rec.speaker_names as-is) and does NOT
// touch transcripts, entries, or per-line overrides — only the topic
// summary text. Needs the operator's analyze hardware online (it's an
// LLM pass, one call per analysis window).
router.post("/:id/repolish", express.json(), async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).json({ error: "not found" });
if (!rec.analysis || !Array.isArray(rec.analysis.sections) || rec.analysis.sections.length === 0) {
return res.status(400).json({ error: "this meeting has no analysis sections to re-polish" });
}
if (typeof rec.transcript !== "string" || !rec.transcript.trim()) {
return res.status(400).json({ error: "this meeting has no transcript to polish against" });
}
const namedCount = Object.values(rec.speaker_names || {}).filter(
(v) => typeof v === "string" && v.trim()
).length;
if (namedCount === 0) {
return res.status(400).json({
error: "no named speakers yet — rename speakers in the legend first, then re-polish",
});
}
try {
const cfg = await getConfigSnapshot();
const hw = await resolveHardwareConfig(cfg);
if (!hw.analyze || !hw.analyze.url) {
return res.status(503).json({
error: "analyze hardware not available — Spark Control isn't reporting a ready analyze endpoint",
});
}
const analyzeBackend = createHardwareBackend({
parakeetBaseURL: hw.transcribe.url || "",
gemmaBaseURL: hw.analyze.url,
sparkControlBaseURL: hw.sparkBase || "",
parakeetModel: hw.transcribe.model || "",
gemmaModel: hw.analyze.model || "",
anMaxTokens: cfg.relay_hardware_an_max_tokens || 16000,
});
const bodyMin = cfg.relay_hardware_analyze_window_minutes || 18;
const overlapMin = cfg.relay_hardware_analyze_overlap_minutes || 2;
const cutoffMin = cfg.relay_analyze_cutoff_minutes || 25;
const anConcurrency = cfg.relay_hardware_analyze_concurrency || 8;
const windows = planWindowsByDuration({
totalAudioSec: rec.audio_seconds || 0,
bodySeconds: bodyMin * 60,
overlapSeconds: overlapMin * 60,
cutoffSeconds: cutoffMin * 60,
});
const canonicalEntries = parseBracketedTranscript(rec.transcript);
const polished = await runSummaryPolish({
sections: rec.analysis.sections,
canonicalEntries,
windows,
transcriptSegments: rec.transcript_segments || [],
speakerNames: rec.speaker_names || {},
speakerStats: rec.speakers || {},
backend: analyzeBackend,
concurrency: anConcurrency,
pipelineBackend: "hardware",
jobId: "repolish-" + req.params.id,
batchId: null,
mediaUrl: null,
installId: "internal-meetings",
licenseFingerprint: null,
source: "internal-meetings",
computeCostDetails: () => ({
input_tokens: 0,
output_tokens: 0,
thinking_tokens: 0,
cost_usd: 0,
}),
});
const changed = applyPolishedSummaries(rec, polished);
rec.meta = rec.meta || {};
rec.meta.repolished_at = Date.now();
rec.meta.polish_done = true;
await saveMeeting(dataDir, req.params.id, rec);
res.json({ ok: true, polished_count: changed });
} catch (err) {
console.error(
`[internal-meetings] repolish failed for ${req.params.id}: ${err?.message || err}`
);
res.status(500).json({ error: "re-polish failed: " + (err?.message || "unknown error") });
}
});
// GET /admin/internal-meetings/:id/html — self-contained HTML
// download. Renders the same Recaps-style layout the dashboard uses,
// with inlined CSS so the file is a single shareable artifact.
router.get("/:id/html", async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).send("not found");
const html = meetingToHtml(rec);
const safeTitle = (rec.title || "meeting")
.replace(/[^a-zA-Z0-9-_.]+/g, "-")
.slice(0, 80);
// ?inline=1 → render in-place (useful for clicking a saved link
// and reading it without forcing a download). Default = download.
if (req.query.inline === "1") {
res.setHeader("Content-Type", "text/html; charset=utf-8");
} else {
res.setHeader("Content-Type", "text/html; charset=utf-8");
res.setHeader(
"Content-Disposition",
`attachment; filename="${safeTitle}.html"`
);
}
res.send(html);
});
// GET /admin/internal-meetings/:id/download — raw JSON download
router.get("/:id/download", async (req, res) => {
const rec = await loadMeeting(dataDir, req.params.id);
if (!rec) return res.status(404).send("not found");
const safeTitle = (rec.title || "meeting")
.replace(/[^a-zA-Z0-9-_.]+/g, "-")
.slice(0, 80);
res.setHeader("Content-Type", "application/json; charset=utf-8");
res.setHeader(
"Content-Disposition",
`attachment; filename="${safeTitle}.json"`
);
res.send(JSON.stringify(rec, null, 2));
});
// DELETE /admin/internal-meetings/:id
router.delete("/:id", async (req, res) => {
const ok = await deleteMeeting(dataDir, req.params.id);
if (!ok) return res.status(404).json({ error: "not found" });
res.json({ deleted: true });
});
return router;
}