recap-relay/server/meeting-extras.js

// Phase 2 of Path 2A — meeting extras analysis.
//
// Runs a single LLM pass AFTER transcribe → diarize → cluster →
// analyze → name-inference → summary-polish complete. Pulls out four
// categories of structured information that operators consistently
// want at the top of an internal meeting recap:
//
//   - decisions       : what was agreed on (with the offset where it was settled)
//   - action_items    : who owes what, by when (best-effort due_hint)
//   - open_questions  : questions raised that didn't get resolved
//   - key_quotes      : notable statements worth surfacing verbatim
//
// Each item carries a `supporting_offset` (or `offset`) in seconds so
// the dashboard can render the timestamp as a clickable jump to the
// corresponding transcript line. Each item also carries speaker IDs
// (cluster ids like Speaker_A) so the renderer can show the speaker's
// colored chip + display name, and so an operator-rename or per-line
// override propagates here too.
//
// Returns:
//   {
//     decisions:      [{ statement, agreed_by[], supporting_offset }],
//     action_items:   [{ description, owner, due_hint, supporting_offset }],
//     open_questions: [{ question, raised_by, answered }],
//     key_quotes:     [{ speaker, offset, quote, why_notable }],
//   }
//
// or null on total failure. Failure is non-fatal — the meeting still
// saves with rec.extras = null and the dashboard just hides the
// extras section.

import { recordCall } from "./audit-log.js";

const EXTRAS_MAX_ATTEMPTS = 3;

export const DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE = `You are extracting structured information from an internal team meeting transcript. The transcript below has been pre-tagged with speaker labels like [A], [B], [C] (anonymous voice-clustering labels) and inferred real names where available.

MEETING METADATA:
- Title: {{title}}
- Duration: {{duration}}

{{operatorContext}}SPEAKERS (from voice clustering, with operator-confirmed names where present):
{{speakerRoster}}

TOPIC SUMMARIES (already produced — for context only, do not duplicate):
{{topics}}

TRANSCRIPT (each line is "[<letter> <MM:SS>] text"):
{{transcript}}

INSTRUCTIONS:
Extract FIVE categories of information from the meeting. Return EMPTY ARRAYS for categories that don't apply — do NOT invent items.

1. TLDR — A 2-4 sentence executive summary of the entire meeting: what it was about, the key discussion arc, and the bottom-line outcome. Write in past tense, third person. Keep it dense — every clause should carry information. Skip pleasantries and procedural opening/closing chatter. If a meeting was genuinely substanceless (a 3-minute check-in, audio test, etc.), write one factual descriptor sentence instead of padding. This is the only required category — even the most trivial meeting gets a one-sentence TLDR.
   - summary: the 2-4 sentence executive summary
   - primary_speakers: array of Speaker_X ids who drove the conversation (the 1-3 people most central to the discussion, in rough order of contribution). Empty array if unclear.

2. DECISIONS — Things explicitly decided / agreed during the meeting. Include only clear commitments ("we will do X", "let's go with Y"), not casual mentions. For each:
   - statement: the decision in one sentence
   - agreed_by: array of Speaker_X ids who explicitly agreed (use the chip-letter notation, e.g. ["Speaker_A", "Speaker_C"]). Empty array if unclear.
   - supporting_offset: integer SECONDS where this decision was made (use the [<letter> <MM:SS>] timestamp from the most relevant transcript line — convert MM:SS to total seconds)

3. ACTION_ITEMS — Specific commitments where someone said they would do something. Include only explicit ownership ("I'll send the doc", "Matt will follow up"), not vague "someone should...". For each:
   - description: the action in imperative form
   - owner: the Speaker_X id of the person taking it on (e.g. "Speaker_A"), or null if unclear
   - due_hint: the deadline as a string if mentioned ("by Friday", "end of week", "before next call"), or null
   - supporting_offset: integer seconds where the commitment was made

4. OPEN_QUESTIONS — Questions raised that were NOT clearly answered during the meeting. Skip rhetorical questions and questions that got direct answers. For each:
   - question: the question, rephrased to be self-contained
   - raised_by: the Speaker_X id who asked (or null if unclear)
   - answered: false (always — if it was answered, don't include it)

5. KEY_QUOTES — Statements worth surfacing verbatim because they are pivotal, particularly insightful, or capture a strong opinion. Limit to 3-6 quotes max. Skip filler and conversational text. For each:
   - speaker: the Speaker_X id of the speaker
   - offset: integer seconds where the quote occurs
   - quote: the verbatim quote (trim to the substantive sentence, 4-30 words)
   - why_notable: one short clause on why this is worth surfacing

Be conservative across all five. Better to return an empty array (or for TLDR, a single factual sentence) than to fabricate. A 5-minute small-talk call may legitimately have 0 decisions, 0 action items, 0 open questions, 0 key quotes — but it still gets a TLDR.

Respond with ONLY valid JSON in this exact shape, no other text:
{
  "tldr": {"summary": "...", "primary_speakers": ["Speaker_A", "Speaker_B"]},
  "decisions": [{"statement": "...", "agreed_by": ["Speaker_A"], "supporting_offset": 123}],
  "action_items": [{"description": "...", "owner": "Speaker_B", "due_hint": "by Friday", "supporting_offset": 234}],
  "open_questions": [{"question": "...", "raised_by": "Speaker_C", "answered": false}],
  "key_quotes": [{"speaker": "Speaker_A", "offset": 345, "quote": "...", "why_notable": "..."}]
}`;

function fillTemplate(template, vars) {
  return String(template || "").replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
    return key in vars ? String(vars[key]) : `{{${key}}}`;
  });
}

function formatDuration(seconds) {
  const s = Math.max(0, Math.floor(seconds || 0));
  const h = Math.floor(s / 3600);
  const m = Math.floor((s % 3600) / 60);
  const sec = s % 60;
  if (h > 0) return `${h}h ${m}m ${sec}s`;
  if (m > 0) return `${m}m ${sec}s`;
  return `${sec}s`;
}

function formatLabeledTranscript(segments) {
  if (!Array.isArray(segments) || segments.length === 0) return "";
  const lines = [];
  for (const seg of segments) {
    const text = (seg.text || "").trim();
    if (!text) continue;
    const t = seg.start || 0;
    let letter = "?";
    const m = String(seg.speaker || "").match(/^Speaker_([A-Z]+)$/);
    if (m) letter = m[1];
    const secInt = Math.floor(t);
    const mm = Math.floor(secInt / 60);
    const ss = secInt % 60;
    lines.push(`[${letter} ${mm}:${String(ss).padStart(2, "0")}] ${text}`);
  }
  return lines.join("\n");
}

// Trim a too-large transcript by keeping the head and tail. Keeps
// the meeting's opening (introductions, agenda) AND closing (wrap-up,
// next steps) which are where most extras-worthy content lives.
function capTranscript(text, maxChars) {
  if (text.length <= maxChars) return text;
  const half = Math.floor(maxChars / 2) - 50;
  return (
    text.slice(0, half) +
    "\n\n…[middle truncated for prompt length]…\n\n" +
    text.slice(-half)
  );
}

function safeParseExtras(text) {
  if (!text || typeof text !== "string") return null;
  let s = text.trim();
  const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
  if (fence) s = fence[1].trim();
  let parsed;
  try {
    parsed = JSON.parse(s);
  } catch {
    return null;
  }
  if (!parsed || typeof parsed !== "object") return null;
  const asArray = (v) => (Array.isArray(v) ? v : []);
  // TLDR — exactly one object (not an array). Required category;
  // we accept any well-formed shape and clamp to safe bounds. If
  // the LLM omitted it entirely we leave it null so the renderer
  // can show "TLDR unavailable" rather than fabricating.
  let tldr = null;
  if (parsed.tldr && typeof parsed.tldr === "object" && !Array.isArray(parsed.tldr)) {
    const summary = typeof parsed.tldr.summary === "string" ? parsed.tldr.summary.trim() : "";
    if (summary) {
      tldr = {
        summary: summary.slice(0, 800),
        primary_speakers: Array.isArray(parsed.tldr.primary_speakers)
          ? parsed.tldr.primary_speakers
              .filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x))
              .slice(0, 5)
          : [],
      };
    }
  }
  // Coerce + clamp each category to a sane shape. Drop entries
  // that fail validation rather than failing the whole pass.
  const decisions = asArray(parsed.decisions)
    .map((d) => {
      if (!d || typeof d !== "object") return null;
      const statement = typeof d.statement === "string" ? d.statement.trim() : "";
      if (!statement) return null;
      return {
        statement: statement.slice(0, 400),
        agreed_by: Array.isArray(d.agreed_by)
          ? d.agreed_by.filter((x) => typeof x === "string" && /^Speaker_[A-Z]+$/.test(x)).slice(0, 10)
          : [],
        supporting_offset: Number.isFinite(d.supporting_offset) ? Math.max(0, Math.floor(d.supporting_offset)) : null,
      };
    })
    .filter(Boolean)
    .slice(0, 20);
  const action_items = asArray(parsed.action_items)
    .map((a) => {
      if (!a || typeof a !== "object") return null;
      const description = typeof a.description === "string" ? a.description.trim() : "";
      if (!description) return null;
      return {
        description: description.slice(0, 400),
        owner: typeof a.owner === "string" && /^Speaker_[A-Z]+$/.test(a.owner) ? a.owner : null,
        due_hint: typeof a.due_hint === "string" && a.due_hint.trim() ? a.due_hint.trim().slice(0, 80) : null,
        supporting_offset: Number.isFinite(a.supporting_offset) ? Math.max(0, Math.floor(a.supporting_offset)) : null,
      };
    })
    .filter(Boolean)
    .slice(0, 30);
  const open_questions = asArray(parsed.open_questions)
    .map((q) => {
      if (!q || typeof q !== "object") return null;
      const question = typeof q.question === "string" ? q.question.trim() : "";
      if (!question) return null;
      return {
        question: question.slice(0, 400),
        raised_by: typeof q.raised_by === "string" && /^Speaker_[A-Z]+$/.test(q.raised_by) ? q.raised_by : null,
        answered: q.answered === true,
      };
    })
    .filter(Boolean)
    .slice(0, 20);
  const key_quotes = asArray(parsed.key_quotes)
    .map((q) => {
      if (!q || typeof q !== "object") return null;
      const quote = typeof q.quote === "string" ? q.quote.trim() : "";
      if (!quote) return null;
      return {
        speaker: typeof q.speaker === "string" && /^Speaker_[A-Z]+$/.test(q.speaker) ? q.speaker : null,
        offset: Number.isFinite(q.offset) ? Math.max(0, Math.floor(q.offset)) : null,
        quote: quote.slice(0, 400),
        why_notable: typeof q.why_notable === "string" ? q.why_notable.trim().slice(0, 200) : "",
      };
    })
    .filter(Boolean)
    .slice(0, 10);
  return { tldr, decisions, action_items, open_questions, key_quotes };
}

export async function runMeetingExtras({
  title,
  audioSec,
  speakers,
  speakerNames,
  transcriptSegments,
  topics, // array of { title, summary, startTime } from analyze-then-polish
  promptOverride = "",
  // Operator-supplied hints (internal meetings only). participantHints
  // is a CSV-ish string of expected attendees; operatorNotes is free-
  // form prose describing who-said-what. Both are framed as hints in
  // the rendered prompt — the LLM is instructed to use them as soft
  // signals and verify against the transcript before quoting or
  // attributing. Empty → no OPERATOR HINTS block appears.
  participantHints = "",
  operatorNotes = "",
  backend,
  pipelineBackend,
  jobId,
  installId,
  licenseFingerprint = null,
  source,
  computeCostDetails,
}) {
  if (!backend) return null;
  if (!Array.isArray(transcriptSegments) || transcriptSegments.length === 0) return null;

  // Build speaker roster — Speaker_A (chip A, 12m 34s, "Matt Hill")
  const speakerLetters = Object.keys(speakers || {})
    .filter((k) => /^Speaker_[A-Z]+$/.test(k))
    .sort();
  const speakerRoster = speakerLetters
    .map((k) => {
      const stats = speakers[k] || {};
      const secs = Math.round(stats.total_speaking_seconds || 0);
      const mins = Math.floor(secs / 60);
      const rem = secs % 60;
      const timeStr = mins > 0 ? `${mins}m ${rem}s` : `${rem}s`;
      const letter = k.replace("Speaker_", "");
      const name = speakerNames && speakerNames[k] ? `"${speakerNames[k]}"` : "(unknown)";
      return `- ${k} (chip [${letter}], ${timeStr} speaking, ${stats.turns || 0} turns): ${name}`;
    })
    .join("\n");

  const topicsBlock = Array.isArray(topics) && topics.length
    ? topics
        .map((t, i) => {
          const startSec = t.startTime || 0;
          const mm = Math.floor(startSec / 60);
          const ss = Math.floor(startSec % 60);
          const tStr = `${mm}:${String(ss).padStart(2, "0")}`;
          return `${i + 1}. [${tStr}] ${t.title || "(untitled)"} — ${t.summary || ""}`;
        })
        .join("\n")
    : "(no topics)";

  const fullTranscript = formatLabeledTranscript(transcriptSegments);
  const cappedTranscript = capTranscript(fullTranscript, 25000);

  // Compose the OPERATOR HINTS block — same shape as the name-
  // inference pipeline so the LLM gets consistent framing across
  // both passes. Empty when no hints supplied.
  const hintsParts = [];
  if (participantHints && String(participantHints).trim()) {
    hintsParts.push(
      `Possible participants in this meeting (operator-supplied — may be incomplete):\n${String(participantHints).trim()}`,
    );
  }
  if (operatorNotes && String(operatorNotes).trim()) {
    const trimmed = String(operatorNotes).trim().slice(0, 4000);
    hintsParts.push(
      `Operator notes (may describe who said what — use as soft context, verify against the transcript before extracting decisions / action items / quotes):\n${trimmed}`,
    );
  }
  const operatorContextBlock = hintsParts.length
    ? `OPERATOR HINTS (treat as suggestions only — verify against the transcript):\n\n${hintsParts.join("\n\n")}\n\n`
    : "";

  const templateSource =
    typeof promptOverride === "string" && promptOverride.trim()
      ? promptOverride
      : DEFAULT_MEETING_EXTRAS_PROMPT_TEMPLATE;
  const prompt = fillTemplate(templateSource, {
    title: title || "(untitled)",
    duration: formatDuration(audioSec),
    operatorContext: operatorContextBlock,
    speakerRoster: speakerRoster || "(no speakers identified)",
    topics: topicsBlock,
    transcript: cappedTranscript || "(empty)",
  });

  const t0 = Date.now();
  let r = null;
  let parsed = null;
  let lastErr = null;
  for (let attempt = 0; attempt < EXTRAS_MAX_ATTEMPTS; attempt++) {
    try {
      r = await backend.analyzeText({ prompt });
      parsed = safeParseExtras(r.text);
      if (parsed) {
        lastErr = null;
        break;
      }
      lastErr = "invalid JSON in extras response";
    } catch (err) {
      lastErr = (err?.message || String(err)).slice(0, 280);
      r = null;
    }
    if (attempt < EXTRAS_MAX_ATTEMPTS - 1) {
      console.warn(
        `[meeting-extras] attempt ${attempt + 1} failed (${lastErr}) — retrying`
      );
    }
  }
  const dur = Date.now() - t0;
  const cost =
    parsed && r
      ? computeCostDetails(r.model, r.usage)
      : { input_tokens: 0, output_tokens: 0, thinking_tokens: 0, cost_usd: 0 };
  await recordCall({
    install_id: installId,
    license_fingerprint: licenseFingerprint,
    tier: "core",
    pipeline: "meeting_extras",
    backend: pipelineBackend,
    model: r?.model || null,
    status: parsed ? "success" : "error",
    duration_ms: dur,
    audio_seconds: 0,
    job_id: jobId,
    batch_id: null,
    source,
    media_url: null,
    error: parsed ? null : lastErr || "extras analysis failed",
    ...cost,
  });
  if (!parsed) {
    console.warn(
      `[meeting-extras] all ${EXTRAS_MAX_ATTEMPTS} attempts failed (${lastErr}) — extras unavailable`
    );
    return null;
  }
  console.log(
    `[meeting-extras] extracted ${parsed.tldr ? "tldr + " : "(no tldr) + "}${parsed.decisions.length} decision(s), ${parsed.action_items.length} action(s), ${parsed.open_questions.length} question(s), ${parsed.key_quotes.length} quote(s) in ${(dur / 1000).toFixed(1)}s`
  );
  return parsed;
}