/** * Lenient JSON parser for incremental rendering of in-flight LLM * output. * * The model emits JSON one token at a time. Strict JSON.parse fails * until the very last `}` arrives. lenientJsonParse instead: * * 1. Locates the first `{` (after stripping ```json fences). * 2. Walks the buffer tracking quote state + an open-bracket * stack so we know what to close in what order. * 3. Closes any open string with `"`. * 4. Trims a partial trailing keyword (true/false/null prefix), * trailing comma, and dangling key:value pair where value is * missing. * 5. Closes open structures in reverse-of-opening order (so * `[{` closes as `}]`, not `]}`). * 6. JSON.parse the result; return null if it still fails. * * The returned object is a best-effort snapshot of the program so * far. The Generate UI uses it to render a live preview as the * model writes; once the stream ends, the FULL response is parsed * with the strict parser via parseAIProgram for the final render. * * This is intentionally simple — partial numbers (e.g. `-2.`) and * partial escape sequences just return null until the next chunk * makes them well-formed. */ export function lenientJsonParse(raw: string): unknown | null { if (!raw) return null; // Strip ```json fences (or plain ``` fences). Tolerates an // unclosed trailing fence (still streaming). let s = raw; const fenced = s.match(/```(?:json)?\s*([\s\S]*?)(?:\s*```|$)/); if (fenced) s = fenced[1]; // Locate first `{`. const startIdx = s.indexOf('{'); if (startIdx < 0) return null; s = s.slice(startIdx); // Quick path: maybe it's already valid (rare during streaming, // common after the stream completes). try { return JSON.parse(s); } catch { // fall through } // Walk the buffer tracking the open-bracket stack. We don't try // to recover from mismatched closers (would be model malformity); // we just don't pop more than we have. const stack: Array<'{' | '['> = []; let inStr = false; let escape = false; for (let i = 0; i < s.length; i++) { const c = s[i]; if (escape) { escape = false; continue; } if (c === '\\') { escape = true; continue; } if (c === '"') { inStr = !inStr; continue; } if (inStr) continue; if (c === '{') stack.push('{'); else if (c === '}') { if (stack[stack.length - 1] === '{') stack.pop(); } else if (c === '[') stack.push('['); else if (c === ']') { if (stack[stack.length - 1] === '[') stack.pop(); } } let candidate = s; // Close any open string at the tail. if (inStr) candidate += '"'; // Trim trailing whitespace. candidate = candidate.replace(/\s+$/, ''); // Drop a partial trailing keyword (`true`/`false`/`null` prefix) // sitting after a `:`, `,`, or `[`. candidate = candidate.replace( /([:,[])\s*(?:t|tr|tru|f|fa|fal|fals|n|nu|nul)$/, '$1', ); // Drop a trailing comma (no value follows yet). candidate = candidate.replace(/,\s*$/, ''); // Drop a dangling key + colon (value not started yet). candidate = candidate.replace(/"[^"\\]*(?:\\.[^"\\]*)*"\s*:\s*$/, ''); // Drop another trailing comma that may now be exposed. candidate = candidate.replace(/,\s*$/, ''); // Close stack in reverse-of-opening order. `[{` becomes `}]` not // `]}` — that's the bug a depth-counter approach would have. while (stack.length > 0) { const top = stack.pop()!; candidate += top === '{' ? '}' : ']'; } try { return JSON.parse(candidate); } catch { return null; } }