Files
proof-of-work/proof-of-work/lib/ai/lenientJson.ts
T
Keysat dba478aa23 v1.1.0:3 — AI upgrades: history context, test connection, cost estimator, streaming preview
Four incremental upgrades to the AI program generator. No schema change, no /data migration.

1. History as context (the killer feature)
   - lib/ai/historyContext.ts builds a 90-day per-exercise rollup:
     frequency, recent weights, estimated 1RM (Epley), avg RPE,
     days-since-last, plus a STAGNANT flag when the heaviest weight in
     the new half doesn't beat the old half.
   - Generate page surfaces an "Include my workout history as context"
     checkbox (default on at >=10 logged workouts). When checked, the
     ~1-3 KB summary is appended to the system prompt so the model can
     recommend things like "you've stalled bench at 245 — try paused reps."
   - We deliberately don't ship raw set logs (privacy + token cost).

2. Test connection
   - POST /api/ai/test sends a tiny "say hi in 3 words" prompt and
     reports latency + first sample, or the error inline.
   - "Test connection" button next to "Save AI config" in
     Settings -> AI integration. Verifies provider/model/key/baseUrl
     without going through full program generation.

3. Cost estimator
   - lib/ai/pricing.ts ships a price table for major models
     (Claude 3.5/3.7/4/4.5, GPT-4o/5/o1/o3/o4-mini, Gemini 1.5/2.0/2.5).
     Ollama always returns 0; openai-compatible returns null.
   - Generation history shows per-row cost + a 30-day rolling total
     at the top of the page.

4. Streaming preview render
   - lib/ai/lenientJson.ts: stack-aware partial-JSON parser that
     auto-closes open strings/brackets/braces in reverse-of-opening
     order, drops dangling key:value pairs and partial keywords.
     Returns a best-effort snapshot of the program-so-far on each chunk.
   - Generate UI now renders a live "Building program..." panel that
     updates as weeks/days/exercises arrive instead of just showing
     raw text and waiting for stream end.

Tests: 26 new (ai-historyContext.test.ts, ai-lenientJson.test.ts,
ai-pricing.test.ts). 161 total pass.
2026-05-10 22:17:35 -05:00

117 lines
3.5 KiB
TypeScript

/**
* Lenient JSON parser for incremental rendering of in-flight LLM
* output.
*
* The model emits JSON one token at a time. Strict JSON.parse fails
* until the very last `}` arrives. lenientJsonParse instead:
*
* 1. Locates the first `{` (after stripping ```json fences).
* 2. Walks the buffer tracking quote state + an open-bracket
* stack so we know what to close in what order.
* 3. Closes any open string with `"`.
* 4. Trims a partial trailing keyword (true/false/null prefix),
* trailing comma, and dangling key:value pair where value is
* missing.
* 5. Closes open structures in reverse-of-opening order (so
* `[{` closes as `}]`, not `]}`).
* 6. JSON.parse the result; return null if it still fails.
*
* The returned object is a best-effort snapshot of the program so
* far. The Generate UI uses it to render a live preview as the
* model writes; once the stream ends, the FULL response is parsed
* with the strict parser via parseAIProgram for the final render.
*
* This is intentionally simple — partial numbers (e.g. `-2.`) and
* partial escape sequences just return null until the next chunk
* makes them well-formed.
*/
export function lenientJsonParse(raw: string): unknown | null {
if (!raw) return null;
// Strip ```json fences (or plain ``` fences). Tolerates an
// unclosed trailing fence (still streaming).
let s = raw;
const fenced = s.match(/```(?:json)?\s*([\s\S]*?)(?:\s*```|$)/);
if (fenced) s = fenced[1];
// Locate first `{`.
const startIdx = s.indexOf('{');
if (startIdx < 0) return null;
s = s.slice(startIdx);
// Quick path: maybe it's already valid (rare during streaming,
// common after the stream completes).
try {
return JSON.parse(s);
} catch {
// fall through
}
// Walk the buffer tracking the open-bracket stack. We don't try
// to recover from mismatched closers (would be model malformity);
// we just don't pop more than we have.
const stack: Array<'{' | '['> = [];
let inStr = false;
let escape = false;
for (let i = 0; i < s.length; i++) {
const c = s[i];
if (escape) {
escape = false;
continue;
}
if (c === '\\') {
escape = true;
continue;
}
if (c === '"') {
inStr = !inStr;
continue;
}
if (inStr) continue;
if (c === '{') stack.push('{');
else if (c === '}') {
if (stack[stack.length - 1] === '{') stack.pop();
} else if (c === '[') stack.push('[');
else if (c === ']') {
if (stack[stack.length - 1] === '[') stack.pop();
}
}
let candidate = s;
// Close any open string at the tail.
if (inStr) candidate += '"';
// Trim trailing whitespace.
candidate = candidate.replace(/\s+$/, '');
// Drop a partial trailing keyword (`true`/`false`/`null` prefix)
// sitting after a `:`, `,`, or `[`.
candidate = candidate.replace(
/([:,[])\s*(?:t|tr|tru|f|fa|fal|fals|n|nu|nul)$/,
'$1',
);
// Drop a trailing comma (no value follows yet).
candidate = candidate.replace(/,\s*$/, '');
// Drop a dangling key + colon (value not started yet).
candidate = candidate.replace(/"[^"\\]*(?:\\.[^"\\]*)*"\s*:\s*$/, '');
// Drop another trailing comma that may now be exposed.
candidate = candidate.replace(/,\s*$/, '');
// Close stack in reverse-of-opening order. `[{` becomes `}]` not
// `]}` — that's the bug a depth-counter approach would have.
while (stack.length > 0) {
const top = stack.pop()!;
candidate += top === '{' ? '}' : ']';
}
try {
return JSON.parse(candidate);
} catch {
return null;
}
}