Package v0.2.12→v0.2.124: manifest, actions, version graph

2026-06-13 13:36:30 -05:00
parent 318c6c4b81
commit 1243f4414c
126 changed files with 2052 additions and 441 deletions
@@ -0,0 +1,12 @@
+import { VersionInfo } from '@start9labs/start-sdk'
+
+export const v_0_2_16 = VersionInfo.of({
+  version: '0.2.16:0',
+  releaseNotes: {
+    en_US: 'Analyze pipeline now talks to operator-hardware LLMs in a much faster mode. Two changes to the chat-completion request the relay sends to vLLM: (1) response_format is set to JSON-object mode, which constrains the model to emit valid JSON instead of wrapping the output in prose preamble like "Here are the topics I identified:" — saves real decode tokens and avoids parse failures. (2) chat_template_kwargs.enable_thinking is set to false, which disables Qwen3.6\'s reasoning mode for this task. Thinking mode is great for math but pure latency-noise for structured extraction. Together these typically cut analyze wall-time on operator hardware by 30-50%. Both fields are vLLM-specific; non-Qwen / non-vLLM backends ignore them, so this is safe across other operator-hardware setups.',
+  },
+  migrations: {
+    up: async ({ effects }) => {},
+    down: async ({ effects }) => {},
+  },
+})