Package v0.2.12→v0.2.124: manifest, actions, version graph
This commit is contained in:
@@ -0,0 +1,12 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
export const v_0_2_16 = VersionInfo.of({
|
||||
version: '0.2.16:0',
|
||||
releaseNotes: {
|
||||
en_US: 'Analyze pipeline now talks to operator-hardware LLMs in a much faster mode. Two changes to the chat-completion request the relay sends to vLLM: (1) response_format is set to JSON-object mode, which constrains the model to emit valid JSON instead of wrapping the output in prose preamble like "Here are the topics I identified:" — saves real decode tokens and avoids parse failures. (2) chat_template_kwargs.enable_thinking is set to false, which disables Qwen3.6\'s reasoning mode for this task. Thinking mode is great for math but pure latency-noise for structured extraction. Together these typically cut analyze wall-time on operator hardware by 30-50%. Both fields are vLLM-specific; non-Qwen / non-vLLM backends ignore them, so this is safe across other operator-hardware setups.',
|
||||
},
|
||||
migrations: {
|
||||
up: async ({ effects }) => {},
|
||||
down: async ({ effects }) => {},
|
||||
},
|
||||
})
|
||||
Reference in New Issue
Block a user