Files
Keysat 7ae6ab3ba8 v0.25.0:0 - cluster coordination layer (swap lock + webhook + schedule registry)
GPU-arbiter safety layer for when automation, not just the dashboard, swaps
models:
- swap reservation lock (POST/GET/DELETE /api/swap/lock); 423-enforced in
  post_swap via a single-read gate, TTL-bounded, secret-token auth, human
  force-release override + dashboard banner
- swap webhook (swap_complete/swap_failed) fired outside the swap lock, optional
  HMAC signature, configurable URL+secret
- read-only schedule registry (GET/POST/DELETE /api/schedule) + dashboard panel

New module image/app/coordination.py; docs/COORDINATION.md for consumers; 22
offline tests in test_coordination.py.
2026-06-18 07:07:08 -05:00

51 lines
2.1 KiB
TypeScript

import { FileHelper } from '@start9labs/start-sdk'
import { z } from 'zod'
import { sdk } from '../sdk'
export const sparkConfigSchema = z.object({
spark1_host: z.string().catch(''),
spark1_user: z.string().catch(''),
spark2_host: z.string().catch(''),
spark2_user: z.string().catch(''),
// Optional vLLM port override (Spark 1). Blank => 8888 (launch-cluster.sh default).
vllm_port: z.string().catch(''),
// Optional vLLM container-name override (Spark 1). Blank => "vllm_node".
vllm_container: z.string().catch(''),
// Optional comma-separated list of built-in services to switch off
// (parakeet, kokoro, embeddings, qdrant). Blank => all enabled.
disabled_services: z.string().catch(''),
// Optional per-service overrides. Blank => use spark2_host / spark2_user.
parakeet_host: z.string().catch(''),
parakeet_user: z.string().catch(''),
parakeet_container: z.string().catch(''),
kokoro_host: z.string().catch(''),
kokoro_user: z.string().catch(''),
kokoro_container: z.string().catch(''),
// Optional overrides for the embedding server (spark-embed) + Qdrant.
embed_host: z.string().catch(''),
embed_user: z.string().catch(''),
embed_container: z.string().catch(''),
qdrant_host: z.string().catch(''),
qdrant_user: z.string().catch(''),
qdrant_container: z.string().catch(''),
qdrant_collection: z.string().catch(''),
// Optional matrix-bridge bot. Blank => no tile. Host reuses Spark 2.
matrix_bridge_user: z.string().catch(''),
// Optional Open WebUI deep-link
open_webui_url: z.string().catch(''),
// Optional NGC API key for pulling NIM containers from nvcr.io/nim/...
ngc_api_key: z.string().catch(''),
// Optional coordination webhook: POSTed on swap_complete/swap_failed so
// downstream consumers re-point their model config. Blank => disabled.
swap_webhook_url: z.string().catch(''),
// Optional shared secret; if set, the webhook body is HMAC-signed.
swap_webhook_secret: z.string().catch(''),
})
export type SparkConfig = z.infer<typeof sparkConfigSchema>
export const sparkConfigYaml = FileHelper.yaml(
{ base: sdk.volumes.main, subpath: 'config.yaml' },
sparkConfigSchema,
)