v0.13.0:4 - redaction gateway, embeddings proxy, expanded audio API

- Add redaction gateway (redaction_gateway.py, redaction/ scrub + tests) - Add embeddings proxy and spark_embed service (Dockerfile + main.py) - Expand audio_proxy with speaker-aware handling; deep_health/health/server updates - Package: configureSparks action + sparkConfig model updates, manifest/main wiring - Docs: AUDIO_API, EMBEDDINGS, REDACTION_GATEWAY; HANDOFF and runbook/known-issues refresh
2026-06-11 17:45:21 -05:00
parent 4a75274db3
commit 8d839e3714
37 changed files with 3763 additions and 197 deletions
@@ -25,7 +25,7 @@ const inputSpec = InputSpec.of({
  spark2_host: Value.text({
    name: 'Spark 2 hostname or IP',
    description:
-      'The worker node of your DGX Spark cluster (also runs always-on services like Parakeet/Magpie). Enter its LAN IP or hostname.',
+      'The worker node of your DGX Spark cluster (also runs always-on services like Parakeet and Kokoro). Enter its LAN IP or hostname.',
    required: true,
    default: null,
    placeholder: 'e.g. 192.168.1.11',
@@ -58,22 +58,65 @@ const inputSpec = InputSpec.of({
    placeholder: 'parakeet-asr',
    masked: false,
  }),
-  magpie_host: Value.text({
-    name: 'Magpie host (optional)',
+  kokoro_host: Value.text({
+    name: 'Kokoro host (optional)',
    description:
-      'Override the host running the Magpie TTS container. Leave blank if Magpie runs on Spark 2.',
+      'Override the host running the Kokoro TTS container. Leave blank if Kokoro runs on Spark 2.',
    required: false,
    default: null,
    placeholder: 'leave blank to use Spark 2',
    masked: false,
  }),
-  magpie_container: Value.text({
-    name: 'Magpie container name (optional)',
+  kokoro_container: Value.text({
+    name: 'Kokoro container name (optional)',
    description:
-      'Docker container name for Magpie. Defaults to "magpie-tts".',
+      'Docker container name for Kokoro. Defaults to "kokoro-tts".',
    required: false,
    default: null,
-    placeholder: 'magpie-tts',
+    placeholder: 'kokoro-tts',
+    masked: false,
+  }),
+  embed_host: Value.text({
+    name: 'Embedding server host (optional)',
+    description:
+      'Override the host running the spark-embed container (bge-m3 dense embeddings + reranker). Leave blank if it runs on Spark 2.',
+    required: false,
+    default: null,
+    placeholder: 'leave blank to use Spark 2',
+    masked: false,
+  }),
+  embed_container: Value.text({
+    name: 'Embedding container name (optional)',
+    description: 'Docker container name for the embedding server. Defaults to "spark-embed".',
+    required: false,
+    default: null,
+    placeholder: 'spark-embed',
+    masked: false,
+  }),
+  qdrant_host: Value.text({
+    name: 'Qdrant host (optional)',
+    description:
+      'Override the host running the Qdrant vector database. Leave blank if it runs on Spark 2.',
+    required: false,
+    default: null,
+    placeholder: 'leave blank to use Spark 2',
+    masked: false,
+  }),
+  qdrant_container: Value.text({
+    name: 'Qdrant container name (optional)',
+    description: 'Docker container name for Qdrant. Defaults to "qdrant".',
+    required: false,
+    default: null,
+    placeholder: 'qdrant',
+    masked: false,
+  }),
+  qdrant_collection: Value.text({
+    name: 'Default Qdrant collection (optional)',
+    description:
+      'Default collection name used by /api/search when a request does not specify one. Leave blank to require callers to pass a collection.',
+    required: false,
+    default: null,
+    placeholder: 'e.g. crm_chunks',
    masked: false,
  }),
  open_webui_url: Value.text({
@@ -88,7 +131,7 @@ const inputSpec = InputSpec.of({
  ngc_api_key: Value.text({
    name: 'NGC API key (optional)',
    description:
-      'NVIDIA NGC personal API key — needed to install NIM containers (Parakeet, Magpie, etc.) from nvcr.io. Get one free at https://ngc.nvidia.com/setup/personal-key. Stored only on this Start9 server; passed to docker as the NGC_API_KEY env var when installing NIM services.',
+      'NVIDIA NGC personal API key — needed to install NIM containers (Parakeet, etc.) from nvcr.io. Get one free at https://ngc.nvidia.com/setup/personal-key. Stored only on this Start9 server; passed to docker as the NGC_API_KEY env var when installing NIM services. (Kokoro TTS is Apache 2.0 and does not need an NGC key.)',
    required: false,
    default: null,
    placeholder: 'starts with "nvapi-..."',
@@ -11,9 +11,17 @@ export const sparkConfigSchema = z.object({
  parakeet_host: z.string().catch(''),
  parakeet_user: z.string().catch(''),
  parakeet_container: z.string().catch(''),
-  magpie_host: z.string().catch(''),
-  magpie_user: z.string().catch(''),
-  magpie_container: z.string().catch(''),
+  kokoro_host: z.string().catch(''),
+  kokoro_user: z.string().catch(''),
+  kokoro_container: z.string().catch(''),
+  // Optional overrides for the embedding server (spark-embed) + Qdrant.
+  embed_host: z.string().catch(''),
+  embed_user: z.string().catch(''),
+  embed_container: z.string().catch(''),
+  qdrant_host: z.string().catch(''),
+  qdrant_user: z.string().catch(''),
+  qdrant_container: z.string().catch(''),
+  qdrant_collection: z.string().catch(''),
  // Optional Open WebUI deep-link
  open_webui_url: z.string().catch(''),
  // Optional NGC API key for pulling NIM containers from nvcr.io/nim/...
@@ -17,7 +17,7 @@ const dict = {

  // interfaces.ts (api)
  'OpenAI-compatible API': 8,
-  'Service-discovery JSON at /api/endpoints. Other apps on the LAN can GET this to learn the current vLLM, Parakeet, and Magpie URLs.': 9,
+  'Service-discovery JSON at /api/endpoints. Other apps on the LAN can GET this to learn the current vLLM, Parakeet, and Kokoro URLs.': 9,
 } as const

 /**
@@ -22,7 +22,7 @@ export const setInterfaces = sdk.setupInterfaces(async ({ effects }) => {
    name: i18n('OpenAI-compatible API'),
    id: 'api',
    description: i18n(
-      'Service-discovery JSON at /api/endpoints. Other apps on the LAN can GET this to learn the current vLLM, Parakeet, and Magpie URLs.',
+      'Service-discovery JSON at /api/endpoints. Other apps on the LAN can GET this to learn the current vLLM, Parakeet, and Kokoro URLs.',
    ),
    type: 'api',
    masked: false,
@@ -16,9 +16,16 @@ export const main = sdk.setupMain(async ({ effects }) => {
    parakeet_host: '',
    parakeet_user: '',
    parakeet_container: '',
-    magpie_host: '',
-    magpie_user: '',
-    magpie_container: '',
+    kokoro_host: '',
+    kokoro_user: '',
+    kokoro_container: '',
+    embed_host: '',
+    embed_user: '',
+    embed_container: '',
+    qdrant_host: '',
+    qdrant_user: '',
+    qdrant_container: '',
+    qdrant_collection: '',
    open_webui_url: '',
    ngc_api_key: '',
  }
@@ -45,9 +52,16 @@ export const main = sdk.setupMain(async ({ effects }) => {
        PARAKEET_HOST: cfg.parakeet_host,
        PARAKEET_USER: cfg.parakeet_user,
        PARAKEET_CONTAINER: cfg.parakeet_container,
-        MAGPIE_HOST: cfg.magpie_host,
-        MAGPIE_USER: cfg.magpie_user,
-        MAGPIE_CONTAINER: cfg.magpie_container,
+        KOKORO_HOST: cfg.kokoro_host,
+        KOKORO_USER: cfg.kokoro_user,
+        KOKORO_CONTAINER: cfg.kokoro_container,
+        EMBED_HOST: cfg.embed_host,
+        EMBED_USER: cfg.embed_user,
+        EMBED_CONTAINER: cfg.embed_container,
+        QDRANT_HOST: cfg.qdrant_host,
+        QDRANT_USER: cfg.qdrant_user,
+        QDRANT_CONTAINER: cfg.qdrant_container,
+        QDRANT_COLLECTION: cfg.qdrant_collection,
        MODELS_OVERRIDES: '/data/models-overrides.yaml',
        SERVICES_OVERRIDES: '/data/services-overrides.yaml',
        CONNECTIVITY_LOG: '/data/connectivity.json',
@@ -5,10 +5,14 @@ export const manifest = setupManifest({
  id: 'spark-control',
  title: 'Spark Control',
  license: 'MIT',
-  packageRepo: 'https://github.com/grant/spark-control',
-  upstreamRepo: 'https://github.com/grant/spark-control',
-  marketingUrl: 'https://github.com/grant/spark-control',
-  donationUrl: 'https://github.com/grant/spark-control',
+  // Placeholder URLs — replace with a real repo before publishing the package
+  // publicly. The StartOS UI shows these as "Source" and "Marketing" links;
+  // example.com is RFC 2606 reserved-for-documentation so it's an obvious
+  // "fill me in" signal rather than pointing at anyone's personal account.
+  packageRepo: 'https://example.com',
+  upstreamRepo: 'https://example.com',
+  marketingUrl: 'https://example.com',
+  donationUrl: null,
  docsUrls: [],
  description: { short, long },
  volumes: ['main'],
@@ -1,10 +1,10 @@
 import { VersionInfo, IMPOSSIBLE } from '@start9labs/start-sdk'

 export const v0_1_0 = VersionInfo.of({
-  version: '0.13.0:3',
+  version: '0.18.0:0',
  releaseNotes: {
    en_US:
-      'v0.13.0:3 — chat-completions proxy. Adds POST /v1/chat/completions (and /v1/completions for the legacy endpoint) to Spark Control that forwards to whichever vLLM is currently loaded on Spark 1. Supports SSE streaming when stream=true in the request body. Request body is passed through unchanged — any vLLM-supported field works (model, messages, max_tokens, temperature, response_format, chat_template_kwargs, tools, ...). Closes the last gap that forced clients to know about both Spark Control AND the direct vLLM URL — recap-relay and friends can now use one trusted host for everything (transcribe, diarize, analyze) with one cert and one allowlist. 30-min request timeout to accommodate large-context completions. No parakeet container changes; no Reapply patches needed.',
+      'v0.18.0 — dual-channel mode for POST /api/audio/label-merge. Instead of one mixed-mono file, a caller (Ten31 Transcripts) can send two sample-aligned tracks: mic_file (the local user) + system_file (everyone else, from screen capture). Rather than force the diarizer to re-disentangle a mono mix (which over-segments — proven: a stereo clip of 2 clean voices returned 3 speakers), we split the problem so each model gets the easiest mono input. The mic track yields the local user\'s words, gated to windows where the mic is genuinely the user speaking (mic louder than system — a self-VAD computed server-side per-window, or supplied via self_vad); this gate is load-bearing because the mic picks up the remote audio as quiet bleed. The system track is diarized (only has to separate the remote people) and named via the visual timeline + voiceprints. The user\'s clean voiceprint is enrolled from the mic track and injected into the voiceprint library, so a system cluster that is the user dialed in from a second device (dual-login) resolves to the user, not a stranger. Validated on a real misattributing call: fixes both mono-mix misattributions, recovers the dropped-to-Unknown local line, and correctly splits overlapping speech (two people saying "Hello" at once) that the coarse ground truth itself conflated. New form fields: mic_file + system_file (dual mode), self_name, self_vad (optional). The mono file path is unchanged and fully backward-compatible. Response gains a "mode" field (mono | dual_channel). Known limit: if loud remote bleed masks a quiet local word, the mic-track ASR may miss it — mitigated by a cleaner mic (headphones) or future echo-cancellation. See docs/AUDIO_API.md.',
  },
  migrations: {
    up: async ({ effects }) => {},