7e0759846f
Move the ~20 optional cluster knobs out of the StartOS "Configure Sparks"
action (now just the 4 required fields) and into a dashboard ⚙ Settings gear,
backed by a /data/app_settings.json overlay keyed by env-var names. One shared
mutable Settings instance + Settings.reload() applies edits live without a
restart; existing installs' values migrate automatically on first boot.
Also: support-service ports (parakeet/kokoro/embed/qdrant + vllm) are now
configurable, and GET /api/swap/lock no longer 404s (it was shadowed by the
/api/swap/{job_id} catch-all). WebhookNotifier is re-pointed on save so its
url/secret reload live too.
287 lines
14 KiB
Python
287 lines
14 KiB
Python
"""App-owned settings overlay: the in-dashboard 'gear' knobs.
|
|
|
|
Spark Control's *required* wiring — the two Spark IPs and SSH users — is set once
|
|
via the StartOS "Configure Sparks" action and arrives as env vars. Everything
|
|
else (ports, container names, support-service hosts, integrations, webhook) is
|
|
optional and lives here: a small JSON overlay on /data that the dashboard gear
|
|
reads and writes, so an operator never has to open StartOS actions to tune the
|
|
cluster. This follows the StartOS 0.4 convention (minimal setup action; routine
|
|
config in the app's own UI) and stays inside the package's backup volume, so the
|
|
file is backed up and restored for free.
|
|
|
|
Each overlay entry is keyed by the *same env var name* config.Settings already
|
|
reads, so the overlay is simply an env-var override store. Precedence (see
|
|
config._effective_env): process env first, this overlay on top — so a knob set
|
|
in the gear wins, while an un-touched knob falls through to whatever the StartOS
|
|
action injected, then to the code default.
|
|
|
|
First-run migration: when the overlay file doesn't exist yet (e.g. an existing
|
|
install upgrading into this version), it's seeded from the current env so any
|
|
value previously set via the StartOS action carries over into the gear with no
|
|
operator action and nothing lost.
|
|
"""
|
|
from __future__ import annotations
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Mapping
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Field metadata drives BOTH the /api/settings response (the front-end renders
|
|
# the form generically from this) and light server-side validation. `key` is the
|
|
# env var name; `type` is one of text|int|csv|secret. `secret` values are
|
|
# write-only — never echoed back to the browser.
|
|
FIELDS: list[dict] = [
|
|
# --- vLLM (Spark 1) ---
|
|
{"group": "vLLM (Spark 1)", "key": "VLLM_PORT", "label": "vLLM port", "type": "int",
|
|
"placeholder": "8888",
|
|
"help": "Port your vLLM listens on. Blank ⇒ 8888 (the bundled launch-cluster.sh). Set 8000 for vanilla vLLM, or wherever yours listens."},
|
|
{"group": "vLLM (Spark 1)", "key": "VLLM_CONTAINER", "label": "vLLM container name", "type": "text",
|
|
"placeholder": "vllm_node",
|
|
"help": "Docker container the swappable vLLM runs in. Blank ⇒ vllm_node. The swap log-tail and pre-flight validator exec into it by name."},
|
|
|
|
# --- Monitoring ---
|
|
{"group": "Monitoring", "key": "DISABLED_SERVICES", "label": "Services to hide", "type": "csv",
|
|
"placeholder": "e.g. parakeet,kokoro",
|
|
"help": "Comma-separated built-in services your cluster doesn't run, so their tiles are hidden and never probed. Valid: parakeet, kokoro, embeddings, qdrant. Blank ⇒ monitor all."},
|
|
|
|
# --- Parakeet (STT) ---
|
|
{"group": "Parakeet (STT)", "key": "PARAKEET_HOST", "label": "Host", "type": "text",
|
|
"placeholder": "leave blank for Spark 2",
|
|
"help": "Host running the Parakeet STT container. Blank ⇒ Spark 2."},
|
|
{"group": "Parakeet (STT)", "key": "PARAKEET_PORT", "label": "Port", "type": "int",
|
|
"placeholder": "8000",
|
|
"help": "Port Parakeet listens on. Blank ⇒ 8000. Set this if you remapped it (e.g. because your vLLM holds 8000)."},
|
|
{"group": "Parakeet (STT)", "key": "PARAKEET_CONTAINER", "label": "Container name", "type": "text",
|
|
"placeholder": "parakeet-asr",
|
|
"help": "Docker container name for Parakeet. Blank ⇒ parakeet-asr."},
|
|
{"group": "Parakeet (STT)", "key": "PARAKEET_USER", "label": "SSH user", "type": "text",
|
|
"placeholder": "leave blank for Spark 2 user",
|
|
"help": "SSH user that owns the Parakeet container. Blank ⇒ your Spark 2 user."},
|
|
|
|
# --- Kokoro (TTS) ---
|
|
{"group": "Kokoro (TTS)", "key": "KOKORO_HOST", "label": "Host", "type": "text",
|
|
"placeholder": "leave blank for Spark 2",
|
|
"help": "Host running the Kokoro TTS container. Blank ⇒ Spark 2."},
|
|
{"group": "Kokoro (TTS)", "key": "KOKORO_PORT", "label": "Port", "type": "int",
|
|
"placeholder": "8880",
|
|
"help": "Port Kokoro listens on. Blank ⇒ 8880."},
|
|
{"group": "Kokoro (TTS)", "key": "KOKORO_CONTAINER", "label": "Container name", "type": "text",
|
|
"placeholder": "kokoro-tts",
|
|
"help": "Docker container name for Kokoro. Blank ⇒ kokoro-tts."},
|
|
{"group": "Kokoro (TTS)", "key": "KOKORO_USER", "label": "SSH user", "type": "text",
|
|
"placeholder": "leave blank for Spark 2 user",
|
|
"help": "SSH user that owns the Kokoro container. Blank ⇒ your Spark 2 user."},
|
|
|
|
# --- Embeddings ---
|
|
{"group": "Embeddings", "key": "EMBED_HOST", "label": "Host", "type": "text",
|
|
"placeholder": "leave blank for Spark 2",
|
|
"help": "Host running the spark-embed container (bge-m3 + reranker). Blank ⇒ Spark 2."},
|
|
{"group": "Embeddings", "key": "EMBED_PORT", "label": "Port", "type": "int",
|
|
"placeholder": "8088",
|
|
"help": "Port the embedding server listens on. Blank ⇒ 8088."},
|
|
{"group": "Embeddings", "key": "EMBED_CONTAINER", "label": "Container name", "type": "text",
|
|
"placeholder": "spark-embed",
|
|
"help": "Docker container name for the embedding server. Blank ⇒ spark-embed."},
|
|
{"group": "Embeddings", "key": "EMBED_USER", "label": "SSH user", "type": "text",
|
|
"placeholder": "leave blank for Spark 2 user",
|
|
"help": "SSH user that owns the embedding container. Blank ⇒ your Spark 2 user."},
|
|
|
|
# --- Qdrant ---
|
|
{"group": "Qdrant", "key": "QDRANT_HOST", "label": "Host", "type": "text",
|
|
"placeholder": "leave blank for Spark 2",
|
|
"help": "Host running the Qdrant vector database. Blank ⇒ Spark 2."},
|
|
{"group": "Qdrant", "key": "QDRANT_PORT", "label": "Port", "type": "int",
|
|
"placeholder": "6333",
|
|
"help": "Port Qdrant's REST API listens on. Blank ⇒ 6333."},
|
|
{"group": "Qdrant", "key": "QDRANT_CONTAINER", "label": "Container name", "type": "text",
|
|
"placeholder": "qdrant",
|
|
"help": "Docker container name for Qdrant. Blank ⇒ qdrant."},
|
|
{"group": "Qdrant", "key": "QDRANT_USER", "label": "SSH user", "type": "text",
|
|
"placeholder": "leave blank for Spark 2 user",
|
|
"help": "SSH user that owns the Qdrant container. Blank ⇒ your Spark 2 user."},
|
|
{"group": "Qdrant", "key": "QDRANT_COLLECTION", "label": "Default collection", "type": "text",
|
|
"placeholder": "e.g. crm_chunks",
|
|
"help": "Collection used by /api/search when a request doesn't name one. Blank ⇒ callers must pass a collection."},
|
|
|
|
# --- Integrations ---
|
|
{"group": "Integrations", "key": "OPEN_WEBUI_URL", "label": "Open WebUI URL", "type": "text",
|
|
"placeholder": "e.g. https://open-webui.yourserver.local",
|
|
"help": "If set, the header shows a one-click 'Open chat' button to your Open WebUI."},
|
|
{"group": "Integrations", "key": "MATRIX_BRIDGE_USER", "label": "matrix-bridge bot SSH user", "type": "text",
|
|
"placeholder": "e.g. modelo",
|
|
"help": "SSH user owning the bot's ~/matrix-bridge clone (Spark 2). Set this to show the bot tile (update/restart/logs). Blank ⇒ tile hidden."},
|
|
{"group": "Integrations", "key": "NGC_API_KEY", "label": "NGC API key", "type": "secret",
|
|
"placeholder": "starts with nvapi-…",
|
|
"help": "NVIDIA NGC personal key, needed only to install NIM containers from nvcr.io. Stored on this server."},
|
|
{"group": "Integrations", "key": "SWAP_WEBHOOK_URL", "label": "Swap webhook URL", "type": "text",
|
|
"placeholder": "e.g. https://my-service.local/spark-swap",
|
|
"help": "POSTed a small JSON event (swap_complete / swap_failed) after every model swap, so automation can re-point to the new model. Blank ⇒ disabled."},
|
|
{"group": "Integrations", "key": "SWAP_WEBHOOK_SECRET", "label": "Swap webhook secret", "type": "secret",
|
|
"placeholder": "a random shared string",
|
|
"help": "If set, each webhook is HMAC-signed (X-Spark-Signature) so the receiver can verify it. Blank ⇒ unsigned."},
|
|
]
|
|
|
|
_BY_KEY = {f["key"]: f for f in FIELDS}
|
|
_SECRET_KEYS = frozenset(f["key"] for f in FIELDS if f["type"] == "secret")
|
|
_INT_KEYS = frozenset(f["key"] for f in FIELDS if f["type"] == "int")
|
|
# Reject control characters (incl. newlines) — these values flow into env vars,
|
|
# URLs, and SSH command lines (quoted at the sink, but defence in depth).
|
|
_BAD_CHARS = re.compile(r"[\x00-\x1f\x7f]")
|
|
# A secret's value is never echoed back, so a blank submit means "keep the stored
|
|
# one" (you can't see it to retype it). To actually *remove* a stored secret the
|
|
# UI sends this sentinel instead of a real value. Surfaced to the front-end via
|
|
# public_view so the two stay in sync.
|
|
CLEAR_SENTINEL = "__clear__"
|
|
|
|
|
|
def _path() -> Path:
|
|
return Path(os.environ.get("APP_SETTINGS_FILE", "/data/app_settings.json"))
|
|
|
|
|
|
def field_keys() -> frozenset[str]:
|
|
return frozenset(_BY_KEY)
|
|
|
|
|
|
def load_overlay() -> dict[str, str]:
|
|
"""Return the overlay as {ENV_KEY: value}, filtered to known, non-empty keys.
|
|
|
|
Pure read (no side effects) — called on every Settings (re)build, so it must
|
|
not write. Missing/corrupt file ⇒ {}. The file is tiny."""
|
|
p = _path()
|
|
if not p.exists():
|
|
return {}
|
|
try:
|
|
raw = json.loads(p.read_text())
|
|
except (ValueError, OSError) as e:
|
|
log.warning("ignoring unreadable %s: %s", p, e)
|
|
return {}
|
|
if not isinstance(raw, dict):
|
|
return {}
|
|
return {k: str(v) for k, v in raw.items() if k in _BY_KEY and v not in (None, "")}
|
|
|
|
|
|
def seed_from_env(env: Mapping[str, str]) -> None:
|
|
"""One-time migration, called once at startup: if no overlay exists yet, seed
|
|
it from the current env so any optional value previously set via the StartOS
|
|
action carries into the gear automatically (nothing lost on upgrade). No-op
|
|
if the file already exists or the env carries no known non-empty knob — a
|
|
fresh install then starts with no overlay and pure defaults. Values run
|
|
through the same validation as apply(); a malformed one (e.g. a paste-error
|
|
port) is skipped rather than written, matching the gear's own guards."""
|
|
if _path().exists():
|
|
return
|
|
seeded: dict[str, str] = {}
|
|
for k in _BY_KEY:
|
|
v = env.get(k)
|
|
if not v:
|
|
continue
|
|
try:
|
|
cleaned = _validate(k, v)
|
|
except SettingsError as e:
|
|
log.warning("skipping invalid env value while seeding overlay: %s", e)
|
|
continue
|
|
if cleaned and cleaned != CLEAR_SENTINEL:
|
|
seeded[k] = cleaned
|
|
if seeded:
|
|
_write(seeded)
|
|
log.info("seeded settings overlay from env (%d keys): %s", len(seeded), _path())
|
|
|
|
|
|
def _write(overlay: dict[str, str]) -> None:
|
|
p = _path()
|
|
p.parent.mkdir(parents=True, exist_ok=True)
|
|
# Atomic replace so a crash mid-write never leaves a truncated overlay.
|
|
fd, tmp = tempfile.mkstemp(dir=str(p.parent), prefix=".app_settings.", suffix=".tmp")
|
|
try:
|
|
with os.fdopen(fd, "w") as fh:
|
|
json.dump(overlay, fh, indent=2, sort_keys=True)
|
|
os.replace(tmp, p)
|
|
except BaseException:
|
|
try:
|
|
os.unlink(tmp)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
|
|
|
|
def public_view() -> dict:
|
|
"""Shape the gear form for the browser: ordered groups of fields with their
|
|
current overlay value. Secret values are never sent — only a `set` flag."""
|
|
overlay = load_overlay()
|
|
groups: list[dict] = []
|
|
index: dict[str, dict] = {}
|
|
for f in FIELDS:
|
|
g = index.get(f["group"])
|
|
if g is None:
|
|
g = {"name": f["group"], "fields": []}
|
|
index[f["group"]] = g
|
|
groups.append(g)
|
|
entry = {
|
|
"key": f["key"],
|
|
"label": f["label"],
|
|
"type": f["type"],
|
|
"placeholder": f.get("placeholder", ""),
|
|
"help": f.get("help", ""),
|
|
}
|
|
if f["type"] == "secret":
|
|
entry["set"] = bool(overlay.get(f["key"]))
|
|
else:
|
|
entry["value"] = overlay.get(f["key"], "")
|
|
g["fields"].append(entry)
|
|
return {"groups": groups, "clear_sentinel": CLEAR_SENTINEL}
|
|
|
|
|
|
class SettingsError(ValueError):
|
|
"""Bad input to apply() — surfaced as 422 by the endpoint."""
|
|
|
|
|
|
def _validate(key: str, value) -> str:
|
|
"""Clean + validate one value; raise SettingsError on bad input. Returns the
|
|
stripped string ('' is valid and means 'unset'). The CLEAR_SENTINEL passes
|
|
through for the caller to interpret (secret removal)."""
|
|
if key not in _BY_KEY:
|
|
raise SettingsError(f"unknown setting: {key}")
|
|
val = ("" if value is None else str(value)).strip()
|
|
if val == CLEAR_SENTINEL:
|
|
return val
|
|
if _BAD_CHARS.search(val):
|
|
raise SettingsError(f"{key}: control characters are not allowed")
|
|
if key in _INT_KEYS and val:
|
|
if not val.isdigit() or not (1 <= int(val) <= 65535):
|
|
raise SettingsError(f"{key}: must be a port number between 1 and 65535")
|
|
return val
|
|
|
|
|
|
def apply(updates: Mapping[str, str]) -> dict[str, str]:
|
|
"""Validate `updates` and merge them into the overlay, then persist.
|
|
|
|
Rules per key:
|
|
- unknown key / bad int / control chars → reject (422, via _validate)
|
|
- secret + CLEAR_SENTINEL → delete the stored secret
|
|
- secret + blank value → leave the stored secret unchanged (don't wipe)
|
|
- non-secret + blank → delete the key (revert to env/default)
|
|
- otherwise → set the key
|
|
|
|
Returns the new overlay. The caller reloads Settings so the change goes live.
|
|
"""
|
|
overlay = load_overlay()
|
|
for key, value in updates.items():
|
|
val = _validate(key, value)
|
|
if key in _SECRET_KEYS:
|
|
if val == CLEAR_SENTINEL:
|
|
overlay.pop(key, None)
|
|
elif val:
|
|
overlay[key] = val
|
|
# blank secret ⇒ leave the existing value in place
|
|
elif val and val != CLEAR_SENTINEL:
|
|
overlay[key] = val
|
|
else:
|
|
overlay.pop(key, None)
|
|
_write(overlay)
|
|
return overlay
|