v0.13.0:4 - redaction gateway, embeddings proxy, expanded audio API
- Add redaction gateway (redaction_gateway.py, redaction/ scrub + tests) - Add embeddings proxy and spark_embed service (Dockerfile + main.py) - Expand audio_proxy with speaker-aware handling; deep_health/health/server updates - Package: configureSparks action + sparkConfig model updates, manifest/main wiring - Docs: AUDIO_API, EMBEDDINGS, REDACTION_GATEWAY; HANDOFF and runbook/known-issues refresh
This commit is contained in:
+68
-19
@@ -17,8 +17,10 @@ from .deep_health import DeepHealth
|
||||
from .disk import delete_from_disk, probe_disk
|
||||
from .download import DownloadManager
|
||||
from .llm_proxy import build_router as build_llm_router
|
||||
from .embeddings_proxy import build_router as build_embeddings_router
|
||||
from .redaction_gateway import build_router as build_redaction_router, MapStore
|
||||
from .hardware import HardwareProbe
|
||||
from .health import check_magpie, check_parakeet, check_vllm
|
||||
from .health import check_kokoro, check_parakeet, check_vllm, check_embeddings, check_qdrant
|
||||
from .models import load_catalog
|
||||
from .nim import SUGGESTED_NIMS, CATALOG_URL, NimManager
|
||||
from .overrides import add_custom, delete_custom, extract_knobs_from_args, load_overrides, set_knobs
|
||||
@@ -60,7 +62,7 @@ app.mount("/static", StaticFiles(directory=_STATIC_DIR), name="static")
|
||||
|
||||
# OpenAI-compatible audio proxy: /v1/audio/speech, /v1/audio/transcriptions, /v1/models.
|
||||
# Lets Open WebUI, Home Assistant, and any other OpenAI-shaped client talk to
|
||||
# Parakeet (STT) and Magpie (TTS) through a single spark-control URL.
|
||||
# Parakeet (STT) and Kokoro (TTS) through a single spark-control URL.
|
||||
# Passing deep_health lets the proxy fire an immediate wedge-detect + auto-restart
|
||||
# when Parakeet returns 500, instead of waiting up to 5 min for the periodic probe.
|
||||
app.include_router(build_audio_router(settings, deep_health=deep_health))
|
||||
@@ -71,6 +73,20 @@ app.include_router(build_audio_router(settings, deep_health=deep_health))
|
||||
# as the audio proxy — clients only need one URL for everything.
|
||||
app.include_router(build_llm_router(settings))
|
||||
|
||||
# OpenAI-compatible embeddings + rerank + hybrid search proxy:
|
||||
# /v1/embeddings -> spark-embed (bge-m3 dense), /v1/rerank -> spark-embed
|
||||
# (bge-reranker-v2-m3), /api/search -> orchestrated dense(+sparse) retrieval
|
||||
# from Qdrant with optional cross-encoder rerank. Same single-trusted-host
|
||||
# model as the LLM and audio proxies.
|
||||
app.include_router(build_embeddings_router(settings))
|
||||
|
||||
# Redaction gateway: /scrub + /rehydrate. The privacy boundary between sovereign
|
||||
# LP data and the Claude API — de-identify context before it leaves the box,
|
||||
# re-identify Claude's response locally. The pseudonym map (the de-anon key) is
|
||||
# held server-side in a TTL-swept store on /data and never leaves this host.
|
||||
redaction_map_store = MapStore(settings.redaction_map_db, settings.redaction_map_ttl)
|
||||
app.include_router(build_redaction_router(settings, redaction_map_store))
|
||||
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def index() -> FileResponse:
|
||||
@@ -274,7 +290,7 @@ async def run_deep_health(service: str) -> dict:
|
||||
|
||||
|
||||
class HealthEventBody(BaseModel):
|
||||
service: str # e.g. "parakeet", "magpie", "vllm"
|
||||
service: str # e.g. "parakeet", "kokoro", "vllm"
|
||||
ok: bool # true on success, false on failure
|
||||
source: str | None = None # what app reported (e.g. "open-webui")
|
||||
error: str | None = None # optional detail
|
||||
@@ -344,7 +360,7 @@ async def wake_spark(name: str) -> dict:
|
||||
|
||||
@app.get("/api/services")
|
||||
async def get_services() -> dict:
|
||||
"""Lifecycle state of always-on support services (Parakeet, Magpie, …).
|
||||
"""Lifecycle state of always-on support services (Parakeet, Kokoro, …).
|
||||
|
||||
Each entry includes:
|
||||
- host/port/container/user (configured)
|
||||
@@ -362,8 +378,15 @@ async def get_services() -> dict:
|
||||
docker = await docker_state(settings, svc)
|
||||
if name == "parakeet":
|
||||
http = await check_parakeet(settings)
|
||||
elif name == "kokoro":
|
||||
http = await check_kokoro(settings)
|
||||
elif name == "embeddings":
|
||||
http = await check_embeddings(settings)
|
||||
elif name == "qdrant":
|
||||
http = await check_qdrant(settings)
|
||||
else:
|
||||
http = await check_magpie(settings)
|
||||
# Custom services expose a /health endpoint by convention.
|
||||
http = await check_kokoro(settings) if svc.kind == "tts" else {"ok": None, "base_url": svc.host and f"http://{svc.host}:{svc.port}"}
|
||||
return name, {
|
||||
"host": svc.host,
|
||||
"user": svc.user,
|
||||
@@ -372,7 +395,10 @@ async def get_services() -> dict:
|
||||
"kind": svc.kind,
|
||||
"base_url": http.get("base_url"),
|
||||
"http_ready": bool(http.get("ok")),
|
||||
"model": (http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None,
|
||||
# Prefer the check fn's own top-level model key (embeddings reports
|
||||
# it there); fall back to a model field inside detail for services
|
||||
# whose /health embeds it (parakeet).
|
||||
"model": http.get("model") or ((http.get("detail") or {}).get("model") if isinstance(http.get("detail"), dict) else None),
|
||||
"docker_state": docker.get("state"),
|
||||
"restart_count": docker.get("restart_count"),
|
||||
"started_at": docker.get("started_at"),
|
||||
@@ -484,8 +510,8 @@ async def stream_nim_install(job_id: str):
|
||||
|
||||
@app.delete("/api/services/{name}")
|
||||
async def del_service(name: str) -> dict:
|
||||
# Only allow deleting custom services (not the bundled parakeet/magpie keys)
|
||||
if name in ("parakeet", "magpie"):
|
||||
# Only allow deleting custom services (not the bundled built-in keys)
|
||||
if name in ("parakeet", "kokoro", "embeddings", "qdrant"):
|
||||
raise HTTPException(400, "built-in service; cannot delete (use Configure Sparks to point at a different host)")
|
||||
delete_custom_service(name)
|
||||
return {"ok": True, "name": name}
|
||||
@@ -551,12 +577,15 @@ async def post_speech_models_restart() -> dict:
|
||||
@app.get("/api/endpoints")
|
||||
async def get_endpoints() -> dict:
|
||||
"""Service-discovery summary. Stable shape; other apps on the LAN can poll this
|
||||
to learn the OpenAI-compatible vLLM endpoint, the Parakeet STT endpoint, and the
|
||||
Magpie TTS endpoint without needing to know the individual Spark IPs."""
|
||||
vllm, parakeet, magpie = await asyncio.gather(
|
||||
to learn the OpenAI-compatible vLLM endpoint, the Parakeet STT endpoint, the
|
||||
Kokoro TTS endpoint, and the embeddings + Qdrant retrieval endpoints without
|
||||
needing to know the individual Spark IPs."""
|
||||
vllm, parakeet, kokoro, embeddings, qdrant = await asyncio.gather(
|
||||
check_vllm(settings),
|
||||
check_parakeet(settings),
|
||||
check_magpie(settings),
|
||||
check_kokoro(settings),
|
||||
check_embeddings(settings),
|
||||
check_qdrant(settings),
|
||||
)
|
||||
return {
|
||||
"vllm": {
|
||||
@@ -571,31 +600,51 @@ async def get_endpoints() -> dict:
|
||||
"kind": "stt",
|
||||
"model": (parakeet.get("detail") or {}).get("model") if isinstance(parakeet.get("detail"), dict) else None,
|
||||
},
|
||||
"magpie": {
|
||||
"ready": bool(magpie.get("ok")),
|
||||
"base_url": magpie.get("base_url"),
|
||||
"kokoro": {
|
||||
"ready": bool(kokoro.get("ok")),
|
||||
"base_url": kokoro.get("base_url"),
|
||||
"kind": "tts",
|
||||
},
|
||||
"embeddings": {
|
||||
"ready": bool(embeddings.get("ok")),
|
||||
"base_url": embeddings.get("base_url"),
|
||||
"kind": "embedding",
|
||||
"model": embeddings.get("model"),
|
||||
# The proxied OpenAI-compatible endpoints live on Spark Control itself.
|
||||
"openai_endpoints": ["/v1/embeddings", "/v1/rerank", "/api/search"],
|
||||
},
|
||||
"qdrant": {
|
||||
"ready": bool(qdrant.get("ok")),
|
||||
"base_url": qdrant.get("base_url"),
|
||||
"kind": "vectordb",
|
||||
"collection": settings.qdrant_collection or None,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/status")
|
||||
async def get_status() -> dict:
|
||||
vllm, parakeet, magpie = await asyncio.gather(
|
||||
vllm, parakeet, kokoro, embeddings, qdrant = await asyncio.gather(
|
||||
check_vllm(settings),
|
||||
check_parakeet(settings),
|
||||
check_magpie(settings),
|
||||
check_kokoro(settings),
|
||||
check_embeddings(settings),
|
||||
check_qdrant(settings),
|
||||
)
|
||||
# Feed health into the connectivity log (deduped — only logs on transition)
|
||||
record_state("vllm", bool(vllm.get("ok")))
|
||||
record_state("parakeet", bool(parakeet.get("ok")))
|
||||
record_state("magpie", bool(magpie.get("ok")))
|
||||
record_state("kokoro", bool(kokoro.get("ok")))
|
||||
record_state("embeddings", bool(embeddings.get("ok")))
|
||||
record_state("qdrant", bool(qdrant.get("ok")))
|
||||
current_key = _identify_current_model(vllm.get("current_model"))
|
||||
return {
|
||||
"configured": settings.configured,
|
||||
"vllm": vllm,
|
||||
"parakeet": parakeet,
|
||||
"magpie": magpie,
|
||||
"kokoro": kokoro,
|
||||
"embeddings": embeddings,
|
||||
"qdrant": qdrant,
|
||||
"current_model_key": current_key,
|
||||
"current_swap_job": swap_manager.current_job_id,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user