feat(#400): add Docker Compose swarm/prod deployment for speech services
All checks were successful
ci/woodpecker/push/infra Pipeline was successful

Add docker/docker-compose.sample.speech.yml for standalone speech services
deployment in Docker Swarm with Portainer compatibility:

- Speaches (STT + basic TTS) with Whisper model configuration
- Kokoro TTS (default high-quality TTS) always deployed
- Chatterbox TTS (premium, GPU) commented out as optional
- Traefik labels for reverse proxy routing with TLS
- Health checks on all services
- Volume persistence for Whisper models
- GPU reservation via Swarm generic resources for Chatterbox
- Environment variable substitution for Portainer
- Comprehensive header documentation

Fixes #400
This commit is contained in:
2026-02-15 02:51:13 -06:00
parent 527262af38
commit b3d6d73348

View File

@@ -0,0 +1,164 @@
# ==============================================
# Speech Services - Sample Swarm Deployment
# ==============================================
#
# Standalone speech services deployment for use with Mosaic Stack.
# This is SEPARATE infrastructure — not part of the Mosaic Stack itself.
# Mosaic connects to it via SPEACHES_URL and TTS_URL environment variables.
#
# Provides:
# - Speaches: Speech-to-Text (Whisper) + basic TTS fallback
# - Kokoro TTS: Default high-quality text-to-speech
# - Chatterbox TTS: Premium TTS with voice cloning (optional, requires GPU)
#
# Usage (Docker Swarm via Portainer):
# 1. Create a new stack in Portainer
# 2. Paste this file or point to the repo
# 3. Set environment variables in Portainer's env var section
# 4. Deploy the stack
#
# Usage (Docker Swarm CLI):
# 1. Create .env file with variables below
# 2. docker stack deploy -c docker-compose.sample.speech.yml speech
#
# Required Environment Variables:
# STT_DOMAIN=stt.example.com # Domain for Speaches (STT + basic TTS)
# TTS_DOMAIN=tts.example.com # Domain for Kokoro TTS (default TTS)
#
# Optional Environment Variables:
# WHISPER_MODEL=Systran/faster-whisper-large-v3-turbo # Whisper model for STT
# CHATTERBOX_TTS_DOMAIN=tts-premium.example.com # Domain for Chatterbox (premium TTS)
# TRAEFIK_ENTRYPOINT=websecure # Traefik entrypoint name
# TRAEFIK_CERTRESOLVER=letsencrypt # Traefik cert resolver
# TRAEFIK_DOCKER_NETWORK=traefik-public # Traefik network name
# TRAEFIK_TLS_ENABLED=true # Enable TLS on Traefik routers
#
# Connecting to Mosaic Stack:
# Add to your Mosaic Stack .env:
# SPEACHES_URL=http://speaches:8000 (if same Docker network)
# SPEACHES_URL=https://stt.example.com (if external)
# TTS_URL=http://kokoro-tts:8880 (if same Docker network)
# TTS_URL=https://tts.example.com (if external)
#
# GPU Requirements (Chatterbox only):
# - NVIDIA GPU with CUDA support
# - nvidia-container-toolkit installed on Docker host
# - Docker runtime configured for GPU access
# - Note: Docker Swarm requires "generic resources" for GPU scheduling.
# See: https://docs.docker.com/engine/daemon/nvidia-gpu/#configure-gpus-for-docker-swarm
#
# ==============================================
services:
# ======================
# Speaches (STT + basic TTS)
# ======================
# Primary speech-to-text service using Whisper.
# Also provides basic TTS as a fallback.
speaches:
image: ghcr.io/speaches-ai/speaches:latest
environment:
WHISPER__MODEL: ${WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
volumes:
- speaches-models:/root/.cache/huggingface
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- internal
- traefik-public
deploy:
restart_policy:
condition: on-failure
delay: 10s
labels:
- "traefik.enable=true"
- "traefik.http.routers.speech-stt.rule=Host(`${STT_DOMAIN}`)"
- "traefik.http.routers.speech-stt.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.speech-stt.tls=${TRAEFIK_TLS_ENABLED:-true}"
- "traefik.http.routers.speech-stt.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
- "traefik.http.services.speech-stt.loadbalancer.server.port=8000"
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
# ======================
# Kokoro TTS (Default TTS)
# ======================
# High-quality text-to-speech engine. Always deployed alongside Speaches.
kokoro-tts:
image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- internal
- traefik-public
deploy:
restart_policy:
condition: on-failure
delay: 10s
labels:
- "traefik.enable=true"
- "traefik.http.routers.speech-tts.rule=Host(`${TTS_DOMAIN}`)"
- "traefik.http.routers.speech-tts.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.speech-tts.tls=${TRAEFIK_TLS_ENABLED:-true}"
- "traefik.http.routers.speech-tts.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
- "traefik.http.services.speech-tts.loadbalancer.server.port=8880"
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
# ======================
# Chatterbox TTS (Premium TTS - Optional)
# ======================
# Premium TTS with voice cloning capabilities. Requires NVIDIA GPU.
#
# To enable: Uncomment this service and set CHATTERBOX_TTS_DOMAIN.
#
# For Docker Swarm GPU scheduling, configure generic resources on the node:
# /etc/docker/daemon.json:
# { "runtimes": { "nvidia": { ... } },
# "node-generic-resources": ["NVIDIA-GPU=0"] }
#
# chatterbox-tts:
# image: devnen/chatterbox-tts-server:latest
# healthcheck:
# test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
# interval: 30s
# timeout: 10s
# retries: 5
# start_period: 180s
# networks:
# - internal
# - traefik-public
# deploy:
# restart_policy:
# condition: on-failure
# delay: 10s
# resources:
# reservations:
# generic_resources:
# - discrete_resource_spec:
# kind: "NVIDIA-GPU"
# value: 1
# labels:
# - "traefik.enable=true"
# - "traefik.http.routers.speech-tts-premium.rule=Host(`${CHATTERBOX_TTS_DOMAIN}`)"
# - "traefik.http.routers.speech-tts-premium.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
# - "traefik.http.routers.speech-tts-premium.tls=${TRAEFIK_TLS_ENABLED:-true}"
# - "traefik.http.routers.speech-tts-premium.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
# - "traefik.http.services.speech-tts-premium.loadbalancer.server.port=8000"
# - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
volumes:
speaches-models:
networks:
internal:
driver: overlay
traefik-public:
external: true
name: ${TRAEFIK_DOCKER_NETWORK:-traefik-public}