feat(#400): add Docker Compose swarm/prod deployment for speech services
All checks were successful
ci/woodpecker/push/infra Pipeline was successful
All checks were successful
ci/woodpecker/push/infra Pipeline was successful
Add docker/docker-compose.sample.speech.yml for standalone speech services deployment in Docker Swarm with Portainer compatibility: - Speaches (STT + basic TTS) with Whisper model configuration - Kokoro TTS (default high-quality TTS) always deployed - Chatterbox TTS (premium, GPU) commented out as optional - Traefik labels for reverse proxy routing with TLS - Health checks on all services - Volume persistence for Whisper models - GPU reservation via Swarm generic resources for Chatterbox - Environment variable substitution for Portainer - Comprehensive header documentation Fixes #400
This commit is contained in:
164
docker/docker-compose.sample.speech.yml
Normal file
164
docker/docker-compose.sample.speech.yml
Normal file
@@ -0,0 +1,164 @@
|
||||
# ==============================================
|
||||
# Speech Services - Sample Swarm Deployment
|
||||
# ==============================================
|
||||
#
|
||||
# Standalone speech services deployment for use with Mosaic Stack.
|
||||
# This is SEPARATE infrastructure — not part of the Mosaic Stack itself.
|
||||
# Mosaic connects to it via SPEACHES_URL and TTS_URL environment variables.
|
||||
#
|
||||
# Provides:
|
||||
# - Speaches: Speech-to-Text (Whisper) + basic TTS fallback
|
||||
# - Kokoro TTS: Default high-quality text-to-speech
|
||||
# - Chatterbox TTS: Premium TTS with voice cloning (optional, requires GPU)
|
||||
#
|
||||
# Usage (Docker Swarm via Portainer):
|
||||
# 1. Create a new stack in Portainer
|
||||
# 2. Paste this file or point to the repo
|
||||
# 3. Set environment variables in Portainer's env var section
|
||||
# 4. Deploy the stack
|
||||
#
|
||||
# Usage (Docker Swarm CLI):
|
||||
# 1. Create .env file with variables below
|
||||
# 2. docker stack deploy -c docker-compose.sample.speech.yml speech
|
||||
#
|
||||
# Required Environment Variables:
|
||||
# STT_DOMAIN=stt.example.com # Domain for Speaches (STT + basic TTS)
|
||||
# TTS_DOMAIN=tts.example.com # Domain for Kokoro TTS (default TTS)
|
||||
#
|
||||
# Optional Environment Variables:
|
||||
# WHISPER_MODEL=Systran/faster-whisper-large-v3-turbo # Whisper model for STT
|
||||
# CHATTERBOX_TTS_DOMAIN=tts-premium.example.com # Domain for Chatterbox (premium TTS)
|
||||
# TRAEFIK_ENTRYPOINT=websecure # Traefik entrypoint name
|
||||
# TRAEFIK_CERTRESOLVER=letsencrypt # Traefik cert resolver
|
||||
# TRAEFIK_DOCKER_NETWORK=traefik-public # Traefik network name
|
||||
# TRAEFIK_TLS_ENABLED=true # Enable TLS on Traefik routers
|
||||
#
|
||||
# Connecting to Mosaic Stack:
|
||||
# Add to your Mosaic Stack .env:
|
||||
# SPEACHES_URL=http://speaches:8000 (if same Docker network)
|
||||
# SPEACHES_URL=https://stt.example.com (if external)
|
||||
# TTS_URL=http://kokoro-tts:8880 (if same Docker network)
|
||||
# TTS_URL=https://tts.example.com (if external)
|
||||
#
|
||||
# GPU Requirements (Chatterbox only):
|
||||
# - NVIDIA GPU with CUDA support
|
||||
# - nvidia-container-toolkit installed on Docker host
|
||||
# - Docker runtime configured for GPU access
|
||||
# - Note: Docker Swarm requires "generic resources" for GPU scheduling.
|
||||
# See: https://docs.docker.com/engine/daemon/nvidia-gpu/#configure-gpus-for-docker-swarm
|
||||
#
|
||||
# ==============================================
|
||||
|
||||
services:
|
||||
# ======================
|
||||
# Speaches (STT + basic TTS)
|
||||
# ======================
|
||||
# Primary speech-to-text service using Whisper.
|
||||
# Also provides basic TTS as a fallback.
|
||||
speaches:
|
||||
image: ghcr.io/speaches-ai/speaches:latest
|
||||
environment:
|
||||
WHISPER__MODEL: ${WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
|
||||
volumes:
|
||||
- speaches-models:/root/.cache/huggingface
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
networks:
|
||||
- internal
|
||||
- traefik-public
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 10s
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.speech-stt.rule=Host(`${STT_DOMAIN}`)"
|
||||
- "traefik.http.routers.speech-stt.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
||||
- "traefik.http.routers.speech-stt.tls=${TRAEFIK_TLS_ENABLED:-true}"
|
||||
- "traefik.http.routers.speech-stt.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
|
||||
- "traefik.http.services.speech-stt.loadbalancer.server.port=8000"
|
||||
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
|
||||
|
||||
# ======================
|
||||
# Kokoro TTS (Default TTS)
|
||||
# ======================
|
||||
# High-quality text-to-speech engine. Always deployed alongside Speaches.
|
||||
kokoro-tts:
|
||||
image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
networks:
|
||||
- internal
|
||||
- traefik-public
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 10s
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.speech-tts.rule=Host(`${TTS_DOMAIN}`)"
|
||||
- "traefik.http.routers.speech-tts.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
||||
- "traefik.http.routers.speech-tts.tls=${TRAEFIK_TLS_ENABLED:-true}"
|
||||
- "traefik.http.routers.speech-tts.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
|
||||
- "traefik.http.services.speech-tts.loadbalancer.server.port=8880"
|
||||
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
|
||||
|
||||
# ======================
|
||||
# Chatterbox TTS (Premium TTS - Optional)
|
||||
# ======================
|
||||
# Premium TTS with voice cloning capabilities. Requires NVIDIA GPU.
|
||||
#
|
||||
# To enable: Uncomment this service and set CHATTERBOX_TTS_DOMAIN.
|
||||
#
|
||||
# For Docker Swarm GPU scheduling, configure generic resources on the node:
|
||||
# /etc/docker/daemon.json:
|
||||
# { "runtimes": { "nvidia": { ... } },
|
||||
# "node-generic-resources": ["NVIDIA-GPU=0"] }
|
||||
#
|
||||
# chatterbox-tts:
|
||||
# image: devnen/chatterbox-tts-server:latest
|
||||
# healthcheck:
|
||||
# test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
|
||||
# interval: 30s
|
||||
# timeout: 10s
|
||||
# retries: 5
|
||||
# start_period: 180s
|
||||
# networks:
|
||||
# - internal
|
||||
# - traefik-public
|
||||
# deploy:
|
||||
# restart_policy:
|
||||
# condition: on-failure
|
||||
# delay: 10s
|
||||
# resources:
|
||||
# reservations:
|
||||
# generic_resources:
|
||||
# - discrete_resource_spec:
|
||||
# kind: "NVIDIA-GPU"
|
||||
# value: 1
|
||||
# labels:
|
||||
# - "traefik.enable=true"
|
||||
# - "traefik.http.routers.speech-tts-premium.rule=Host(`${CHATTERBOX_TTS_DOMAIN}`)"
|
||||
# - "traefik.http.routers.speech-tts-premium.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
||||
# - "traefik.http.routers.speech-tts-premium.tls=${TRAEFIK_TLS_ENABLED:-true}"
|
||||
# - "traefik.http.routers.speech-tts-premium.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
|
||||
# - "traefik.http.services.speech-tts-premium.loadbalancer.server.port=8000"
|
||||
# - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
|
||||
|
||||
volumes:
|
||||
speaches-models:
|
||||
|
||||
networks:
|
||||
internal:
|
||||
driver: overlay
|
||||
traefik-public:
|
||||
external: true
|
||||
name: ${TRAEFIK_DOCKER_NETWORK:-traefik-public}
|
||||
Reference in New Issue
Block a user