All checks were successful
ci/woodpecker/push/infra Pipeline was successful
Add docker/docker-compose.sample.speech.yml for standalone speech services deployment in Docker Swarm with Portainer compatibility: - Speaches (STT + basic TTS) with Whisper model configuration - Kokoro TTS (default high-quality TTS) always deployed - Chatterbox TTS (premium, GPU) commented out as optional - Traefik labels for reverse proxy routing with TLS - Health checks on all services - Volume persistence for Whisper models - GPU reservation via Swarm generic resources for Chatterbox - Environment variable substitution for Portainer - Comprehensive header documentation Fixes #400
165 lines
6.3 KiB
YAML
165 lines
6.3 KiB
YAML
# ==============================================
|
|
# Speech Services - Sample Swarm Deployment
|
|
# ==============================================
|
|
#
|
|
# Standalone speech services deployment for use with Mosaic Stack.
|
|
# This is SEPARATE infrastructure — not part of the Mosaic Stack itself.
|
|
# Mosaic connects to it via SPEACHES_URL and TTS_URL environment variables.
|
|
#
|
|
# Provides:
|
|
# - Speaches: Speech-to-Text (Whisper) + basic TTS fallback
|
|
# - Kokoro TTS: Default high-quality text-to-speech
|
|
# - Chatterbox TTS: Premium TTS with voice cloning (optional, requires GPU)
|
|
#
|
|
# Usage (Docker Swarm via Portainer):
|
|
# 1. Create a new stack in Portainer
|
|
# 2. Paste this file or point to the repo
|
|
# 3. Set environment variables in Portainer's env var section
|
|
# 4. Deploy the stack
|
|
#
|
|
# Usage (Docker Swarm CLI):
|
|
# 1. Create .env file with variables below
|
|
# 2. docker stack deploy -c docker-compose.sample.speech.yml speech
|
|
#
|
|
# Required Environment Variables:
|
|
# STT_DOMAIN=stt.example.com # Domain for Speaches (STT + basic TTS)
|
|
# TTS_DOMAIN=tts.example.com # Domain for Kokoro TTS (default TTS)
|
|
#
|
|
# Optional Environment Variables:
|
|
# WHISPER_MODEL=Systran/faster-whisper-large-v3-turbo # Whisper model for STT
|
|
# CHATTERBOX_TTS_DOMAIN=tts-premium.example.com # Domain for Chatterbox (premium TTS)
|
|
# TRAEFIK_ENTRYPOINT=websecure # Traefik entrypoint name
|
|
# TRAEFIK_CERTRESOLVER=letsencrypt # Traefik cert resolver
|
|
# TRAEFIK_DOCKER_NETWORK=traefik-public # Traefik network name
|
|
# TRAEFIK_TLS_ENABLED=true # Enable TLS on Traefik routers
|
|
#
|
|
# Connecting to Mosaic Stack:
|
|
# Add to your Mosaic Stack .env:
|
|
# SPEACHES_URL=http://speaches:8000 (if same Docker network)
|
|
# SPEACHES_URL=https://stt.example.com (if external)
|
|
# TTS_URL=http://kokoro-tts:8880 (if same Docker network)
|
|
# TTS_URL=https://tts.example.com (if external)
|
|
#
|
|
# GPU Requirements (Chatterbox only):
|
|
# - NVIDIA GPU with CUDA support
|
|
# - nvidia-container-toolkit installed on Docker host
|
|
# - Docker runtime configured for GPU access
|
|
# - Note: Docker Swarm requires "generic resources" for GPU scheduling.
|
|
# See: https://docs.docker.com/engine/daemon/nvidia-gpu/#configure-gpus-for-docker-swarm
|
|
#
|
|
# ==============================================
|
|
|
|
services:
|
|
# ======================
|
|
# Speaches (STT + basic TTS)
|
|
# ======================
|
|
# Primary speech-to-text service using Whisper.
|
|
# Also provides basic TTS as a fallback.
|
|
speaches:
|
|
image: ghcr.io/speaches-ai/speaches:latest
|
|
environment:
|
|
WHISPER__MODEL: ${WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
|
|
volumes:
|
|
- speaches-models:/root/.cache/huggingface
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 120s
|
|
networks:
|
|
- internal
|
|
- traefik-public
|
|
deploy:
|
|
restart_policy:
|
|
condition: on-failure
|
|
delay: 10s
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.speech-stt.rule=Host(`${STT_DOMAIN}`)"
|
|
- "traefik.http.routers.speech-stt.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
|
- "traefik.http.routers.speech-stt.tls=${TRAEFIK_TLS_ENABLED:-true}"
|
|
- "traefik.http.routers.speech-stt.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
|
|
- "traefik.http.services.speech-stt.loadbalancer.server.port=8000"
|
|
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
|
|
|
|
# ======================
|
|
# Kokoro TTS (Default TTS)
|
|
# ======================
|
|
# High-quality text-to-speech engine. Always deployed alongside Speaches.
|
|
kokoro-tts:
|
|
image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 120s
|
|
networks:
|
|
- internal
|
|
- traefik-public
|
|
deploy:
|
|
restart_policy:
|
|
condition: on-failure
|
|
delay: 10s
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.speech-tts.rule=Host(`${TTS_DOMAIN}`)"
|
|
- "traefik.http.routers.speech-tts.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
|
- "traefik.http.routers.speech-tts.tls=${TRAEFIK_TLS_ENABLED:-true}"
|
|
- "traefik.http.routers.speech-tts.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
|
|
- "traefik.http.services.speech-tts.loadbalancer.server.port=8880"
|
|
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
|
|
|
|
# ======================
|
|
# Chatterbox TTS (Premium TTS - Optional)
|
|
# ======================
|
|
# Premium TTS with voice cloning capabilities. Requires NVIDIA GPU.
|
|
#
|
|
# To enable: Uncomment this service and set CHATTERBOX_TTS_DOMAIN.
|
|
#
|
|
# For Docker Swarm GPU scheduling, configure generic resources on the node:
|
|
# /etc/docker/daemon.json:
|
|
# { "runtimes": { "nvidia": { ... } },
|
|
# "node-generic-resources": ["NVIDIA-GPU=0"] }
|
|
#
|
|
# chatterbox-tts:
|
|
# image: devnen/chatterbox-tts-server:latest
|
|
# healthcheck:
|
|
# test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
|
|
# interval: 30s
|
|
# timeout: 10s
|
|
# retries: 5
|
|
# start_period: 180s
|
|
# networks:
|
|
# - internal
|
|
# - traefik-public
|
|
# deploy:
|
|
# restart_policy:
|
|
# condition: on-failure
|
|
# delay: 10s
|
|
# resources:
|
|
# reservations:
|
|
# generic_resources:
|
|
# - discrete_resource_spec:
|
|
# kind: "NVIDIA-GPU"
|
|
# value: 1
|
|
# labels:
|
|
# - "traefik.enable=true"
|
|
# - "traefik.http.routers.speech-tts-premium.rule=Host(`${CHATTERBOX_TTS_DOMAIN}`)"
|
|
# - "traefik.http.routers.speech-tts-premium.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
|
# - "traefik.http.routers.speech-tts-premium.tls=${TRAEFIK_TLS_ENABLED:-true}"
|
|
# - "traefik.http.routers.speech-tts-premium.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
|
|
# - "traefik.http.services.speech-tts-premium.loadbalancer.server.port=8000"
|
|
# - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
|
|
|
|
volumes:
|
|
speaches-models:
|
|
|
|
networks:
|
|
internal:
|
|
driver: overlay
|
|
traefik-public:
|
|
external: true
|
|
name: ${TRAEFIK_DOCKER_NETWORK:-traefik-public}
|