# ============================================== # Speech Services - Sample Swarm Deployment # ============================================== # # Standalone speech services deployment for use with Mosaic Stack. # This is SEPARATE infrastructure — not part of the Mosaic Stack itself. # Mosaic connects to it via SPEACHES_URL and TTS_URL environment variables. # # Provides: # - Speaches: Speech-to-Text (Whisper) + basic TTS fallback # - Kokoro TTS: Default high-quality text-to-speech # - Chatterbox TTS: Premium TTS with voice cloning (optional, requires GPU) # # Usage (Docker Swarm via Portainer): # 1. Create a new stack in Portainer # 2. Paste this file or point to the repo # 3. Set environment variables in Portainer's env var section # 4. Deploy the stack # # Usage (Docker Swarm CLI): # 1. Create .env file with variables below # 2. docker stack deploy -c docker-compose.sample.speech.yml speech # # Required Environment Variables: # STT_DOMAIN=stt.example.com # Domain for Speaches (STT + basic TTS) # TTS_DOMAIN=tts.example.com # Domain for Kokoro TTS (default TTS) # # Optional Environment Variables: # WHISPER_MODEL=Systran/faster-whisper-large-v3-turbo # Whisper model for STT # CHATTERBOX_TTS_DOMAIN=tts-premium.example.com # Domain for Chatterbox (premium TTS) # TRAEFIK_ENTRYPOINT=websecure # Traefik entrypoint name # TRAEFIK_CERTRESOLVER=letsencrypt # Traefik cert resolver # TRAEFIK_DOCKER_NETWORK=traefik-public # Traefik network name # TRAEFIK_TLS_ENABLED=true # Enable TLS on Traefik routers # # Connecting to Mosaic Stack: # Add to your Mosaic Stack .env: # SPEACHES_URL=http://speaches:8000 (if same Docker network) # SPEACHES_URL=https://stt.example.com (if external) # TTS_URL=http://kokoro-tts:8880 (if same Docker network) # TTS_URL=https://tts.example.com (if external) # # GPU Requirements (Chatterbox only): # - NVIDIA GPU with CUDA support # - nvidia-container-toolkit installed on Docker host # - Docker runtime configured for GPU access # - Note: Docker Swarm requires "generic resources" for GPU scheduling. # See: https://docs.docker.com/engine/daemon/nvidia-gpu/#configure-gpus-for-docker-swarm # # ============================================== services: # ====================== # Speaches (STT + basic TTS) # ====================== # Primary speech-to-text service using Whisper. # Also provides basic TTS as a fallback. speaches: image: ghcr.io/speaches-ai/speaches:latest environment: WHISPER__MODEL: ${WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo} volumes: - speaches-models:/root/.cache/huggingface healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 120s networks: - internal - traefik-public deploy: restart_policy: condition: on-failure delay: 10s labels: - "traefik.enable=true" - "traefik.http.routers.speech-stt.rule=Host(`${STT_DOMAIN}`)" - "traefik.http.routers.speech-stt.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}" - "traefik.http.routers.speech-stt.tls=${TRAEFIK_TLS_ENABLED:-true}" - "traefik.http.routers.speech-stt.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}" - "traefik.http.services.speech-stt.loadbalancer.server.port=8000" - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}" # ====================== # Kokoro TTS (Default TTS) # ====================== # High-quality text-to-speech engine. Always deployed alongside Speaches. kokoro-tts: image: ghcr.io/remsky/kokoro-fastapi:latest-cpu healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 120s networks: - internal - traefik-public deploy: restart_policy: condition: on-failure delay: 10s labels: - "traefik.enable=true" - "traefik.http.routers.speech-tts.rule=Host(`${TTS_DOMAIN}`)" - "traefik.http.routers.speech-tts.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}" - "traefik.http.routers.speech-tts.tls=${TRAEFIK_TLS_ENABLED:-true}" - "traefik.http.routers.speech-tts.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}" - "traefik.http.services.speech-tts.loadbalancer.server.port=8880" - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}" # ====================== # Chatterbox TTS (Premium TTS - Optional) # ====================== # Premium TTS with voice cloning capabilities. Requires NVIDIA GPU. # # To enable: Uncomment this service and set CHATTERBOX_TTS_DOMAIN. # # For Docker Swarm GPU scheduling, configure generic resources on the node: # /etc/docker/daemon.json: # { "runtimes": { "nvidia": { ... } }, # "node-generic-resources": ["NVIDIA-GPU=0"] } # # chatterbox-tts: # image: devnen/chatterbox-tts-server:latest # healthcheck: # test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] # interval: 30s # timeout: 10s # retries: 5 # start_period: 180s # networks: # - internal # - traefik-public # deploy: # restart_policy: # condition: on-failure # delay: 10s # resources: # reservations: # generic_resources: # - discrete_resource_spec: # kind: "NVIDIA-GPU" # value: 1 # labels: # - "traefik.enable=true" # - "traefik.http.routers.speech-tts-premium.rule=Host(`${CHATTERBOX_TTS_DOMAIN}`)" # - "traefik.http.routers.speech-tts-premium.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}" # - "traefik.http.routers.speech-tts-premium.tls=${TRAEFIK_TLS_ENABLED:-true}" # - "traefik.http.routers.speech-tts-premium.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}" # - "traefik.http.services.speech-tts-premium.loadbalancer.server.port=8000" # - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}" volumes: speaches-models: networks: internal: driver: overlay traefik-public: external: true name: ${TRAEFIK_DOCKER_NETWORK:-traefik-public}