stack/docker/docker-compose.sample.speech.yml

# ==============================================
# Speech Services - Sample Swarm Deployment
# ==============================================
#
# Standalone speech services deployment for use with Mosaic Stack.
# This is SEPARATE infrastructure — not part of the Mosaic Stack itself.
# Mosaic connects to it via SPEACHES_URL and TTS_URL environment variables.
#
# Provides:
#   - Speaches: Speech-to-Text (Whisper) + basic TTS fallback
#   - Kokoro TTS: Default high-quality text-to-speech
#   - Chatterbox TTS: Premium TTS with voice cloning (optional, requires GPU)
#
# Usage (Docker Swarm via Portainer):
#   1. Create a new stack in Portainer
#   2. Paste this file or point to the repo
#   3. Set environment variables in Portainer's env var section
#   4. Deploy the stack
#
# Usage (Docker Swarm CLI):
#   1. Create .env file with variables below
#   2. docker stack deploy -c docker-compose.sample.speech.yml speech
#
# Required Environment Variables:
#   STT_DOMAIN=stt.example.com              # Domain for Speaches (STT + basic TTS)
#   TTS_DOMAIN=tts.example.com              # Domain for Kokoro TTS (default TTS)
#
# Optional Environment Variables:
#   WHISPER_MODEL=Systran/faster-whisper-large-v3-turbo  # Whisper model for STT
#   CHATTERBOX_TTS_DOMAIN=tts-premium.example.com       # Domain for Chatterbox (premium TTS)
#   TRAEFIK_ENTRYPOINT=websecure            # Traefik entrypoint name
#   TRAEFIK_CERTRESOLVER=letsencrypt        # Traefik cert resolver
#   TRAEFIK_DOCKER_NETWORK=traefik-public   # Traefik network name
#   TRAEFIK_TLS_ENABLED=true                # Enable TLS on Traefik routers
#
# Connecting to Mosaic Stack:
#   Add to your Mosaic Stack .env:
#     SPEACHES_URL=http://speaches:8000      (if same Docker network)
#     SPEACHES_URL=https://stt.example.com   (if external)
#     TTS_URL=http://kokoro-tts:8880         (if same Docker network)
#     TTS_URL=https://tts.example.com        (if external)
#
# GPU Requirements (Chatterbox only):
#   - NVIDIA GPU with CUDA support
#   - nvidia-container-toolkit installed on Docker host
#   - Docker runtime configured for GPU access
#   - Note: Docker Swarm requires "generic resources" for GPU scheduling.
#     See: https://docs.docker.com/engine/daemon/nvidia-gpu/#configure-gpus-for-docker-swarm
#
# ==============================================

services:
  # ======================
  # Speaches (STT + basic TTS)
  # ======================
  # Primary speech-to-text service using Whisper.
  # Also provides basic TTS as a fallback.
  speaches:
    image: ghcr.io/speaches-ai/speaches:latest
    environment:
      WHISPER__MODEL: ${WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
    volumes:
      - speaches-models:/root/.cache/huggingface
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s
    networks:
      - internal
      - traefik-public
    deploy:
      restart_policy:
        condition: on-failure
        delay: 10s
      labels:
        - "traefik.enable=true"
        - "traefik.http.routers.speech-stt.rule=Host(`${STT_DOMAIN}`)"
        - "traefik.http.routers.speech-stt.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
        - "traefik.http.routers.speech-stt.tls=${TRAEFIK_TLS_ENABLED:-true}"
        - "traefik.http.routers.speech-stt.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
        - "traefik.http.services.speech-stt.loadbalancer.server.port=8000"
        - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"

  # ======================
  # Kokoro TTS (Default TTS)
  # ======================
  # High-quality text-to-speech engine. Always deployed alongside Speaches.
  kokoro-tts:
    image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s
    networks:
      - internal
      - traefik-public
    deploy:
      restart_policy:
        condition: on-failure
        delay: 10s
      labels:
        - "traefik.enable=true"
        - "traefik.http.routers.speech-tts.rule=Host(`${TTS_DOMAIN}`)"
        - "traefik.http.routers.speech-tts.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
        - "traefik.http.routers.speech-tts.tls=${TRAEFIK_TLS_ENABLED:-true}"
        - "traefik.http.routers.speech-tts.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
        - "traefik.http.services.speech-tts.loadbalancer.server.port=8880"
        - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"

  # ======================
  # Chatterbox TTS (Premium TTS - Optional)
  # ======================
  # Premium TTS with voice cloning capabilities. Requires NVIDIA GPU.
  #
  # To enable: Uncomment this service and set CHATTERBOX_TTS_DOMAIN.
  #
  # For Docker Swarm GPU scheduling, configure generic resources on the node:
  #   /etc/docker/daemon.json:
  #     { "runtimes": { "nvidia": { ... } },
  #       "node-generic-resources": ["NVIDIA-GPU=0"] }
  #
  # chatterbox-tts:
  #   image: devnen/chatterbox-tts-server:latest
  #   healthcheck:
  #     test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
  #     interval: 30s
  #     timeout: 10s
  #     retries: 5
  #     start_period: 180s
  #   networks:
  #     - internal
  #     - traefik-public
  #   deploy:
  #     restart_policy:
  #       condition: on-failure
  #       delay: 10s
  #     resources:
  #       reservations:
  #         generic_resources:
  #           - discrete_resource_spec:
  #               kind: "NVIDIA-GPU"
  #               value: 1
  #     labels:
  #       - "traefik.enable=true"
  #       - "traefik.http.routers.speech-tts-premium.rule=Host(`${CHATTERBOX_TTS_DOMAIN}`)"
  #       - "traefik.http.routers.speech-tts-premium.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
  #       - "traefik.http.routers.speech-tts-premium.tls=${TRAEFIK_TLS_ENABLED:-true}"
  #       - "traefik.http.routers.speech-tts-premium.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
  #       - "traefik.http.services.speech-tts-premium.loadbalancer.server.port=8000"
  #       - "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"

volumes:
  speaches-models:

networks:
  internal:
    driver: overlay
  traefik-public:
    external: true
    name: ${TRAEFIK_DOCKER_NETWORK:-traefik-public}