# ============================================== # Speech Services - Docker Compose Dev Overlay # ============================================== # # Adds STT and TTS services for local development. # # Usage: # Basic (STT + default TTS): # docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d # # With premium TTS (requires GPU): # docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d # # Or use Makefile targets: # make speech-up # Basic speech services # make speech-down # Stop speech services # make speech-logs # View speech service logs # ============================================== services: # ====================== # Speaches (STT + basic TTS) # ====================== speaches: image: ghcr.io/speaches-ai/speaches:latest container_name: mosaic-speaches restart: unless-stopped environment: WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo} ports: - "${SPEACHES_PORT:-8090}:8000" volumes: - speaches_models:/root/.cache/huggingface healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 120s networks: - mosaic-internal labels: - "com.mosaic.service=speech-stt" - "com.mosaic.description=Speaches STT (Whisper) and basic TTS" # ====================== # Kokoro TTS (Default TTS) # ====================== kokoro-tts: image: ghcr.io/remsky/kokoro-fastapi:latest-cpu container_name: mosaic-kokoro-tts restart: unless-stopped ports: - "${KOKORO_TTS_PORT:-8880}:8880" healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 120s networks: - mosaic-internal labels: - "com.mosaic.service=speech-tts" - "com.mosaic.description=Kokoro FastAPI TTS engine" # ====================== # Chatterbox TTS (Premium TTS - Optional) # ====================== # Only starts with: --profile premium-tts # Requires NVIDIA GPU with docker nvidia runtime chatterbox-tts: image: devnen/chatterbox-tts-server:latest container_name: mosaic-chatterbox-tts restart: unless-stopped ports: - "${CHATTERBOX_TTS_PORT:-8881}:8000" profiles: - premium-tts deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] interval: 30s timeout: 10s retries: 5 start_period: 180s networks: - mosaic-internal labels: - "com.mosaic.service=speech-tts-premium" - "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)" # ====================== # Volumes # ====================== volumes: speaches_models: name: mosaic-speaches-models driver: local # ====================== # Networks # ====================== networks: mosaic-internal: external: true name: mosaic-internal