Files
stack/docker-compose.speech.yml
Jason Woltje 52553c8266 feat(#399): add Docker Compose dev overlay for speech services
Add docker-compose.speech.yml with three speech services:
- Speaches (STT via Whisper + basic TTS) on port 8090
- Kokoro-FastAPI (default TTS) on port 8880
- Chatterbox TTS (premium, GPU-required) on port 8881 behind
  the premium-tts profile

All services include health checks, connect to the mosaic-internal
network, and follow existing naming/labeling conventions. Makefile
targets added: speech-up, speech-down, speech-logs.

Fixes #399

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:06:21 -06:00

114 lines
3.2 KiB
YAML

# ==============================================
# Speech Services - Docker Compose Dev Overlay
# ==============================================
#
# Adds STT and TTS services for local development.
#
# Usage:
# Basic (STT + default TTS):
# docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d
#
# With premium TTS (requires GPU):
# docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d
#
# Or use Makefile targets:
# make speech-up # Basic speech services
# make speech-down # Stop speech services
# make speech-logs # View speech service logs
# ==============================================
services:
# ======================
# Speaches (STT + basic TTS)
# ======================
speaches:
image: ghcr.io/speaches-ai/speaches:latest
container_name: mosaic-speaches
restart: unless-stopped
environment:
WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
ports:
- "${SPEACHES_PORT:-8090}:8000"
volumes:
- speaches_models:/root/.cache/huggingface
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- mosaic-internal
labels:
- "com.mosaic.service=speech-stt"
- "com.mosaic.description=Speaches STT (Whisper) and basic TTS"
# ======================
# Kokoro TTS (Default TTS)
# ======================
kokoro-tts:
image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
container_name: mosaic-kokoro-tts
restart: unless-stopped
ports:
- "${KOKORO_TTS_PORT:-8880}:8880"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- mosaic-internal
labels:
- "com.mosaic.service=speech-tts"
- "com.mosaic.description=Kokoro FastAPI TTS engine"
# ======================
# Chatterbox TTS (Premium TTS - Optional)
# ======================
# Only starts with: --profile premium-tts
# Requires NVIDIA GPU with docker nvidia runtime
chatterbox-tts:
image: devnen/chatterbox-tts-server:latest
container_name: mosaic-chatterbox-tts
restart: unless-stopped
ports:
- "${CHATTERBOX_TTS_PORT:-8881}:8000"
profiles:
- premium-tts
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 180s
networks:
- mosaic-internal
labels:
- "com.mosaic.service=speech-tts-premium"
- "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)"
# ======================
# Volumes
# ======================
volumes:
speaches_models:
name: mosaic-speaches-models
driver: local
# ======================
# Networks
# ======================
networks:
mosaic-internal:
external: true
name: mosaic-internal