Add docker-compose.speech.yml with three speech services: - Speaches (STT via Whisper + basic TTS) on port 8090 - Kokoro-FastAPI (default TTS) on port 8880 - Chatterbox TTS (premium, GPU-required) on port 8881 behind the premium-tts profile All services include health checks, connect to the mosaic-internal network, and follow existing naming/labeling conventions. Makefile targets added: speech-up, speech-down, speech-logs. Fixes #399 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
114 lines
3.2 KiB
YAML
114 lines
3.2 KiB
YAML
# ==============================================
|
|
# Speech Services - Docker Compose Dev Overlay
|
|
# ==============================================
|
|
#
|
|
# Adds STT and TTS services for local development.
|
|
#
|
|
# Usage:
|
|
# Basic (STT + default TTS):
|
|
# docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d
|
|
#
|
|
# With premium TTS (requires GPU):
|
|
# docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d
|
|
#
|
|
# Or use Makefile targets:
|
|
# make speech-up # Basic speech services
|
|
# make speech-down # Stop speech services
|
|
# make speech-logs # View speech service logs
|
|
# ==============================================
|
|
|
|
services:
|
|
# ======================
|
|
# Speaches (STT + basic TTS)
|
|
# ======================
|
|
speaches:
|
|
image: ghcr.io/speaches-ai/speaches:latest
|
|
container_name: mosaic-speaches
|
|
restart: unless-stopped
|
|
environment:
|
|
WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
|
|
ports:
|
|
- "${SPEACHES_PORT:-8090}:8000"
|
|
volumes:
|
|
- speaches_models:/root/.cache/huggingface
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 120s
|
|
networks:
|
|
- mosaic-internal
|
|
labels:
|
|
- "com.mosaic.service=speech-stt"
|
|
- "com.mosaic.description=Speaches STT (Whisper) and basic TTS"
|
|
|
|
# ======================
|
|
# Kokoro TTS (Default TTS)
|
|
# ======================
|
|
kokoro-tts:
|
|
image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
|
|
container_name: mosaic-kokoro-tts
|
|
restart: unless-stopped
|
|
ports:
|
|
- "${KOKORO_TTS_PORT:-8880}:8880"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 120s
|
|
networks:
|
|
- mosaic-internal
|
|
labels:
|
|
- "com.mosaic.service=speech-tts"
|
|
- "com.mosaic.description=Kokoro FastAPI TTS engine"
|
|
|
|
# ======================
|
|
# Chatterbox TTS (Premium TTS - Optional)
|
|
# ======================
|
|
# Only starts with: --profile premium-tts
|
|
# Requires NVIDIA GPU with docker nvidia runtime
|
|
chatterbox-tts:
|
|
image: devnen/chatterbox-tts-server:latest
|
|
container_name: mosaic-chatterbox-tts
|
|
restart: unless-stopped
|
|
ports:
|
|
- "${CHATTERBOX_TTS_PORT:-8881}:8000"
|
|
profiles:
|
|
- premium-tts
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 180s
|
|
networks:
|
|
- mosaic-internal
|
|
labels:
|
|
- "com.mosaic.service=speech-tts-premium"
|
|
- "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)"
|
|
|
|
# ======================
|
|
# Volumes
|
|
# ======================
|
|
volumes:
|
|
speaches_models:
|
|
name: mosaic-speaches-models
|
|
driver: local
|
|
|
|
# ======================
|
|
# Networks
|
|
# ======================
|
|
networks:
|
|
mosaic-internal:
|
|
external: true
|
|
name: mosaic-internal
|