feat(#399): add Docker Compose dev overlay for speech services

Add docker-compose.speech.yml with three speech services:
- Speaches (STT via Whisper + basic TTS) on port 8090
- Kokoro-FastAPI (default TTS) on port 8880
- Chatterbox TTS (premium, GPU-required) on port 8881 behind
  the premium-tts profile

All services include health checks, connect to the mosaic-internal
network, and follow existing naming/labeling conventions. Makefile
targets added: speech-up, speech-down, speech-logs.

Fixes #399

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 02:06:21 -06:00
parent 4cc43bece6
commit 52553c8266
2 changed files with 129 additions and 1 deletions

View File

@@ -1,4 +1,4 @@
.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test clean .PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test speech-up speech-down speech-logs clean
# Default target # Default target
help: help:
@@ -24,6 +24,11 @@ help:
@echo " make docker-test Run Docker smoke test" @echo " make docker-test Run Docker smoke test"
@echo " make docker-test-traefik Run Traefik integration tests" @echo " make docker-test-traefik Run Traefik integration tests"
@echo "" @echo ""
@echo "Speech Services:"
@echo " make speech-up Start speech services (STT + TTS)"
@echo " make speech-down Stop speech services"
@echo " make speech-logs View speech service logs"
@echo ""
@echo "Database:" @echo "Database:"
@echo " make db-migrate Run database migrations" @echo " make db-migrate Run database migrations"
@echo " make db-seed Seed development data" @echo " make db-seed Seed development data"
@@ -85,6 +90,16 @@ docker-test:
docker-test-traefik: docker-test-traefik:
./tests/integration/docker/traefik.test.sh all ./tests/integration/docker/traefik.test.sh all
# Speech services
speech-up:
docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d speaches kokoro-tts
speech-down:
docker compose -f docker-compose.yml -f docker-compose.speech.yml down --remove-orphans
speech-logs:
docker compose -f docker-compose.yml -f docker-compose.speech.yml logs -f speaches kokoro-tts
# Database operations # Database operations
db-migrate: db-migrate:
cd apps/api && pnpm prisma:migrate cd apps/api && pnpm prisma:migrate

113
docker-compose.speech.yml Normal file
View File

@@ -0,0 +1,113 @@
# ==============================================
# Speech Services - Docker Compose Dev Overlay
# ==============================================
#
# Adds STT and TTS services for local development.
#
# Usage:
# Basic (STT + default TTS):
# docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d
#
# With premium TTS (requires GPU):
# docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d
#
# Or use Makefile targets:
# make speech-up # Basic speech services
# make speech-down # Stop speech services
# make speech-logs # View speech service logs
# ==============================================
services:
# ======================
# Speaches (STT + basic TTS)
# ======================
speaches:
image: ghcr.io/speaches-ai/speaches:latest
container_name: mosaic-speaches
restart: unless-stopped
environment:
WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
ports:
- "${SPEACHES_PORT:-8090}:8000"
volumes:
- speaches_models:/root/.cache/huggingface
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- mosaic-internal
labels:
- "com.mosaic.service=speech-stt"
- "com.mosaic.description=Speaches STT (Whisper) and basic TTS"
# ======================
# Kokoro TTS (Default TTS)
# ======================
kokoro-tts:
image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
container_name: mosaic-kokoro-tts
restart: unless-stopped
ports:
- "${KOKORO_TTS_PORT:-8880}:8880"
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- mosaic-internal
labels:
- "com.mosaic.service=speech-tts"
- "com.mosaic.description=Kokoro FastAPI TTS engine"
# ======================
# Chatterbox TTS (Premium TTS - Optional)
# ======================
# Only starts with: --profile premium-tts
# Requires NVIDIA GPU with docker nvidia runtime
chatterbox-tts:
image: devnen/chatterbox-tts-server:latest
container_name: mosaic-chatterbox-tts
restart: unless-stopped
ports:
- "${CHATTERBOX_TTS_PORT:-8881}:8000"
profiles:
- premium-tts
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 180s
networks:
- mosaic-internal
labels:
- "com.mosaic.service=speech-tts-premium"
- "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)"
# ======================
# Volumes
# ======================
volumes:
speaches_models:
name: mosaic-speaches-models
driver: local
# ======================
# Networks
# ======================
networks:
mosaic-internal:
external: true
name: mosaic-internal