From 52553c8266b4580d3bfc5a0f398fa1937ad9eb9b Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 15 Feb 2026 02:06:21 -0600 Subject: [PATCH] feat(#399): add Docker Compose dev overlay for speech services Add docker-compose.speech.yml with three speech services: - Speaches (STT via Whisper + basic TTS) on port 8090 - Kokoro-FastAPI (default TTS) on port 8880 - Chatterbox TTS (premium, GPU-required) on port 8881 behind the premium-tts profile All services include health checks, connect to the mosaic-internal network, and follow existing naming/labeling conventions. Makefile targets added: speech-up, speech-down, speech-logs. Fixes #399 Co-Authored-By: Claude Opus 4.6 --- Makefile | 17 +++++- docker-compose.speech.yml | 113 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 docker-compose.speech.yml diff --git a/Makefile b/Makefile index 3375fee..c6fbb30 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test clean +.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test speech-up speech-down speech-logs clean # Default target help: @@ -24,6 +24,11 @@ help: @echo " make docker-test Run Docker smoke test" @echo " make docker-test-traefik Run Traefik integration tests" @echo "" + @echo "Speech Services:" + @echo " make speech-up Start speech services (STT + TTS)" + @echo " make speech-down Stop speech services" + @echo " make speech-logs View speech service logs" + @echo "" @echo "Database:" @echo " make db-migrate Run database migrations" @echo " make db-seed Seed development data" @@ -85,6 +90,16 @@ docker-test: docker-test-traefik: ./tests/integration/docker/traefik.test.sh all +# Speech services +speech-up: + docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d speaches kokoro-tts + +speech-down: + docker compose -f docker-compose.yml -f docker-compose.speech.yml down --remove-orphans + +speech-logs: + docker compose -f docker-compose.yml -f docker-compose.speech.yml logs -f speaches kokoro-tts + # Database operations db-migrate: cd apps/api && pnpm prisma:migrate diff --git a/docker-compose.speech.yml b/docker-compose.speech.yml new file mode 100644 index 0000000..855a947 --- /dev/null +++ b/docker-compose.speech.yml @@ -0,0 +1,113 @@ +# ============================================== +# Speech Services - Docker Compose Dev Overlay +# ============================================== +# +# Adds STT and TTS services for local development. +# +# Usage: +# Basic (STT + default TTS): +# docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d +# +# With premium TTS (requires GPU): +# docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d +# +# Or use Makefile targets: +# make speech-up # Basic speech services +# make speech-down # Stop speech services +# make speech-logs # View speech service logs +# ============================================== + +services: + # ====================== + # Speaches (STT + basic TTS) + # ====================== + speaches: + image: ghcr.io/speaches-ai/speaches:latest + container_name: mosaic-speaches + restart: unless-stopped + environment: + WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo} + ports: + - "${SPEACHES_PORT:-8090}:8000" + volumes: + - speaches_models:/root/.cache/huggingface + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + networks: + - mosaic-internal + labels: + - "com.mosaic.service=speech-stt" + - "com.mosaic.description=Speaches STT (Whisper) and basic TTS" + + # ====================== + # Kokoro TTS (Default TTS) + # ====================== + kokoro-tts: + image: ghcr.io/remsky/kokoro-fastapi:latest-cpu + container_name: mosaic-kokoro-tts + restart: unless-stopped + ports: + - "${KOKORO_TTS_PORT:-8880}:8880" + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + networks: + - mosaic-internal + labels: + - "com.mosaic.service=speech-tts" + - "com.mosaic.description=Kokoro FastAPI TTS engine" + + # ====================== + # Chatterbox TTS (Premium TTS - Optional) + # ====================== + # Only starts with: --profile premium-tts + # Requires NVIDIA GPU with docker nvidia runtime + chatterbox-tts: + image: devnen/chatterbox-tts-server:latest + container_name: mosaic-chatterbox-tts + restart: unless-stopped + ports: + - "${CHATTERBOX_TTS_PORT:-8881}:8000" + profiles: + - premium-tts + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 180s + networks: + - mosaic-internal + labels: + - "com.mosaic.service=speech-tts-premium" + - "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)" + +# ====================== +# Volumes +# ====================== +volumes: + speaches_models: + name: mosaic-speaches-models + driver: local + +# ====================== +# Networks +# ====================== +networks: + mosaic-internal: + external: true + name: mosaic-internal