feat(#399): add Docker Compose dev overlay for speech services

Add docker-compose.speech.yml with three speech services: - Speaches (STT via Whisper + basic TTS) on port 8090 - Kokoro-FastAPI (default TTS) on port 8880 - Chatterbox TTS (premium, GPU-required) on port 8881 behind the premium-tts profile All services include health checks, connect to the mosaic-internal network, and follow existing naming/labeling conventions. Makefile targets added: speech-up, speech-down, speech-logs. Fixes #399 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:06:21 -06:00
parent 4cc43bece6
commit 52553c8266
2 changed files with 129 additions and 1 deletions
--- a/17
+++ b/17
@@ -1,4 +1,4 @@
-.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test clean
+.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test speech-up speech-down speech-logs clean
 # Default target
 help:
@@ -24,6 +24,11 @@ help:
 	@echo "  make docker-test            Run Docker smoke test"
 	@echo "  make docker-test-traefik    Run Traefik integration tests"
 	@echo ""
 	@echo "Speech Services:"
 	@echo "  make speech-up              Start speech services (STT + TTS)"
 	@echo "  make speech-down            Stop speech services"
 	@echo "  make speech-logs            View speech service logs"
 	@echo ""
 	@echo "Database:"
 	@echo "  make db-migrate       Run database migrations"
 	@echo "  make db-seed          Seed development data"
@@ -85,6 +90,16 @@ docker-test:
 docker-test-traefik:
 	./tests/integration/docker/traefik.test.sh all
 # Speech services
 speech-up:
 	docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d speaches kokoro-tts
 speech-down:
 	docker compose -f docker-compose.yml -f docker-compose.speech.yml down --remove-orphans
 speech-logs:
 	docker compose -f docker-compose.yml -f docker-compose.speech.yml logs -f speaches kokoro-tts
 # Database operations
 db-migrate:
 	cd apps/api && pnpm prisma:migrate
--- a/docker-compose.speech.yml
+++ b/docker-compose.speech.yml
@@ -0,0 +1,113 @@
 # ==============================================
 # Speech Services - Docker Compose Dev Overlay
 # ==============================================
 #
 # Adds STT and TTS services for local development.
 #
 # Usage:
 #   Basic (STT + default TTS):
 #     docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d
 #
 #   With premium TTS (requires GPU):
 #     docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d
 #
 #   Or use Makefile targets:
 #     make speech-up              # Basic speech services
 #     make speech-down            # Stop speech services
 #     make speech-logs            # View speech service logs
 # ==============================================
 services:
  # ======================
  # Speaches (STT + basic TTS)
  # ======================
  speaches:
    image: ghcr.io/speaches-ai/speaches:latest
    container_name: mosaic-speaches
    restart: unless-stopped
    environment:
      WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
    ports:
      - "${SPEACHES_PORT:-8090}:8000"
    volumes:
      - speaches_models:/root/.cache/huggingface
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s
    networks:
      - mosaic-internal
    labels:
      - "com.mosaic.service=speech-stt"
      - "com.mosaic.description=Speaches STT (Whisper) and basic TTS"
  # ======================
  # Kokoro TTS (Default TTS)
  # ======================
  kokoro-tts:
    image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
    container_name: mosaic-kokoro-tts
    restart: unless-stopped
    ports:
      - "${KOKORO_TTS_PORT:-8880}:8880"
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s
    networks:
      - mosaic-internal
    labels:
      - "com.mosaic.service=speech-tts"
      - "com.mosaic.description=Kokoro FastAPI TTS engine"
  # ======================
  # Chatterbox TTS (Premium TTS - Optional)
  # ======================
  # Only starts with: --profile premium-tts
  # Requires NVIDIA GPU with docker nvidia runtime
  chatterbox-tts:
    image: devnen/chatterbox-tts-server:latest
    container_name: mosaic-chatterbox-tts
    restart: unless-stopped
    ports:
      - "${CHATTERBOX_TTS_PORT:-8881}:8000"
    profiles:
      - premium-tts
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 180s
    networks:
      - mosaic-internal
    labels:
      - "com.mosaic.service=speech-tts-premium"
      - "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)"
 # ======================
 # Volumes
 # ======================
 volumes:
  speaches_models:
    name: mosaic-speaches-models
    driver: local
 # ======================
 # Networks
 # ======================
 networks:
  mosaic-internal:
    external: true
    name: mosaic-internal