From 52553c8266b4580d3bfc5a0f398fa1937ad9eb9b Mon Sep 17 00:00:00 2001
From: Jason Woltje <jason@diversecanvas.com>
Date: Sun, 15 Feb 2026 02:06:21 -0600
Subject: [PATCH] feat(#399): add Docker Compose dev overlay for speech
 services

Add docker-compose.speech.yml with three speech services:
- Speaches (STT via Whisper + basic TTS) on port 8090
- Kokoro-FastAPI (default TTS) on port 8880
- Chatterbox TTS (premium, GPU-required) on port 8881 behind
  the premium-tts profile

All services include health checks, connect to the mosaic-internal
network, and follow existing naming/labeling conventions. Makefile
targets added: speech-up, speech-down, speech-logs.

Fixes #399

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Makefile                  |  17 +++++-
 docker-compose.speech.yml | 113 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 docker-compose.speech.yml

diff --git a/Makefile b/Makefile
index 3375fee..c6fbb30 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test clean
+.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test speech-up speech-down speech-logs clean
 
 # Default target
 help:
@@ -24,6 +24,11 @@ help:
 	@echo "  make docker-test            Run Docker smoke test"
 	@echo "  make docker-test-traefik    Run Traefik integration tests"
 	@echo ""
+	@echo "Speech Services:"
+	@echo "  make speech-up              Start speech services (STT + TTS)"
+	@echo "  make speech-down            Stop speech services"
+	@echo "  make speech-logs            View speech service logs"
+	@echo ""
 	@echo "Database:"
 	@echo "  make db-migrate       Run database migrations"
 	@echo "  make db-seed          Seed development data"
@@ -85,6 +90,16 @@ docker-test:
 docker-test-traefik:
 	./tests/integration/docker/traefik.test.sh all
 
+# Speech services
+speech-up:
+	docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d speaches kokoro-tts
+
+speech-down:
+	docker compose -f docker-compose.yml -f docker-compose.speech.yml down --remove-orphans
+
+speech-logs:
+	docker compose -f docker-compose.yml -f docker-compose.speech.yml logs -f speaches kokoro-tts
+
 # Database operations
 db-migrate:
 	cd apps/api && pnpm prisma:migrate
diff --git a/docker-compose.speech.yml b/docker-compose.speech.yml
new file mode 100644
index 0000000..855a947
--- /dev/null
+++ b/docker-compose.speech.yml
@@ -0,0 +1,113 @@
+# ==============================================
+# Speech Services - Docker Compose Dev Overlay
+# ==============================================
+#
+# Adds STT and TTS services for local development.
+#
+# Usage:
+#   Basic (STT + default TTS):
+#     docker compose -f docker-compose.yml -f docker-compose.speech.yml up -d
+#
+#   With premium TTS (requires GPU):
+#     docker compose -f docker-compose.yml -f docker-compose.speech.yml --profile premium-tts up -d
+#
+#   Or use Makefile targets:
+#     make speech-up              # Basic speech services
+#     make speech-down            # Stop speech services
+#     make speech-logs            # View speech service logs
+# ==============================================
+
+services:
+  # ======================
+  # Speaches (STT + basic TTS)
+  # ======================
+  speaches:
+    image: ghcr.io/speaches-ai/speaches:latest
+    container_name: mosaic-speaches
+    restart: unless-stopped
+    environment:
+      WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
+    ports:
+      - "${SPEACHES_PORT:-8090}:8000"
+    volumes:
+      - speaches_models:/root/.cache/huggingface
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 120s
+    networks:
+      - mosaic-internal
+    labels:
+      - "com.mosaic.service=speech-stt"
+      - "com.mosaic.description=Speaches STT (Whisper) and basic TTS"
+
+  # ======================
+  # Kokoro TTS (Default TTS)
+  # ======================
+  kokoro-tts:
+    image: ghcr.io/remsky/kokoro-fastapi:latest-cpu
+    container_name: mosaic-kokoro-tts
+    restart: unless-stopped
+    ports:
+      - "${KOKORO_TTS_PORT:-8880}:8880"
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 120s
+    networks:
+      - mosaic-internal
+    labels:
+      - "com.mosaic.service=speech-tts"
+      - "com.mosaic.description=Kokoro FastAPI TTS engine"
+
+  # ======================
+  # Chatterbox TTS (Premium TTS - Optional)
+  # ======================
+  # Only starts with: --profile premium-tts
+  # Requires NVIDIA GPU with docker nvidia runtime
+  chatterbox-tts:
+    image: devnen/chatterbox-tts-server:latest
+    container_name: mosaic-chatterbox-tts
+    restart: unless-stopped
+    ports:
+      - "${CHATTERBOX_TTS_PORT:-8881}:8000"
+    profiles:
+      - premium-tts
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 180s
+    networks:
+      - mosaic-internal
+    labels:
+      - "com.mosaic.service=speech-tts-premium"
+      - "com.mosaic.description=Chatterbox premium TTS with voice cloning (GPU)"
+
+# ======================
+# Volumes
+# ======================
+volumes:
+  speaches_models:
+    name: mosaic-speaches-models
+    driver: local
+
+# ======================
+# Networks
+# ======================
+networks:
+  mosaic-internal:
+    external: true
+    name: mosaic-internal