feat(#401): add speech services config and env vars

Add SpeechConfig with typed configuration and startup validation for STT (Whisper/Speaches), TTS default (Kokoro), TTS premium (Chatterbox), and TTS fallback (Piper/OpenedAI). Includes registerAs factory for NestJS ConfigModule integration, .env.example documentation, and 51 unit tests covering all validation paths. Refs #401
2026-02-15 02:03:21 -06:00
parent fb53272fa9
commit 4cc43bece6
4 changed files with 814 additions and 6 deletions
--- a/.env.example
+++ b/.env.example
@@ -350,6 +350,45 @@ OLLAMA_MODEL=llama3.1:latest
 # Get your API key from: https://platform.openai.com/api-keys
 # OPENAI_API_KEY=sk-...

+# ======================
+# Speech Services (STT / TTS)
+# ======================
+# Speech-to-Text (STT) - Whisper via Speaches
+# Set STT_ENABLED=true to enable speech-to-text transcription
+# STT_BASE_URL is required when STT_ENABLED=true
+STT_ENABLED=true
+STT_BASE_URL=http://speaches:8000/v1
+STT_MODEL=Systran/faster-whisper-large-v3-turbo
+STT_LANGUAGE=en
+
+# Text-to-Speech (TTS) - Default Engine (Kokoro)
+# Set TTS_ENABLED=true to enable text-to-speech synthesis
+# TTS_DEFAULT_URL is required when TTS_ENABLED=true
+TTS_ENABLED=true
+TTS_DEFAULT_URL=http://kokoro-tts:8880/v1
+TTS_DEFAULT_VOICE=af_heart
+TTS_DEFAULT_FORMAT=mp3
+
+# Text-to-Speech (TTS) - Premium Engine (Chatterbox) - Optional
+# Higher quality voice cloning engine, disabled by default
+# TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true
+TTS_PREMIUM_ENABLED=false
+TTS_PREMIUM_URL=http://chatterbox-tts:8881/v1
+
+# Text-to-Speech (TTS) - Fallback Engine (Piper/OpenedAI) - Optional
+# Lightweight fallback engine, disabled by default
+# TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true
+TTS_FALLBACK_ENABLED=false
+TTS_FALLBACK_URL=http://openedai-speech:8000/v1
+
+# Speech Service Limits
+# Maximum upload file size in bytes (default: 25MB)
+SPEECH_MAX_UPLOAD_SIZE=25000000
+# Maximum audio duration in seconds (default: 600 = 10 minutes)
+SPEECH_MAX_DURATION_SECONDS=600
+# Maximum text length for TTS in characters (default: 4096)
+SPEECH_MAX_TEXT_LENGTH=4096
+
 # ======================
 # Logging & Debugging
 # ======================