feat(#401): add speech services config and env vars
All checks were successful
ci/woodpecker/push/api Pipeline was successful
All checks were successful
ci/woodpecker/push/api Pipeline was successful
Add SpeechConfig with typed configuration and startup validation for STT (Whisper/Speaches), TTS default (Kokoro), TTS premium (Chatterbox), and TTS fallback (Piper/OpenedAI). Includes registerAs factory for NestJS ConfigModule integration, .env.example documentation, and 51 unit tests covering all validation paths. Refs #401
This commit is contained in:
39
.env.example
39
.env.example
@@ -350,6 +350,45 @@ OLLAMA_MODEL=llama3.1:latest
|
||||
# Get your API key from: https://platform.openai.com/api-keys
|
||||
# OPENAI_API_KEY=sk-...
|
||||
|
||||
# ======================
|
||||
# Speech Services (STT / TTS)
|
||||
# ======================
|
||||
# Speech-to-Text (STT) - Whisper via Speaches
|
||||
# Set STT_ENABLED=true to enable speech-to-text transcription
|
||||
# STT_BASE_URL is required when STT_ENABLED=true
|
||||
STT_ENABLED=true
|
||||
STT_BASE_URL=http://speaches:8000/v1
|
||||
STT_MODEL=Systran/faster-whisper-large-v3-turbo
|
||||
STT_LANGUAGE=en
|
||||
|
||||
# Text-to-Speech (TTS) - Default Engine (Kokoro)
|
||||
# Set TTS_ENABLED=true to enable text-to-speech synthesis
|
||||
# TTS_DEFAULT_URL is required when TTS_ENABLED=true
|
||||
TTS_ENABLED=true
|
||||
TTS_DEFAULT_URL=http://kokoro-tts:8880/v1
|
||||
TTS_DEFAULT_VOICE=af_heart
|
||||
TTS_DEFAULT_FORMAT=mp3
|
||||
|
||||
# Text-to-Speech (TTS) - Premium Engine (Chatterbox) - Optional
|
||||
# Higher quality voice cloning engine, disabled by default
|
||||
# TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true
|
||||
TTS_PREMIUM_ENABLED=false
|
||||
TTS_PREMIUM_URL=http://chatterbox-tts:8881/v1
|
||||
|
||||
# Text-to-Speech (TTS) - Fallback Engine (Piper/OpenedAI) - Optional
|
||||
# Lightweight fallback engine, disabled by default
|
||||
# TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true
|
||||
TTS_FALLBACK_ENABLED=false
|
||||
TTS_FALLBACK_URL=http://openedai-speech:8000/v1
|
||||
|
||||
# Speech Service Limits
|
||||
# Maximum upload file size in bytes (default: 25MB)
|
||||
SPEECH_MAX_UPLOAD_SIZE=25000000
|
||||
# Maximum audio duration in seconds (default: 600 = 10 minutes)
|
||||
SPEECH_MAX_DURATION_SECONDS=600
|
||||
# Maximum text length for TTS in characters (default: 4096)
|
||||
SPEECH_MAX_TEXT_LENGTH=4096
|
||||
|
||||
# ======================
|
||||
# Logging & Debugging
|
||||
# ======================
|
||||
|
||||
@@ -4,15 +4,22 @@
|
||||
|
||||
## Patterns
|
||||
|
||||
<!-- Add module-specific patterns as you discover them -->
|
||||
- **Config validation pattern**: Config files use exported validation functions + typed getter functions (not class-validator). See `auth.config.ts`, `federation.config.ts`, `speech/speech.config.ts`. Pattern: export `isXEnabled()`, `validateXConfig()`, and `getXConfig()` functions.
|
||||
- **Config registerAs**: `speech.config.ts` also exports a `registerAs("speech", ...)` factory for NestJS ConfigModule namespaced injection. Use `ConfigModule.forFeature(speechConfig)` in module imports and access via `this.config.get<string>('speech.stt.baseUrl')`.
|
||||
- **Conditional config validation**: When a service has an enabled flag (e.g., `STT_ENABLED`), URL/connection vars are only required when enabled. Validation throws with a helpful message suggesting how to disable.
|
||||
- **Boolean env parsing**: Use `value === "true" || value === "1"` pattern. No default-true -- all services default to disabled when env var is unset.
|
||||
|
||||
## Gotchas
|
||||
|
||||
<!-- Add things that trip up agents in this module -->
|
||||
- **Prisma client must be generated** before `tsc --noEmit` will pass. Run `pnpm prisma:generate` first. Pre-existing type errors from Prisma are expected in worktrees without generated client.
|
||||
- **Pre-commit hooks**: lint-staged runs on staged files. If other packages' files are staged, their lint must pass too. Only stage files you intend to commit.
|
||||
- **vitest runs all test files**: Even when targeting a specific test file, vitest loads all spec files. Many will fail if Prisma client isn't generated -- this is expected. Check only your target file's pass/fail status.
|
||||
|
||||
## Key Files
|
||||
|
||||
| File | Purpose |
|
||||
| ---- | ------- |
|
||||
|
||||
<!-- Add important files in this directory -->
|
||||
| File | Purpose |
|
||||
| ------------------------------------- | ---------------------------------------------------------------------- |
|
||||
| `src/speech/speech.config.ts` | Speech services env var validation and typed config (STT, TTS, limits) |
|
||||
| `src/speech/speech.config.spec.ts` | Unit tests for speech config validation (51 tests) |
|
||||
| `src/auth/auth.config.ts` | Auth/OIDC config validation (reference pattern) |
|
||||
| `src/federation/federation.config.ts` | Federation config validation (reference pattern) |
|
||||
|
||||
458
apps/api/src/speech/speech.config.spec.ts
Normal file
458
apps/api/src/speech/speech.config.spec.ts
Normal file
@@ -0,0 +1,458 @@
|
||||
/**
|
||||
* Speech Configuration Tests
|
||||
*
|
||||
* Issue #401: Tests for speech services environment variable validation
|
||||
* Tests cover STT, TTS (default, premium, fallback), and speech limits configuration.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import {
|
||||
isSttEnabled,
|
||||
isTtsEnabled,
|
||||
isTtsPremiumEnabled,
|
||||
isTtsFallbackEnabled,
|
||||
validateSpeechConfig,
|
||||
getSpeechConfig,
|
||||
type SpeechConfig,
|
||||
} from "./speech.config";
|
||||
|
||||
describe("speech.config", () => {
|
||||
const originalEnv = { ...process.env };
|
||||
|
||||
beforeEach(() => {
|
||||
// Clear all speech-related env vars before each test
|
||||
delete process.env.STT_ENABLED;
|
||||
delete process.env.STT_BASE_URL;
|
||||
delete process.env.STT_MODEL;
|
||||
delete process.env.STT_LANGUAGE;
|
||||
delete process.env.TTS_ENABLED;
|
||||
delete process.env.TTS_DEFAULT_URL;
|
||||
delete process.env.TTS_DEFAULT_VOICE;
|
||||
delete process.env.TTS_DEFAULT_FORMAT;
|
||||
delete process.env.TTS_PREMIUM_ENABLED;
|
||||
delete process.env.TTS_PREMIUM_URL;
|
||||
delete process.env.TTS_FALLBACK_ENABLED;
|
||||
delete process.env.TTS_FALLBACK_URL;
|
||||
delete process.env.SPEECH_MAX_UPLOAD_SIZE;
|
||||
delete process.env.SPEECH_MAX_DURATION_SECONDS;
|
||||
delete process.env.SPEECH_MAX_TEXT_LENGTH;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.env = { ...originalEnv };
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// STT enabled check
|
||||
// ==========================================
|
||||
describe("isSttEnabled", () => {
|
||||
it("should return false when STT_ENABLED is not set", () => {
|
||||
expect(isSttEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false when STT_ENABLED is 'false'", () => {
|
||||
process.env.STT_ENABLED = "false";
|
||||
expect(isSttEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false when STT_ENABLED is '0'", () => {
|
||||
process.env.STT_ENABLED = "0";
|
||||
expect(isSttEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false when STT_ENABLED is empty string", () => {
|
||||
process.env.STT_ENABLED = "";
|
||||
expect(isSttEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return true when STT_ENABLED is 'true'", () => {
|
||||
process.env.STT_ENABLED = "true";
|
||||
expect(isSttEnabled()).toBe(true);
|
||||
});
|
||||
|
||||
it("should return true when STT_ENABLED is '1'", () => {
|
||||
process.env.STT_ENABLED = "1";
|
||||
expect(isSttEnabled()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// TTS enabled check
|
||||
// ==========================================
|
||||
describe("isTtsEnabled", () => {
|
||||
it("should return false when TTS_ENABLED is not set", () => {
|
||||
expect(isTtsEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false when TTS_ENABLED is 'false'", () => {
|
||||
process.env.TTS_ENABLED = "false";
|
||||
expect(isTtsEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return true when TTS_ENABLED is 'true'", () => {
|
||||
process.env.TTS_ENABLED = "true";
|
||||
expect(isTtsEnabled()).toBe(true);
|
||||
});
|
||||
|
||||
it("should return true when TTS_ENABLED is '1'", () => {
|
||||
process.env.TTS_ENABLED = "1";
|
||||
expect(isTtsEnabled()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// TTS premium enabled check
|
||||
// ==========================================
|
||||
describe("isTtsPremiumEnabled", () => {
|
||||
it("should return false when TTS_PREMIUM_ENABLED is not set", () => {
|
||||
expect(isTtsPremiumEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false when TTS_PREMIUM_ENABLED is 'false'", () => {
|
||||
process.env.TTS_PREMIUM_ENABLED = "false";
|
||||
expect(isTtsPremiumEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return true when TTS_PREMIUM_ENABLED is 'true'", () => {
|
||||
process.env.TTS_PREMIUM_ENABLED = "true";
|
||||
expect(isTtsPremiumEnabled()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// TTS fallback enabled check
|
||||
// ==========================================
|
||||
describe("isTtsFallbackEnabled", () => {
|
||||
it("should return false when TTS_FALLBACK_ENABLED is not set", () => {
|
||||
expect(isTtsFallbackEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false when TTS_FALLBACK_ENABLED is 'false'", () => {
|
||||
process.env.TTS_FALLBACK_ENABLED = "false";
|
||||
expect(isTtsFallbackEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it("should return true when TTS_FALLBACK_ENABLED is 'true'", () => {
|
||||
process.env.TTS_FALLBACK_ENABLED = "true";
|
||||
expect(isTtsFallbackEnabled()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// validateSpeechConfig
|
||||
// ==========================================
|
||||
describe("validateSpeechConfig", () => {
|
||||
describe("when all services are disabled", () => {
|
||||
it("should not throw when no speech services are enabled", () => {
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
|
||||
it("should not throw when services are explicitly disabled", () => {
|
||||
process.env.STT_ENABLED = "false";
|
||||
process.env.TTS_ENABLED = "false";
|
||||
process.env.TTS_PREMIUM_ENABLED = "false";
|
||||
process.env.TTS_FALLBACK_ENABLED = "false";
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("STT validation", () => {
|
||||
beforeEach(() => {
|
||||
process.env.STT_ENABLED = "true";
|
||||
});
|
||||
|
||||
it("should throw when STT is enabled but STT_BASE_URL is missing", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL");
|
||||
expect(() => validateSpeechConfig()).toThrow(
|
||||
"STT is enabled (STT_ENABLED=true) but required environment variables are missing"
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw when STT_BASE_URL is empty string", () => {
|
||||
process.env.STT_BASE_URL = "";
|
||||
expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL");
|
||||
});
|
||||
|
||||
it("should throw when STT_BASE_URL is whitespace only", () => {
|
||||
process.env.STT_BASE_URL = " ";
|
||||
expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL");
|
||||
});
|
||||
|
||||
it("should not throw when STT is enabled and STT_BASE_URL is set", () => {
|
||||
process.env.STT_BASE_URL = "http://speaches:8000/v1";
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
|
||||
it("should suggest disabling STT in error message", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("STT_ENABLED=false");
|
||||
});
|
||||
});
|
||||
|
||||
describe("TTS default validation", () => {
|
||||
beforeEach(() => {
|
||||
process.env.TTS_ENABLED = "true";
|
||||
});
|
||||
|
||||
it("should throw when TTS is enabled but TTS_DEFAULT_URL is missing", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_DEFAULT_URL");
|
||||
expect(() => validateSpeechConfig()).toThrow(
|
||||
"TTS is enabled (TTS_ENABLED=true) but required environment variables are missing"
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw when TTS_DEFAULT_URL is empty string", () => {
|
||||
process.env.TTS_DEFAULT_URL = "";
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_DEFAULT_URL");
|
||||
});
|
||||
|
||||
it("should not throw when TTS is enabled and TTS_DEFAULT_URL is set", () => {
|
||||
process.env.TTS_DEFAULT_URL = "http://kokoro-tts:8880/v1";
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
|
||||
it("should suggest disabling TTS in error message", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_ENABLED=false");
|
||||
});
|
||||
});
|
||||
|
||||
describe("TTS premium validation", () => {
|
||||
beforeEach(() => {
|
||||
process.env.TTS_PREMIUM_ENABLED = "true";
|
||||
});
|
||||
|
||||
it("should throw when TTS premium is enabled but TTS_PREMIUM_URL is missing", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_PREMIUM_URL");
|
||||
expect(() => validateSpeechConfig()).toThrow(
|
||||
"TTS premium is enabled (TTS_PREMIUM_ENABLED=true) but required environment variables are missing"
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw when TTS_PREMIUM_URL is empty string", () => {
|
||||
process.env.TTS_PREMIUM_URL = "";
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_PREMIUM_URL");
|
||||
});
|
||||
|
||||
it("should not throw when TTS premium is enabled and TTS_PREMIUM_URL is set", () => {
|
||||
process.env.TTS_PREMIUM_URL = "http://chatterbox-tts:8881/v1";
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
|
||||
it("should suggest disabling TTS premium in error message", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_PREMIUM_ENABLED=false");
|
||||
});
|
||||
});
|
||||
|
||||
describe("TTS fallback validation", () => {
|
||||
beforeEach(() => {
|
||||
process.env.TTS_FALLBACK_ENABLED = "true";
|
||||
});
|
||||
|
||||
it("should throw when TTS fallback is enabled but TTS_FALLBACK_URL is missing", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_FALLBACK_URL");
|
||||
expect(() => validateSpeechConfig()).toThrow(
|
||||
"TTS fallback is enabled (TTS_FALLBACK_ENABLED=true) but required environment variables are missing"
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw when TTS_FALLBACK_URL is empty string", () => {
|
||||
process.env.TTS_FALLBACK_URL = "";
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_FALLBACK_URL");
|
||||
});
|
||||
|
||||
it("should not throw when TTS fallback is enabled and TTS_FALLBACK_URL is set", () => {
|
||||
process.env.TTS_FALLBACK_URL = "http://openedai-speech:8000/v1";
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
|
||||
it("should suggest disabling TTS fallback in error message", () => {
|
||||
expect(() => validateSpeechConfig()).toThrow("TTS_FALLBACK_ENABLED=false");
|
||||
});
|
||||
});
|
||||
|
||||
describe("multiple services enabled simultaneously", () => {
|
||||
it("should validate all enabled services", () => {
|
||||
process.env.STT_ENABLED = "true";
|
||||
process.env.TTS_ENABLED = "true";
|
||||
// Missing both STT_BASE_URL and TTS_DEFAULT_URL
|
||||
|
||||
expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL");
|
||||
});
|
||||
|
||||
it("should pass when all enabled services are properly configured", () => {
|
||||
process.env.STT_ENABLED = "true";
|
||||
process.env.STT_BASE_URL = "http://speaches:8000/v1";
|
||||
process.env.TTS_ENABLED = "true";
|
||||
process.env.TTS_DEFAULT_URL = "http://kokoro-tts:8880/v1";
|
||||
process.env.TTS_PREMIUM_ENABLED = "true";
|
||||
process.env.TTS_PREMIUM_URL = "http://chatterbox-tts:8881/v1";
|
||||
process.env.TTS_FALLBACK_ENABLED = "true";
|
||||
process.env.TTS_FALLBACK_URL = "http://openedai-speech:8000/v1";
|
||||
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("limits validation", () => {
|
||||
it("should throw when SPEECH_MAX_UPLOAD_SIZE is not a valid number", () => {
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE = "not-a-number";
|
||||
expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_UPLOAD_SIZE");
|
||||
expect(() => validateSpeechConfig()).toThrow("must be a positive integer");
|
||||
});
|
||||
|
||||
it("should throw when SPEECH_MAX_UPLOAD_SIZE is negative", () => {
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE = "-100";
|
||||
expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_UPLOAD_SIZE");
|
||||
});
|
||||
|
||||
it("should throw when SPEECH_MAX_UPLOAD_SIZE is zero", () => {
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE = "0";
|
||||
expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_UPLOAD_SIZE");
|
||||
});
|
||||
|
||||
it("should throw when SPEECH_MAX_DURATION_SECONDS is not a valid number", () => {
|
||||
process.env.SPEECH_MAX_DURATION_SECONDS = "abc";
|
||||
expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_DURATION_SECONDS");
|
||||
});
|
||||
|
||||
it("should throw when SPEECH_MAX_TEXT_LENGTH is not a valid number", () => {
|
||||
process.env.SPEECH_MAX_TEXT_LENGTH = "xyz";
|
||||
expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_TEXT_LENGTH");
|
||||
});
|
||||
|
||||
it("should not throw when limits are valid positive integers", () => {
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE = "50000000";
|
||||
process.env.SPEECH_MAX_DURATION_SECONDS = "1200";
|
||||
process.env.SPEECH_MAX_TEXT_LENGTH = "8192";
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
|
||||
it("should not throw when limits are not set (uses defaults)", () => {
|
||||
expect(() => validateSpeechConfig()).not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// getSpeechConfig
|
||||
// ==========================================
|
||||
describe("getSpeechConfig", () => {
|
||||
it("should return default values when no env vars are set", () => {
|
||||
const config = getSpeechConfig();
|
||||
|
||||
expect(config.stt.enabled).toBe(false);
|
||||
expect(config.stt.baseUrl).toBe("http://speaches:8000/v1");
|
||||
expect(config.stt.model).toBe("Systran/faster-whisper-large-v3-turbo");
|
||||
expect(config.stt.language).toBe("en");
|
||||
|
||||
expect(config.tts.default.enabled).toBe(false);
|
||||
expect(config.tts.default.url).toBe("http://kokoro-tts:8880/v1");
|
||||
expect(config.tts.default.voice).toBe("af_heart");
|
||||
expect(config.tts.default.format).toBe("mp3");
|
||||
|
||||
expect(config.tts.premium.enabled).toBe(false);
|
||||
expect(config.tts.premium.url).toBe("http://chatterbox-tts:8881/v1");
|
||||
|
||||
expect(config.tts.fallback.enabled).toBe(false);
|
||||
expect(config.tts.fallback.url).toBe("http://openedai-speech:8000/v1");
|
||||
|
||||
expect(config.limits.maxUploadSize).toBe(25000000);
|
||||
expect(config.limits.maxDurationSeconds).toBe(600);
|
||||
expect(config.limits.maxTextLength).toBe(4096);
|
||||
});
|
||||
|
||||
it("should use custom env var values when set", () => {
|
||||
process.env.STT_ENABLED = "true";
|
||||
process.env.STT_BASE_URL = "http://custom-stt:9000/v1";
|
||||
process.env.STT_MODEL = "custom-model";
|
||||
process.env.STT_LANGUAGE = "fr";
|
||||
|
||||
process.env.TTS_ENABLED = "true";
|
||||
process.env.TTS_DEFAULT_URL = "http://custom-tts:9001/v1";
|
||||
process.env.TTS_DEFAULT_VOICE = "custom_voice";
|
||||
process.env.TTS_DEFAULT_FORMAT = "wav";
|
||||
|
||||
process.env.TTS_PREMIUM_ENABLED = "true";
|
||||
process.env.TTS_PREMIUM_URL = "http://custom-premium:9002/v1";
|
||||
|
||||
process.env.TTS_FALLBACK_ENABLED = "true";
|
||||
process.env.TTS_FALLBACK_URL = "http://custom-fallback:9003/v1";
|
||||
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE = "50000000";
|
||||
process.env.SPEECH_MAX_DURATION_SECONDS = "1200";
|
||||
process.env.SPEECH_MAX_TEXT_LENGTH = "8192";
|
||||
|
||||
const config = getSpeechConfig();
|
||||
|
||||
expect(config.stt.enabled).toBe(true);
|
||||
expect(config.stt.baseUrl).toBe("http://custom-stt:9000/v1");
|
||||
expect(config.stt.model).toBe("custom-model");
|
||||
expect(config.stt.language).toBe("fr");
|
||||
|
||||
expect(config.tts.default.enabled).toBe(true);
|
||||
expect(config.tts.default.url).toBe("http://custom-tts:9001/v1");
|
||||
expect(config.tts.default.voice).toBe("custom_voice");
|
||||
expect(config.tts.default.format).toBe("wav");
|
||||
|
||||
expect(config.tts.premium.enabled).toBe(true);
|
||||
expect(config.tts.premium.url).toBe("http://custom-premium:9002/v1");
|
||||
|
||||
expect(config.tts.fallback.enabled).toBe(true);
|
||||
expect(config.tts.fallback.url).toBe("http://custom-fallback:9003/v1");
|
||||
|
||||
expect(config.limits.maxUploadSize).toBe(50000000);
|
||||
expect(config.limits.maxDurationSeconds).toBe(1200);
|
||||
expect(config.limits.maxTextLength).toBe(8192);
|
||||
});
|
||||
|
||||
it("should return typed SpeechConfig object", () => {
|
||||
const config: SpeechConfig = getSpeechConfig();
|
||||
|
||||
// Verify structure matches the SpeechConfig type
|
||||
expect(config).toHaveProperty("stt");
|
||||
expect(config).toHaveProperty("tts");
|
||||
expect(config).toHaveProperty("limits");
|
||||
expect(config.tts).toHaveProperty("default");
|
||||
expect(config.tts).toHaveProperty("premium");
|
||||
expect(config.tts).toHaveProperty("fallback");
|
||||
});
|
||||
|
||||
it("should handle partial env var overrides", () => {
|
||||
process.env.STT_ENABLED = "true";
|
||||
process.env.STT_BASE_URL = "http://custom-stt:9000/v1";
|
||||
// STT_MODEL and STT_LANGUAGE not set, should use defaults
|
||||
|
||||
const config = getSpeechConfig();
|
||||
|
||||
expect(config.stt.enabled).toBe(true);
|
||||
expect(config.stt.baseUrl).toBe("http://custom-stt:9000/v1");
|
||||
expect(config.stt.model).toBe("Systran/faster-whisper-large-v3-turbo");
|
||||
expect(config.stt.language).toBe("en");
|
||||
});
|
||||
|
||||
it("should parse numeric limits correctly", () => {
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE = "10000000";
|
||||
const config = getSpeechConfig();
|
||||
expect(typeof config.limits.maxUploadSize).toBe("number");
|
||||
expect(config.limits.maxUploadSize).toBe(10000000);
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// registerAs integration
|
||||
// ==========================================
|
||||
describe("speechConfig (registerAs factory)", () => {
|
||||
it("should be importable as a config namespace factory", async () => {
|
||||
const { speechConfig } = await import("./speech.config");
|
||||
expect(speechConfig).toBeDefined();
|
||||
expect(speechConfig.KEY).toBe("CONFIGURATION(speech)");
|
||||
});
|
||||
|
||||
it("should return config object when called", async () => {
|
||||
const { speechConfig } = await import("./speech.config");
|
||||
const config = speechConfig() as SpeechConfig;
|
||||
expect(config).toHaveProperty("stt");
|
||||
expect(config).toHaveProperty("tts");
|
||||
expect(config).toHaveProperty("limits");
|
||||
});
|
||||
});
|
||||
});
|
||||
304
apps/api/src/speech/speech.config.ts
Normal file
304
apps/api/src/speech/speech.config.ts
Normal file
@@ -0,0 +1,304 @@
|
||||
/**
|
||||
* Speech Services Configuration
|
||||
*
|
||||
* Issue #401: Environment variables and validation for STT (speech-to-text),
|
||||
* TTS (text-to-speech), and speech service limits.
|
||||
*
|
||||
* Validates conditional requirements at startup:
|
||||
* - STT_BASE_URL is required when STT_ENABLED=true
|
||||
* - TTS_DEFAULT_URL is required when TTS_ENABLED=true
|
||||
* - TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true
|
||||
* - TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true
|
||||
*/
|
||||
|
||||
import { registerAs } from "@nestjs/config";
|
||||
|
||||
// ==========================================
|
||||
// Default values
|
||||
// ==========================================
|
||||
|
||||
const STT_DEFAULTS = {
|
||||
baseUrl: "http://speaches:8000/v1",
|
||||
model: "Systran/faster-whisper-large-v3-turbo",
|
||||
language: "en",
|
||||
} as const;
|
||||
|
||||
const TTS_DEFAULT_DEFAULTS = {
|
||||
url: "http://kokoro-tts:8880/v1",
|
||||
voice: "af_heart",
|
||||
format: "mp3",
|
||||
} as const;
|
||||
|
||||
const TTS_PREMIUM_DEFAULTS = {
|
||||
url: "http://chatterbox-tts:8881/v1",
|
||||
} as const;
|
||||
|
||||
const TTS_FALLBACK_DEFAULTS = {
|
||||
url: "http://openedai-speech:8000/v1",
|
||||
} as const;
|
||||
|
||||
const LIMITS_DEFAULTS = {
|
||||
maxUploadSize: 25_000_000,
|
||||
maxDurationSeconds: 600,
|
||||
maxTextLength: 4096,
|
||||
} as const;
|
||||
|
||||
// ==========================================
|
||||
// Types
|
||||
// ==========================================
|
||||
|
||||
export interface SttConfig {
|
||||
enabled: boolean;
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
language: string;
|
||||
}
|
||||
|
||||
export interface TtsDefaultConfig {
|
||||
enabled: boolean;
|
||||
url: string;
|
||||
voice: string;
|
||||
format: string;
|
||||
}
|
||||
|
||||
export interface TtsPremiumConfig {
|
||||
enabled: boolean;
|
||||
url: string;
|
||||
}
|
||||
|
||||
export interface TtsFallbackConfig {
|
||||
enabled: boolean;
|
||||
url: string;
|
||||
}
|
||||
|
||||
export interface TtsConfig {
|
||||
default: TtsDefaultConfig;
|
||||
premium: TtsPremiumConfig;
|
||||
fallback: TtsFallbackConfig;
|
||||
}
|
||||
|
||||
export interface SpeechLimitsConfig {
|
||||
maxUploadSize: number;
|
||||
maxDurationSeconds: number;
|
||||
maxTextLength: number;
|
||||
}
|
||||
|
||||
export interface SpeechConfig {
|
||||
stt: SttConfig;
|
||||
tts: TtsConfig;
|
||||
limits: SpeechLimitsConfig;
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// Helper: parse boolean env var
|
||||
// ==========================================
|
||||
|
||||
function parseBooleanEnv(value: string | undefined): boolean {
|
||||
return value === "true" || value === "1";
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// Enabled checks
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Check if speech-to-text (STT) is enabled via environment variable.
|
||||
*/
|
||||
export function isSttEnabled(): boolean {
|
||||
return parseBooleanEnv(process.env.STT_ENABLED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if text-to-speech (TTS) default engine is enabled via environment variable.
|
||||
*/
|
||||
export function isTtsEnabled(): boolean {
|
||||
return parseBooleanEnv(process.env.TTS_ENABLED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if TTS premium engine (Chatterbox) is enabled via environment variable.
|
||||
*/
|
||||
export function isTtsPremiumEnabled(): boolean {
|
||||
return parseBooleanEnv(process.env.TTS_PREMIUM_ENABLED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if TTS fallback engine (Piper/OpenedAI) is enabled via environment variable.
|
||||
*/
|
||||
export function isTtsFallbackEnabled(): boolean {
|
||||
return parseBooleanEnv(process.env.TTS_FALLBACK_ENABLED);
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// Validation helpers
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Check if an environment variable has a non-empty value.
|
||||
*/
|
||||
function isEnvVarSet(envVar: string): boolean {
|
||||
const value = process.env[envVar];
|
||||
return value !== undefined && value.trim() !== "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that required env vars are set when a service is enabled.
|
||||
* Throws with a helpful error message listing missing vars and how to disable.
|
||||
*/
|
||||
function validateRequiredVars(
|
||||
serviceName: string,
|
||||
enabledFlag: string,
|
||||
requiredVars: string[]
|
||||
): void {
|
||||
const missingVars: string[] = [];
|
||||
|
||||
for (const envVar of requiredVars) {
|
||||
if (!isEnvVarSet(envVar)) {
|
||||
missingVars.push(envVar);
|
||||
}
|
||||
}
|
||||
|
||||
if (missingVars.length > 0) {
|
||||
throw new Error(
|
||||
`${serviceName} is enabled (${enabledFlag}=true) but required environment variables are missing or empty: ${missingVars.join(", ")}. ` +
|
||||
`Either set these variables or disable by setting ${enabledFlag}=false.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a numeric env var, if set, is a positive integer.
|
||||
*/
|
||||
function validatePositiveInteger(envVar: string): void {
|
||||
const value = process.env[envVar];
|
||||
if (value === undefined || value.trim() === "") {
|
||||
return; // Not set, will use default
|
||||
}
|
||||
|
||||
const parsed = parseInt(value, 10);
|
||||
if (isNaN(parsed) || parsed <= 0 || String(parsed) !== value.trim()) {
|
||||
throw new Error(`${envVar} must be a positive integer. Current value: "${value}".`);
|
||||
}
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// Main validation
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Validates speech configuration at startup.
|
||||
* Call this during module initialization to fail fast if misconfigured.
|
||||
*
|
||||
* Validates:
|
||||
* - STT_BASE_URL is set when STT_ENABLED=true
|
||||
* - TTS_DEFAULT_URL is set when TTS_ENABLED=true
|
||||
* - TTS_PREMIUM_URL is set when TTS_PREMIUM_ENABLED=true
|
||||
* - TTS_FALLBACK_URL is set when TTS_FALLBACK_ENABLED=true
|
||||
* - Numeric limits are positive integers (when set)
|
||||
*
|
||||
* @throws Error if any required configuration is missing or invalid
|
||||
*/
|
||||
export function validateSpeechConfig(): void {
|
||||
// STT validation
|
||||
if (isSttEnabled()) {
|
||||
validateRequiredVars("STT", "STT_ENABLED", ["STT_BASE_URL"]);
|
||||
}
|
||||
|
||||
// TTS default validation
|
||||
if (isTtsEnabled()) {
|
||||
validateRequiredVars("TTS", "TTS_ENABLED", ["TTS_DEFAULT_URL"]);
|
||||
}
|
||||
|
||||
// TTS premium validation
|
||||
if (isTtsPremiumEnabled()) {
|
||||
validateRequiredVars("TTS premium", "TTS_PREMIUM_ENABLED", ["TTS_PREMIUM_URL"]);
|
||||
}
|
||||
|
||||
// TTS fallback validation
|
||||
if (isTtsFallbackEnabled()) {
|
||||
validateRequiredVars("TTS fallback", "TTS_FALLBACK_ENABLED", ["TTS_FALLBACK_URL"]);
|
||||
}
|
||||
|
||||
// Limits validation (only if set, otherwise defaults are used)
|
||||
validatePositiveInteger("SPEECH_MAX_UPLOAD_SIZE");
|
||||
validatePositiveInteger("SPEECH_MAX_DURATION_SECONDS");
|
||||
validatePositiveInteger("SPEECH_MAX_TEXT_LENGTH");
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// Config getter
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Get the full speech configuration object with typed values and defaults.
|
||||
*
|
||||
* @returns SpeechConfig with all STT, TTS, and limits configuration
|
||||
*/
|
||||
export function getSpeechConfig(): SpeechConfig {
|
||||
return {
|
||||
stt: {
|
||||
enabled: isSttEnabled(),
|
||||
baseUrl: process.env.STT_BASE_URL ?? STT_DEFAULTS.baseUrl,
|
||||
model: process.env.STT_MODEL ?? STT_DEFAULTS.model,
|
||||
language: process.env.STT_LANGUAGE ?? STT_DEFAULTS.language,
|
||||
},
|
||||
tts: {
|
||||
default: {
|
||||
enabled: isTtsEnabled(),
|
||||
url: process.env.TTS_DEFAULT_URL ?? TTS_DEFAULT_DEFAULTS.url,
|
||||
voice: process.env.TTS_DEFAULT_VOICE ?? TTS_DEFAULT_DEFAULTS.voice,
|
||||
format: process.env.TTS_DEFAULT_FORMAT ?? TTS_DEFAULT_DEFAULTS.format,
|
||||
},
|
||||
premium: {
|
||||
enabled: isTtsPremiumEnabled(),
|
||||
url: process.env.TTS_PREMIUM_URL ?? TTS_PREMIUM_DEFAULTS.url,
|
||||
},
|
||||
fallback: {
|
||||
enabled: isTtsFallbackEnabled(),
|
||||
url: process.env.TTS_FALLBACK_URL ?? TTS_FALLBACK_DEFAULTS.url,
|
||||
},
|
||||
},
|
||||
limits: {
|
||||
maxUploadSize: parseInt(
|
||||
process.env.SPEECH_MAX_UPLOAD_SIZE ?? String(LIMITS_DEFAULTS.maxUploadSize),
|
||||
10
|
||||
),
|
||||
maxDurationSeconds: parseInt(
|
||||
process.env.SPEECH_MAX_DURATION_SECONDS ?? String(LIMITS_DEFAULTS.maxDurationSeconds),
|
||||
10
|
||||
),
|
||||
maxTextLength: parseInt(
|
||||
process.env.SPEECH_MAX_TEXT_LENGTH ?? String(LIMITS_DEFAULTS.maxTextLength),
|
||||
10
|
||||
),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// NestJS ConfigModule registerAs factory
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* NestJS ConfigModule namespace factory for speech configuration.
|
||||
*
|
||||
* Usage in a module:
|
||||
* ```typescript
|
||||
* import { speechConfig } from './speech.config';
|
||||
*
|
||||
* @Module({
|
||||
* imports: [ConfigModule.forFeature(speechConfig)],
|
||||
* })
|
||||
* export class SpeechModule {}
|
||||
* ```
|
||||
*
|
||||
* Then inject via ConfigService:
|
||||
* ```typescript
|
||||
* constructor(private config: ConfigService) {
|
||||
* const sttUrl = this.config.get<string>('speech.stt.baseUrl');
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
export const speechConfig = registerAs("speech", (): SpeechConfig => {
|
||||
return getSpeechConfig();
|
||||
});
|
||||
Reference in New Issue
Block a user