diff --git a/.env.example b/.env.example index 9ca59fd..05a7d8d 100644 --- a/.env.example +++ b/.env.example @@ -350,6 +350,45 @@ OLLAMA_MODEL=llama3.1:latest # Get your API key from: https://platform.openai.com/api-keys # OPENAI_API_KEY=sk-... +# ====================== +# Speech Services (STT / TTS) +# ====================== +# Speech-to-Text (STT) - Whisper via Speaches +# Set STT_ENABLED=true to enable speech-to-text transcription +# STT_BASE_URL is required when STT_ENABLED=true +STT_ENABLED=true +STT_BASE_URL=http://speaches:8000/v1 +STT_MODEL=Systran/faster-whisper-large-v3-turbo +STT_LANGUAGE=en + +# Text-to-Speech (TTS) - Default Engine (Kokoro) +# Set TTS_ENABLED=true to enable text-to-speech synthesis +# TTS_DEFAULT_URL is required when TTS_ENABLED=true +TTS_ENABLED=true +TTS_DEFAULT_URL=http://kokoro-tts:8880/v1 +TTS_DEFAULT_VOICE=af_heart +TTS_DEFAULT_FORMAT=mp3 + +# Text-to-Speech (TTS) - Premium Engine (Chatterbox) - Optional +# Higher quality voice cloning engine, disabled by default +# TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true +TTS_PREMIUM_ENABLED=false +TTS_PREMIUM_URL=http://chatterbox-tts:8881/v1 + +# Text-to-Speech (TTS) - Fallback Engine (Piper/OpenedAI) - Optional +# Lightweight fallback engine, disabled by default +# TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true +TTS_FALLBACK_ENABLED=false +TTS_FALLBACK_URL=http://openedai-speech:8000/v1 + +# Speech Service Limits +# Maximum upload file size in bytes (default: 25MB) +SPEECH_MAX_UPLOAD_SIZE=25000000 +# Maximum audio duration in seconds (default: 600 = 10 minutes) +SPEECH_MAX_DURATION_SECONDS=600 +# Maximum text length for TTS in characters (default: 4096) +SPEECH_MAX_TEXT_LENGTH=4096 + # ====================== # Logging & Debugging # ====================== diff --git a/apps/api/AGENTS.md b/apps/api/AGENTS.md index 7c937ef..db1a989 100644 --- a/apps/api/AGENTS.md +++ b/apps/api/AGENTS.md @@ -4,15 +4,22 @@ ## Patterns - +- **Config validation pattern**: Config files use exported validation functions + typed getter functions (not class-validator). See `auth.config.ts`, `federation.config.ts`, `speech/speech.config.ts`. Pattern: export `isXEnabled()`, `validateXConfig()`, and `getXConfig()` functions. +- **Config registerAs**: `speech.config.ts` also exports a `registerAs("speech", ...)` factory for NestJS ConfigModule namespaced injection. Use `ConfigModule.forFeature(speechConfig)` in module imports and access via `this.config.get('speech.stt.baseUrl')`. +- **Conditional config validation**: When a service has an enabled flag (e.g., `STT_ENABLED`), URL/connection vars are only required when enabled. Validation throws with a helpful message suggesting how to disable. +- **Boolean env parsing**: Use `value === "true" || value === "1"` pattern. No default-true -- all services default to disabled when env var is unset. ## Gotchas - +- **Prisma client must be generated** before `tsc --noEmit` will pass. Run `pnpm prisma:generate` first. Pre-existing type errors from Prisma are expected in worktrees without generated client. +- **Pre-commit hooks**: lint-staged runs on staged files. If other packages' files are staged, their lint must pass too. Only stage files you intend to commit. +- **vitest runs all test files**: Even when targeting a specific test file, vitest loads all spec files. Many will fail if Prisma client isn't generated -- this is expected. Check only your target file's pass/fail status. ## Key Files -| File | Purpose | -| ---- | ------- | - - +| File | Purpose | +| ------------------------------------- | ---------------------------------------------------------------------- | +| `src/speech/speech.config.ts` | Speech services env var validation and typed config (STT, TTS, limits) | +| `src/speech/speech.config.spec.ts` | Unit tests for speech config validation (51 tests) | +| `src/auth/auth.config.ts` | Auth/OIDC config validation (reference pattern) | +| `src/federation/federation.config.ts` | Federation config validation (reference pattern) | diff --git a/apps/api/src/speech/speech.config.spec.ts b/apps/api/src/speech/speech.config.spec.ts new file mode 100644 index 0000000..f88be85 --- /dev/null +++ b/apps/api/src/speech/speech.config.spec.ts @@ -0,0 +1,458 @@ +/** + * Speech Configuration Tests + * + * Issue #401: Tests for speech services environment variable validation + * Tests cover STT, TTS (default, premium, fallback), and speech limits configuration. + */ + +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { + isSttEnabled, + isTtsEnabled, + isTtsPremiumEnabled, + isTtsFallbackEnabled, + validateSpeechConfig, + getSpeechConfig, + type SpeechConfig, +} from "./speech.config"; + +describe("speech.config", () => { + const originalEnv = { ...process.env }; + + beforeEach(() => { + // Clear all speech-related env vars before each test + delete process.env.STT_ENABLED; + delete process.env.STT_BASE_URL; + delete process.env.STT_MODEL; + delete process.env.STT_LANGUAGE; + delete process.env.TTS_ENABLED; + delete process.env.TTS_DEFAULT_URL; + delete process.env.TTS_DEFAULT_VOICE; + delete process.env.TTS_DEFAULT_FORMAT; + delete process.env.TTS_PREMIUM_ENABLED; + delete process.env.TTS_PREMIUM_URL; + delete process.env.TTS_FALLBACK_ENABLED; + delete process.env.TTS_FALLBACK_URL; + delete process.env.SPEECH_MAX_UPLOAD_SIZE; + delete process.env.SPEECH_MAX_DURATION_SECONDS; + delete process.env.SPEECH_MAX_TEXT_LENGTH; + }); + + afterEach(() => { + process.env = { ...originalEnv }; + }); + + // ========================================== + // STT enabled check + // ========================================== + describe("isSttEnabled", () => { + it("should return false when STT_ENABLED is not set", () => { + expect(isSttEnabled()).toBe(false); + }); + + it("should return false when STT_ENABLED is 'false'", () => { + process.env.STT_ENABLED = "false"; + expect(isSttEnabled()).toBe(false); + }); + + it("should return false when STT_ENABLED is '0'", () => { + process.env.STT_ENABLED = "0"; + expect(isSttEnabled()).toBe(false); + }); + + it("should return false when STT_ENABLED is empty string", () => { + process.env.STT_ENABLED = ""; + expect(isSttEnabled()).toBe(false); + }); + + it("should return true when STT_ENABLED is 'true'", () => { + process.env.STT_ENABLED = "true"; + expect(isSttEnabled()).toBe(true); + }); + + it("should return true when STT_ENABLED is '1'", () => { + process.env.STT_ENABLED = "1"; + expect(isSttEnabled()).toBe(true); + }); + }); + + // ========================================== + // TTS enabled check + // ========================================== + describe("isTtsEnabled", () => { + it("should return false when TTS_ENABLED is not set", () => { + expect(isTtsEnabled()).toBe(false); + }); + + it("should return false when TTS_ENABLED is 'false'", () => { + process.env.TTS_ENABLED = "false"; + expect(isTtsEnabled()).toBe(false); + }); + + it("should return true when TTS_ENABLED is 'true'", () => { + process.env.TTS_ENABLED = "true"; + expect(isTtsEnabled()).toBe(true); + }); + + it("should return true when TTS_ENABLED is '1'", () => { + process.env.TTS_ENABLED = "1"; + expect(isTtsEnabled()).toBe(true); + }); + }); + + // ========================================== + // TTS premium enabled check + // ========================================== + describe("isTtsPremiumEnabled", () => { + it("should return false when TTS_PREMIUM_ENABLED is not set", () => { + expect(isTtsPremiumEnabled()).toBe(false); + }); + + it("should return false when TTS_PREMIUM_ENABLED is 'false'", () => { + process.env.TTS_PREMIUM_ENABLED = "false"; + expect(isTtsPremiumEnabled()).toBe(false); + }); + + it("should return true when TTS_PREMIUM_ENABLED is 'true'", () => { + process.env.TTS_PREMIUM_ENABLED = "true"; + expect(isTtsPremiumEnabled()).toBe(true); + }); + }); + + // ========================================== + // TTS fallback enabled check + // ========================================== + describe("isTtsFallbackEnabled", () => { + it("should return false when TTS_FALLBACK_ENABLED is not set", () => { + expect(isTtsFallbackEnabled()).toBe(false); + }); + + it("should return false when TTS_FALLBACK_ENABLED is 'false'", () => { + process.env.TTS_FALLBACK_ENABLED = "false"; + expect(isTtsFallbackEnabled()).toBe(false); + }); + + it("should return true when TTS_FALLBACK_ENABLED is 'true'", () => { + process.env.TTS_FALLBACK_ENABLED = "true"; + expect(isTtsFallbackEnabled()).toBe(true); + }); + }); + + // ========================================== + // validateSpeechConfig + // ========================================== + describe("validateSpeechConfig", () => { + describe("when all services are disabled", () => { + it("should not throw when no speech services are enabled", () => { + expect(() => validateSpeechConfig()).not.toThrow(); + }); + + it("should not throw when services are explicitly disabled", () => { + process.env.STT_ENABLED = "false"; + process.env.TTS_ENABLED = "false"; + process.env.TTS_PREMIUM_ENABLED = "false"; + process.env.TTS_FALLBACK_ENABLED = "false"; + expect(() => validateSpeechConfig()).not.toThrow(); + }); + }); + + describe("STT validation", () => { + beforeEach(() => { + process.env.STT_ENABLED = "true"; + }); + + it("should throw when STT is enabled but STT_BASE_URL is missing", () => { + expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL"); + expect(() => validateSpeechConfig()).toThrow( + "STT is enabled (STT_ENABLED=true) but required environment variables are missing" + ); + }); + + it("should throw when STT_BASE_URL is empty string", () => { + process.env.STT_BASE_URL = ""; + expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL"); + }); + + it("should throw when STT_BASE_URL is whitespace only", () => { + process.env.STT_BASE_URL = " "; + expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL"); + }); + + it("should not throw when STT is enabled and STT_BASE_URL is set", () => { + process.env.STT_BASE_URL = "http://speaches:8000/v1"; + expect(() => validateSpeechConfig()).not.toThrow(); + }); + + it("should suggest disabling STT in error message", () => { + expect(() => validateSpeechConfig()).toThrow("STT_ENABLED=false"); + }); + }); + + describe("TTS default validation", () => { + beforeEach(() => { + process.env.TTS_ENABLED = "true"; + }); + + it("should throw when TTS is enabled but TTS_DEFAULT_URL is missing", () => { + expect(() => validateSpeechConfig()).toThrow("TTS_DEFAULT_URL"); + expect(() => validateSpeechConfig()).toThrow( + "TTS is enabled (TTS_ENABLED=true) but required environment variables are missing" + ); + }); + + it("should throw when TTS_DEFAULT_URL is empty string", () => { + process.env.TTS_DEFAULT_URL = ""; + expect(() => validateSpeechConfig()).toThrow("TTS_DEFAULT_URL"); + }); + + it("should not throw when TTS is enabled and TTS_DEFAULT_URL is set", () => { + process.env.TTS_DEFAULT_URL = "http://kokoro-tts:8880/v1"; + expect(() => validateSpeechConfig()).not.toThrow(); + }); + + it("should suggest disabling TTS in error message", () => { + expect(() => validateSpeechConfig()).toThrow("TTS_ENABLED=false"); + }); + }); + + describe("TTS premium validation", () => { + beforeEach(() => { + process.env.TTS_PREMIUM_ENABLED = "true"; + }); + + it("should throw when TTS premium is enabled but TTS_PREMIUM_URL is missing", () => { + expect(() => validateSpeechConfig()).toThrow("TTS_PREMIUM_URL"); + expect(() => validateSpeechConfig()).toThrow( + "TTS premium is enabled (TTS_PREMIUM_ENABLED=true) but required environment variables are missing" + ); + }); + + it("should throw when TTS_PREMIUM_URL is empty string", () => { + process.env.TTS_PREMIUM_URL = ""; + expect(() => validateSpeechConfig()).toThrow("TTS_PREMIUM_URL"); + }); + + it("should not throw when TTS premium is enabled and TTS_PREMIUM_URL is set", () => { + process.env.TTS_PREMIUM_URL = "http://chatterbox-tts:8881/v1"; + expect(() => validateSpeechConfig()).not.toThrow(); + }); + + it("should suggest disabling TTS premium in error message", () => { + expect(() => validateSpeechConfig()).toThrow("TTS_PREMIUM_ENABLED=false"); + }); + }); + + describe("TTS fallback validation", () => { + beforeEach(() => { + process.env.TTS_FALLBACK_ENABLED = "true"; + }); + + it("should throw when TTS fallback is enabled but TTS_FALLBACK_URL is missing", () => { + expect(() => validateSpeechConfig()).toThrow("TTS_FALLBACK_URL"); + expect(() => validateSpeechConfig()).toThrow( + "TTS fallback is enabled (TTS_FALLBACK_ENABLED=true) but required environment variables are missing" + ); + }); + + it("should throw when TTS_FALLBACK_URL is empty string", () => { + process.env.TTS_FALLBACK_URL = ""; + expect(() => validateSpeechConfig()).toThrow("TTS_FALLBACK_URL"); + }); + + it("should not throw when TTS fallback is enabled and TTS_FALLBACK_URL is set", () => { + process.env.TTS_FALLBACK_URL = "http://openedai-speech:8000/v1"; + expect(() => validateSpeechConfig()).not.toThrow(); + }); + + it("should suggest disabling TTS fallback in error message", () => { + expect(() => validateSpeechConfig()).toThrow("TTS_FALLBACK_ENABLED=false"); + }); + }); + + describe("multiple services enabled simultaneously", () => { + it("should validate all enabled services", () => { + process.env.STT_ENABLED = "true"; + process.env.TTS_ENABLED = "true"; + // Missing both STT_BASE_URL and TTS_DEFAULT_URL + + expect(() => validateSpeechConfig()).toThrow("STT_BASE_URL"); + }); + + it("should pass when all enabled services are properly configured", () => { + process.env.STT_ENABLED = "true"; + process.env.STT_BASE_URL = "http://speaches:8000/v1"; + process.env.TTS_ENABLED = "true"; + process.env.TTS_DEFAULT_URL = "http://kokoro-tts:8880/v1"; + process.env.TTS_PREMIUM_ENABLED = "true"; + process.env.TTS_PREMIUM_URL = "http://chatterbox-tts:8881/v1"; + process.env.TTS_FALLBACK_ENABLED = "true"; + process.env.TTS_FALLBACK_URL = "http://openedai-speech:8000/v1"; + + expect(() => validateSpeechConfig()).not.toThrow(); + }); + }); + + describe("limits validation", () => { + it("should throw when SPEECH_MAX_UPLOAD_SIZE is not a valid number", () => { + process.env.SPEECH_MAX_UPLOAD_SIZE = "not-a-number"; + expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_UPLOAD_SIZE"); + expect(() => validateSpeechConfig()).toThrow("must be a positive integer"); + }); + + it("should throw when SPEECH_MAX_UPLOAD_SIZE is negative", () => { + process.env.SPEECH_MAX_UPLOAD_SIZE = "-100"; + expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_UPLOAD_SIZE"); + }); + + it("should throw when SPEECH_MAX_UPLOAD_SIZE is zero", () => { + process.env.SPEECH_MAX_UPLOAD_SIZE = "0"; + expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_UPLOAD_SIZE"); + }); + + it("should throw when SPEECH_MAX_DURATION_SECONDS is not a valid number", () => { + process.env.SPEECH_MAX_DURATION_SECONDS = "abc"; + expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_DURATION_SECONDS"); + }); + + it("should throw when SPEECH_MAX_TEXT_LENGTH is not a valid number", () => { + process.env.SPEECH_MAX_TEXT_LENGTH = "xyz"; + expect(() => validateSpeechConfig()).toThrow("SPEECH_MAX_TEXT_LENGTH"); + }); + + it("should not throw when limits are valid positive integers", () => { + process.env.SPEECH_MAX_UPLOAD_SIZE = "50000000"; + process.env.SPEECH_MAX_DURATION_SECONDS = "1200"; + process.env.SPEECH_MAX_TEXT_LENGTH = "8192"; + expect(() => validateSpeechConfig()).not.toThrow(); + }); + + it("should not throw when limits are not set (uses defaults)", () => { + expect(() => validateSpeechConfig()).not.toThrow(); + }); + }); + }); + + // ========================================== + // getSpeechConfig + // ========================================== + describe("getSpeechConfig", () => { + it("should return default values when no env vars are set", () => { + const config = getSpeechConfig(); + + expect(config.stt.enabled).toBe(false); + expect(config.stt.baseUrl).toBe("http://speaches:8000/v1"); + expect(config.stt.model).toBe("Systran/faster-whisper-large-v3-turbo"); + expect(config.stt.language).toBe("en"); + + expect(config.tts.default.enabled).toBe(false); + expect(config.tts.default.url).toBe("http://kokoro-tts:8880/v1"); + expect(config.tts.default.voice).toBe("af_heart"); + expect(config.tts.default.format).toBe("mp3"); + + expect(config.tts.premium.enabled).toBe(false); + expect(config.tts.premium.url).toBe("http://chatterbox-tts:8881/v1"); + + expect(config.tts.fallback.enabled).toBe(false); + expect(config.tts.fallback.url).toBe("http://openedai-speech:8000/v1"); + + expect(config.limits.maxUploadSize).toBe(25000000); + expect(config.limits.maxDurationSeconds).toBe(600); + expect(config.limits.maxTextLength).toBe(4096); + }); + + it("should use custom env var values when set", () => { + process.env.STT_ENABLED = "true"; + process.env.STT_BASE_URL = "http://custom-stt:9000/v1"; + process.env.STT_MODEL = "custom-model"; + process.env.STT_LANGUAGE = "fr"; + + process.env.TTS_ENABLED = "true"; + process.env.TTS_DEFAULT_URL = "http://custom-tts:9001/v1"; + process.env.TTS_DEFAULT_VOICE = "custom_voice"; + process.env.TTS_DEFAULT_FORMAT = "wav"; + + process.env.TTS_PREMIUM_ENABLED = "true"; + process.env.TTS_PREMIUM_URL = "http://custom-premium:9002/v1"; + + process.env.TTS_FALLBACK_ENABLED = "true"; + process.env.TTS_FALLBACK_URL = "http://custom-fallback:9003/v1"; + + process.env.SPEECH_MAX_UPLOAD_SIZE = "50000000"; + process.env.SPEECH_MAX_DURATION_SECONDS = "1200"; + process.env.SPEECH_MAX_TEXT_LENGTH = "8192"; + + const config = getSpeechConfig(); + + expect(config.stt.enabled).toBe(true); + expect(config.stt.baseUrl).toBe("http://custom-stt:9000/v1"); + expect(config.stt.model).toBe("custom-model"); + expect(config.stt.language).toBe("fr"); + + expect(config.tts.default.enabled).toBe(true); + expect(config.tts.default.url).toBe("http://custom-tts:9001/v1"); + expect(config.tts.default.voice).toBe("custom_voice"); + expect(config.tts.default.format).toBe("wav"); + + expect(config.tts.premium.enabled).toBe(true); + expect(config.tts.premium.url).toBe("http://custom-premium:9002/v1"); + + expect(config.tts.fallback.enabled).toBe(true); + expect(config.tts.fallback.url).toBe("http://custom-fallback:9003/v1"); + + expect(config.limits.maxUploadSize).toBe(50000000); + expect(config.limits.maxDurationSeconds).toBe(1200); + expect(config.limits.maxTextLength).toBe(8192); + }); + + it("should return typed SpeechConfig object", () => { + const config: SpeechConfig = getSpeechConfig(); + + // Verify structure matches the SpeechConfig type + expect(config).toHaveProperty("stt"); + expect(config).toHaveProperty("tts"); + expect(config).toHaveProperty("limits"); + expect(config.tts).toHaveProperty("default"); + expect(config.tts).toHaveProperty("premium"); + expect(config.tts).toHaveProperty("fallback"); + }); + + it("should handle partial env var overrides", () => { + process.env.STT_ENABLED = "true"; + process.env.STT_BASE_URL = "http://custom-stt:9000/v1"; + // STT_MODEL and STT_LANGUAGE not set, should use defaults + + const config = getSpeechConfig(); + + expect(config.stt.enabled).toBe(true); + expect(config.stt.baseUrl).toBe("http://custom-stt:9000/v1"); + expect(config.stt.model).toBe("Systran/faster-whisper-large-v3-turbo"); + expect(config.stt.language).toBe("en"); + }); + + it("should parse numeric limits correctly", () => { + process.env.SPEECH_MAX_UPLOAD_SIZE = "10000000"; + const config = getSpeechConfig(); + expect(typeof config.limits.maxUploadSize).toBe("number"); + expect(config.limits.maxUploadSize).toBe(10000000); + }); + }); + + // ========================================== + // registerAs integration + // ========================================== + describe("speechConfig (registerAs factory)", () => { + it("should be importable as a config namespace factory", async () => { + const { speechConfig } = await import("./speech.config"); + expect(speechConfig).toBeDefined(); + expect(speechConfig.KEY).toBe("CONFIGURATION(speech)"); + }); + + it("should return config object when called", async () => { + const { speechConfig } = await import("./speech.config"); + const config = speechConfig() as SpeechConfig; + expect(config).toHaveProperty("stt"); + expect(config).toHaveProperty("tts"); + expect(config).toHaveProperty("limits"); + }); + }); +}); diff --git a/apps/api/src/speech/speech.config.ts b/apps/api/src/speech/speech.config.ts new file mode 100644 index 0000000..48487de --- /dev/null +++ b/apps/api/src/speech/speech.config.ts @@ -0,0 +1,304 @@ +/** + * Speech Services Configuration + * + * Issue #401: Environment variables and validation for STT (speech-to-text), + * TTS (text-to-speech), and speech service limits. + * + * Validates conditional requirements at startup: + * - STT_BASE_URL is required when STT_ENABLED=true + * - TTS_DEFAULT_URL is required when TTS_ENABLED=true + * - TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true + * - TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true + */ + +import { registerAs } from "@nestjs/config"; + +// ========================================== +// Default values +// ========================================== + +const STT_DEFAULTS = { + baseUrl: "http://speaches:8000/v1", + model: "Systran/faster-whisper-large-v3-turbo", + language: "en", +} as const; + +const TTS_DEFAULT_DEFAULTS = { + url: "http://kokoro-tts:8880/v1", + voice: "af_heart", + format: "mp3", +} as const; + +const TTS_PREMIUM_DEFAULTS = { + url: "http://chatterbox-tts:8881/v1", +} as const; + +const TTS_FALLBACK_DEFAULTS = { + url: "http://openedai-speech:8000/v1", +} as const; + +const LIMITS_DEFAULTS = { + maxUploadSize: 25_000_000, + maxDurationSeconds: 600, + maxTextLength: 4096, +} as const; + +// ========================================== +// Types +// ========================================== + +export interface SttConfig { + enabled: boolean; + baseUrl: string; + model: string; + language: string; +} + +export interface TtsDefaultConfig { + enabled: boolean; + url: string; + voice: string; + format: string; +} + +export interface TtsPremiumConfig { + enabled: boolean; + url: string; +} + +export interface TtsFallbackConfig { + enabled: boolean; + url: string; +} + +export interface TtsConfig { + default: TtsDefaultConfig; + premium: TtsPremiumConfig; + fallback: TtsFallbackConfig; +} + +export interface SpeechLimitsConfig { + maxUploadSize: number; + maxDurationSeconds: number; + maxTextLength: number; +} + +export interface SpeechConfig { + stt: SttConfig; + tts: TtsConfig; + limits: SpeechLimitsConfig; +} + +// ========================================== +// Helper: parse boolean env var +// ========================================== + +function parseBooleanEnv(value: string | undefined): boolean { + return value === "true" || value === "1"; +} + +// ========================================== +// Enabled checks +// ========================================== + +/** + * Check if speech-to-text (STT) is enabled via environment variable. + */ +export function isSttEnabled(): boolean { + return parseBooleanEnv(process.env.STT_ENABLED); +} + +/** + * Check if text-to-speech (TTS) default engine is enabled via environment variable. + */ +export function isTtsEnabled(): boolean { + return parseBooleanEnv(process.env.TTS_ENABLED); +} + +/** + * Check if TTS premium engine (Chatterbox) is enabled via environment variable. + */ +export function isTtsPremiumEnabled(): boolean { + return parseBooleanEnv(process.env.TTS_PREMIUM_ENABLED); +} + +/** + * Check if TTS fallback engine (Piper/OpenedAI) is enabled via environment variable. + */ +export function isTtsFallbackEnabled(): boolean { + return parseBooleanEnv(process.env.TTS_FALLBACK_ENABLED); +} + +// ========================================== +// Validation helpers +// ========================================== + +/** + * Check if an environment variable has a non-empty value. + */ +function isEnvVarSet(envVar: string): boolean { + const value = process.env[envVar]; + return value !== undefined && value.trim() !== ""; +} + +/** + * Validate that required env vars are set when a service is enabled. + * Throws with a helpful error message listing missing vars and how to disable. + */ +function validateRequiredVars( + serviceName: string, + enabledFlag: string, + requiredVars: string[] +): void { + const missingVars: string[] = []; + + for (const envVar of requiredVars) { + if (!isEnvVarSet(envVar)) { + missingVars.push(envVar); + } + } + + if (missingVars.length > 0) { + throw new Error( + `${serviceName} is enabled (${enabledFlag}=true) but required environment variables are missing or empty: ${missingVars.join(", ")}. ` + + `Either set these variables or disable by setting ${enabledFlag}=false.` + ); + } +} + +/** + * Validate that a numeric env var, if set, is a positive integer. + */ +function validatePositiveInteger(envVar: string): void { + const value = process.env[envVar]; + if (value === undefined || value.trim() === "") { + return; // Not set, will use default + } + + const parsed = parseInt(value, 10); + if (isNaN(parsed) || parsed <= 0 || String(parsed) !== value.trim()) { + throw new Error(`${envVar} must be a positive integer. Current value: "${value}".`); + } +} + +// ========================================== +// Main validation +// ========================================== + +/** + * Validates speech configuration at startup. + * Call this during module initialization to fail fast if misconfigured. + * + * Validates: + * - STT_BASE_URL is set when STT_ENABLED=true + * - TTS_DEFAULT_URL is set when TTS_ENABLED=true + * - TTS_PREMIUM_URL is set when TTS_PREMIUM_ENABLED=true + * - TTS_FALLBACK_URL is set when TTS_FALLBACK_ENABLED=true + * - Numeric limits are positive integers (when set) + * + * @throws Error if any required configuration is missing or invalid + */ +export function validateSpeechConfig(): void { + // STT validation + if (isSttEnabled()) { + validateRequiredVars("STT", "STT_ENABLED", ["STT_BASE_URL"]); + } + + // TTS default validation + if (isTtsEnabled()) { + validateRequiredVars("TTS", "TTS_ENABLED", ["TTS_DEFAULT_URL"]); + } + + // TTS premium validation + if (isTtsPremiumEnabled()) { + validateRequiredVars("TTS premium", "TTS_PREMIUM_ENABLED", ["TTS_PREMIUM_URL"]); + } + + // TTS fallback validation + if (isTtsFallbackEnabled()) { + validateRequiredVars("TTS fallback", "TTS_FALLBACK_ENABLED", ["TTS_FALLBACK_URL"]); + } + + // Limits validation (only if set, otherwise defaults are used) + validatePositiveInteger("SPEECH_MAX_UPLOAD_SIZE"); + validatePositiveInteger("SPEECH_MAX_DURATION_SECONDS"); + validatePositiveInteger("SPEECH_MAX_TEXT_LENGTH"); +} + +// ========================================== +// Config getter +// ========================================== + +/** + * Get the full speech configuration object with typed values and defaults. + * + * @returns SpeechConfig with all STT, TTS, and limits configuration + */ +export function getSpeechConfig(): SpeechConfig { + return { + stt: { + enabled: isSttEnabled(), + baseUrl: process.env.STT_BASE_URL ?? STT_DEFAULTS.baseUrl, + model: process.env.STT_MODEL ?? STT_DEFAULTS.model, + language: process.env.STT_LANGUAGE ?? STT_DEFAULTS.language, + }, + tts: { + default: { + enabled: isTtsEnabled(), + url: process.env.TTS_DEFAULT_URL ?? TTS_DEFAULT_DEFAULTS.url, + voice: process.env.TTS_DEFAULT_VOICE ?? TTS_DEFAULT_DEFAULTS.voice, + format: process.env.TTS_DEFAULT_FORMAT ?? TTS_DEFAULT_DEFAULTS.format, + }, + premium: { + enabled: isTtsPremiumEnabled(), + url: process.env.TTS_PREMIUM_URL ?? TTS_PREMIUM_DEFAULTS.url, + }, + fallback: { + enabled: isTtsFallbackEnabled(), + url: process.env.TTS_FALLBACK_URL ?? TTS_FALLBACK_DEFAULTS.url, + }, + }, + limits: { + maxUploadSize: parseInt( + process.env.SPEECH_MAX_UPLOAD_SIZE ?? String(LIMITS_DEFAULTS.maxUploadSize), + 10 + ), + maxDurationSeconds: parseInt( + process.env.SPEECH_MAX_DURATION_SECONDS ?? String(LIMITS_DEFAULTS.maxDurationSeconds), + 10 + ), + maxTextLength: parseInt( + process.env.SPEECH_MAX_TEXT_LENGTH ?? String(LIMITS_DEFAULTS.maxTextLength), + 10 + ), + }, + }; +} + +// ========================================== +// NestJS ConfigModule registerAs factory +// ========================================== + +/** + * NestJS ConfigModule namespace factory for speech configuration. + * + * Usage in a module: + * ```typescript + * import { speechConfig } from './speech.config'; + * + * @Module({ + * imports: [ConfigModule.forFeature(speechConfig)], + * }) + * export class SpeechModule {} + * ``` + * + * Then inject via ConfigService: + * ```typescript + * constructor(private config: ConfigService) { + * const sttUrl = this.config.get('speech.stt.baseUrl'); + * } + * ``` + */ +export const speechConfig = registerAs("speech", (): SpeechConfig => { + return getSpeechConfig(); +});