All checks were successful
ci/woodpecker/push/api Pipeline was successful
Add SpeechConfig with typed configuration and startup validation for STT (Whisper/Speaches), TTS default (Kokoro), TTS premium (Chatterbox), and TTS fallback (Piper/OpenedAI). Includes registerAs factory for NestJS ConfigModule integration, .env.example documentation, and 51 unit tests covering all validation paths. Refs #401
305 lines
8.1 KiB
TypeScript
305 lines
8.1 KiB
TypeScript
/**
|
|
* Speech Services Configuration
|
|
*
|
|
* Issue #401: Environment variables and validation for STT (speech-to-text),
|
|
* TTS (text-to-speech), and speech service limits.
|
|
*
|
|
* Validates conditional requirements at startup:
|
|
* - STT_BASE_URL is required when STT_ENABLED=true
|
|
* - TTS_DEFAULT_URL is required when TTS_ENABLED=true
|
|
* - TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true
|
|
* - TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true
|
|
*/
|
|
|
|
import { registerAs } from "@nestjs/config";
|
|
|
|
// ==========================================
|
|
// Default values
|
|
// ==========================================
|
|
|
|
const STT_DEFAULTS = {
|
|
baseUrl: "http://speaches:8000/v1",
|
|
model: "Systran/faster-whisper-large-v3-turbo",
|
|
language: "en",
|
|
} as const;
|
|
|
|
const TTS_DEFAULT_DEFAULTS = {
|
|
url: "http://kokoro-tts:8880/v1",
|
|
voice: "af_heart",
|
|
format: "mp3",
|
|
} as const;
|
|
|
|
const TTS_PREMIUM_DEFAULTS = {
|
|
url: "http://chatterbox-tts:8881/v1",
|
|
} as const;
|
|
|
|
const TTS_FALLBACK_DEFAULTS = {
|
|
url: "http://openedai-speech:8000/v1",
|
|
} as const;
|
|
|
|
const LIMITS_DEFAULTS = {
|
|
maxUploadSize: 25_000_000,
|
|
maxDurationSeconds: 600,
|
|
maxTextLength: 4096,
|
|
} as const;
|
|
|
|
// ==========================================
|
|
// Types
|
|
// ==========================================
|
|
|
|
export interface SttConfig {
|
|
enabled: boolean;
|
|
baseUrl: string;
|
|
model: string;
|
|
language: string;
|
|
}
|
|
|
|
export interface TtsDefaultConfig {
|
|
enabled: boolean;
|
|
url: string;
|
|
voice: string;
|
|
format: string;
|
|
}
|
|
|
|
export interface TtsPremiumConfig {
|
|
enabled: boolean;
|
|
url: string;
|
|
}
|
|
|
|
export interface TtsFallbackConfig {
|
|
enabled: boolean;
|
|
url: string;
|
|
}
|
|
|
|
export interface TtsConfig {
|
|
default: TtsDefaultConfig;
|
|
premium: TtsPremiumConfig;
|
|
fallback: TtsFallbackConfig;
|
|
}
|
|
|
|
export interface SpeechLimitsConfig {
|
|
maxUploadSize: number;
|
|
maxDurationSeconds: number;
|
|
maxTextLength: number;
|
|
}
|
|
|
|
export interface SpeechConfig {
|
|
stt: SttConfig;
|
|
tts: TtsConfig;
|
|
limits: SpeechLimitsConfig;
|
|
}
|
|
|
|
// ==========================================
|
|
// Helper: parse boolean env var
|
|
// ==========================================
|
|
|
|
function parseBooleanEnv(value: string | undefined): boolean {
|
|
return value === "true" || value === "1";
|
|
}
|
|
|
|
// ==========================================
|
|
// Enabled checks
|
|
// ==========================================
|
|
|
|
/**
|
|
* Check if speech-to-text (STT) is enabled via environment variable.
|
|
*/
|
|
export function isSttEnabled(): boolean {
|
|
return parseBooleanEnv(process.env.STT_ENABLED);
|
|
}
|
|
|
|
/**
|
|
* Check if text-to-speech (TTS) default engine is enabled via environment variable.
|
|
*/
|
|
export function isTtsEnabled(): boolean {
|
|
return parseBooleanEnv(process.env.TTS_ENABLED);
|
|
}
|
|
|
|
/**
|
|
* Check if TTS premium engine (Chatterbox) is enabled via environment variable.
|
|
*/
|
|
export function isTtsPremiumEnabled(): boolean {
|
|
return parseBooleanEnv(process.env.TTS_PREMIUM_ENABLED);
|
|
}
|
|
|
|
/**
|
|
* Check if TTS fallback engine (Piper/OpenedAI) is enabled via environment variable.
|
|
*/
|
|
export function isTtsFallbackEnabled(): boolean {
|
|
return parseBooleanEnv(process.env.TTS_FALLBACK_ENABLED);
|
|
}
|
|
|
|
// ==========================================
|
|
// Validation helpers
|
|
// ==========================================
|
|
|
|
/**
|
|
* Check if an environment variable has a non-empty value.
|
|
*/
|
|
function isEnvVarSet(envVar: string): boolean {
|
|
const value = process.env[envVar];
|
|
return value !== undefined && value.trim() !== "";
|
|
}
|
|
|
|
/**
|
|
* Validate that required env vars are set when a service is enabled.
|
|
* Throws with a helpful error message listing missing vars and how to disable.
|
|
*/
|
|
function validateRequiredVars(
|
|
serviceName: string,
|
|
enabledFlag: string,
|
|
requiredVars: string[]
|
|
): void {
|
|
const missingVars: string[] = [];
|
|
|
|
for (const envVar of requiredVars) {
|
|
if (!isEnvVarSet(envVar)) {
|
|
missingVars.push(envVar);
|
|
}
|
|
}
|
|
|
|
if (missingVars.length > 0) {
|
|
throw new Error(
|
|
`${serviceName} is enabled (${enabledFlag}=true) but required environment variables are missing or empty: ${missingVars.join(", ")}. ` +
|
|
`Either set these variables or disable by setting ${enabledFlag}=false.`
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate that a numeric env var, if set, is a positive integer.
|
|
*/
|
|
function validatePositiveInteger(envVar: string): void {
|
|
const value = process.env[envVar];
|
|
if (value === undefined || value.trim() === "") {
|
|
return; // Not set, will use default
|
|
}
|
|
|
|
const parsed = parseInt(value, 10);
|
|
if (isNaN(parsed) || parsed <= 0 || String(parsed) !== value.trim()) {
|
|
throw new Error(`${envVar} must be a positive integer. Current value: "${value}".`);
|
|
}
|
|
}
|
|
|
|
// ==========================================
|
|
// Main validation
|
|
// ==========================================
|
|
|
|
/**
|
|
* Validates speech configuration at startup.
|
|
* Call this during module initialization to fail fast if misconfigured.
|
|
*
|
|
* Validates:
|
|
* - STT_BASE_URL is set when STT_ENABLED=true
|
|
* - TTS_DEFAULT_URL is set when TTS_ENABLED=true
|
|
* - TTS_PREMIUM_URL is set when TTS_PREMIUM_ENABLED=true
|
|
* - TTS_FALLBACK_URL is set when TTS_FALLBACK_ENABLED=true
|
|
* - Numeric limits are positive integers (when set)
|
|
*
|
|
* @throws Error if any required configuration is missing or invalid
|
|
*/
|
|
export function validateSpeechConfig(): void {
|
|
// STT validation
|
|
if (isSttEnabled()) {
|
|
validateRequiredVars("STT", "STT_ENABLED", ["STT_BASE_URL"]);
|
|
}
|
|
|
|
// TTS default validation
|
|
if (isTtsEnabled()) {
|
|
validateRequiredVars("TTS", "TTS_ENABLED", ["TTS_DEFAULT_URL"]);
|
|
}
|
|
|
|
// TTS premium validation
|
|
if (isTtsPremiumEnabled()) {
|
|
validateRequiredVars("TTS premium", "TTS_PREMIUM_ENABLED", ["TTS_PREMIUM_URL"]);
|
|
}
|
|
|
|
// TTS fallback validation
|
|
if (isTtsFallbackEnabled()) {
|
|
validateRequiredVars("TTS fallback", "TTS_FALLBACK_ENABLED", ["TTS_FALLBACK_URL"]);
|
|
}
|
|
|
|
// Limits validation (only if set, otherwise defaults are used)
|
|
validatePositiveInteger("SPEECH_MAX_UPLOAD_SIZE");
|
|
validatePositiveInteger("SPEECH_MAX_DURATION_SECONDS");
|
|
validatePositiveInteger("SPEECH_MAX_TEXT_LENGTH");
|
|
}
|
|
|
|
// ==========================================
|
|
// Config getter
|
|
// ==========================================
|
|
|
|
/**
|
|
* Get the full speech configuration object with typed values and defaults.
|
|
*
|
|
* @returns SpeechConfig with all STT, TTS, and limits configuration
|
|
*/
|
|
export function getSpeechConfig(): SpeechConfig {
|
|
return {
|
|
stt: {
|
|
enabled: isSttEnabled(),
|
|
baseUrl: process.env.STT_BASE_URL ?? STT_DEFAULTS.baseUrl,
|
|
model: process.env.STT_MODEL ?? STT_DEFAULTS.model,
|
|
language: process.env.STT_LANGUAGE ?? STT_DEFAULTS.language,
|
|
},
|
|
tts: {
|
|
default: {
|
|
enabled: isTtsEnabled(),
|
|
url: process.env.TTS_DEFAULT_URL ?? TTS_DEFAULT_DEFAULTS.url,
|
|
voice: process.env.TTS_DEFAULT_VOICE ?? TTS_DEFAULT_DEFAULTS.voice,
|
|
format: process.env.TTS_DEFAULT_FORMAT ?? TTS_DEFAULT_DEFAULTS.format,
|
|
},
|
|
premium: {
|
|
enabled: isTtsPremiumEnabled(),
|
|
url: process.env.TTS_PREMIUM_URL ?? TTS_PREMIUM_DEFAULTS.url,
|
|
},
|
|
fallback: {
|
|
enabled: isTtsFallbackEnabled(),
|
|
url: process.env.TTS_FALLBACK_URL ?? TTS_FALLBACK_DEFAULTS.url,
|
|
},
|
|
},
|
|
limits: {
|
|
maxUploadSize: parseInt(
|
|
process.env.SPEECH_MAX_UPLOAD_SIZE ?? String(LIMITS_DEFAULTS.maxUploadSize),
|
|
10
|
|
),
|
|
maxDurationSeconds: parseInt(
|
|
process.env.SPEECH_MAX_DURATION_SECONDS ?? String(LIMITS_DEFAULTS.maxDurationSeconds),
|
|
10
|
|
),
|
|
maxTextLength: parseInt(
|
|
process.env.SPEECH_MAX_TEXT_LENGTH ?? String(LIMITS_DEFAULTS.maxTextLength),
|
|
10
|
|
),
|
|
},
|
|
};
|
|
}
|
|
|
|
// ==========================================
|
|
// NestJS ConfigModule registerAs factory
|
|
// ==========================================
|
|
|
|
/**
|
|
* NestJS ConfigModule namespace factory for speech configuration.
|
|
*
|
|
* Usage in a module:
|
|
* ```typescript
|
|
* import { speechConfig } from './speech.config';
|
|
*
|
|
* @Module({
|
|
* imports: [ConfigModule.forFeature(speechConfig)],
|
|
* })
|
|
* export class SpeechModule {}
|
|
* ```
|
|
*
|
|
* Then inject via ConfigService:
|
|
* ```typescript
|
|
* constructor(private config: ConfigService) {
|
|
* const sttUrl = this.config.get<string>('speech.stt.baseUrl');
|
|
* }
|
|
* ```
|
|
*/
|
|
export const speechConfig = registerAs("speech", (): SpeechConfig => {
|
|
return getSpeechConfig();
|
|
});
|