Files
stack/apps/api/src/speech/speech.config.ts
Jason Woltje af9c5799af
All checks were successful
ci/woodpecker/push/web Pipeline was successful
ci/woodpecker/push/api Pipeline was successful
fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments
Critical fixes:
- Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor
- Add /speech namespace to WebSocket connection URL
- Pass auth token in WebSocket handshake options
- Wrap audio.play() in try-catch for NotAllowedError and DOMException handling
- Replace bare catch block with named error parameter and descriptive message
- Add connect_error and disconnect event handlers to WebSocket
- Update JSDoc to accurately describe batch transcription (not real-time partial)

Important fixes:
- Emit transcription-error before disconnect in gateway auth failures
- Capture MediaRecorder error details and clean up media tracks on error
- Change TtsDefaultConfig.format type from string to AudioFormat
- Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth
- Fix voice count from 54 to 53 in provider, AGENTS.md, and docs
- Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 03:44:33 -06:00

306 lines
8.2 KiB
TypeScript

/**
* Speech Services Configuration
*
* Issue #401: Environment variables and validation for STT (speech-to-text),
* TTS (text-to-speech), and speech service limits.
*
* Validates conditional requirements at startup:
* - STT_BASE_URL is required when STT_ENABLED=true
* - TTS_DEFAULT_URL is required when TTS_ENABLED=true
* - TTS_PREMIUM_URL is required when TTS_PREMIUM_ENABLED=true
* - TTS_FALLBACK_URL is required when TTS_FALLBACK_ENABLED=true
*/
import { registerAs } from "@nestjs/config";
import type { AudioFormat } from "./interfaces/speech-types";
// ==========================================
// Default values
// ==========================================
const STT_DEFAULTS = {
baseUrl: "http://speaches:8000/v1",
model: "Systran/faster-whisper-large-v3-turbo",
language: "en",
} as const;
const TTS_DEFAULT_DEFAULTS = {
url: "http://kokoro-tts:8880/v1",
voice: "af_heart",
format: "mp3",
} as const;
const TTS_PREMIUM_DEFAULTS = {
url: "http://chatterbox-tts:8881/v1",
} as const;
const TTS_FALLBACK_DEFAULTS = {
url: "http://openedai-speech:8000/v1",
} as const;
const LIMITS_DEFAULTS = {
maxUploadSize: 25_000_000,
maxDurationSeconds: 600,
maxTextLength: 4096,
} as const;
// ==========================================
// Types
// ==========================================
export interface SttConfig {
enabled: boolean;
baseUrl: string;
model: string;
language: string;
}
export interface TtsDefaultConfig {
enabled: boolean;
url: string;
voice: string;
format: AudioFormat;
}
export interface TtsPremiumConfig {
enabled: boolean;
url: string;
}
export interface TtsFallbackConfig {
enabled: boolean;
url: string;
}
export interface TtsConfig {
default: TtsDefaultConfig;
premium: TtsPremiumConfig;
fallback: TtsFallbackConfig;
}
export interface SpeechLimitsConfig {
maxUploadSize: number;
maxDurationSeconds: number;
maxTextLength: number;
}
export interface SpeechConfig {
stt: SttConfig;
tts: TtsConfig;
limits: SpeechLimitsConfig;
}
// ==========================================
// Helper: parse boolean env var
// ==========================================
function parseBooleanEnv(value: string | undefined): boolean {
return value === "true" || value === "1";
}
// ==========================================
// Enabled checks
// ==========================================
/**
* Check if speech-to-text (STT) is enabled via environment variable.
*/
export function isSttEnabled(): boolean {
return parseBooleanEnv(process.env.STT_ENABLED);
}
/**
* Check if text-to-speech (TTS) default engine is enabled via environment variable.
*/
export function isTtsEnabled(): boolean {
return parseBooleanEnv(process.env.TTS_ENABLED);
}
/**
* Check if TTS premium engine (Chatterbox) is enabled via environment variable.
*/
export function isTtsPremiumEnabled(): boolean {
return parseBooleanEnv(process.env.TTS_PREMIUM_ENABLED);
}
/**
* Check if TTS fallback engine (Piper/OpenedAI) is enabled via environment variable.
*/
export function isTtsFallbackEnabled(): boolean {
return parseBooleanEnv(process.env.TTS_FALLBACK_ENABLED);
}
// ==========================================
// Validation helpers
// ==========================================
/**
* Check if an environment variable has a non-empty value.
*/
function isEnvVarSet(envVar: string): boolean {
const value = process.env[envVar];
return value !== undefined && value.trim() !== "";
}
/**
* Validate that required env vars are set when a service is enabled.
* Throws with a helpful error message listing missing vars and how to disable.
*/
function validateRequiredVars(
serviceName: string,
enabledFlag: string,
requiredVars: string[]
): void {
const missingVars: string[] = [];
for (const envVar of requiredVars) {
if (!isEnvVarSet(envVar)) {
missingVars.push(envVar);
}
}
if (missingVars.length > 0) {
throw new Error(
`${serviceName} is enabled (${enabledFlag}=true) but required environment variables are missing or empty: ${missingVars.join(", ")}. ` +
`Either set these variables or disable by setting ${enabledFlag}=false.`
);
}
}
/**
* Validate that a numeric env var, if set, is a positive integer.
*/
function validatePositiveInteger(envVar: string): void {
const value = process.env[envVar];
if (value === undefined || value.trim() === "") {
return; // Not set, will use default
}
const parsed = parseInt(value, 10);
if (isNaN(parsed) || parsed <= 0 || String(parsed) !== value.trim()) {
throw new Error(`${envVar} must be a positive integer. Current value: "${value}".`);
}
}
// ==========================================
// Main validation
// ==========================================
/**
* Validates speech configuration at startup.
* Call this during module initialization to fail fast if misconfigured.
*
* Validates:
* - STT_BASE_URL is set when STT_ENABLED=true
* - TTS_DEFAULT_URL is set when TTS_ENABLED=true
* - TTS_PREMIUM_URL is set when TTS_PREMIUM_ENABLED=true
* - TTS_FALLBACK_URL is set when TTS_FALLBACK_ENABLED=true
* - Numeric limits are positive integers (when set)
*
* @throws Error if any required configuration is missing or invalid
*/
export function validateSpeechConfig(): void {
// STT validation
if (isSttEnabled()) {
validateRequiredVars("STT", "STT_ENABLED", ["STT_BASE_URL"]);
}
// TTS default validation
if (isTtsEnabled()) {
validateRequiredVars("TTS", "TTS_ENABLED", ["TTS_DEFAULT_URL"]);
}
// TTS premium validation
if (isTtsPremiumEnabled()) {
validateRequiredVars("TTS premium", "TTS_PREMIUM_ENABLED", ["TTS_PREMIUM_URL"]);
}
// TTS fallback validation
if (isTtsFallbackEnabled()) {
validateRequiredVars("TTS fallback", "TTS_FALLBACK_ENABLED", ["TTS_FALLBACK_URL"]);
}
// Limits validation (only if set, otherwise defaults are used)
validatePositiveInteger("SPEECH_MAX_UPLOAD_SIZE");
validatePositiveInteger("SPEECH_MAX_DURATION_SECONDS");
validatePositiveInteger("SPEECH_MAX_TEXT_LENGTH");
}
// ==========================================
// Config getter
// ==========================================
/**
* Get the full speech configuration object with typed values and defaults.
*
* @returns SpeechConfig with all STT, TTS, and limits configuration
*/
export function getSpeechConfig(): SpeechConfig {
return {
stt: {
enabled: isSttEnabled(),
baseUrl: process.env.STT_BASE_URL ?? STT_DEFAULTS.baseUrl,
model: process.env.STT_MODEL ?? STT_DEFAULTS.model,
language: process.env.STT_LANGUAGE ?? STT_DEFAULTS.language,
},
tts: {
default: {
enabled: isTtsEnabled(),
url: process.env.TTS_DEFAULT_URL ?? TTS_DEFAULT_DEFAULTS.url,
voice: process.env.TTS_DEFAULT_VOICE ?? TTS_DEFAULT_DEFAULTS.voice,
format: (process.env.TTS_DEFAULT_FORMAT ?? TTS_DEFAULT_DEFAULTS.format) as AudioFormat,
},
premium: {
enabled: isTtsPremiumEnabled(),
url: process.env.TTS_PREMIUM_URL ?? TTS_PREMIUM_DEFAULTS.url,
},
fallback: {
enabled: isTtsFallbackEnabled(),
url: process.env.TTS_FALLBACK_URL ?? TTS_FALLBACK_DEFAULTS.url,
},
},
limits: {
maxUploadSize: parseInt(
process.env.SPEECH_MAX_UPLOAD_SIZE ?? String(LIMITS_DEFAULTS.maxUploadSize),
10
),
maxDurationSeconds: parseInt(
process.env.SPEECH_MAX_DURATION_SECONDS ?? String(LIMITS_DEFAULTS.maxDurationSeconds),
10
),
maxTextLength: parseInt(
process.env.SPEECH_MAX_TEXT_LENGTH ?? String(LIMITS_DEFAULTS.maxTextLength),
10
),
},
};
}
// ==========================================
// NestJS ConfigModule registerAs factory
// ==========================================
/**
* NestJS ConfigModule namespace factory for speech configuration.
*
* Usage in a module:
* ```typescript
* import { speechConfig } from './speech.config';
*
* @Module({
* imports: [ConfigModule.forFeature(speechConfig)],
* })
* export class SpeechModule {}
* ```
*
* Then inject via ConfigService:
* ```typescript
* constructor(private config: ConfigService) {
* const sttUrl = this.config.get<string>('speech.stt.baseUrl');
* }
* ```
*/
export const speechConfig = registerAs("speech", (): SpeechConfig => {
return getSpeechConfig();
});