Critical fixes: - Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor - Add /speech namespace to WebSocket connection URL - Pass auth token in WebSocket handshake options - Wrap audio.play() in try-catch for NotAllowedError and DOMException handling - Replace bare catch block with named error parameter and descriptive message - Add connect_error and disconnect event handlers to WebSocket - Update JSDoc to accurately describe batch transcription (not real-time partial) Important fixes: - Emit transcription-error before disconnect in gateway auth failures - Capture MediaRecorder error details and clean up media tracks on error - Change TtsDefaultConfig.format type from string to AudioFormat - Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth - Fix voice count from 54 to 53 in provider, AGENTS.md, and docs - Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
76 lines
2.6 KiB
TypeScript
76 lines
2.6 KiB
TypeScript
/**
|
|
* TTS Provider Factory
|
|
*
|
|
* Creates and registers TTS providers based on speech configuration.
|
|
* Reads enabled flags and URLs from config and instantiates the appropriate
|
|
* provider for each tier.
|
|
*
|
|
* Each tier maps to a specific TTS engine:
|
|
* - default: Kokoro-FastAPI (CPU, always available)
|
|
* - premium: Chatterbox (GPU, voice cloning)
|
|
* - fallback: Piper via OpenedAI Speech (ultra-lightweight CPU)
|
|
*
|
|
* Issue #391
|
|
*/
|
|
|
|
import { Logger } from "@nestjs/common";
|
|
import { ChatterboxTTSProvider } from "./chatterbox-tts.provider";
|
|
import { KokoroTtsProvider } from "./kokoro-tts.provider";
|
|
import { PiperTtsProvider } from "./piper-tts.provider";
|
|
import type { ITTSProvider } from "../interfaces/tts-provider.interface";
|
|
import type { SpeechTier } from "../interfaces/speech-types";
|
|
import type { SpeechConfig } from "../speech.config";
|
|
|
|
// ==========================================
|
|
// Factory function
|
|
// ==========================================
|
|
|
|
const logger = new Logger("TTSProviderFactory");
|
|
|
|
/**
|
|
* Create and register TTS providers based on the speech configuration.
|
|
*
|
|
* Only creates providers for tiers that are enabled in the config.
|
|
* Returns a Map keyed by SpeechTier for use with the TTS_PROVIDERS injection token.
|
|
*
|
|
* @param config - Speech configuration with TTS tier settings
|
|
* @returns Map of enabled TTS providers keyed by tier
|
|
*/
|
|
export function createTTSProviders(config: SpeechConfig): Map<SpeechTier, ITTSProvider> {
|
|
const providers = new Map<SpeechTier, ITTSProvider>();
|
|
|
|
// Default tier: Kokoro
|
|
if (config.tts.default.enabled) {
|
|
const provider = new KokoroTtsProvider(
|
|
config.tts.default.url,
|
|
config.tts.default.voice,
|
|
config.tts.default.format
|
|
);
|
|
providers.set("default", provider);
|
|
logger.log(`Registered default TTS provider: kokoro at ${config.tts.default.url}`);
|
|
}
|
|
|
|
// Premium tier: Chatterbox
|
|
if (config.tts.premium.enabled) {
|
|
const provider = new ChatterboxTTSProvider(config.tts.premium.url);
|
|
providers.set("premium", provider);
|
|
logger.log(`Registered premium TTS provider: chatterbox at ${config.tts.premium.url}`);
|
|
}
|
|
|
|
// Fallback tier: Piper
|
|
if (config.tts.fallback.enabled) {
|
|
const provider = new PiperTtsProvider(config.tts.fallback.url);
|
|
providers.set("fallback", provider);
|
|
logger.log(`Registered fallback TTS provider: piper at ${config.tts.fallback.url}`);
|
|
}
|
|
|
|
if (providers.size === 0) {
|
|
logger.warn("No TTS providers are enabled. TTS synthesis will not be available.");
|
|
} else {
|
|
const tierNames = Array.from(providers.keys()).join(", ");
|
|
logger.log(`TTS providers ready: ${tierNames} (${String(providers.size)} total)`);
|
|
}
|
|
|
|
return providers;
|
|
}
|