Files
stack/apps/api/src/speech/providers/tts-provider.factory.ts
Jason Woltje af9c5799af
All checks were successful
ci/woodpecker/push/web Pipeline was successful
ci/woodpecker/push/api Pipeline was successful
fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments
Critical fixes:
- Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor
- Add /speech namespace to WebSocket connection URL
- Pass auth token in WebSocket handshake options
- Wrap audio.play() in try-catch for NotAllowedError and DOMException handling
- Replace bare catch block with named error parameter and descriptive message
- Add connect_error and disconnect event handlers to WebSocket
- Update JSDoc to accurately describe batch transcription (not real-time partial)

Important fixes:
- Emit transcription-error before disconnect in gateway auth failures
- Capture MediaRecorder error details and clean up media tracks on error
- Change TtsDefaultConfig.format type from string to AudioFormat
- Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth
- Fix voice count from 54 to 53 in provider, AGENTS.md, and docs
- Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 03:44:33 -06:00

76 lines
2.6 KiB
TypeScript

/**
* TTS Provider Factory
*
* Creates and registers TTS providers based on speech configuration.
* Reads enabled flags and URLs from config and instantiates the appropriate
* provider for each tier.
*
* Each tier maps to a specific TTS engine:
* - default: Kokoro-FastAPI (CPU, always available)
* - premium: Chatterbox (GPU, voice cloning)
* - fallback: Piper via OpenedAI Speech (ultra-lightweight CPU)
*
* Issue #391
*/
import { Logger } from "@nestjs/common";
import { ChatterboxTTSProvider } from "./chatterbox-tts.provider";
import { KokoroTtsProvider } from "./kokoro-tts.provider";
import { PiperTtsProvider } from "./piper-tts.provider";
import type { ITTSProvider } from "../interfaces/tts-provider.interface";
import type { SpeechTier } from "../interfaces/speech-types";
import type { SpeechConfig } from "../speech.config";
// ==========================================
// Factory function
// ==========================================
const logger = new Logger("TTSProviderFactory");
/**
* Create and register TTS providers based on the speech configuration.
*
* Only creates providers for tiers that are enabled in the config.
* Returns a Map keyed by SpeechTier for use with the TTS_PROVIDERS injection token.
*
* @param config - Speech configuration with TTS tier settings
* @returns Map of enabled TTS providers keyed by tier
*/
export function createTTSProviders(config: SpeechConfig): Map<SpeechTier, ITTSProvider> {
const providers = new Map<SpeechTier, ITTSProvider>();
// Default tier: Kokoro
if (config.tts.default.enabled) {
const provider = new KokoroTtsProvider(
config.tts.default.url,
config.tts.default.voice,
config.tts.default.format
);
providers.set("default", provider);
logger.log(`Registered default TTS provider: kokoro at ${config.tts.default.url}`);
}
// Premium tier: Chatterbox
if (config.tts.premium.enabled) {
const provider = new ChatterboxTTSProvider(config.tts.premium.url);
providers.set("premium", provider);
logger.log(`Registered premium TTS provider: chatterbox at ${config.tts.premium.url}`);
}
// Fallback tier: Piper
if (config.tts.fallback.enabled) {
const provider = new PiperTtsProvider(config.tts.fallback.url);
providers.set("fallback", provider);
logger.log(`Registered fallback TTS provider: piper at ${config.tts.fallback.url}`);
}
if (providers.size === 0) {
logger.warn("No TTS providers are enabled. TTS synthesis will not be available.");
} else {
const tierNames = Array.from(providers.keys()).join(", ");
logger.log(`TTS providers ready: ${tierNames} (${String(providers.size)} total)`);
}
return providers;
}