feat(#389): create SpeechModule with provider abstraction layer
All checks were successful
ci/woodpecker/push/api Pipeline was successful

Add SpeechModule with provider interfaces and service skeleton for
multi-tier TTS fallback (premium -> default -> fallback) and STT
transcription support. Includes 27 unit tests covering provider
selection, fallback logic, and availability checks.

- ISTTProvider interface with transcribe/isHealthy methods
- ITTSProvider interface with synthesize/listVoices/isHealthy methods
- Shared types: SpeechTier, TranscriptionResult, SynthesisResult, etc.
- SpeechService with graceful TTS fallback chain
- NestJS injection tokens (STT_PROVIDER, TTS_PROVIDERS)
- SpeechModule registered in AppModule
- ConfigModule integration via speechConfig registerAs factory

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 02:09:45 -06:00
parent 52553c8266
commit c40373fa3b
9 changed files with 1129 additions and 0 deletions

View File

@@ -0,0 +1,231 @@
/**
* SpeechService
*
* High-level service for speech-to-text (STT) and text-to-speech (TTS) operations.
* Manages provider selection and graceful fallback for TTS tiers.
*
* Fallback chain for TTS: premium -> default -> fallback
* Each tier is only attempted if enabled in config and a provider is registered.
*
* Issue #389
*/
import { Injectable, Inject, Optional, Logger, ServiceUnavailableException } from "@nestjs/common";
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
import { speechConfig, type SpeechConfig } from "./speech.config";
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
import type {
SpeechTier,
TranscribeOptions,
TranscriptionResult,
SynthesizeOptions,
SynthesisResult,
VoiceInfo,
} from "./interfaces/speech-types";
/**
* Fallback order for TTS tiers.
* When a tier fails, the next tier in this array is attempted.
*/
const TTS_FALLBACK_ORDER: readonly SpeechTier[] = ["premium", "default", "fallback"] as const;
@Injectable()
export class SpeechService {
private readonly logger = new Logger(SpeechService.name);
constructor(
@Inject(speechConfig.KEY)
private readonly config: SpeechConfig,
@Optional()
@Inject(STT_PROVIDER)
private readonly sttProvider: ISTTProvider | null,
@Inject(TTS_PROVIDERS)
private readonly ttsProviders: Map<SpeechTier, ITTSProvider>
) {
this.logger.log("Speech service initialized");
if (this.sttProvider) {
this.logger.log(`STT provider registered: ${this.sttProvider.name}`);
}
if (this.ttsProviders.size > 0) {
const tierNames = Array.from(this.ttsProviders.keys()).join(", ");
this.logger.log(`TTS providers registered: ${tierNames}`);
}
}
// ==========================================
// STT Operations
// ==========================================
/**
* Transcribe audio data to text using the registered STT provider.
*
* @param audio - Raw audio data as a Buffer
* @param options - Optional transcription parameters
* @returns Transcription result with text and metadata
* @throws {ServiceUnavailableException} If STT is disabled or no provider is registered
*/
async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
if (!this.config.stt.enabled) {
throw new ServiceUnavailableException("Speech-to-text is not enabled");
}
if (!this.sttProvider) {
throw new ServiceUnavailableException("No STT provider is registered");
}
try {
return await this.sttProvider.transcribe(audio, options);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.error(`STT transcription failed: ${message}`);
throw new ServiceUnavailableException(`Transcription failed: ${message}`);
}
}
// ==========================================
// TTS Operations
// ==========================================
/**
* Synthesize text to audio using TTS providers with graceful fallback.
*
* Fallback chain: requested tier -> default -> fallback.
* Only enabled tiers with registered providers are attempted.
*
* @param text - Text to convert to speech
* @param options - Optional synthesis parameters (voice, format, tier)
* @returns Synthesis result with audio buffer and metadata
* @throws {ServiceUnavailableException} If no TTS provider can fulfill the request
*/
async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
const requestedTier = options?.tier ?? "default";
const fallbackChain = this.buildFallbackChain(requestedTier);
if (fallbackChain.length === 0) {
throw new ServiceUnavailableException(
"No TTS providers are available. Check that TTS is enabled and providers are registered."
);
}
let lastError: Error | undefined;
for (const tier of fallbackChain) {
const provider = this.ttsProviders.get(tier);
if (!provider) {
continue;
}
try {
return await provider.synthesize(text, options);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.warn(`TTS tier "${tier}" (${provider.name}) failed: ${message}`);
lastError = error instanceof Error ? error : new Error(message);
}
}
const errorMessage = lastError?.message ?? "No providers available";
throw new ServiceUnavailableException(`All TTS providers failed: ${errorMessage}`);
}
/**
* List available voices across all TTS providers, optionally filtered by tier.
*
* @param tier - Optional tier filter. If omitted, voices from all tiers are returned.
* @returns Array of voice information objects
*/
async listVoices(tier?: SpeechTier): Promise<VoiceInfo[]> {
const voices: VoiceInfo[] = [];
if (tier) {
const provider = this.ttsProviders.get(tier);
if (!provider) {
return [];
}
try {
return await provider.listVoices();
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to list voices for tier "${tier}": ${message}`);
return [];
}
}
// Aggregate voices from all providers
for (const [providerTier, provider] of this.ttsProviders) {
try {
const tierVoices = await provider.listVoices();
voices.push(...tierVoices);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to list voices for tier "${providerTier}": ${message}`);
}
}
return voices;
}
// ==========================================
// Availability Checks
// ==========================================
/**
* Check if STT is available (enabled in config and provider registered).
*/
isSTTAvailable(): boolean {
return this.config.stt.enabled && this.sttProvider !== null;
}
/**
* Check if TTS is available (at least one tier enabled with a registered provider).
*/
isTTSAvailable(): boolean {
return this.getEnabledTiers().some((tier) => this.ttsProviders.has(tier));
}
// ==========================================
// Private helpers
// ==========================================
/**
* Build the fallback chain starting from the requested tier.
* Only includes tiers that are enabled in config and have a registered provider.
*/
private buildFallbackChain(requestedTier: SpeechTier): SpeechTier[] {
const startIndex = TTS_FALLBACK_ORDER.indexOf(requestedTier);
if (startIndex === -1) {
return [];
}
const enabledTiers = this.getEnabledTiers();
return TTS_FALLBACK_ORDER.slice(startIndex).filter(
(tier) => enabledTiers.includes(tier) && this.ttsProviders.has(tier)
);
}
/**
* Get the list of TTS tiers that are enabled in the configuration.
*/
private getEnabledTiers(): SpeechTier[] {
const tiers: SpeechTier[] = [];
if (this.config.tts.default.enabled) {
tiers.push("default");
}
if (this.config.tts.premium.enabled) {
tiers.push("premium");
}
if (this.config.tts.fallback.enabled) {
tiers.push("fallback");
}
return tiers;
}
}