All checks were successful
ci/woodpecker/push/api Pipeline was successful
Add SpeechModule with provider interfaces and service skeleton for multi-tier TTS fallback (premium -> default -> fallback) and STT transcription support. Includes 27 unit tests covering provider selection, fallback logic, and availability checks. - ISTTProvider interface with transcribe/isHealthy methods - ITTSProvider interface with synthesize/listVoices/isHealthy methods - Shared types: SpeechTier, TranscriptionResult, SynthesisResult, etc. - SpeechService with graceful TTS fallback chain - NestJS injection tokens (STT_PROVIDER, TTS_PROVIDERS) - SpeechModule registered in AppModule - ConfigModule integration via speechConfig registerAs factory Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
232 lines
7.3 KiB
TypeScript
232 lines
7.3 KiB
TypeScript
/**
|
|
* SpeechService
|
|
*
|
|
* High-level service for speech-to-text (STT) and text-to-speech (TTS) operations.
|
|
* Manages provider selection and graceful fallback for TTS tiers.
|
|
*
|
|
* Fallback chain for TTS: premium -> default -> fallback
|
|
* Each tier is only attempted if enabled in config and a provider is registered.
|
|
*
|
|
* Issue #389
|
|
*/
|
|
|
|
import { Injectable, Inject, Optional, Logger, ServiceUnavailableException } from "@nestjs/common";
|
|
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
|
|
import { speechConfig, type SpeechConfig } from "./speech.config";
|
|
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
|
|
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
|
|
import type {
|
|
SpeechTier,
|
|
TranscribeOptions,
|
|
TranscriptionResult,
|
|
SynthesizeOptions,
|
|
SynthesisResult,
|
|
VoiceInfo,
|
|
} from "./interfaces/speech-types";
|
|
|
|
/**
|
|
* Fallback order for TTS tiers.
|
|
* When a tier fails, the next tier in this array is attempted.
|
|
*/
|
|
const TTS_FALLBACK_ORDER: readonly SpeechTier[] = ["premium", "default", "fallback"] as const;
|
|
|
|
@Injectable()
|
|
export class SpeechService {
|
|
private readonly logger = new Logger(SpeechService.name);
|
|
|
|
constructor(
|
|
@Inject(speechConfig.KEY)
|
|
private readonly config: SpeechConfig,
|
|
|
|
@Optional()
|
|
@Inject(STT_PROVIDER)
|
|
private readonly sttProvider: ISTTProvider | null,
|
|
|
|
@Inject(TTS_PROVIDERS)
|
|
private readonly ttsProviders: Map<SpeechTier, ITTSProvider>
|
|
) {
|
|
this.logger.log("Speech service initialized");
|
|
|
|
if (this.sttProvider) {
|
|
this.logger.log(`STT provider registered: ${this.sttProvider.name}`);
|
|
}
|
|
|
|
if (this.ttsProviders.size > 0) {
|
|
const tierNames = Array.from(this.ttsProviders.keys()).join(", ");
|
|
this.logger.log(`TTS providers registered: ${tierNames}`);
|
|
}
|
|
}
|
|
|
|
// ==========================================
|
|
// STT Operations
|
|
// ==========================================
|
|
|
|
/**
|
|
* Transcribe audio data to text using the registered STT provider.
|
|
*
|
|
* @param audio - Raw audio data as a Buffer
|
|
* @param options - Optional transcription parameters
|
|
* @returns Transcription result with text and metadata
|
|
* @throws {ServiceUnavailableException} If STT is disabled or no provider is registered
|
|
*/
|
|
async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
|
|
if (!this.config.stt.enabled) {
|
|
throw new ServiceUnavailableException("Speech-to-text is not enabled");
|
|
}
|
|
|
|
if (!this.sttProvider) {
|
|
throw new ServiceUnavailableException("No STT provider is registered");
|
|
}
|
|
|
|
try {
|
|
return await this.sttProvider.transcribe(audio, options);
|
|
} catch (error: unknown) {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
this.logger.error(`STT transcription failed: ${message}`);
|
|
throw new ServiceUnavailableException(`Transcription failed: ${message}`);
|
|
}
|
|
}
|
|
|
|
// ==========================================
|
|
// TTS Operations
|
|
// ==========================================
|
|
|
|
/**
|
|
* Synthesize text to audio using TTS providers with graceful fallback.
|
|
*
|
|
* Fallback chain: requested tier -> default -> fallback.
|
|
* Only enabled tiers with registered providers are attempted.
|
|
*
|
|
* @param text - Text to convert to speech
|
|
* @param options - Optional synthesis parameters (voice, format, tier)
|
|
* @returns Synthesis result with audio buffer and metadata
|
|
* @throws {ServiceUnavailableException} If no TTS provider can fulfill the request
|
|
*/
|
|
async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
|
|
const requestedTier = options?.tier ?? "default";
|
|
const fallbackChain = this.buildFallbackChain(requestedTier);
|
|
|
|
if (fallbackChain.length === 0) {
|
|
throw new ServiceUnavailableException(
|
|
"No TTS providers are available. Check that TTS is enabled and providers are registered."
|
|
);
|
|
}
|
|
|
|
let lastError: Error | undefined;
|
|
|
|
for (const tier of fallbackChain) {
|
|
const provider = this.ttsProviders.get(tier);
|
|
if (!provider) {
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
return await provider.synthesize(text, options);
|
|
} catch (error: unknown) {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`TTS tier "${tier}" (${provider.name}) failed: ${message}`);
|
|
lastError = error instanceof Error ? error : new Error(message);
|
|
}
|
|
}
|
|
|
|
const errorMessage = lastError?.message ?? "No providers available";
|
|
throw new ServiceUnavailableException(`All TTS providers failed: ${errorMessage}`);
|
|
}
|
|
|
|
/**
|
|
* List available voices across all TTS providers, optionally filtered by tier.
|
|
*
|
|
* @param tier - Optional tier filter. If omitted, voices from all tiers are returned.
|
|
* @returns Array of voice information objects
|
|
*/
|
|
async listVoices(tier?: SpeechTier): Promise<VoiceInfo[]> {
|
|
const voices: VoiceInfo[] = [];
|
|
|
|
if (tier) {
|
|
const provider = this.ttsProviders.get(tier);
|
|
if (!provider) {
|
|
return [];
|
|
}
|
|
|
|
try {
|
|
return await provider.listVoices();
|
|
} catch (error: unknown) {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`Failed to list voices for tier "${tier}": ${message}`);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
// Aggregate voices from all providers
|
|
for (const [providerTier, provider] of this.ttsProviders) {
|
|
try {
|
|
const tierVoices = await provider.listVoices();
|
|
voices.push(...tierVoices);
|
|
} catch (error: unknown) {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
this.logger.warn(`Failed to list voices for tier "${providerTier}": ${message}`);
|
|
}
|
|
}
|
|
|
|
return voices;
|
|
}
|
|
|
|
// ==========================================
|
|
// Availability Checks
|
|
// ==========================================
|
|
|
|
/**
|
|
* Check if STT is available (enabled in config and provider registered).
|
|
*/
|
|
isSTTAvailable(): boolean {
|
|
return this.config.stt.enabled && this.sttProvider !== null;
|
|
}
|
|
|
|
/**
|
|
* Check if TTS is available (at least one tier enabled with a registered provider).
|
|
*/
|
|
isTTSAvailable(): boolean {
|
|
return this.getEnabledTiers().some((tier) => this.ttsProviders.has(tier));
|
|
}
|
|
|
|
// ==========================================
|
|
// Private helpers
|
|
// ==========================================
|
|
|
|
/**
|
|
* Build the fallback chain starting from the requested tier.
|
|
* Only includes tiers that are enabled in config and have a registered provider.
|
|
*/
|
|
private buildFallbackChain(requestedTier: SpeechTier): SpeechTier[] {
|
|
const startIndex = TTS_FALLBACK_ORDER.indexOf(requestedTier);
|
|
if (startIndex === -1) {
|
|
return [];
|
|
}
|
|
|
|
const enabledTiers = this.getEnabledTiers();
|
|
|
|
return TTS_FALLBACK_ORDER.slice(startIndex).filter(
|
|
(tier) => enabledTiers.includes(tier) && this.ttsProviders.has(tier)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get the list of TTS tiers that are enabled in the configuration.
|
|
*/
|
|
private getEnabledTiers(): SpeechTier[] {
|
|
const tiers: SpeechTier[] = [];
|
|
|
|
if (this.config.tts.default.enabled) {
|
|
tiers.push("default");
|
|
}
|
|
if (this.config.tts.premium.enabled) {
|
|
tiers.push("premium");
|
|
}
|
|
if (this.config.tts.fallback.enabled) {
|
|
tiers.push("fallback");
|
|
}
|
|
|
|
return tiers;
|
|
}
|
|
}
|