feat(#389): create SpeechModule with provider abstraction layer
All checks were successful
ci/woodpecker/push/api Pipeline was successful
All checks were successful
ci/woodpecker/push/api Pipeline was successful
Add SpeechModule with provider interfaces and service skeleton for multi-tier TTS fallback (premium -> default -> fallback) and STT transcription support. Includes 27 unit tests covering provider selection, fallback logic, and availability checks. - ISTTProvider interface with transcribe/isHealthy methods - ITTSProvider interface with synthesize/listVoices/isHealthy methods - Shared types: SpeechTier, TranscriptionResult, SynthesisResult, etc. - SpeechService with graceful TTS fallback chain - NestJS injection tokens (STT_PROVIDER, TTS_PROVIDERS) - SpeechModule registered in AppModule - ConfigModule integration via speechConfig registerAs factory Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,6 +37,7 @@ import { JobStepsModule } from "./job-steps/job-steps.module";
|
|||||||
import { CoordinatorIntegrationModule } from "./coordinator-integration/coordinator-integration.module";
|
import { CoordinatorIntegrationModule } from "./coordinator-integration/coordinator-integration.module";
|
||||||
import { FederationModule } from "./federation/federation.module";
|
import { FederationModule } from "./federation/federation.module";
|
||||||
import { CredentialsModule } from "./credentials/credentials.module";
|
import { CredentialsModule } from "./credentials/credentials.module";
|
||||||
|
import { SpeechModule } from "./speech/speech.module";
|
||||||
import { RlsContextInterceptor } from "./common/interceptors/rls-context.interceptor";
|
import { RlsContextInterceptor } from "./common/interceptors/rls-context.interceptor";
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
@@ -97,6 +98,7 @@ import { RlsContextInterceptor } from "./common/interceptors/rls-context.interce
|
|||||||
CoordinatorIntegrationModule,
|
CoordinatorIntegrationModule,
|
||||||
FederationModule,
|
FederationModule,
|
||||||
CredentialsModule,
|
CredentialsModule,
|
||||||
|
SpeechModule,
|
||||||
],
|
],
|
||||||
controllers: [AppController, CsrfController],
|
controllers: [AppController, CsrfController],
|
||||||
providers: [
|
providers: [
|
||||||
|
|||||||
18
apps/api/src/speech/interfaces/index.ts
Normal file
18
apps/api/src/speech/interfaces/index.ts
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
/**
|
||||||
|
* Speech interfaces barrel export.
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type { ISTTProvider } from "./stt-provider.interface";
|
||||||
|
export type { ITTSProvider } from "./tts-provider.interface";
|
||||||
|
export type {
|
||||||
|
SpeechTier,
|
||||||
|
AudioFormat,
|
||||||
|
TranscribeOptions,
|
||||||
|
TranscriptionResult,
|
||||||
|
TranscriptionSegment,
|
||||||
|
SynthesizeOptions,
|
||||||
|
SynthesisResult,
|
||||||
|
VoiceInfo,
|
||||||
|
} from "./speech-types";
|
||||||
149
apps/api/src/speech/interfaces/speech-types.ts
Normal file
149
apps/api/src/speech/interfaces/speech-types.ts
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
/**
|
||||||
|
* Speech Types
|
||||||
|
*
|
||||||
|
* Shared types for speech-to-text (STT) and text-to-speech (TTS) services.
|
||||||
|
* Used by provider interfaces and the SpeechService.
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Enums / Discriminators
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TTS provider tier.
|
||||||
|
* Determines which TTS engine is used for synthesis.
|
||||||
|
*
|
||||||
|
* - default: Primary TTS engine (e.g., Kokoro)
|
||||||
|
* - premium: Higher quality TTS engine (e.g., Chatterbox)
|
||||||
|
* - fallback: Backup TTS engine (e.g., Piper/OpenedAI)
|
||||||
|
*/
|
||||||
|
export type SpeechTier = "default" | "premium" | "fallback";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Audio output format for TTS synthesis.
|
||||||
|
*/
|
||||||
|
export type AudioFormat = "mp3" | "wav" | "opus" | "flac" | "aac" | "pcm";
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// STT Types
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options for speech-to-text transcription.
|
||||||
|
*/
|
||||||
|
export interface TranscribeOptions {
|
||||||
|
/** Language code (e.g., "en", "fr", "de") */
|
||||||
|
language?: string;
|
||||||
|
|
||||||
|
/** Model to use for transcription */
|
||||||
|
model?: string;
|
||||||
|
|
||||||
|
/** MIME type of the audio (e.g., "audio/mp3", "audio/wav") */
|
||||||
|
mimeType?: string;
|
||||||
|
|
||||||
|
/** Optional prompt to guide transcription */
|
||||||
|
prompt?: string;
|
||||||
|
|
||||||
|
/** Temperature for transcription (0.0 - 1.0) */
|
||||||
|
temperature?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a speech-to-text transcription.
|
||||||
|
*/
|
||||||
|
export interface TranscriptionResult {
|
||||||
|
/** Transcribed text */
|
||||||
|
text: string;
|
||||||
|
|
||||||
|
/** Language detected or used */
|
||||||
|
language: string;
|
||||||
|
|
||||||
|
/** Duration of the audio in seconds */
|
||||||
|
durationSeconds?: number;
|
||||||
|
|
||||||
|
/** Confidence score (0.0 - 1.0, if available) */
|
||||||
|
confidence?: number;
|
||||||
|
|
||||||
|
/** Individual word or segment timings (if available) */
|
||||||
|
segments?: TranscriptionSegment[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A segment within a transcription result.
|
||||||
|
*/
|
||||||
|
export interface TranscriptionSegment {
|
||||||
|
/** Segment text */
|
||||||
|
text: string;
|
||||||
|
|
||||||
|
/** Start time in seconds */
|
||||||
|
start: number;
|
||||||
|
|
||||||
|
/** End time in seconds */
|
||||||
|
end: number;
|
||||||
|
|
||||||
|
/** Confidence for this segment */
|
||||||
|
confidence?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// TTS Types
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options for text-to-speech synthesis.
|
||||||
|
*/
|
||||||
|
export interface SynthesizeOptions {
|
||||||
|
/** Voice ID to use */
|
||||||
|
voice?: string;
|
||||||
|
|
||||||
|
/** Desired audio format */
|
||||||
|
format?: AudioFormat;
|
||||||
|
|
||||||
|
/** Speech speed multiplier (0.5 - 2.0) */
|
||||||
|
speed?: number;
|
||||||
|
|
||||||
|
/** Preferred TTS tier */
|
||||||
|
tier?: SpeechTier;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a text-to-speech synthesis.
|
||||||
|
*/
|
||||||
|
export interface SynthesisResult {
|
||||||
|
/** Synthesized audio data */
|
||||||
|
audio: Buffer;
|
||||||
|
|
||||||
|
/** Audio format of the result */
|
||||||
|
format: AudioFormat;
|
||||||
|
|
||||||
|
/** Voice used for synthesis */
|
||||||
|
voice: string;
|
||||||
|
|
||||||
|
/** Tier that produced the synthesis */
|
||||||
|
tier: SpeechTier;
|
||||||
|
|
||||||
|
/** Duration of the generated audio in seconds (if available) */
|
||||||
|
durationSeconds?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about an available TTS voice.
|
||||||
|
*/
|
||||||
|
export interface VoiceInfo {
|
||||||
|
/** Voice identifier */
|
||||||
|
id: string;
|
||||||
|
|
||||||
|
/** Human-readable voice name */
|
||||||
|
name: string;
|
||||||
|
|
||||||
|
/** Language code */
|
||||||
|
language?: string;
|
||||||
|
|
||||||
|
/** Tier this voice belongs to */
|
||||||
|
tier: SpeechTier;
|
||||||
|
|
||||||
|
/** Whether this is the default voice for its tier */
|
||||||
|
isDefault?: boolean;
|
||||||
|
}
|
||||||
52
apps/api/src/speech/interfaces/stt-provider.interface.ts
Normal file
52
apps/api/src/speech/interfaces/stt-provider.interface.ts
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
/**
|
||||||
|
* STT Provider Interface
|
||||||
|
*
|
||||||
|
* Defines the contract for speech-to-text provider implementations.
|
||||||
|
* All STT providers (e.g., Speaches/faster-whisper) must implement this interface.
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { TranscribeOptions, TranscriptionResult } from "./speech-types";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for speech-to-text providers.
|
||||||
|
*
|
||||||
|
* Implementations wrap an OpenAI-compatible API endpoint for transcription.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```typescript
|
||||||
|
* class SpeachesProvider implements ISTTProvider {
|
||||||
|
* readonly name = "speaches";
|
||||||
|
*
|
||||||
|
* async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
|
||||||
|
* // Call speaches API via OpenAI SDK
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* async isHealthy(): Promise<boolean> {
|
||||||
|
* // Check endpoint health
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export interface ISTTProvider {
|
||||||
|
/** Provider name for logging and identification */
|
||||||
|
readonly name: string;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transcribe audio data to text.
|
||||||
|
*
|
||||||
|
* @param audio - Raw audio data as a Buffer
|
||||||
|
* @param options - Optional transcription parameters
|
||||||
|
* @returns Transcription result with text and metadata
|
||||||
|
* @throws {Error} If transcription fails
|
||||||
|
*/
|
||||||
|
transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the provider is healthy and available.
|
||||||
|
*
|
||||||
|
* @returns true if the provider endpoint is reachable and ready
|
||||||
|
*/
|
||||||
|
isHealthy(): Promise<boolean>;
|
||||||
|
}
|
||||||
68
apps/api/src/speech/interfaces/tts-provider.interface.ts
Normal file
68
apps/api/src/speech/interfaces/tts-provider.interface.ts
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
/**
|
||||||
|
* TTS Provider Interface
|
||||||
|
*
|
||||||
|
* Defines the contract for text-to-speech provider implementations.
|
||||||
|
* All TTS providers (e.g., Kokoro, Chatterbox, Piper/OpenedAI) must implement this interface.
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { SynthesizeOptions, SynthesisResult, VoiceInfo, SpeechTier } from "./speech-types";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for text-to-speech providers.
|
||||||
|
*
|
||||||
|
* Implementations wrap an OpenAI-compatible API endpoint for speech synthesis.
|
||||||
|
* Each provider is associated with a SpeechTier (default, premium, fallback).
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```typescript
|
||||||
|
* class KokoroProvider implements ITTSProvider {
|
||||||
|
* readonly name = "kokoro";
|
||||||
|
* readonly tier = "default";
|
||||||
|
*
|
||||||
|
* async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
|
||||||
|
* // Call Kokoro API via OpenAI SDK
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* async listVoices(): Promise<VoiceInfo[]> {
|
||||||
|
* // Return available voices
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* async isHealthy(): Promise<boolean> {
|
||||||
|
* // Check endpoint health
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export interface ITTSProvider {
|
||||||
|
/** Provider name for logging and identification */
|
||||||
|
readonly name: string;
|
||||||
|
|
||||||
|
/** Tier this provider serves (default, premium, fallback) */
|
||||||
|
readonly tier: SpeechTier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synthesize text to audio.
|
||||||
|
*
|
||||||
|
* @param text - Text to convert to speech
|
||||||
|
* @param options - Optional synthesis parameters (voice, format, speed)
|
||||||
|
* @returns Synthesis result with audio buffer and metadata
|
||||||
|
* @throws {Error} If synthesis fails
|
||||||
|
*/
|
||||||
|
synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List available voices for this provider.
|
||||||
|
*
|
||||||
|
* @returns Array of voice information objects
|
||||||
|
*/
|
||||||
|
listVoices(): Promise<VoiceInfo[]>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the provider is healthy and available.
|
||||||
|
*
|
||||||
|
* @returns true if the provider endpoint is reachable and ready
|
||||||
|
*/
|
||||||
|
isHealthy(): Promise<boolean>;
|
||||||
|
}
|
||||||
19
apps/api/src/speech/speech.constants.ts
Normal file
19
apps/api/src/speech/speech.constants.ts
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
/**
|
||||||
|
* Speech Module Constants
|
||||||
|
*
|
||||||
|
* NestJS injection tokens for speech providers.
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Injection token for the STT (speech-to-text) provider.
|
||||||
|
* Providers implementing ISTTProvider register under this token.
|
||||||
|
*/
|
||||||
|
export const STT_PROVIDER = Symbol("STT_PROVIDER");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Injection token for TTS (text-to-speech) providers map.
|
||||||
|
* Registered as Map<SpeechTier, ITTSProvider>.
|
||||||
|
*/
|
||||||
|
export const TTS_PROVIDERS = Symbol("TTS_PROVIDERS");
|
||||||
49
apps/api/src/speech/speech.module.ts
Normal file
49
apps/api/src/speech/speech.module.ts
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/**
|
||||||
|
* SpeechModule
|
||||||
|
*
|
||||||
|
* NestJS module for speech-to-text (STT) and text-to-speech (TTS) services.
|
||||||
|
* Provides a provider abstraction layer with graceful fallback for TTS tiers.
|
||||||
|
*
|
||||||
|
* Imports:
|
||||||
|
* - ConfigModule.forFeature(speechConfig) for speech configuration
|
||||||
|
*
|
||||||
|
* Providers:
|
||||||
|
* - SpeechService: High-level speech operations with provider selection
|
||||||
|
* - TTS_PROVIDERS: Empty Map<SpeechTier, ITTSProvider> (populated by provider modules)
|
||||||
|
*
|
||||||
|
* Exports:
|
||||||
|
* - SpeechService for use by other modules (e.g., controllers, brain)
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Module, type OnModuleInit, Logger } from "@nestjs/common";
|
||||||
|
import { ConfigModule } from "@nestjs/config";
|
||||||
|
import { speechConfig, validateSpeechConfig } from "./speech.config";
|
||||||
|
import { SpeechService } from "./speech.service";
|
||||||
|
import { TTS_PROVIDERS } from "./speech.constants";
|
||||||
|
import type { SpeechTier } from "./interfaces/speech-types";
|
||||||
|
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
|
||||||
|
|
||||||
|
@Module({
|
||||||
|
imports: [ConfigModule.forFeature(speechConfig)],
|
||||||
|
providers: [
|
||||||
|
SpeechService,
|
||||||
|
// Default empty TTS providers map. Provider modules (Kokoro, Chatterbox, etc.)
|
||||||
|
// will register their providers in subsequent tasks.
|
||||||
|
{
|
||||||
|
provide: TTS_PROVIDERS,
|
||||||
|
useFactory: (): Map<SpeechTier, ITTSProvider> => new Map(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
exports: [SpeechService],
|
||||||
|
})
|
||||||
|
export class SpeechModule implements OnModuleInit {
|
||||||
|
private readonly logger = new Logger(SpeechModule.name);
|
||||||
|
|
||||||
|
onModuleInit(): void {
|
||||||
|
// Validate configuration at startup (fail fast)
|
||||||
|
validateSpeechConfig();
|
||||||
|
this.logger.log("Speech module initialized");
|
||||||
|
}
|
||||||
|
}
|
||||||
541
apps/api/src/speech/speech.service.spec.ts
Normal file
541
apps/api/src/speech/speech.service.spec.ts
Normal file
@@ -0,0 +1,541 @@
|
|||||||
|
/**
|
||||||
|
* SpeechService Tests
|
||||||
|
*
|
||||||
|
* Issue #389: Tests for provider abstraction layer with fallback logic.
|
||||||
|
* Written FIRST following TDD (Red-Green-Refactor).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, vi } from "vitest";
|
||||||
|
import { Test, TestingModule } from "@nestjs/testing";
|
||||||
|
import { ServiceUnavailableException } from "@nestjs/common";
|
||||||
|
import { SpeechService } from "./speech.service";
|
||||||
|
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
|
||||||
|
import { speechConfig } from "./speech.config";
|
||||||
|
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
|
||||||
|
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
|
||||||
|
import type {
|
||||||
|
SpeechTier,
|
||||||
|
TranscriptionResult,
|
||||||
|
SynthesisResult,
|
||||||
|
VoiceInfo,
|
||||||
|
} from "./interfaces/speech-types";
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Mock provider factories
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
function createMockSttProvider(overrides?: Partial<ISTTProvider>): ISTTProvider {
|
||||||
|
return {
|
||||||
|
name: "mock-stt",
|
||||||
|
transcribe: vi.fn().mockResolvedValue({
|
||||||
|
text: "Hello world",
|
||||||
|
language: "en",
|
||||||
|
durationSeconds: 2.5,
|
||||||
|
} satisfies TranscriptionResult),
|
||||||
|
isHealthy: vi.fn().mockResolvedValue(true),
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function createMockTtsProvider(tier: SpeechTier, overrides?: Partial<ITTSProvider>): ITTSProvider {
|
||||||
|
return {
|
||||||
|
name: `mock-tts-${tier}`,
|
||||||
|
tier,
|
||||||
|
synthesize: vi.fn().mockResolvedValue({
|
||||||
|
audio: Buffer.from("fake-audio"),
|
||||||
|
format: "mp3",
|
||||||
|
voice: "test-voice",
|
||||||
|
tier,
|
||||||
|
} satisfies SynthesisResult),
|
||||||
|
listVoices: vi
|
||||||
|
.fn()
|
||||||
|
.mockResolvedValue([
|
||||||
|
{ id: `${tier}-voice-1`, name: `${tier} Voice 1`, tier, isDefault: true },
|
||||||
|
] satisfies VoiceInfo[]),
|
||||||
|
isHealthy: vi.fn().mockResolvedValue(true),
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Default config for tests
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
function createTestConfig(): ReturnType<typeof speechConfig> {
|
||||||
|
return {
|
||||||
|
stt: {
|
||||||
|
enabled: true,
|
||||||
|
baseUrl: "http://localhost:8000/v1",
|
||||||
|
model: "test-model",
|
||||||
|
language: "en",
|
||||||
|
},
|
||||||
|
tts: {
|
||||||
|
default: {
|
||||||
|
enabled: true,
|
||||||
|
url: "http://localhost:8880/v1",
|
||||||
|
voice: "test-voice",
|
||||||
|
format: "mp3",
|
||||||
|
},
|
||||||
|
premium: {
|
||||||
|
enabled: true,
|
||||||
|
url: "http://localhost:8881/v1",
|
||||||
|
},
|
||||||
|
fallback: {
|
||||||
|
enabled: true,
|
||||||
|
url: "http://localhost:8882/v1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
limits: {
|
||||||
|
maxUploadSize: 25_000_000,
|
||||||
|
maxDurationSeconds: 600,
|
||||||
|
maxTextLength: 4096,
|
||||||
|
},
|
||||||
|
} as ReturnType<typeof speechConfig>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Test helper: create testing module
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
async function createTestModule(options: {
|
||||||
|
sttProvider?: ISTTProvider | null;
|
||||||
|
ttsProviders?: Map<SpeechTier, ITTSProvider>;
|
||||||
|
config?: ReturnType<typeof speechConfig>;
|
||||||
|
}): Promise<TestingModule> {
|
||||||
|
const config = options.config ?? createTestConfig();
|
||||||
|
const ttsProviders = options.ttsProviders ?? new Map<SpeechTier, ITTSProvider>();
|
||||||
|
|
||||||
|
const providers: Array<{ provide: symbol | string; useValue: unknown }> = [
|
||||||
|
{ provide: speechConfig.KEY, useValue: config },
|
||||||
|
{ provide: TTS_PROVIDERS, useValue: ttsProviders },
|
||||||
|
];
|
||||||
|
|
||||||
|
if (options.sttProvider !== undefined) {
|
||||||
|
providers.push({ provide: STT_PROVIDER, useValue: options.sttProvider });
|
||||||
|
}
|
||||||
|
|
||||||
|
return Test.createTestingModule({
|
||||||
|
providers: [SpeechService, ...providers],
|
||||||
|
}).compile();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Tests
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
describe("SpeechService", () => {
|
||||||
|
// ==========================================
|
||||||
|
// Construction and initialization
|
||||||
|
// ==========================================
|
||||||
|
describe("construction", () => {
|
||||||
|
it("should be defined when all providers are injected", async () => {
|
||||||
|
const module = await createTestModule({
|
||||||
|
sttProvider: createMockSttProvider(),
|
||||||
|
ttsProviders: new Map([["default", createMockTtsProvider("default")]]),
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
expect(service).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should be defined with no STT provider", async () => {
|
||||||
|
const module = await createTestModule({
|
||||||
|
sttProvider: null,
|
||||||
|
ttsProviders: new Map([["default", createMockTtsProvider("default")]]),
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
expect(service).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should be defined with empty TTS providers map", async () => {
|
||||||
|
const module = await createTestModule({
|
||||||
|
sttProvider: createMockSttProvider(),
|
||||||
|
ttsProviders: new Map(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
expect(service).toBeDefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// transcribe()
|
||||||
|
// ==========================================
|
||||||
|
describe("transcribe", () => {
|
||||||
|
let service: SpeechService;
|
||||||
|
let mockStt: ISTTProvider;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
mockStt = createMockSttProvider();
|
||||||
|
const module = await createTestModule({ sttProvider: mockStt });
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should delegate to the STT provider", async () => {
|
||||||
|
const audio = Buffer.from("test-audio");
|
||||||
|
const result = await service.transcribe(audio);
|
||||||
|
|
||||||
|
expect(mockStt.transcribe).toHaveBeenCalledWith(audio, undefined);
|
||||||
|
expect(result.text).toBe("Hello world");
|
||||||
|
expect(result.language).toBe("en");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should pass options to the STT provider", async () => {
|
||||||
|
const audio = Buffer.from("test-audio");
|
||||||
|
const options = { language: "fr", model: "custom-model" };
|
||||||
|
await service.transcribe(audio, options);
|
||||||
|
|
||||||
|
expect(mockStt.transcribe).toHaveBeenCalledWith(audio, options);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw ServiceUnavailableException when STT is disabled in config", async () => {
|
||||||
|
const config = createTestConfig();
|
||||||
|
config.stt.enabled = false;
|
||||||
|
const module = await createTestModule({ sttProvider: mockStt, config });
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
await expect(service.transcribe(Buffer.from("audio"))).rejects.toThrow(
|
||||||
|
ServiceUnavailableException
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw ServiceUnavailableException when no STT provider is registered", async () => {
|
||||||
|
const module = await createTestModule({ sttProvider: null });
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
await expect(service.transcribe(Buffer.from("audio"))).rejects.toThrow(
|
||||||
|
ServiceUnavailableException
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should propagate provider errors as ServiceUnavailableException", async () => {
|
||||||
|
const failingStt = createMockSttProvider({
|
||||||
|
transcribe: vi.fn().mockRejectedValue(new Error("Connection refused")),
|
||||||
|
});
|
||||||
|
const module = await createTestModule({ sttProvider: failingStt });
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
await expect(service.transcribe(Buffer.from("audio"))).rejects.toThrow(
|
||||||
|
ServiceUnavailableException
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// synthesize()
|
||||||
|
// ==========================================
|
||||||
|
describe("synthesize", () => {
|
||||||
|
let service: SpeechService;
|
||||||
|
let defaultProvider: ITTSProvider;
|
||||||
|
let premiumProvider: ITTSProvider;
|
||||||
|
let fallbackProvider: ITTSProvider;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
defaultProvider = createMockTtsProvider("default");
|
||||||
|
premiumProvider = createMockTtsProvider("premium");
|
||||||
|
fallbackProvider = createMockTtsProvider("fallback");
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["default", defaultProvider],
|
||||||
|
["premium", premiumProvider],
|
||||||
|
["fallback", fallbackProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should use the default tier when no tier is specified", async () => {
|
||||||
|
const result = await service.synthesize("Hello world");
|
||||||
|
|
||||||
|
expect(defaultProvider.synthesize).toHaveBeenCalledWith("Hello world", undefined);
|
||||||
|
expect(result.tier).toBe("default");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should use the requested tier when specified", async () => {
|
||||||
|
const result = await service.synthesize("Hello world", { tier: "premium" });
|
||||||
|
|
||||||
|
expect(premiumProvider.synthesize).toHaveBeenCalled();
|
||||||
|
expect(result.tier).toBe("premium");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should pass options to the TTS provider", async () => {
|
||||||
|
const options = { voice: "custom-voice", format: "wav" as const };
|
||||||
|
await service.synthesize("Hello", options);
|
||||||
|
|
||||||
|
expect(defaultProvider.synthesize).toHaveBeenCalledWith("Hello", options);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw ServiceUnavailableException when TTS default is disabled and no tier specified", async () => {
|
||||||
|
const config = createTestConfig();
|
||||||
|
config.tts.default.enabled = false;
|
||||||
|
config.tts.premium.enabled = false;
|
||||||
|
config.tts.fallback.enabled = false;
|
||||||
|
const module = await createTestModule({
|
||||||
|
ttsProviders: new Map([["default", defaultProvider]]),
|
||||||
|
config,
|
||||||
|
});
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
await expect(service.synthesize("Hello")).rejects.toThrow(ServiceUnavailableException);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw ServiceUnavailableException when no TTS providers are registered", async () => {
|
||||||
|
const module = await createTestModule({ ttsProviders: new Map() });
|
||||||
|
service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
await expect(service.synthesize("Hello")).rejects.toThrow(ServiceUnavailableException);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// synthesize() fallback logic
|
||||||
|
// ==========================================
|
||||||
|
describe("synthesize fallback", () => {
|
||||||
|
it("should fall back from premium to default when premium provider fails", async () => {
|
||||||
|
const failingPremium = createMockTtsProvider("premium", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Premium unavailable")),
|
||||||
|
});
|
||||||
|
const defaultProvider = createMockTtsProvider("default");
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["premium", failingPremium],
|
||||||
|
["default", defaultProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const result = await service.synthesize("Hello", { tier: "premium" });
|
||||||
|
|
||||||
|
expect(failingPremium.synthesize).toHaveBeenCalled();
|
||||||
|
expect(defaultProvider.synthesize).toHaveBeenCalled();
|
||||||
|
expect(result.tier).toBe("default");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should fall back from default to fallback when default provider fails", async () => {
|
||||||
|
const failingDefault = createMockTtsProvider("default", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Default unavailable")),
|
||||||
|
});
|
||||||
|
const fallbackProvider = createMockTtsProvider("fallback");
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["default", failingDefault],
|
||||||
|
["fallback", fallbackProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const result = await service.synthesize("Hello");
|
||||||
|
|
||||||
|
expect(failingDefault.synthesize).toHaveBeenCalled();
|
||||||
|
expect(fallbackProvider.synthesize).toHaveBeenCalled();
|
||||||
|
expect(result.tier).toBe("fallback");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should fall back premium -> default -> fallback", async () => {
|
||||||
|
const failingPremium = createMockTtsProvider("premium", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Premium fail")),
|
||||||
|
});
|
||||||
|
const failingDefault = createMockTtsProvider("default", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Default fail")),
|
||||||
|
});
|
||||||
|
const fallbackProvider = createMockTtsProvider("fallback");
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["premium", failingPremium],
|
||||||
|
["default", failingDefault],
|
||||||
|
["fallback", fallbackProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const result = await service.synthesize("Hello", { tier: "premium" });
|
||||||
|
|
||||||
|
expect(failingPremium.synthesize).toHaveBeenCalled();
|
||||||
|
expect(failingDefault.synthesize).toHaveBeenCalled();
|
||||||
|
expect(fallbackProvider.synthesize).toHaveBeenCalled();
|
||||||
|
expect(result.tier).toBe("fallback");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should throw ServiceUnavailableException when all tiers fail", async () => {
|
||||||
|
const failingDefault = createMockTtsProvider("default", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Default fail")),
|
||||||
|
});
|
||||||
|
const failingFallback = createMockTtsProvider("fallback", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Fallback fail")),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["default", failingDefault],
|
||||||
|
["fallback", failingFallback],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
await expect(service.synthesize("Hello")).rejects.toThrow(ServiceUnavailableException);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should skip unavailable tiers in fallback chain", async () => {
|
||||||
|
// premium requested, but only fallback registered (no default)
|
||||||
|
const failingPremium = createMockTtsProvider("premium", {
|
||||||
|
synthesize: vi.fn().mockRejectedValue(new Error("Premium fail")),
|
||||||
|
});
|
||||||
|
const fallbackProvider = createMockTtsProvider("fallback");
|
||||||
|
|
||||||
|
const config = createTestConfig();
|
||||||
|
config.tts.default.enabled = false;
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["premium", failingPremium],
|
||||||
|
["fallback", fallbackProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders, config });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const result = await service.synthesize("Hello", { tier: "premium" });
|
||||||
|
expect(result.tier).toBe("fallback");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// listVoices()
|
||||||
|
// ==========================================
|
||||||
|
describe("listVoices", () => {
|
||||||
|
it("should aggregate voices from all registered TTS providers", async () => {
|
||||||
|
const defaultProvider = createMockTtsProvider("default", {
|
||||||
|
listVoices: vi.fn().mockResolvedValue([
|
||||||
|
{ id: "voice-1", name: "Voice 1", tier: "default" as SpeechTier, isDefault: true },
|
||||||
|
{ id: "voice-2", name: "Voice 2", tier: "default" as SpeechTier },
|
||||||
|
]),
|
||||||
|
});
|
||||||
|
const premiumProvider = createMockTtsProvider("premium", {
|
||||||
|
listVoices: vi
|
||||||
|
.fn()
|
||||||
|
.mockResolvedValue([
|
||||||
|
{ id: "voice-3", name: "Voice 3", tier: "premium" as SpeechTier, isDefault: true },
|
||||||
|
]),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["default", defaultProvider],
|
||||||
|
["premium", premiumProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const voices = await service.listVoices();
|
||||||
|
|
||||||
|
expect(voices).toHaveLength(3);
|
||||||
|
expect(voices.map((v) => v.id)).toEqual(["voice-1", "voice-2", "voice-3"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should filter voices by tier when specified", async () => {
|
||||||
|
const defaultProvider = createMockTtsProvider("default", {
|
||||||
|
listVoices: vi
|
||||||
|
.fn()
|
||||||
|
.mockResolvedValue([{ id: "voice-1", name: "Voice 1", tier: "default" as SpeechTier }]),
|
||||||
|
});
|
||||||
|
const premiumProvider = createMockTtsProvider("premium", {
|
||||||
|
listVoices: vi
|
||||||
|
.fn()
|
||||||
|
.mockResolvedValue([{ id: "voice-2", name: "Voice 2", tier: "premium" as SpeechTier }]),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["default", defaultProvider],
|
||||||
|
["premium", premiumProvider],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const voices = await service.listVoices("premium");
|
||||||
|
|
||||||
|
expect(voices).toHaveLength(1);
|
||||||
|
expect(voices[0].id).toBe("voice-2");
|
||||||
|
// Only the premium provider should have been called
|
||||||
|
expect(premiumProvider.listVoices).toHaveBeenCalled();
|
||||||
|
expect(defaultProvider.listVoices).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return empty array when no TTS providers are registered", async () => {
|
||||||
|
const module = await createTestModule({ ttsProviders: new Map() });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const voices = await service.listVoices();
|
||||||
|
expect(voices).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should return empty array when requested tier has no provider", async () => {
|
||||||
|
const defaultProvider = createMockTtsProvider("default");
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([["default", defaultProvider]]);
|
||||||
|
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
const voices = await service.listVoices("premium");
|
||||||
|
expect(voices).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// isSTTAvailable / isTTSAvailable
|
||||||
|
// ==========================================
|
||||||
|
describe("availability checks", () => {
|
||||||
|
it("should report STT as available when enabled and provider registered", async () => {
|
||||||
|
const module = await createTestModule({
|
||||||
|
sttProvider: createMockSttProvider(),
|
||||||
|
});
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
expect(service.isSTTAvailable()).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should report STT as unavailable when disabled in config", async () => {
|
||||||
|
const config = createTestConfig();
|
||||||
|
config.stt.enabled = false;
|
||||||
|
const module = await createTestModule({
|
||||||
|
sttProvider: createMockSttProvider(),
|
||||||
|
config,
|
||||||
|
});
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
expect(service.isSTTAvailable()).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should report STT as unavailable when no provider registered", async () => {
|
||||||
|
const module = await createTestModule({ sttProvider: null });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
expect(service.isSTTAvailable()).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should report TTS as available when at least one tier is enabled with a provider", async () => {
|
||||||
|
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
|
||||||
|
["default", createMockTtsProvider("default")],
|
||||||
|
]);
|
||||||
|
const module = await createTestModule({ ttsProviders });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
expect(service.isTTSAvailable()).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should report TTS as unavailable when no providers registered", async () => {
|
||||||
|
const config = createTestConfig();
|
||||||
|
config.tts.default.enabled = false;
|
||||||
|
config.tts.premium.enabled = false;
|
||||||
|
config.tts.fallback.enabled = false;
|
||||||
|
const module = await createTestModule({ ttsProviders: new Map(), config });
|
||||||
|
const service = module.get<SpeechService>(SpeechService);
|
||||||
|
|
||||||
|
expect(service.isTTSAvailable()).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
231
apps/api/src/speech/speech.service.ts
Normal file
231
apps/api/src/speech/speech.service.ts
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
/**
|
||||||
|
* SpeechService
|
||||||
|
*
|
||||||
|
* High-level service for speech-to-text (STT) and text-to-speech (TTS) operations.
|
||||||
|
* Manages provider selection and graceful fallback for TTS tiers.
|
||||||
|
*
|
||||||
|
* Fallback chain for TTS: premium -> default -> fallback
|
||||||
|
* Each tier is only attempted if enabled in config and a provider is registered.
|
||||||
|
*
|
||||||
|
* Issue #389
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Injectable, Inject, Optional, Logger, ServiceUnavailableException } from "@nestjs/common";
|
||||||
|
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
|
||||||
|
import { speechConfig, type SpeechConfig } from "./speech.config";
|
||||||
|
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
|
||||||
|
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
|
||||||
|
import type {
|
||||||
|
SpeechTier,
|
||||||
|
TranscribeOptions,
|
||||||
|
TranscriptionResult,
|
||||||
|
SynthesizeOptions,
|
||||||
|
SynthesisResult,
|
||||||
|
VoiceInfo,
|
||||||
|
} from "./interfaces/speech-types";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback order for TTS tiers.
|
||||||
|
* When a tier fails, the next tier in this array is attempted.
|
||||||
|
*/
|
||||||
|
const TTS_FALLBACK_ORDER: readonly SpeechTier[] = ["premium", "default", "fallback"] as const;
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class SpeechService {
|
||||||
|
private readonly logger = new Logger(SpeechService.name);
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
@Inject(speechConfig.KEY)
|
||||||
|
private readonly config: SpeechConfig,
|
||||||
|
|
||||||
|
@Optional()
|
||||||
|
@Inject(STT_PROVIDER)
|
||||||
|
private readonly sttProvider: ISTTProvider | null,
|
||||||
|
|
||||||
|
@Inject(TTS_PROVIDERS)
|
||||||
|
private readonly ttsProviders: Map<SpeechTier, ITTSProvider>
|
||||||
|
) {
|
||||||
|
this.logger.log("Speech service initialized");
|
||||||
|
|
||||||
|
if (this.sttProvider) {
|
||||||
|
this.logger.log(`STT provider registered: ${this.sttProvider.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.ttsProviders.size > 0) {
|
||||||
|
const tierNames = Array.from(this.ttsProviders.keys()).join(", ");
|
||||||
|
this.logger.log(`TTS providers registered: ${tierNames}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// STT Operations
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transcribe audio data to text using the registered STT provider.
|
||||||
|
*
|
||||||
|
* @param audio - Raw audio data as a Buffer
|
||||||
|
* @param options - Optional transcription parameters
|
||||||
|
* @returns Transcription result with text and metadata
|
||||||
|
* @throws {ServiceUnavailableException} If STT is disabled or no provider is registered
|
||||||
|
*/
|
||||||
|
async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
|
||||||
|
if (!this.config.stt.enabled) {
|
||||||
|
throw new ServiceUnavailableException("Speech-to-text is not enabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.sttProvider) {
|
||||||
|
throw new ServiceUnavailableException("No STT provider is registered");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await this.sttProvider.transcribe(audio, options);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
this.logger.error(`STT transcription failed: ${message}`);
|
||||||
|
throw new ServiceUnavailableException(`Transcription failed: ${message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// TTS Operations
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synthesize text to audio using TTS providers with graceful fallback.
|
||||||
|
*
|
||||||
|
* Fallback chain: requested tier -> default -> fallback.
|
||||||
|
* Only enabled tiers with registered providers are attempted.
|
||||||
|
*
|
||||||
|
* @param text - Text to convert to speech
|
||||||
|
* @param options - Optional synthesis parameters (voice, format, tier)
|
||||||
|
* @returns Synthesis result with audio buffer and metadata
|
||||||
|
* @throws {ServiceUnavailableException} If no TTS provider can fulfill the request
|
||||||
|
*/
|
||||||
|
async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
|
||||||
|
const requestedTier = options?.tier ?? "default";
|
||||||
|
const fallbackChain = this.buildFallbackChain(requestedTier);
|
||||||
|
|
||||||
|
if (fallbackChain.length === 0) {
|
||||||
|
throw new ServiceUnavailableException(
|
||||||
|
"No TTS providers are available. Check that TTS is enabled and providers are registered."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let lastError: Error | undefined;
|
||||||
|
|
||||||
|
for (const tier of fallbackChain) {
|
||||||
|
const provider = this.ttsProviders.get(tier);
|
||||||
|
if (!provider) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await provider.synthesize(text, options);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
this.logger.warn(`TTS tier "${tier}" (${provider.name}) failed: ${message}`);
|
||||||
|
lastError = error instanceof Error ? error : new Error(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const errorMessage = lastError?.message ?? "No providers available";
|
||||||
|
throw new ServiceUnavailableException(`All TTS providers failed: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List available voices across all TTS providers, optionally filtered by tier.
|
||||||
|
*
|
||||||
|
* @param tier - Optional tier filter. If omitted, voices from all tiers are returned.
|
||||||
|
* @returns Array of voice information objects
|
||||||
|
*/
|
||||||
|
async listVoices(tier?: SpeechTier): Promise<VoiceInfo[]> {
|
||||||
|
const voices: VoiceInfo[] = [];
|
||||||
|
|
||||||
|
if (tier) {
|
||||||
|
const provider = this.ttsProviders.get(tier);
|
||||||
|
if (!provider) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await provider.listVoices();
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
this.logger.warn(`Failed to list voices for tier "${tier}": ${message}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregate voices from all providers
|
||||||
|
for (const [providerTier, provider] of this.ttsProviders) {
|
||||||
|
try {
|
||||||
|
const tierVoices = await provider.listVoices();
|
||||||
|
voices.push(...tierVoices);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
this.logger.warn(`Failed to list voices for tier "${providerTier}": ${message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return voices;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Availability Checks
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if STT is available (enabled in config and provider registered).
|
||||||
|
*/
|
||||||
|
isSTTAvailable(): boolean {
|
||||||
|
return this.config.stt.enabled && this.sttProvider !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if TTS is available (at least one tier enabled with a registered provider).
|
||||||
|
*/
|
||||||
|
isTTSAvailable(): boolean {
|
||||||
|
return this.getEnabledTiers().some((tier) => this.ttsProviders.has(tier));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================
|
||||||
|
// Private helpers
|
||||||
|
// ==========================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the fallback chain starting from the requested tier.
|
||||||
|
* Only includes tiers that are enabled in config and have a registered provider.
|
||||||
|
*/
|
||||||
|
private buildFallbackChain(requestedTier: SpeechTier): SpeechTier[] {
|
||||||
|
const startIndex = TTS_FALLBACK_ORDER.indexOf(requestedTier);
|
||||||
|
if (startIndex === -1) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const enabledTiers = this.getEnabledTiers();
|
||||||
|
|
||||||
|
return TTS_FALLBACK_ORDER.slice(startIndex).filter(
|
||||||
|
(tier) => enabledTiers.includes(tier) && this.ttsProviders.has(tier)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the list of TTS tiers that are enabled in the configuration.
|
||||||
|
*/
|
||||||
|
private getEnabledTiers(): SpeechTier[] {
|
||||||
|
const tiers: SpeechTier[] = [];
|
||||||
|
|
||||||
|
if (this.config.tts.default.enabled) {
|
||||||
|
tiers.push("default");
|
||||||
|
}
|
||||||
|
if (this.config.tts.premium.enabled) {
|
||||||
|
tiers.push("premium");
|
||||||
|
}
|
||||||
|
if (this.config.tts.fallback.enabled) {
|
||||||
|
tiers.push("fallback");
|
||||||
|
}
|
||||||
|
|
||||||
|
return tiers;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user