feat(#389): create SpeechModule with provider abstraction layer
All checks were successful
ci/woodpecker/push/api Pipeline was successful

Add SpeechModule with provider interfaces and service skeleton for
multi-tier TTS fallback (premium -> default -> fallback) and STT
transcription support. Includes 27 unit tests covering provider
selection, fallback logic, and availability checks.

- ISTTProvider interface with transcribe/isHealthy methods
- ITTSProvider interface with synthesize/listVoices/isHealthy methods
- Shared types: SpeechTier, TranscriptionResult, SynthesisResult, etc.
- SpeechService with graceful TTS fallback chain
- NestJS injection tokens (STT_PROVIDER, TTS_PROVIDERS)
- SpeechModule registered in AppModule
- ConfigModule integration via speechConfig registerAs factory

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 02:09:45 -06:00
parent 52553c8266
commit c40373fa3b
9 changed files with 1129 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
/**
* Speech interfaces barrel export.
*
* Issue #389
*/
export type { ISTTProvider } from "./stt-provider.interface";
export type { ITTSProvider } from "./tts-provider.interface";
export type {
SpeechTier,
AudioFormat,
TranscribeOptions,
TranscriptionResult,
TranscriptionSegment,
SynthesizeOptions,
SynthesisResult,
VoiceInfo,
} from "./speech-types";

View File

@@ -0,0 +1,149 @@
/**
* Speech Types
*
* Shared types for speech-to-text (STT) and text-to-speech (TTS) services.
* Used by provider interfaces and the SpeechService.
*
* Issue #389
*/
// ==========================================
// Enums / Discriminators
// ==========================================
/**
* TTS provider tier.
* Determines which TTS engine is used for synthesis.
*
* - default: Primary TTS engine (e.g., Kokoro)
* - premium: Higher quality TTS engine (e.g., Chatterbox)
* - fallback: Backup TTS engine (e.g., Piper/OpenedAI)
*/
export type SpeechTier = "default" | "premium" | "fallback";
/**
* Audio output format for TTS synthesis.
*/
export type AudioFormat = "mp3" | "wav" | "opus" | "flac" | "aac" | "pcm";
// ==========================================
// STT Types
// ==========================================
/**
* Options for speech-to-text transcription.
*/
export interface TranscribeOptions {
/** Language code (e.g., "en", "fr", "de") */
language?: string;
/** Model to use for transcription */
model?: string;
/** MIME type of the audio (e.g., "audio/mp3", "audio/wav") */
mimeType?: string;
/** Optional prompt to guide transcription */
prompt?: string;
/** Temperature for transcription (0.0 - 1.0) */
temperature?: number;
}
/**
* Result of a speech-to-text transcription.
*/
export interface TranscriptionResult {
/** Transcribed text */
text: string;
/** Language detected or used */
language: string;
/** Duration of the audio in seconds */
durationSeconds?: number;
/** Confidence score (0.0 - 1.0, if available) */
confidence?: number;
/** Individual word or segment timings (if available) */
segments?: TranscriptionSegment[];
}
/**
* A segment within a transcription result.
*/
export interface TranscriptionSegment {
/** Segment text */
text: string;
/** Start time in seconds */
start: number;
/** End time in seconds */
end: number;
/** Confidence for this segment */
confidence?: number;
}
// ==========================================
// TTS Types
// ==========================================
/**
* Options for text-to-speech synthesis.
*/
export interface SynthesizeOptions {
/** Voice ID to use */
voice?: string;
/** Desired audio format */
format?: AudioFormat;
/** Speech speed multiplier (0.5 - 2.0) */
speed?: number;
/** Preferred TTS tier */
tier?: SpeechTier;
}
/**
* Result of a text-to-speech synthesis.
*/
export interface SynthesisResult {
/** Synthesized audio data */
audio: Buffer;
/** Audio format of the result */
format: AudioFormat;
/** Voice used for synthesis */
voice: string;
/** Tier that produced the synthesis */
tier: SpeechTier;
/** Duration of the generated audio in seconds (if available) */
durationSeconds?: number;
}
/**
* Information about an available TTS voice.
*/
export interface VoiceInfo {
/** Voice identifier */
id: string;
/** Human-readable voice name */
name: string;
/** Language code */
language?: string;
/** Tier this voice belongs to */
tier: SpeechTier;
/** Whether this is the default voice for its tier */
isDefault?: boolean;
}

View File

@@ -0,0 +1,52 @@
/**
* STT Provider Interface
*
* Defines the contract for speech-to-text provider implementations.
* All STT providers (e.g., Speaches/faster-whisper) must implement this interface.
*
* Issue #389
*/
import type { TranscribeOptions, TranscriptionResult } from "./speech-types";
/**
* Interface for speech-to-text providers.
*
* Implementations wrap an OpenAI-compatible API endpoint for transcription.
*
* @example
* ```typescript
* class SpeachesProvider implements ISTTProvider {
* readonly name = "speaches";
*
* async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
* // Call speaches API via OpenAI SDK
* }
*
* async isHealthy(): Promise<boolean> {
* // Check endpoint health
* }
* }
* ```
*/
export interface ISTTProvider {
/** Provider name for logging and identification */
readonly name: string;
/**
* Transcribe audio data to text.
*
* @param audio - Raw audio data as a Buffer
* @param options - Optional transcription parameters
* @returns Transcription result with text and metadata
* @throws {Error} If transcription fails
*/
transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult>;
/**
* Check if the provider is healthy and available.
*
* @returns true if the provider endpoint is reachable and ready
*/
isHealthy(): Promise<boolean>;
}

View File

@@ -0,0 +1,68 @@
/**
* TTS Provider Interface
*
* Defines the contract for text-to-speech provider implementations.
* All TTS providers (e.g., Kokoro, Chatterbox, Piper/OpenedAI) must implement this interface.
*
* Issue #389
*/
import type { SynthesizeOptions, SynthesisResult, VoiceInfo, SpeechTier } from "./speech-types";
/**
* Interface for text-to-speech providers.
*
* Implementations wrap an OpenAI-compatible API endpoint for speech synthesis.
* Each provider is associated with a SpeechTier (default, premium, fallback).
*
* @example
* ```typescript
* class KokoroProvider implements ITTSProvider {
* readonly name = "kokoro";
* readonly tier = "default";
*
* async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
* // Call Kokoro API via OpenAI SDK
* }
*
* async listVoices(): Promise<VoiceInfo[]> {
* // Return available voices
* }
*
* async isHealthy(): Promise<boolean> {
* // Check endpoint health
* }
* }
* ```
*/
export interface ITTSProvider {
/** Provider name for logging and identification */
readonly name: string;
/** Tier this provider serves (default, premium, fallback) */
readonly tier: SpeechTier;
/**
* Synthesize text to audio.
*
* @param text - Text to convert to speech
* @param options - Optional synthesis parameters (voice, format, speed)
* @returns Synthesis result with audio buffer and metadata
* @throws {Error} If synthesis fails
*/
synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult>;
/**
* List available voices for this provider.
*
* @returns Array of voice information objects
*/
listVoices(): Promise<VoiceInfo[]>;
/**
* Check if the provider is healthy and available.
*
* @returns true if the provider endpoint is reachable and ready
*/
isHealthy(): Promise<boolean>;
}

View File

@@ -0,0 +1,19 @@
/**
* Speech Module Constants
*
* NestJS injection tokens for speech providers.
*
* Issue #389
*/
/**
* Injection token for the STT (speech-to-text) provider.
* Providers implementing ISTTProvider register under this token.
*/
export const STT_PROVIDER = Symbol("STT_PROVIDER");
/**
* Injection token for TTS (text-to-speech) providers map.
* Registered as Map<SpeechTier, ITTSProvider>.
*/
export const TTS_PROVIDERS = Symbol("TTS_PROVIDERS");

View File

@@ -0,0 +1,49 @@
/**
* SpeechModule
*
* NestJS module for speech-to-text (STT) and text-to-speech (TTS) services.
* Provides a provider abstraction layer with graceful fallback for TTS tiers.
*
* Imports:
* - ConfigModule.forFeature(speechConfig) for speech configuration
*
* Providers:
* - SpeechService: High-level speech operations with provider selection
* - TTS_PROVIDERS: Empty Map<SpeechTier, ITTSProvider> (populated by provider modules)
*
* Exports:
* - SpeechService for use by other modules (e.g., controllers, brain)
*
* Issue #389
*/
import { Module, type OnModuleInit, Logger } from "@nestjs/common";
import { ConfigModule } from "@nestjs/config";
import { speechConfig, validateSpeechConfig } from "./speech.config";
import { SpeechService } from "./speech.service";
import { TTS_PROVIDERS } from "./speech.constants";
import type { SpeechTier } from "./interfaces/speech-types";
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
@Module({
imports: [ConfigModule.forFeature(speechConfig)],
providers: [
SpeechService,
// Default empty TTS providers map. Provider modules (Kokoro, Chatterbox, etc.)
// will register their providers in subsequent tasks.
{
provide: TTS_PROVIDERS,
useFactory: (): Map<SpeechTier, ITTSProvider> => new Map(),
},
],
exports: [SpeechService],
})
export class SpeechModule implements OnModuleInit {
private readonly logger = new Logger(SpeechModule.name);
onModuleInit(): void {
// Validate configuration at startup (fail fast)
validateSpeechConfig();
this.logger.log("Speech module initialized");
}
}

View File

@@ -0,0 +1,541 @@
/**
* SpeechService Tests
*
* Issue #389: Tests for provider abstraction layer with fallback logic.
* Written FIRST following TDD (Red-Green-Refactor).
*/
import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import { ServiceUnavailableException } from "@nestjs/common";
import { SpeechService } from "./speech.service";
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
import { speechConfig } from "./speech.config";
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
import type {
SpeechTier,
TranscriptionResult,
SynthesisResult,
VoiceInfo,
} from "./interfaces/speech-types";
// ==========================================
// Mock provider factories
// ==========================================
function createMockSttProvider(overrides?: Partial<ISTTProvider>): ISTTProvider {
return {
name: "mock-stt",
transcribe: vi.fn().mockResolvedValue({
text: "Hello world",
language: "en",
durationSeconds: 2.5,
} satisfies TranscriptionResult),
isHealthy: vi.fn().mockResolvedValue(true),
...overrides,
};
}
function createMockTtsProvider(tier: SpeechTier, overrides?: Partial<ITTSProvider>): ITTSProvider {
return {
name: `mock-tts-${tier}`,
tier,
synthesize: vi.fn().mockResolvedValue({
audio: Buffer.from("fake-audio"),
format: "mp3",
voice: "test-voice",
tier,
} satisfies SynthesisResult),
listVoices: vi
.fn()
.mockResolvedValue([
{ id: `${tier}-voice-1`, name: `${tier} Voice 1`, tier, isDefault: true },
] satisfies VoiceInfo[]),
isHealthy: vi.fn().mockResolvedValue(true),
...overrides,
};
}
// ==========================================
// Default config for tests
// ==========================================
function createTestConfig(): ReturnType<typeof speechConfig> {
return {
stt: {
enabled: true,
baseUrl: "http://localhost:8000/v1",
model: "test-model",
language: "en",
},
tts: {
default: {
enabled: true,
url: "http://localhost:8880/v1",
voice: "test-voice",
format: "mp3",
},
premium: {
enabled: true,
url: "http://localhost:8881/v1",
},
fallback: {
enabled: true,
url: "http://localhost:8882/v1",
},
},
limits: {
maxUploadSize: 25_000_000,
maxDurationSeconds: 600,
maxTextLength: 4096,
},
} as ReturnType<typeof speechConfig>;
}
// ==========================================
// Test helper: create testing module
// ==========================================
async function createTestModule(options: {
sttProvider?: ISTTProvider | null;
ttsProviders?: Map<SpeechTier, ITTSProvider>;
config?: ReturnType<typeof speechConfig>;
}): Promise<TestingModule> {
const config = options.config ?? createTestConfig();
const ttsProviders = options.ttsProviders ?? new Map<SpeechTier, ITTSProvider>();
const providers: Array<{ provide: symbol | string; useValue: unknown }> = [
{ provide: speechConfig.KEY, useValue: config },
{ provide: TTS_PROVIDERS, useValue: ttsProviders },
];
if (options.sttProvider !== undefined) {
providers.push({ provide: STT_PROVIDER, useValue: options.sttProvider });
}
return Test.createTestingModule({
providers: [SpeechService, ...providers],
}).compile();
}
// ==========================================
// Tests
// ==========================================
describe("SpeechService", () => {
// ==========================================
// Construction and initialization
// ==========================================
describe("construction", () => {
it("should be defined when all providers are injected", async () => {
const module = await createTestModule({
sttProvider: createMockSttProvider(),
ttsProviders: new Map([["default", createMockTtsProvider("default")]]),
});
const service = module.get<SpeechService>(SpeechService);
expect(service).toBeDefined();
});
it("should be defined with no STT provider", async () => {
const module = await createTestModule({
sttProvider: null,
ttsProviders: new Map([["default", createMockTtsProvider("default")]]),
});
const service = module.get<SpeechService>(SpeechService);
expect(service).toBeDefined();
});
it("should be defined with empty TTS providers map", async () => {
const module = await createTestModule({
sttProvider: createMockSttProvider(),
ttsProviders: new Map(),
});
const service = module.get<SpeechService>(SpeechService);
expect(service).toBeDefined();
});
});
// ==========================================
// transcribe()
// ==========================================
describe("transcribe", () => {
let service: SpeechService;
let mockStt: ISTTProvider;
beforeEach(async () => {
mockStt = createMockSttProvider();
const module = await createTestModule({ sttProvider: mockStt });
service = module.get<SpeechService>(SpeechService);
});
it("should delegate to the STT provider", async () => {
const audio = Buffer.from("test-audio");
const result = await service.transcribe(audio);
expect(mockStt.transcribe).toHaveBeenCalledWith(audio, undefined);
expect(result.text).toBe("Hello world");
expect(result.language).toBe("en");
});
it("should pass options to the STT provider", async () => {
const audio = Buffer.from("test-audio");
const options = { language: "fr", model: "custom-model" };
await service.transcribe(audio, options);
expect(mockStt.transcribe).toHaveBeenCalledWith(audio, options);
});
it("should throw ServiceUnavailableException when STT is disabled in config", async () => {
const config = createTestConfig();
config.stt.enabled = false;
const module = await createTestModule({ sttProvider: mockStt, config });
service = module.get<SpeechService>(SpeechService);
await expect(service.transcribe(Buffer.from("audio"))).rejects.toThrow(
ServiceUnavailableException
);
});
it("should throw ServiceUnavailableException when no STT provider is registered", async () => {
const module = await createTestModule({ sttProvider: null });
service = module.get<SpeechService>(SpeechService);
await expect(service.transcribe(Buffer.from("audio"))).rejects.toThrow(
ServiceUnavailableException
);
});
it("should propagate provider errors as ServiceUnavailableException", async () => {
const failingStt = createMockSttProvider({
transcribe: vi.fn().mockRejectedValue(new Error("Connection refused")),
});
const module = await createTestModule({ sttProvider: failingStt });
service = module.get<SpeechService>(SpeechService);
await expect(service.transcribe(Buffer.from("audio"))).rejects.toThrow(
ServiceUnavailableException
);
});
});
// ==========================================
// synthesize()
// ==========================================
describe("synthesize", () => {
let service: SpeechService;
let defaultProvider: ITTSProvider;
let premiumProvider: ITTSProvider;
let fallbackProvider: ITTSProvider;
beforeEach(async () => {
defaultProvider = createMockTtsProvider("default");
premiumProvider = createMockTtsProvider("premium");
fallbackProvider = createMockTtsProvider("fallback");
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["default", defaultProvider],
["premium", premiumProvider],
["fallback", fallbackProvider],
]);
const module = await createTestModule({ ttsProviders });
service = module.get<SpeechService>(SpeechService);
});
it("should use the default tier when no tier is specified", async () => {
const result = await service.synthesize("Hello world");
expect(defaultProvider.synthesize).toHaveBeenCalledWith("Hello world", undefined);
expect(result.tier).toBe("default");
});
it("should use the requested tier when specified", async () => {
const result = await service.synthesize("Hello world", { tier: "premium" });
expect(premiumProvider.synthesize).toHaveBeenCalled();
expect(result.tier).toBe("premium");
});
it("should pass options to the TTS provider", async () => {
const options = { voice: "custom-voice", format: "wav" as const };
await service.synthesize("Hello", options);
expect(defaultProvider.synthesize).toHaveBeenCalledWith("Hello", options);
});
it("should throw ServiceUnavailableException when TTS default is disabled and no tier specified", async () => {
const config = createTestConfig();
config.tts.default.enabled = false;
config.tts.premium.enabled = false;
config.tts.fallback.enabled = false;
const module = await createTestModule({
ttsProviders: new Map([["default", defaultProvider]]),
config,
});
service = module.get<SpeechService>(SpeechService);
await expect(service.synthesize("Hello")).rejects.toThrow(ServiceUnavailableException);
});
it("should throw ServiceUnavailableException when no TTS providers are registered", async () => {
const module = await createTestModule({ ttsProviders: new Map() });
service = module.get<SpeechService>(SpeechService);
await expect(service.synthesize("Hello")).rejects.toThrow(ServiceUnavailableException);
});
});
// ==========================================
// synthesize() fallback logic
// ==========================================
describe("synthesize fallback", () => {
it("should fall back from premium to default when premium provider fails", async () => {
const failingPremium = createMockTtsProvider("premium", {
synthesize: vi.fn().mockRejectedValue(new Error("Premium unavailable")),
});
const defaultProvider = createMockTtsProvider("default");
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["premium", failingPremium],
["default", defaultProvider],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
const result = await service.synthesize("Hello", { tier: "premium" });
expect(failingPremium.synthesize).toHaveBeenCalled();
expect(defaultProvider.synthesize).toHaveBeenCalled();
expect(result.tier).toBe("default");
});
it("should fall back from default to fallback when default provider fails", async () => {
const failingDefault = createMockTtsProvider("default", {
synthesize: vi.fn().mockRejectedValue(new Error("Default unavailable")),
});
const fallbackProvider = createMockTtsProvider("fallback");
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["default", failingDefault],
["fallback", fallbackProvider],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
const result = await service.synthesize("Hello");
expect(failingDefault.synthesize).toHaveBeenCalled();
expect(fallbackProvider.synthesize).toHaveBeenCalled();
expect(result.tier).toBe("fallback");
});
it("should fall back premium -> default -> fallback", async () => {
const failingPremium = createMockTtsProvider("premium", {
synthesize: vi.fn().mockRejectedValue(new Error("Premium fail")),
});
const failingDefault = createMockTtsProvider("default", {
synthesize: vi.fn().mockRejectedValue(new Error("Default fail")),
});
const fallbackProvider = createMockTtsProvider("fallback");
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["premium", failingPremium],
["default", failingDefault],
["fallback", fallbackProvider],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
const result = await service.synthesize("Hello", { tier: "premium" });
expect(failingPremium.synthesize).toHaveBeenCalled();
expect(failingDefault.synthesize).toHaveBeenCalled();
expect(fallbackProvider.synthesize).toHaveBeenCalled();
expect(result.tier).toBe("fallback");
});
it("should throw ServiceUnavailableException when all tiers fail", async () => {
const failingDefault = createMockTtsProvider("default", {
synthesize: vi.fn().mockRejectedValue(new Error("Default fail")),
});
const failingFallback = createMockTtsProvider("fallback", {
synthesize: vi.fn().mockRejectedValue(new Error("Fallback fail")),
});
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["default", failingDefault],
["fallback", failingFallback],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
await expect(service.synthesize("Hello")).rejects.toThrow(ServiceUnavailableException);
});
it("should skip unavailable tiers in fallback chain", async () => {
// premium requested, but only fallback registered (no default)
const failingPremium = createMockTtsProvider("premium", {
synthesize: vi.fn().mockRejectedValue(new Error("Premium fail")),
});
const fallbackProvider = createMockTtsProvider("fallback");
const config = createTestConfig();
config.tts.default.enabled = false;
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["premium", failingPremium],
["fallback", fallbackProvider],
]);
const module = await createTestModule({ ttsProviders, config });
const service = module.get<SpeechService>(SpeechService);
const result = await service.synthesize("Hello", { tier: "premium" });
expect(result.tier).toBe("fallback");
});
});
// ==========================================
// listVoices()
// ==========================================
describe("listVoices", () => {
it("should aggregate voices from all registered TTS providers", async () => {
const defaultProvider = createMockTtsProvider("default", {
listVoices: vi.fn().mockResolvedValue([
{ id: "voice-1", name: "Voice 1", tier: "default" as SpeechTier, isDefault: true },
{ id: "voice-2", name: "Voice 2", tier: "default" as SpeechTier },
]),
});
const premiumProvider = createMockTtsProvider("premium", {
listVoices: vi
.fn()
.mockResolvedValue([
{ id: "voice-3", name: "Voice 3", tier: "premium" as SpeechTier, isDefault: true },
]),
});
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["default", defaultProvider],
["premium", premiumProvider],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
const voices = await service.listVoices();
expect(voices).toHaveLength(3);
expect(voices.map((v) => v.id)).toEqual(["voice-1", "voice-2", "voice-3"]);
});
it("should filter voices by tier when specified", async () => {
const defaultProvider = createMockTtsProvider("default", {
listVoices: vi
.fn()
.mockResolvedValue([{ id: "voice-1", name: "Voice 1", tier: "default" as SpeechTier }]),
});
const premiumProvider = createMockTtsProvider("premium", {
listVoices: vi
.fn()
.mockResolvedValue([{ id: "voice-2", name: "Voice 2", tier: "premium" as SpeechTier }]),
});
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["default", defaultProvider],
["premium", premiumProvider],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
const voices = await service.listVoices("premium");
expect(voices).toHaveLength(1);
expect(voices[0].id).toBe("voice-2");
// Only the premium provider should have been called
expect(premiumProvider.listVoices).toHaveBeenCalled();
expect(defaultProvider.listVoices).not.toHaveBeenCalled();
});
it("should return empty array when no TTS providers are registered", async () => {
const module = await createTestModule({ ttsProviders: new Map() });
const service = module.get<SpeechService>(SpeechService);
const voices = await service.listVoices();
expect(voices).toEqual([]);
});
it("should return empty array when requested tier has no provider", async () => {
const defaultProvider = createMockTtsProvider("default");
const ttsProviders = new Map<SpeechTier, ITTSProvider>([["default", defaultProvider]]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
const voices = await service.listVoices("premium");
expect(voices).toEqual([]);
});
});
// ==========================================
// isSTTAvailable / isTTSAvailable
// ==========================================
describe("availability checks", () => {
it("should report STT as available when enabled and provider registered", async () => {
const module = await createTestModule({
sttProvider: createMockSttProvider(),
});
const service = module.get<SpeechService>(SpeechService);
expect(service.isSTTAvailable()).toBe(true);
});
it("should report STT as unavailable when disabled in config", async () => {
const config = createTestConfig();
config.stt.enabled = false;
const module = await createTestModule({
sttProvider: createMockSttProvider(),
config,
});
const service = module.get<SpeechService>(SpeechService);
expect(service.isSTTAvailable()).toBe(false);
});
it("should report STT as unavailable when no provider registered", async () => {
const module = await createTestModule({ sttProvider: null });
const service = module.get<SpeechService>(SpeechService);
expect(service.isSTTAvailable()).toBe(false);
});
it("should report TTS as available when at least one tier is enabled with a provider", async () => {
const ttsProviders = new Map<SpeechTier, ITTSProvider>([
["default", createMockTtsProvider("default")],
]);
const module = await createTestModule({ ttsProviders });
const service = module.get<SpeechService>(SpeechService);
expect(service.isTTSAvailable()).toBe(true);
});
it("should report TTS as unavailable when no providers registered", async () => {
const config = createTestConfig();
config.tts.default.enabled = false;
config.tts.premium.enabled = false;
config.tts.fallback.enabled = false;
const module = await createTestModule({ ttsProviders: new Map(), config });
const service = module.get<SpeechService>(SpeechService);
expect(service.isTTSAvailable()).toBe(false);
});
});
});

View File

@@ -0,0 +1,231 @@
/**
* SpeechService
*
* High-level service for speech-to-text (STT) and text-to-speech (TTS) operations.
* Manages provider selection and graceful fallback for TTS tiers.
*
* Fallback chain for TTS: premium -> default -> fallback
* Each tier is only attempted if enabled in config and a provider is registered.
*
* Issue #389
*/
import { Injectable, Inject, Optional, Logger, ServiceUnavailableException } from "@nestjs/common";
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
import { speechConfig, type SpeechConfig } from "./speech.config";
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
import type {
SpeechTier,
TranscribeOptions,
TranscriptionResult,
SynthesizeOptions,
SynthesisResult,
VoiceInfo,
} from "./interfaces/speech-types";
/**
* Fallback order for TTS tiers.
* When a tier fails, the next tier in this array is attempted.
*/
const TTS_FALLBACK_ORDER: readonly SpeechTier[] = ["premium", "default", "fallback"] as const;
@Injectable()
export class SpeechService {
private readonly logger = new Logger(SpeechService.name);
constructor(
@Inject(speechConfig.KEY)
private readonly config: SpeechConfig,
@Optional()
@Inject(STT_PROVIDER)
private readonly sttProvider: ISTTProvider | null,
@Inject(TTS_PROVIDERS)
private readonly ttsProviders: Map<SpeechTier, ITTSProvider>
) {
this.logger.log("Speech service initialized");
if (this.sttProvider) {
this.logger.log(`STT provider registered: ${this.sttProvider.name}`);
}
if (this.ttsProviders.size > 0) {
const tierNames = Array.from(this.ttsProviders.keys()).join(", ");
this.logger.log(`TTS providers registered: ${tierNames}`);
}
}
// ==========================================
// STT Operations
// ==========================================
/**
* Transcribe audio data to text using the registered STT provider.
*
* @param audio - Raw audio data as a Buffer
* @param options - Optional transcription parameters
* @returns Transcription result with text and metadata
* @throws {ServiceUnavailableException} If STT is disabled or no provider is registered
*/
async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
if (!this.config.stt.enabled) {
throw new ServiceUnavailableException("Speech-to-text is not enabled");
}
if (!this.sttProvider) {
throw new ServiceUnavailableException("No STT provider is registered");
}
try {
return await this.sttProvider.transcribe(audio, options);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.error(`STT transcription failed: ${message}`);
throw new ServiceUnavailableException(`Transcription failed: ${message}`);
}
}
// ==========================================
// TTS Operations
// ==========================================
/**
* Synthesize text to audio using TTS providers with graceful fallback.
*
* Fallback chain: requested tier -> default -> fallback.
* Only enabled tiers with registered providers are attempted.
*
* @param text - Text to convert to speech
* @param options - Optional synthesis parameters (voice, format, tier)
* @returns Synthesis result with audio buffer and metadata
* @throws {ServiceUnavailableException} If no TTS provider can fulfill the request
*/
async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
const requestedTier = options?.tier ?? "default";
const fallbackChain = this.buildFallbackChain(requestedTier);
if (fallbackChain.length === 0) {
throw new ServiceUnavailableException(
"No TTS providers are available. Check that TTS is enabled and providers are registered."
);
}
let lastError: Error | undefined;
for (const tier of fallbackChain) {
const provider = this.ttsProviders.get(tier);
if (!provider) {
continue;
}
try {
return await provider.synthesize(text, options);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.warn(`TTS tier "${tier}" (${provider.name}) failed: ${message}`);
lastError = error instanceof Error ? error : new Error(message);
}
}
const errorMessage = lastError?.message ?? "No providers available";
throw new ServiceUnavailableException(`All TTS providers failed: ${errorMessage}`);
}
/**
* List available voices across all TTS providers, optionally filtered by tier.
*
* @param tier - Optional tier filter. If omitted, voices from all tiers are returned.
* @returns Array of voice information objects
*/
async listVoices(tier?: SpeechTier): Promise<VoiceInfo[]> {
const voices: VoiceInfo[] = [];
if (tier) {
const provider = this.ttsProviders.get(tier);
if (!provider) {
return [];
}
try {
return await provider.listVoices();
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to list voices for tier "${tier}": ${message}`);
return [];
}
}
// Aggregate voices from all providers
for (const [providerTier, provider] of this.ttsProviders) {
try {
const tierVoices = await provider.listVoices();
voices.push(...tierVoices);
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
this.logger.warn(`Failed to list voices for tier "${providerTier}": ${message}`);
}
}
return voices;
}
// ==========================================
// Availability Checks
// ==========================================
/**
* Check if STT is available (enabled in config and provider registered).
*/
isSTTAvailable(): boolean {
return this.config.stt.enabled && this.sttProvider !== null;
}
/**
* Check if TTS is available (at least one tier enabled with a registered provider).
*/
isTTSAvailable(): boolean {
return this.getEnabledTiers().some((tier) => this.ttsProviders.has(tier));
}
// ==========================================
// Private helpers
// ==========================================
/**
* Build the fallback chain starting from the requested tier.
* Only includes tiers that are enabled in config and have a registered provider.
*/
private buildFallbackChain(requestedTier: SpeechTier): SpeechTier[] {
const startIndex = TTS_FALLBACK_ORDER.indexOf(requestedTier);
if (startIndex === -1) {
return [];
}
const enabledTiers = this.getEnabledTiers();
return TTS_FALLBACK_ORDER.slice(startIndex).filter(
(tier) => enabledTiers.includes(tier) && this.ttsProviders.has(tier)
);
}
/**
* Get the list of TTS tiers that are enabled in the configuration.
*/
private getEnabledTiers(): SpeechTier[] {
const tiers: SpeechTier[] = [];
if (this.config.tts.default.enabled) {
tiers.push("default");
}
if (this.config.tts.premium.enabled) {
tiers.push("premium");
}
if (this.config.tts.fallback.enabled) {
tiers.push("fallback");
}
return tiers;
}
}