feat(#391 ): add base TTS provider and factory classes

Add the BaseTTSProvider abstract class and TTS provider factory that were part of the tiered TTS architecture but missed from the previous commit. - BaseTTSProvider: abstract base with synthesize(), listVoices(), isHealthy() - tts-provider.factory: creates Kokoro/Chatterbox/Piper providers from config - 30 tests (22 base provider + 8 factory) Refs #391
feat(#391 ): implement tiered TTS provider architecture with base class
2026-02-15 02:20:24 -06:00 · 2026-02-15 02:19:46 -06:00
7 changed files with 1591 additions and 10 deletions
--- a/apps/api/src/speech/providers/base-tts.provider.spec.ts
+++ b/apps/api/src/speech/providers/base-tts.provider.spec.ts
@@ -0,0 +1,329 @@
+/**
+ * BaseTTSProvider Unit Tests
+ *
+ * Tests the abstract base class for OpenAI-compatible TTS providers.
+ * Uses a concrete test implementation to exercise the base class logic.
+ *
+ * Issue #391
+ */
+
+import { describe, it, expect, beforeEach, vi, type Mock } from "vitest";
+import { BaseTTSProvider } from "./base-tts.provider";
+import type { SpeechTier, SynthesizeOptions, AudioFormat } from "../interfaces/speech-types";
+
+// ==========================================
+// Mock OpenAI SDK
+// ==========================================
+
+const mockCreate = vi.fn();
+
+vi.mock("openai", () => {
+  class MockOpenAI {
+    audio = {
+      speech: {
+        create: mockCreate,
+      },
+    };
+  }
+  return { default: MockOpenAI };
+});
+
+// ==========================================
+// Concrete test implementation
+// ==========================================
+
+class TestTTSProvider extends BaseTTSProvider {
+  readonly name = "test-provider";
+  readonly tier: SpeechTier = "default";
+
+  constructor(baseURL: string, defaultVoice?: string, defaultFormat?: AudioFormat) {
+    super(baseURL, defaultVoice, defaultFormat);
+  }
+}
+
+// ==========================================
+// Test helpers
+// ==========================================
+
+/**
+ * Create a mock Response-like object that mimics OpenAI SDK's audio.speech.create() return.
+ * The OpenAI SDK returns a Response object with arrayBuffer() method.
+ */
+function createMockAudioResponse(audioData: Uint8Array): { arrayBuffer: Mock } {
+  return {
+    arrayBuffer: vi.fn().mockResolvedValue(audioData.buffer),
+  };
+}
+
+describe("BaseTTSProvider", () => {
+  let provider: TestTTSProvider;
+
+  const testBaseURL = "http://localhost:8880/v1";
+  const testVoice = "af_heart";
+  const testFormat: AudioFormat = "mp3";
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    provider = new TestTTSProvider(testBaseURL, testVoice, testFormat);
+  });
+
+  // ==========================================
+  // Constructor
+  // ==========================================
+
+  describe("constructor", () => {
+    it("should create an instance with provided configuration", () => {
+      expect(provider).toBeDefined();
+      expect(provider.name).toBe("test-provider");
+      expect(provider.tier).toBe("default");
+    });
+
+    it("should use default voice 'alloy' when none provided", () => {
+      const defaultProvider = new TestTTSProvider(testBaseURL);
+      expect(defaultProvider).toBeDefined();
+    });
+
+    it("should use default format 'mp3' when none provided", () => {
+      const defaultProvider = new TestTTSProvider(testBaseURL, "voice-1");
+      expect(defaultProvider).toBeDefined();
+    });
+  });
+
+  // ==========================================
+  // synthesize()
+  // ==========================================
+
+  describe("synthesize", () => {
+    it("should synthesize text and return a SynthesisResult with audio buffer", async () => {
+      const audioBytes = new Uint8Array([0x49, 0x44, 0x33, 0x04, 0x00]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      const result = await provider.synthesize("Hello, world!");
+
+      expect(result).toBeDefined();
+      expect(result.audio).toBeInstanceOf(Buffer);
+      expect(result.audio.length).toBe(audioBytes.length);
+      expect(result.format).toBe("mp3");
+      expect(result.voice).toBe("af_heart");
+      expect(result.tier).toBe("default");
+    });
+
+    it("should pass correct parameters to OpenAI SDK", async () => {
+      const audioBytes = new Uint8Array([0x01, 0x02]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      await provider.synthesize("Test text");
+
+      expect(mockCreate).toHaveBeenCalledWith({
+        model: "tts-1",
+        input: "Test text",
+        voice: "af_heart",
+        response_format: "mp3",
+        speed: 1.0,
+      });
+    });
+
+    it("should use custom voice from options", async () => {
+      const audioBytes = new Uint8Array([0x01]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      const options: SynthesizeOptions = { voice: "custom_voice" };
+      const result = await provider.synthesize("Hello", options);
+
+      expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ voice: "custom_voice" }));
+      expect(result.voice).toBe("custom_voice");
+    });
+
+    it("should use custom format from options", async () => {
+      const audioBytes = new Uint8Array([0x01]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      const options: SynthesizeOptions = { format: "wav" };
+      const result = await provider.synthesize("Hello", options);
+
+      expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ response_format: "wav" }));
+      expect(result.format).toBe("wav");
+    });
+
+    it("should use custom speed from options", async () => {
+      const audioBytes = new Uint8Array([0x01]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      const options: SynthesizeOptions = { speed: 1.5 };
+      await provider.synthesize("Hello", options);
+
+      expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ speed: 1.5 }));
+    });
+
+    it("should throw an error when synthesis fails", async () => {
+      mockCreate.mockRejectedValue(new Error("Connection refused"));
+
+      await expect(provider.synthesize("Hello")).rejects.toThrow(
+        "TTS synthesis failed for test-provider: Connection refused"
+      );
+    });
+
+    it("should throw an error when response arrayBuffer fails", async () => {
+      const mockResponse = {
+        arrayBuffer: vi.fn().mockRejectedValue(new Error("Read error")),
+      };
+      mockCreate.mockResolvedValue(mockResponse);
+
+      await expect(provider.synthesize("Hello")).rejects.toThrow(
+        "TTS synthesis failed for test-provider: Read error"
+      );
+    });
+
+    it("should handle empty text input gracefully", async () => {
+      const audioBytes = new Uint8Array([]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      const result = await provider.synthesize("");
+
+      expect(result.audio).toBeInstanceOf(Buffer);
+      expect(result.audio.length).toBe(0);
+    });
+
+    it("should handle non-Error exceptions", async () => {
+      mockCreate.mockRejectedValue("string error");
+
+      await expect(provider.synthesize("Hello")).rejects.toThrow(
+        "TTS synthesis failed for test-provider: string error"
+      );
+    });
+  });
+
+  // ==========================================
+  // listVoices()
+  // ==========================================
+
+  describe("listVoices", () => {
+    it("should return default voice list with the configured default voice", async () => {
+      const voices = await provider.listVoices();
+
+      expect(voices).toBeInstanceOf(Array);
+      expect(voices.length).toBeGreaterThan(0);
+
+      const defaultVoice = voices.find((v) => v.isDefault === true);
+      expect(defaultVoice).toBeDefined();
+      expect(defaultVoice?.id).toBe("af_heart");
+      expect(defaultVoice?.tier).toBe("default");
+    });
+
+    it("should set tier correctly on all returned voices", async () => {
+      const voices = await provider.listVoices();
+
+      for (const voice of voices) {
+        expect(voice.tier).toBe("default");
+      }
+    });
+  });
+
+  // ==========================================
+  // isHealthy()
+  // ==========================================
+
+  describe("isHealthy", () => {
+    it("should return true when the TTS server is reachable", async () => {
+      // Mock global fetch for health check
+      const mockFetch = vi.fn().mockResolvedValue({
+        ok: true,
+        status: 200,
+      });
+      vi.stubGlobal("fetch", mockFetch);
+
+      const healthy = await provider.isHealthy();
+
+      expect(healthy).toBe(true);
+      expect(mockFetch).toHaveBeenCalled();
+
+      vi.unstubAllGlobals();
+    });
+
+    it("should return false when the TTS server is unreachable", async () => {
+      const mockFetch = vi.fn().mockRejectedValue(new Error("ECONNREFUSED"));
+      vi.stubGlobal("fetch", mockFetch);
+
+      const healthy = await provider.isHealthy();
+
+      expect(healthy).toBe(false);
+
+      vi.unstubAllGlobals();
+    });
+
+    it("should return false when the TTS server returns an error status", async () => {
+      const mockFetch = vi.fn().mockResolvedValue({
+        ok: false,
+        status: 503,
+      });
+      vi.stubGlobal("fetch", mockFetch);
+
+      const healthy = await provider.isHealthy();
+
+      expect(healthy).toBe(false);
+
+      vi.unstubAllGlobals();
+    });
+
+    it("should use the base URL for the health check", async () => {
+      const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200 });
+      vi.stubGlobal("fetch", mockFetch);
+
+      await provider.isHealthy();
+
+      // Should call a health-related endpoint at the base URL
+      const calledUrl = mockFetch.mock.calls[0][0] as string;
+      expect(calledUrl).toContain("localhost:8880");
+
+      vi.unstubAllGlobals();
+    });
+
+    it("should set a timeout for the health check", async () => {
+      const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200 });
+      vi.stubGlobal("fetch", mockFetch);
+
+      await provider.isHealthy();
+
+      // Should pass an AbortSignal for timeout
+      const fetchOptions = mockFetch.mock.calls[0][1] as RequestInit;
+      expect(fetchOptions.signal).toBeDefined();
+
+      vi.unstubAllGlobals();
+    });
+  });
+
+  // ==========================================
+  // Default values
+  // ==========================================
+
+  describe("default values", () => {
+    it("should use 'alloy' as default voice when none specified", async () => {
+      const defaultProvider = new TestTTSProvider(testBaseURL);
+      const audioBytes = new Uint8Array([0x01]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      await defaultProvider.synthesize("Hello");
+
+      expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ voice: "alloy" }));
+    });
+
+    it("should use 'mp3' as default format when none specified", async () => {
+      const defaultProvider = new TestTTSProvider(testBaseURL);
+      const audioBytes = new Uint8Array([0x01]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      await defaultProvider.synthesize("Hello");
+
+      expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ response_format: "mp3" }));
+    });
+
+    it("should use speed 1.0 as default speed", async () => {
+      const audioBytes = new Uint8Array([0x01]);
+      mockCreate.mockResolvedValue(createMockAudioResponse(audioBytes));
+
+      await provider.synthesize("Hello");
+
+      expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ speed: 1.0 }));
+    });
+  });
+});
--- a/apps/api/src/speech/providers/base-tts.provider.ts
+++ b/apps/api/src/speech/providers/base-tts.provider.ts
@@ -0,0 +1,189 @@
+/**
+ * Base TTS Provider
+ *
+ * Abstract base class implementing common OpenAI-compatible TTS logic.
+ * All concrete TTS providers (Kokoro, Chatterbox, Piper) extend this class.
+ *
+ * Uses the OpenAI SDK with a configurable baseURL to communicate with
+ * OpenAI-compatible speech synthesis endpoints.
+ *
+ * Issue #391
+ */
+
+import { Logger } from "@nestjs/common";
+import OpenAI from "openai";
+import type { ITTSProvider } from "../interfaces/tts-provider.interface";
+import type {
+  SpeechTier,
+  SynthesizeOptions,
+  SynthesisResult,
+  VoiceInfo,
+  AudioFormat,
+} from "../interfaces/speech-types";
+
+/** Default TTS model identifier used for OpenAI-compatible APIs */
+const DEFAULT_MODEL = "tts-1";
+
+/** Default voice when none is configured */
+const DEFAULT_VOICE = "alloy";
+
+/** Default audio format */
+const DEFAULT_FORMAT: AudioFormat = "mp3";
+
+/** Default speech speed multiplier */
+const DEFAULT_SPEED = 1.0;
+
+/** Health check timeout in milliseconds */
+const HEALTH_CHECK_TIMEOUT_MS = 5000;
+
+/**
+ * Abstract base class for OpenAI-compatible TTS providers.
+ *
+ * Provides common logic for:
+ * - Synthesizing text to audio via OpenAI SDK's audio.speech.create()
+ * - Listing available voices (with a default implementation)
+ * - Health checking the TTS endpoint
+ *
+ * Subclasses must set `name` and `tier` properties and may override
+ * `listVoices()` to provide provider-specific voice lists.
+ *
+ * @example
+ * ```typescript
+ * class KokoroProvider extends BaseTTSProvider {
+ *   readonly name = "kokoro";
+ *   readonly tier: SpeechTier = "default";
+ *
+ *   constructor(baseURL: string) {
+ *     super(baseURL, "af_heart", "mp3");
+ *   }
+ * }
+ * ```
+ */
+export abstract class BaseTTSProvider implements ITTSProvider {
+  abstract readonly name: string;
+  abstract readonly tier: SpeechTier;
+
+  protected readonly logger: Logger;
+  protected readonly client: OpenAI;
+  protected readonly baseURL: string;
+  protected readonly defaultVoice: string;
+  protected readonly defaultFormat: AudioFormat;
+
+  /**
+   * Create a new BaseTTSProvider.
+   *
+   * @param baseURL - The base URL for the OpenAI-compatible TTS endpoint
+   * @param defaultVoice - Default voice ID to use when none is specified in options
+   * @param defaultFormat - Default audio format to use when none is specified in options
+   */
+  constructor(
+    baseURL: string,
+    defaultVoice: string = DEFAULT_VOICE,
+    defaultFormat: AudioFormat = DEFAULT_FORMAT
+  ) {
+    this.baseURL = baseURL;
+    this.defaultVoice = defaultVoice;
+    this.defaultFormat = defaultFormat;
+    this.logger = new Logger(this.constructor.name);
+
+    this.client = new OpenAI({
+      baseURL,
+      apiKey: "not-needed", // Self-hosted services don't require an API key
+    });
+  }
+
+  /**
+   * Synthesize text to audio using the OpenAI-compatible TTS endpoint.
+   *
+   * Calls `client.audio.speech.create()` with the provided text and options,
+   * then converts the response to a Buffer.
+   *
+   * @param text - Text to convert to speech
+   * @param options - Optional synthesis parameters (voice, format, speed)
+   * @returns Synthesis result with audio buffer and metadata
+   * @throws {Error} If synthesis fails
+   */
+  async synthesize(text: string, options?: SynthesizeOptions): Promise<SynthesisResult> {
+    const voice = options?.voice ?? this.defaultVoice;
+    const format = options?.format ?? this.defaultFormat;
+    const speed = options?.speed ?? DEFAULT_SPEED;
+
+    try {
+      const response = await this.client.audio.speech.create({
+        model: DEFAULT_MODEL,
+        input: text,
+        voice,
+        response_format: format,
+        speed,
+      });
+
+      const arrayBuffer = await response.arrayBuffer();
+      const audio = Buffer.from(arrayBuffer);
+
+      return {
+        audio,
+        format,
+        voice,
+        tier: this.tier,
+      };
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.error(`TTS synthesis failed: ${message}`);
+      throw new Error(`TTS synthesis failed for ${this.name}: ${message}`);
+    }
+  }
+
+  /**
+   * List available voices for this provider.
+   *
+   * Default implementation returns the configured default voice.
+   * Subclasses should override this to provide a full voice list
+   * from their specific TTS engine.
+   *
+   * @returns Array of voice information objects
+   */
+  listVoices(): Promise<VoiceInfo[]> {
+    return Promise.resolve([
+      {
+        id: this.defaultVoice,
+        name: this.defaultVoice,
+        tier: this.tier,
+        isDefault: true,
+      },
+    ]);
+  }
+
+  /**
+   * Check if the TTS server is reachable and healthy.
+   *
+   * Performs a simple HTTP request to the base URL's models endpoint
+   * to verify the server is running and responding.
+   *
+   * @returns true if the server is reachable, false otherwise
+   */
+  async isHealthy(): Promise<boolean> {
+    try {
+      // Extract the base URL without the /v1 path for health checking
+      const healthUrl = this.baseURL.replace(/\/v1\/?$/, "/v1/models");
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => {
+        controller.abort();
+      }, HEALTH_CHECK_TIMEOUT_MS);
+
+      try {
+        const response = await fetch(healthUrl, {
+          method: "GET",
+          signal: controller.signal,
+        });
+
+        return response.ok;
+      } finally {
+        clearTimeout(timeoutId);
+      }
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.warn(`Health check failed for ${this.name}: ${message}`);
+      return false;
+    }
+  }
+}
--- a/apps/api/src/speech/providers/speaches-stt.provider.spec.ts
+++ b/apps/api/src/speech/providers/speaches-stt.provider.spec.ts
@@ -0,0 +1,468 @@
+/**
+ * SpeachesSttProvider Tests
+ *
+ * TDD tests for the Speaches/faster-whisper STT provider.
+ * Tests cover transcription, error handling, health checks, and config injection.
+ *
+ * Issue #390
+ */
+
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { SpeachesSttProvider } from "./speaches-stt.provider";
+import type { SpeechConfig } from "../speech.config";
+import type { TranscribeOptions } from "../interfaces/speech-types";
+
+// ==========================================
+// Mock OpenAI SDK
+// ==========================================
+
+const { mockCreate, mockModelsList, mockToFile, mockOpenAIConstructorCalls } = vi.hoisted(() => {
+  const mockCreate = vi.fn();
+  const mockModelsList = vi.fn();
+  const mockToFile = vi.fn().mockImplementation(async (buffer: Buffer, name: string) => {
+    return new File([buffer], name);
+  });
+  const mockOpenAIConstructorCalls: Array<Record<string, unknown>> = [];
+  return { mockCreate, mockModelsList, mockToFile, mockOpenAIConstructorCalls };
+});
+
+vi.mock("openai", () => {
+  class MockOpenAI {
+    audio = {
+      transcriptions: {
+        create: mockCreate,
+      },
+    };
+    models = {
+      list: mockModelsList,
+    };
+    constructor(config: Record<string, unknown>) {
+      mockOpenAIConstructorCalls.push(config);
+    }
+  }
+  return {
+    default: MockOpenAI,
+    toFile: mockToFile,
+  };
+});
+
+// ==========================================
+// Test helpers
+// ==========================================
+
+function createTestConfig(overrides?: Partial<SpeechConfig["stt"]>): SpeechConfig {
+  return {
+    stt: {
+      enabled: true,
+      baseUrl: "http://speaches:8000/v1",
+      model: "Systran/faster-whisper-large-v3-turbo",
+      language: "en",
+      ...overrides,
+    },
+    tts: {
+      default: { enabled: false, url: "", voice: "", format: "" },
+      premium: { enabled: false, url: "" },
+      fallback: { enabled: false, url: "" },
+    },
+    limits: {
+      maxUploadSize: 25_000_000,
+      maxDurationSeconds: 600,
+      maxTextLength: 4096,
+    },
+  };
+}
+
+function createMockVerboseResponse(overrides?: Record<string, unknown>): Record<string, unknown> {
+  return {
+    text: "Hello, world!",
+    language: "en",
+    duration: 3.5,
+    segments: [
+      {
+        id: 0,
+        text: "Hello, world!",
+        start: 0.0,
+        end: 3.5,
+        avg_logprob: -0.25,
+        compression_ratio: 1.2,
+        no_speech_prob: 0.01,
+        seek: 0,
+        temperature: 0.0,
+        tokens: [1, 2, 3],
+      },
+    ],
+    ...overrides,
+  };
+}
+
+describe("SpeachesSttProvider", () => {
+  let provider: SpeachesSttProvider;
+  let config: SpeechConfig;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockOpenAIConstructorCalls.length = 0;
+    config = createTestConfig();
+    provider = new SpeachesSttProvider(config);
+  });
+
+  // ==========================================
+  // Provider identity
+  // ==========================================
+  describe("name", () => {
+    it("should have the name 'speaches'", () => {
+      expect(provider.name).toBe("speaches");
+    });
+  });
+
+  // ==========================================
+  // transcribe
+  // ==========================================
+  describe("transcribe", () => {
+    it("should call OpenAI audio.transcriptions.create with correct parameters", async () => {
+      const mockResponse = createMockVerboseResponse();
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      await provider.transcribe(audio);
+
+      expect(mockCreate).toHaveBeenCalledOnce();
+      const callArgs = mockCreate.mock.calls[0][0];
+      expect(callArgs.model).toBe("Systran/faster-whisper-large-v3-turbo");
+      expect(callArgs.language).toBe("en");
+      expect(callArgs.response_format).toBe("verbose_json");
+    });
+
+    it("should convert Buffer to File using toFile", async () => {
+      const mockResponse = createMockVerboseResponse();
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      await provider.transcribe(audio);
+
+      expect(mockToFile).toHaveBeenCalledWith(audio, "audio.wav", {
+        type: "audio/wav",
+      });
+    });
+
+    it("should return TranscriptionResult with text and language", async () => {
+      const mockResponse = createMockVerboseResponse();
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      const result = await provider.transcribe(audio);
+
+      expect(result.text).toBe("Hello, world!");
+      expect(result.language).toBe("en");
+    });
+
+    it("should return durationSeconds from verbose response", async () => {
+      const mockResponse = createMockVerboseResponse({ duration: 5.25 });
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      const result = await provider.transcribe(audio);
+
+      expect(result.durationSeconds).toBe(5.25);
+    });
+
+    it("should map segments from verbose response", async () => {
+      const mockResponse = createMockVerboseResponse({
+        segments: [
+          {
+            id: 0,
+            text: "Hello,",
+            start: 0.0,
+            end: 1.5,
+            avg_logprob: -0.2,
+            compression_ratio: 1.1,
+            no_speech_prob: 0.01,
+            seek: 0,
+            temperature: 0.0,
+            tokens: [1, 2],
+          },
+          {
+            id: 1,
+            text: " world!",
+            start: 1.5,
+            end: 3.5,
+            avg_logprob: -0.3,
+            compression_ratio: 1.3,
+            no_speech_prob: 0.02,
+            seek: 0,
+            temperature: 0.0,
+            tokens: [3, 4],
+          },
+        ],
+      });
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      const result = await provider.transcribe(audio);
+
+      expect(result.segments).toHaveLength(2);
+      expect(result.segments?.[0]).toEqual({
+        text: "Hello,",
+        start: 0.0,
+        end: 1.5,
+      });
+      expect(result.segments?.[1]).toEqual({
+        text: " world!",
+        start: 1.5,
+        end: 3.5,
+      });
+    });
+
+    it("should handle response without segments gracefully", async () => {
+      const mockResponse = createMockVerboseResponse({ segments: undefined });
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      const result = await provider.transcribe(audio);
+
+      expect(result.text).toBe("Hello, world!");
+      expect(result.segments).toBeUndefined();
+    });
+
+    it("should handle response without duration gracefully", async () => {
+      const mockResponse = createMockVerboseResponse({ duration: undefined });
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      const result = await provider.transcribe(audio);
+
+      expect(result.text).toBe("Hello, world!");
+      expect(result.durationSeconds).toBeUndefined();
+    });
+
+    // ------------------------------------------
+    // Options override
+    // ------------------------------------------
+    describe("options override", () => {
+      it("should use custom model from options when provided", async () => {
+        const mockResponse = createMockVerboseResponse();
+        mockCreate.mockResolvedValueOnce(mockResponse);
+
+        const audio = Buffer.from("fake-audio-data");
+        const options: TranscribeOptions = { model: "custom-whisper-model" };
+        await provider.transcribe(audio, options);
+
+        const callArgs = mockCreate.mock.calls[0][0];
+        expect(callArgs.model).toBe("custom-whisper-model");
+      });
+
+      it("should use custom language from options when provided", async () => {
+        const mockResponse = createMockVerboseResponse({ language: "fr" });
+        mockCreate.mockResolvedValueOnce(mockResponse);
+
+        const audio = Buffer.from("fake-audio-data");
+        const options: TranscribeOptions = { language: "fr" };
+        await provider.transcribe(audio, options);
+
+        const callArgs = mockCreate.mock.calls[0][0];
+        expect(callArgs.language).toBe("fr");
+      });
+
+      it("should pass through prompt option", async () => {
+        const mockResponse = createMockVerboseResponse();
+        mockCreate.mockResolvedValueOnce(mockResponse);
+
+        const audio = Buffer.from("fake-audio-data");
+        const options: TranscribeOptions = { prompt: "This is a meeting about project planning." };
+        await provider.transcribe(audio, options);
+
+        const callArgs = mockCreate.mock.calls[0][0];
+        expect(callArgs.prompt).toBe("This is a meeting about project planning.");
+      });
+
+      it("should pass through temperature option", async () => {
+        const mockResponse = createMockVerboseResponse();
+        mockCreate.mockResolvedValueOnce(mockResponse);
+
+        const audio = Buffer.from("fake-audio-data");
+        const options: TranscribeOptions = { temperature: 0.3 };
+        await provider.transcribe(audio, options);
+
+        const callArgs = mockCreate.mock.calls[0][0];
+        expect(callArgs.temperature).toBe(0.3);
+      });
+
+      it("should use custom mimeType for file conversion when provided", async () => {
+        const mockResponse = createMockVerboseResponse();
+        mockCreate.mockResolvedValueOnce(mockResponse);
+
+        const audio = Buffer.from("fake-audio-data");
+        const options: TranscribeOptions = { mimeType: "audio/mp3" };
+        await provider.transcribe(audio, options);
+
+        expect(mockToFile).toHaveBeenCalledWith(audio, "audio.mp3", {
+          type: "audio/mp3",
+        });
+      });
+    });
+
+    // ------------------------------------------
+    // Simple response fallback
+    // ------------------------------------------
+    describe("simple response fallback", () => {
+      it("should handle simple Transcription response (text only, no verbose fields)", async () => {
+        // Some configurations may return just { text: "..." } without verbose fields
+        const simpleResponse = { text: "Simple transcription result." };
+        mockCreate.mockResolvedValueOnce(simpleResponse);
+
+        const audio = Buffer.from("fake-audio-data");
+        const result = await provider.transcribe(audio);
+
+        expect(result.text).toBe("Simple transcription result.");
+        expect(result.language).toBe("en"); // Falls back to config language
+        expect(result.durationSeconds).toBeUndefined();
+        expect(result.segments).toBeUndefined();
+      });
+    });
+  });
+
+  // ==========================================
+  // Error handling
+  // ==========================================
+  describe("error handling", () => {
+    it("should throw a descriptive error on connection refused", async () => {
+      const connectionError = new Error("connect ECONNREFUSED 127.0.0.1:8000");
+      mockCreate.mockRejectedValueOnce(connectionError);
+
+      const audio = Buffer.from("fake-audio-data");
+      await expect(provider.transcribe(audio)).rejects.toThrow(
+        "STT transcription failed: connect ECONNREFUSED 127.0.0.1:8000"
+      );
+    });
+
+    it("should throw a descriptive error on timeout", async () => {
+      const timeoutError = new Error("Request timed out");
+      mockCreate.mockRejectedValueOnce(timeoutError);
+
+      const audio = Buffer.from("fake-audio-data");
+      await expect(provider.transcribe(audio)).rejects.toThrow(
+        "STT transcription failed: Request timed out"
+      );
+    });
+
+    it("should throw a descriptive error on API error", async () => {
+      const apiError = new Error("Invalid model: nonexistent-model");
+      mockCreate.mockRejectedValueOnce(apiError);
+
+      const audio = Buffer.from("fake-audio-data");
+      await expect(provider.transcribe(audio)).rejects.toThrow(
+        "STT transcription failed: Invalid model: nonexistent-model"
+      );
+    });
+
+    it("should handle non-Error thrown values", async () => {
+      mockCreate.mockRejectedValueOnce("unexpected string error");
+
+      const audio = Buffer.from("fake-audio-data");
+      await expect(provider.transcribe(audio)).rejects.toThrow(
+        "STT transcription failed: unexpected string error"
+      );
+    });
+  });
+
+  // ==========================================
+  // isHealthy
+  // ==========================================
+  describe("isHealthy", () => {
+    it("should return true when the server is reachable", async () => {
+      mockModelsList.mockResolvedValueOnce({ data: [{ id: "whisper-1" }] });
+
+      const healthy = await provider.isHealthy();
+      expect(healthy).toBe(true);
+    });
+
+    it("should return false when the server is unreachable", async () => {
+      mockModelsList.mockRejectedValueOnce(new Error("connect ECONNREFUSED"));
+
+      const healthy = await provider.isHealthy();
+      expect(healthy).toBe(false);
+    });
+
+    it("should not throw on health check failure", async () => {
+      mockModelsList.mockRejectedValueOnce(new Error("Network error"));
+
+      await expect(provider.isHealthy()).resolves.toBe(false);
+    });
+
+    it("should return false on unexpected error types", async () => {
+      mockModelsList.mockRejectedValueOnce("string error");
+
+      const healthy = await provider.isHealthy();
+      expect(healthy).toBe(false);
+    });
+  });
+
+  // ==========================================
+  // Config injection
+  // ==========================================
+  describe("config injection", () => {
+    it("should create OpenAI client with baseURL from config", () => {
+      // The constructor was called in beforeEach
+      expect(mockOpenAIConstructorCalls).toHaveLength(1);
+      expect(mockOpenAIConstructorCalls[0]).toEqual(
+        expect.objectContaining({
+          baseURL: "http://speaches:8000/v1",
+        })
+      );
+    });
+
+    it("should use custom baseURL from config", () => {
+      mockOpenAIConstructorCalls.length = 0;
+      const customConfig = createTestConfig({
+        baseUrl: "http://custom-speaches:9000/v1",
+      });
+      new SpeachesSttProvider(customConfig);
+
+      expect(mockOpenAIConstructorCalls).toHaveLength(1);
+      expect(mockOpenAIConstructorCalls[0]).toEqual(
+        expect.objectContaining({
+          baseURL: "http://custom-speaches:9000/v1",
+        })
+      );
+    });
+
+    it("should use default model from config for transcription", async () => {
+      const customConfig = createTestConfig({
+        model: "Systran/faster-whisper-small",
+      });
+      const customProvider = new SpeachesSttProvider(customConfig);
+
+      const mockResponse = createMockVerboseResponse();
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      await customProvider.transcribe(audio);
+
+      const callArgs = mockCreate.mock.calls[0][0];
+      expect(callArgs.model).toBe("Systran/faster-whisper-small");
+    });
+
+    it("should use default language from config for transcription", async () => {
+      const customConfig = createTestConfig({ language: "de" });
+      const customProvider = new SpeachesSttProvider(customConfig);
+
+      const mockResponse = createMockVerboseResponse({ language: "de" });
+      mockCreate.mockResolvedValueOnce(mockResponse);
+
+      const audio = Buffer.from("fake-audio-data");
+      await customProvider.transcribe(audio);
+
+      const callArgs = mockCreate.mock.calls[0][0];
+      expect(callArgs.language).toBe("de");
+    });
+
+    it("should set a dummy API key for local Speaches server", () => {
+      expect(mockOpenAIConstructorCalls).toHaveLength(1);
+      expect(mockOpenAIConstructorCalls[0]).toEqual(
+        expect.objectContaining({
+          apiKey: "not-needed",
+        })
+      );
+    });
+  });
+});
--- a/apps/api/src/speech/providers/speaches-stt.provider.ts
+++ b/apps/api/src/speech/providers/speaches-stt.provider.ts
@@ -0,0 +1,180 @@
+/**
+ * SpeachesSttProvider
+ *
+ * Speech-to-text provider using Speaches (faster-whisper backend).
+ * Connects to the Speaches server via its OpenAI-compatible
+ * `/v1/audio/transcriptions` endpoint using the OpenAI SDK.
+ *
+ * Issue #390
+ */
+
+import { Injectable, Inject, Logger } from "@nestjs/common";
+import OpenAI from "openai";
+import { toFile } from "openai";
+import { speechConfig, type SpeechConfig } from "../speech.config";
+import type { ISTTProvider } from "../interfaces/stt-provider.interface";
+import type {
+  TranscribeOptions,
+  TranscriptionResult,
+  TranscriptionSegment,
+} from "../interfaces/speech-types";
+
+/**
+ * Derive file extension from a MIME type for use in the uploaded file name.
+ */
+function extensionFromMimeType(mimeType: string): string {
+  const mapping: Record<string, string> = {
+    "audio/wav": "wav",
+    "audio/wave": "wav",
+    "audio/x-wav": "wav",
+    "audio/mp3": "mp3",
+    "audio/mpeg": "mp3",
+    "audio/mp4": "mp4",
+    "audio/m4a": "m4a",
+    "audio/ogg": "ogg",
+    "audio/flac": "flac",
+    "audio/webm": "webm",
+    "audio/mpga": "mpga",
+  };
+  return mapping[mimeType] ?? "wav";
+}
+
+/**
+ * STT provider backed by a Speaches (faster-whisper) server.
+ *
+ * Speaches exposes an OpenAI-compatible `/v1/audio/transcriptions` endpoint,
+ * so we re-use the official OpenAI SDK with a custom `baseURL`.
+ *
+ * @example
+ * ```typescript
+ * const provider = new SpeachesSttProvider(speechConfig);
+ * const result = await provider.transcribe(audioBuffer, { language: "en" });
+ * console.log(result.text);
+ * ```
+ */
+@Injectable()
+export class SpeachesSttProvider implements ISTTProvider {
+  readonly name = "speaches";
+
+  private readonly logger = new Logger(SpeachesSttProvider.name);
+  private readonly client: OpenAI;
+  private readonly config: SpeechConfig;
+
+  constructor(
+    @Inject(speechConfig.KEY)
+    config: SpeechConfig
+  ) {
+    this.config = config;
+
+    this.client = new OpenAI({
+      baseURL: config.stt.baseUrl,
+      apiKey: "not-needed", // Speaches does not require an API key
+    });
+
+    this.logger.log(
+      `Speaches STT provider initialized (endpoint: ${config.stt.baseUrl}, model: ${config.stt.model})`
+    );
+  }
+
+  /**
+   * Transcribe audio data to text using the Speaches server.
+   *
+   * Sends the audio buffer to the `/v1/audio/transcriptions` endpoint
+   * with `response_format=verbose_json` to get segments and duration data.
+   *
+   * @param audio - Raw audio data as a Buffer
+   * @param options - Optional transcription parameters (model, language, prompt, temperature)
+   * @returns Transcription result with text, language, duration, and optional segments
+   * @throws {Error} If transcription fails (connection error, API error, etc.)
+   */
+  async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {
+    const model = options?.model ?? this.config.stt.model;
+    const language = options?.language ?? this.config.stt.language;
+    const mimeType = options?.mimeType ?? "audio/wav";
+    const extension = extensionFromMimeType(mimeType);
+
+    try {
+      const file = await toFile(audio, `audio.${extension}`, {
+        type: mimeType,
+      });
+
+      const response = await this.client.audio.transcriptions.create({
+        file,
+        model,
+        language,
+        response_format: "verbose_json",
+        ...(options?.prompt !== undefined ? { prompt: options.prompt } : {}),
+        ...(options?.temperature !== undefined ? { temperature: options.temperature } : {}),
+      });
+
+      return this.mapResponse(response, language);
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.error(`Transcription failed: ${message}`);
+      throw new Error(`STT transcription failed: ${message}`);
+    }
+  }
+
+  /**
+   * Check if the Speaches server is healthy and reachable.
+   *
+   * Attempts to list models from the server. Returns true if the request
+   * succeeds, false otherwise.
+   *
+   * @returns true if the Speaches server is reachable and ready
+   */
+  async isHealthy(): Promise<boolean> {
+    try {
+      await this.client.models.list();
+      return true;
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      this.logger.warn(`Speaches health check failed: ${message}`);
+      return false;
+    }
+  }
+
+  /**
+   * Map the OpenAI SDK transcription response to our TranscriptionResult type.
+   *
+   * Handles both verbose responses (with duration, segments) and simple
+   * responses (text only).
+   */
+  private mapResponse(
+    response: OpenAI.Audio.Transcriptions.TranscriptionVerbose | Record<string, unknown>,
+    fallbackLanguage: string
+  ): TranscriptionResult {
+    const text = (response as { text: string }).text;
+    const verboseResponse = response as {
+      text: string;
+      language?: string;
+      duration?: number;
+      segments?: {
+        text: string;
+        start: number;
+        end: number;
+      }[];
+    };
+
+    const result: TranscriptionResult = {
+      text,
+      language: verboseResponse.language ?? fallbackLanguage,
+    };
+
+    if (verboseResponse.duration !== undefined) {
+      result.durationSeconds = verboseResponse.duration;
+    }
+
+    if (verboseResponse.segments !== undefined && Array.isArray(verboseResponse.segments)) {
+      result.segments = verboseResponse.segments.map(
+        (segment): TranscriptionSegment => ({
+          text: segment.text,
+          start: segment.start,
+          end: segment.end,
+        })
+      );
+    }
+
+    return result;
+  }
+}
--- a/apps/api/src/speech/providers/tts-provider.factory.spec.ts
+++ b/apps/api/src/speech/providers/tts-provider.factory.spec.ts
@@ -0,0 +1,279 @@
+/**
+ * TTS Provider Factory Unit Tests
+ *
+ * Tests the factory that creates and registers TTS providers based on config.
+ *
+ * Issue #391
+ */
+
+import { describe, it, expect, vi } from "vitest";
+import { createTTSProviders } from "./tts-provider.factory";
+import type { SpeechConfig } from "../speech.config";
+import type { SpeechTier } from "../interfaces/speech-types";
+
+// ==========================================
+// Mock OpenAI SDK
+// ==========================================
+
+vi.mock("openai", () => {
+  class MockOpenAI {
+    audio = {
+      speech: {
+        create: vi.fn(),
+      },
+    };
+  }
+  return { default: MockOpenAI };
+});
+
+// ==========================================
+// Test helpers
+// ==========================================
+
+function createTestConfig(overrides?: Partial<SpeechConfig>): SpeechConfig {
+  return {
+    stt: {
+      enabled: false,
+      baseUrl: "http://speaches:8000/v1",
+      model: "whisper",
+      language: "en",
+    },
+    tts: {
+      default: {
+        enabled: false,
+        url: "http://kokoro-tts:8880/v1",
+        voice: "af_heart",
+        format: "mp3",
+      },
+      premium: {
+        enabled: false,
+        url: "http://chatterbox-tts:8881/v1",
+      },
+      fallback: {
+        enabled: false,
+        url: "http://openedai-speech:8000/v1",
+      },
+    },
+    limits: {
+      maxUploadSize: 25_000_000,
+      maxDurationSeconds: 600,
+      maxTextLength: 4096,
+    },
+    ...overrides,
+  };
+}
+
+describe("createTTSProviders", () => {
+  // ==========================================
+  // Empty map when nothing enabled
+  // ==========================================
+
+  describe("when no TTS tiers are enabled", () => {
+    it("should return an empty map", () => {
+      const config = createTestConfig();
+      const providers = createTTSProviders(config);
+
+      expect(providers).toBeInstanceOf(Map);
+      expect(providers.size).toBe(0);
+    });
+  });
+
+  // ==========================================
+  // Default tier
+  // ==========================================
+
+  describe("when default tier is enabled", () => {
+    it("should create a provider for the default tier", () => {
+      const config = createTestConfig({
+        tts: {
+          default: {
+            enabled: true,
+            url: "http://kokoro-tts:8880/v1",
+            voice: "af_heart",
+            format: "mp3",
+          },
+          premium: { enabled: false, url: "" },
+          fallback: { enabled: false, url: "" },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+
+      expect(providers.size).toBe(1);
+      expect(providers.has("default")).toBe(true);
+
+      const provider = providers.get("default");
+      expect(provider).toBeDefined();
+      expect(provider?.tier).toBe("default");
+      expect(provider?.name).toBe("kokoro");
+    });
+  });
+
+  // ==========================================
+  // Premium tier
+  // ==========================================
+
+  describe("when premium tier is enabled", () => {
+    it("should create a provider for the premium tier", () => {
+      const config = createTestConfig({
+        tts: {
+          default: { enabled: false, url: "", voice: "", format: "" },
+          premium: {
+            enabled: true,
+            url: "http://chatterbox-tts:8881/v1",
+          },
+          fallback: { enabled: false, url: "" },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+
+      expect(providers.size).toBe(1);
+      expect(providers.has("premium")).toBe(true);
+
+      const provider = providers.get("premium");
+      expect(provider).toBeDefined();
+      expect(provider?.tier).toBe("premium");
+      expect(provider?.name).toBe("chatterbox");
+    });
+  });
+
+  // ==========================================
+  // Fallback tier
+  // ==========================================
+
+  describe("when fallback tier is enabled", () => {
+    it("should create a provider for the fallback tier", () => {
+      const config = createTestConfig({
+        tts: {
+          default: { enabled: false, url: "", voice: "", format: "" },
+          premium: { enabled: false, url: "" },
+          fallback: {
+            enabled: true,
+            url: "http://openedai-speech:8000/v1",
+          },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+
+      expect(providers.size).toBe(1);
+      expect(providers.has("fallback")).toBe(true);
+
+      const provider = providers.get("fallback");
+      expect(provider).toBeDefined();
+      expect(provider?.tier).toBe("fallback");
+      expect(provider?.name).toBe("piper");
+    });
+  });
+
+  // ==========================================
+  // Multiple tiers
+  // ==========================================
+
+  describe("when multiple tiers are enabled", () => {
+    it("should create providers for all enabled tiers", () => {
+      const config = createTestConfig({
+        tts: {
+          default: {
+            enabled: true,
+            url: "http://kokoro-tts:8880/v1",
+            voice: "af_heart",
+            format: "mp3",
+          },
+          premium: {
+            enabled: true,
+            url: "http://chatterbox-tts:8881/v1",
+          },
+          fallback: {
+            enabled: true,
+            url: "http://openedai-speech:8000/v1",
+          },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+
+      expect(providers.size).toBe(3);
+      expect(providers.has("default")).toBe(true);
+      expect(providers.has("premium")).toBe(true);
+      expect(providers.has("fallback")).toBe(true);
+    });
+
+    it("should create providers only for enabled tiers", () => {
+      const config = createTestConfig({
+        tts: {
+          default: {
+            enabled: true,
+            url: "http://kokoro-tts:8880/v1",
+            voice: "af_heart",
+            format: "mp3",
+          },
+          premium: { enabled: false, url: "" },
+          fallback: {
+            enabled: true,
+            url: "http://openedai-speech:8000/v1",
+          },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+
+      expect(providers.size).toBe(2);
+      expect(providers.has("default")).toBe(true);
+      expect(providers.has("premium")).toBe(false);
+      expect(providers.has("fallback")).toBe(true);
+    });
+  });
+
+  // ==========================================
+  // Provider properties
+  // ==========================================
+
+  describe("provider properties", () => {
+    it("should implement ITTSProvider interface methods", () => {
+      const config = createTestConfig({
+        tts: {
+          default: {
+            enabled: true,
+            url: "http://kokoro-tts:8880/v1",
+            voice: "af_heart",
+            format: "mp3",
+          },
+          premium: { enabled: false, url: "" },
+          fallback: { enabled: false, url: "" },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+      const provider = providers.get("default");
+
+      expect(provider).toBeDefined();
+      expect(typeof provider?.synthesize).toBe("function");
+      expect(typeof provider?.listVoices).toBe("function");
+      expect(typeof provider?.isHealthy).toBe("function");
+    });
+
+    it("should return providers as a Map<SpeechTier, ITTSProvider>", () => {
+      const config = createTestConfig({
+        tts: {
+          default: {
+            enabled: true,
+            url: "http://kokoro-tts:8880/v1",
+            voice: "af_heart",
+            format: "mp3",
+          },
+          premium: { enabled: false, url: "" },
+          fallback: { enabled: false, url: "" },
+        },
+      });
+
+      const providers = createTTSProviders(config);
+
+      // Verify the map keys are valid SpeechTier values
+      for (const [tier] of providers) {
+        expect(["default", "premium", "fallback"]).toContain(tier as SpeechTier);
+      }
+    });
+  });
+});
--- a/apps/api/src/speech/providers/tts-provider.factory.ts
+++ b/apps/api/src/speech/providers/tts-provider.factory.ts
@@ -0,0 +1,112 @@
+/**
+ * TTS Provider Factory
+ *
+ * Creates and registers TTS providers based on speech configuration.
+ * Reads enabled flags and URLs from config and instantiates the appropriate
+ * provider for each tier.
+ *
+ * Each tier maps to a specific TTS engine:
+ * - default: Kokoro-FastAPI (CPU, always available)
+ * - premium: Chatterbox (GPU, voice cloning)
+ * - fallback: Piper via OpenedAI Speech (ultra-lightweight CPU)
+ *
+ * Issue #391
+ */
+
+import { Logger } from "@nestjs/common";
+import { BaseTTSProvider } from "./base-tts.provider";
+import type { ITTSProvider } from "../interfaces/tts-provider.interface";
+import type { SpeechTier, AudioFormat } from "../interfaces/speech-types";
+import type { SpeechConfig } from "../speech.config";
+
+// ==========================================
+// Concrete provider classes
+// ==========================================
+
+/**
+ * Kokoro TTS provider (default tier).
+ * CPU-based, always available, Apache 2.0 license.
+ */
+class KokoroProvider extends BaseTTSProvider {
+  readonly name = "kokoro";
+  readonly tier: SpeechTier = "default";
+}
+
+/**
+ * Chatterbox TTS provider (premium tier).
+ * GPU required, voice cloning capable, MIT license.
+ */
+class ChatterboxProvider extends BaseTTSProvider {
+  readonly name = "chatterbox";
+  readonly tier: SpeechTier = "premium";
+
+  constructor(baseURL: string) {
+    super(baseURL, "default", "mp3");
+  }
+}
+
+/**
+ * Piper TTS provider via OpenedAI Speech (fallback tier).
+ * Ultra-lightweight CPU, GPL license.
+ */
+class PiperProvider extends BaseTTSProvider {
+  readonly name = "piper";
+  readonly tier: SpeechTier = "fallback";
+
+  constructor(baseURL: string) {
+    super(baseURL, "alloy", "mp3");
+  }
+}
+
+// ==========================================
+// Factory function
+// ==========================================
+
+const logger = new Logger("TTSProviderFactory");
+
+/**
+ * Create and register TTS providers based on the speech configuration.
+ *
+ * Only creates providers for tiers that are enabled in the config.
+ * Returns a Map keyed by SpeechTier for use with the TTS_PROVIDERS injection token.
+ *
+ * @param config - Speech configuration with TTS tier settings
+ * @returns Map of enabled TTS providers keyed by tier
+ */
+export function createTTSProviders(config: SpeechConfig): Map<SpeechTier, ITTSProvider> {
+  const providers = new Map<SpeechTier, ITTSProvider>();
+
+  // Default tier: Kokoro
+  if (config.tts.default.enabled) {
+    const provider = new KokoroProvider(
+      config.tts.default.url,
+      config.tts.default.voice,
+      config.tts.default.format as AudioFormat
+    );
+    providers.set("default", provider);
+    logger.log(`Registered default TTS provider: kokoro at ${config.tts.default.url}`);
+  }
+
+  // Premium tier: Chatterbox
+  if (config.tts.premium.enabled) {
+    const provider = new ChatterboxProvider(config.tts.premium.url);
+    providers.set("premium", provider);
+    logger.log(`Registered premium TTS provider: chatterbox at ${config.tts.premium.url}`);
+  }
+
+  // Fallback tier: Piper
+  if (config.tts.fallback.enabled) {
+    const provider = new PiperProvider(config.tts.fallback.url);
+    providers.set("fallback", provider);
+    logger.log(`Registered fallback TTS provider: piper at ${config.tts.fallback.url}`);
+  }
+
+  if (providers.size === 0) {
+    logger.warn("No TTS providers are enabled. TTS synthesis will not be available.");
+  } else {
+    const tierNames = Array.from(providers.keys()).join(", ");
+    logger.log(`TTS providers ready: ${tierNames} (${String(providers.size)} total)`);
+  }
+
+  return providers;
+}
--- a/apps/api/src/speech/speech.module.ts
+++ b/apps/api/src/speech/speech.module.ts
@@ -4,36 +4,60 @@
 * NestJS module for speech-to-text (STT) and text-to-speech (TTS) services.
 * Provides a provider abstraction layer with graceful fallback for TTS tiers.
 *
+ * TTS providers are created dynamically based on configuration:
+ * - default: Kokoro-FastAPI (CPU, always available)
+ * - premium: Chatterbox (GPU, voice cloning)
+ * - fallback: Piper via OpenedAI Speech (ultra-lightweight CPU)
+ *
 * Imports:
 * - ConfigModule.forFeature(speechConfig) for speech configuration
 *
 * Providers:
 * - SpeechService: High-level speech operations with provider selection
- * - TTS_PROVIDERS: Empty Map<SpeechTier, ITTSProvider> (populated by provider modules)
+ * - TTS_PROVIDERS: Map<SpeechTier, ITTSProvider> populated by factory based on config
 *
 * Exports:
 * - SpeechService for use by other modules (e.g., controllers, brain)
 *
- * Issue #389
+ * Issue #389, #390, #391
 */

 import { Module, type OnModuleInit, Logger } from "@nestjs/common";
-import { ConfigModule } from "@nestjs/config";
-import { speechConfig, validateSpeechConfig } from "./speech.config";
+import { ConfigModule, ConfigService } from "@nestjs/config";
+import {
+  speechConfig,
+  validateSpeechConfig,
+  isSttEnabled,
+  type SpeechConfig,
+} from "./speech.config";
 import { SpeechService } from "./speech.service";
-import { TTS_PROVIDERS } from "./speech.constants";
-import type { SpeechTier } from "./interfaces/speech-types";
-import type { ITTSProvider } from "./interfaces/tts-provider.interface";
+import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
+import { SpeachesSttProvider } from "./providers/speaches-stt.provider";
+import { createTTSProviders } from "./providers/tts-provider.factory";

@Module({
  imports: [ConfigModule.forFeature(speechConfig)],
  providers: [
    SpeechService,
-    // Default empty TTS providers map. Provider modules (Kokoro, Chatterbox, etc.)
-    // will register their providers in subsequent tasks.
+    // STT provider: conditionally register SpeachesSttProvider when STT is enabled
+    ...(isSttEnabled()
+      ? [
+          {
+            provide: STT_PROVIDER,
+            useClass: SpeachesSttProvider,
+          },
+        ]
+      : []),
    {
      provide: TTS_PROVIDERS,
-      useFactory: (): Map<SpeechTier, ITTSProvider> => new Map(),
+      useFactory: (configService: ConfigService) => {
+        const config = configService.get<SpeechConfig>("speech");
+        if (!config) {
+          return new Map();
+        }
+        return createTTSProviders(config);
+      },
+      inject: [ConfigService],
    },
  ],
  exports: [SpeechService],