2026-02-17 01:04:47 +00:00
3 changed files with 480 additions and 19 deletions
--- a/apps/api/src/speech/providers/piper-tts.provider.spec.ts
+++ b/apps/api/src/speech/providers/piper-tts.provider.spec.ts
@@ -0,0 +1,266 @@
+/**
+ * PiperTtsProvider Unit Tests
+ *
+ * Tests the Piper TTS provider via OpenedAI Speech (fallback tier).
+ * Validates provider identity, OpenAI voice name mapping, voice listing,
+ * and ultra-lightweight CPU-only design characteristics.
+ *
+ * Issue #395
+ */
+
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import {
+  PiperTtsProvider,
+  PIPER_VOICE_MAP,
+  PIPER_SUPPORTED_FORMATS,
+  OPENAI_STANDARD_VOICES,
+} from "./piper-tts.provider";
+import type { VoiceInfo } from "../interfaces/speech-types";
+
+// ==========================================
+// Mock OpenAI SDK
+// ==========================================
+
+vi.mock("openai", () => {
+  class MockOpenAI {
+    audio = {
+      speech: {
+        create: vi.fn(),
+      },
+    };
+  }
+  return { default: MockOpenAI };
+});
+
+// ==========================================
+// Provider identity
+// ==========================================
+
+describe("PiperTtsProvider", () => {
+  const testBaseURL = "http://openedai-speech:8000/v1";
+  let provider: PiperTtsProvider;
+
+  beforeEach(() => {
+    provider = new PiperTtsProvider(testBaseURL);
+  });
+
+  describe("provider identity", () => {
+    it("should have name 'piper'", () => {
+      expect(provider.name).toBe("piper");
+    });
+
+    it("should have tier 'fallback'", () => {
+      expect(provider.tier).toBe("fallback");
+    });
+  });
+
+  // ==========================================
+  // Constructor
+  // ==========================================
+
+  describe("constructor", () => {
+    it("should use 'alloy' as default voice", () => {
+      const newProvider = new PiperTtsProvider(testBaseURL);
+      expect(newProvider).toBeDefined();
+    });
+
+    it("should accept a custom default voice", () => {
+      const customProvider = new PiperTtsProvider(testBaseURL, "nova");
+      expect(customProvider).toBeDefined();
+    });
+
+    it("should accept a custom default format", () => {
+      const customProvider = new PiperTtsProvider(testBaseURL, "alloy", "wav");
+      expect(customProvider).toBeDefined();
+    });
+  });
+
+  // ==========================================
+  // listVoices()
+  // ==========================================
+
+  describe("listVoices", () => {
+    let voices: VoiceInfo[];
+
+    beforeEach(async () => {
+      voices = await provider.listVoices();
+    });
+
+    it("should return an array of VoiceInfo objects", () => {
+      expect(voices).toBeInstanceOf(Array);
+      expect(voices.length).toBeGreaterThan(0);
+    });
+
+    it("should return exactly 6 voices (OpenAI standard set)", () => {
+      expect(voices.length).toBe(6);
+    });
+
+    it("should set tier to 'fallback' on all voices", () => {
+      for (const voice of voices) {
+        expect(voice.tier).toBe("fallback");
+      }
+    });
+
+    it("should have exactly one default voice", () => {
+      const defaults = voices.filter((v) => v.isDefault === true);
+      expect(defaults.length).toBe(1);
+    });
+
+    it("should mark 'alloy' as the default voice", () => {
+      const defaultVoice = voices.find((v) => v.isDefault === true);
+      expect(defaultVoice).toBeDefined();
+      expect(defaultVoice?.id).toBe("alloy");
+    });
+
+    it("should have an id and name for every voice", () => {
+      for (const voice of voices) {
+        expect(voice.id).toBeTruthy();
+        expect(voice.name).toBeTruthy();
+      }
+    });
+
+    it("should set language on every voice", () => {
+      for (const voice of voices) {
+        expect(voice.language).toBeTruthy();
+      }
+    });
+
+    // ==========================================
+    // All 6 OpenAI standard voices present
+    // ==========================================
+
+    describe("OpenAI standard voices", () => {
+      const standardVoiceIds = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
+
+      it.each(standardVoiceIds)("should include voice '%s'", (voiceId) => {
+        const voice = voices.find((v) => v.id === voiceId);
+        expect(voice).toBeDefined();
+      });
+    });
+
+    // ==========================================
+    // Voice metadata
+    // ==========================================
+
+    describe("voice metadata", () => {
+      it("should include gender info in voice names", () => {
+        const alloy = voices.find((v) => v.id === "alloy");
+        expect(alloy?.name).toMatch(/Female|Male/);
+      });
+
+      it("should map alloy to a female voice", () => {
+        const alloy = voices.find((v) => v.id === "alloy");
+        expect(alloy?.name).toContain("Female");
+      });
+
+      it("should map echo to a male voice", () => {
+        const echo = voices.find((v) => v.id === "echo");
+        expect(echo?.name).toContain("Male");
+      });
+
+      it("should map fable to a British voice", () => {
+        const fable = voices.find((v) => v.id === "fable");
+        expect(fable?.language).toBe("en-GB");
+      });
+
+      it("should map onyx to a male voice", () => {
+        const onyx = voices.find((v) => v.id === "onyx");
+        expect(onyx?.name).toContain("Male");
+      });
+
+      it("should map nova to a female voice", () => {
+        const nova = voices.find((v) => v.id === "nova");
+        expect(nova?.name).toContain("Female");
+      });
+
+      it("should map shimmer to a female voice", () => {
+        const shimmer = voices.find((v) => v.id === "shimmer");
+        expect(shimmer?.name).toContain("Female");
+      });
+    });
+  });
+});
+
+// ==========================================
+// PIPER_VOICE_MAP
+// ==========================================
+
+describe("PIPER_VOICE_MAP", () => {
+  it("should contain all 6 OpenAI standard voice names", () => {
+    const expectedKeys = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
+    for (const key of expectedKeys) {
+      expect(PIPER_VOICE_MAP).toHaveProperty(key);
+    }
+  });
+
+  it("should map each voice to a Piper voice ID", () => {
+    for (const entry of Object.values(PIPER_VOICE_MAP)) {
+      expect(entry.piperVoice).toBeTruthy();
+      expect(typeof entry.piperVoice).toBe("string");
+    }
+  });
+
+  it("should have gender for each voice entry", () => {
+    for (const entry of Object.values(PIPER_VOICE_MAP)) {
+      expect(entry.gender).toMatch(/^(female|male)$/);
+    }
+  });
+
+  it("should have a language for each voice entry", () => {
+    for (const entry of Object.values(PIPER_VOICE_MAP)) {
+      expect(entry.language).toBeTruthy();
+    }
+  });
+
+  it("should have a description for each voice entry", () => {
+    for (const entry of Object.values(PIPER_VOICE_MAP)) {
+      expect(entry.description).toBeTruthy();
+    }
+  });
+});
+
+// ==========================================
+// OPENAI_STANDARD_VOICES
+// ==========================================
+
+describe("OPENAI_STANDARD_VOICES", () => {
+  it("should be an array of 6 voice IDs", () => {
+    expect(Array.isArray(OPENAI_STANDARD_VOICES)).toBe(true);
+    expect(OPENAI_STANDARD_VOICES.length).toBe(6);
+  });
+
+  it("should contain all standard OpenAI voice names", () => {
+    expect(OPENAI_STANDARD_VOICES).toContain("alloy");
+    expect(OPENAI_STANDARD_VOICES).toContain("echo");
+    expect(OPENAI_STANDARD_VOICES).toContain("fable");
+    expect(OPENAI_STANDARD_VOICES).toContain("onyx");
+    expect(OPENAI_STANDARD_VOICES).toContain("nova");
+    expect(OPENAI_STANDARD_VOICES).toContain("shimmer");
+  });
+});
+
+// ==========================================
+// PIPER_SUPPORTED_FORMATS
+// ==========================================
+
+describe("PIPER_SUPPORTED_FORMATS", () => {
+  it("should include mp3", () => {
+    expect(PIPER_SUPPORTED_FORMATS).toContain("mp3");
+  });
+
+  it("should include wav", () => {
+    expect(PIPER_SUPPORTED_FORMATS).toContain("wav");
+  });
+
+  it("should include opus", () => {
+    expect(PIPER_SUPPORTED_FORMATS).toContain("opus");
+  });
+
+  it("should include flac", () => {
+    expect(PIPER_SUPPORTED_FORMATS).toContain("flac");
+  });
+
+  it("should be a readonly array", () => {
+    expect(Array.isArray(PIPER_SUPPORTED_FORMATS)).toBe(true);
+  });
+});
--- a/apps/api/src/speech/providers/piper-tts.provider.ts
+++ b/apps/api/src/speech/providers/piper-tts.provider.ts
@@ -0,0 +1,212 @@
+/**
+ * Piper TTS Provider via OpenedAI Speech
+ *
+ * Fallback-tier TTS provider using Piper via OpenedAI Speech for
+ * ultra-lightweight CPU-only synthesis. Designed for low-resource
+ * environments including Raspberry Pi.
+ *
+ * Features:
+ * - OpenAI-compatible API via OpenedAI Speech server
+ * - 100+ Piper voices across 40+ languages
+ * - 6 standard OpenAI voice names mapped to Piper voices
+ * - Output formats: mp3, wav, opus, flac, aac, pcm
+ * - CPU-only, no GPU required
+ * - GPL license (via OpenedAI Speech)
+ *
+ * Voice names use the OpenAI standard set (alloy, echo, fable, onyx,
+ * nova, shimmer) which OpenedAI Speech maps to configured Piper voices.
+ *
+ * Issue #395
+ */
+
+import { BaseTTSProvider } from "./base-tts.provider";
+import type { SpeechTier, VoiceInfo, AudioFormat } from "../interfaces/speech-types";
+
+// ==========================================
+// Constants
+// ==========================================
+
+/** Audio formats supported by OpenedAI Speech with Piper backend */
+export const PIPER_SUPPORTED_FORMATS: readonly AudioFormat[] = [
+  "mp3",
+  "wav",
+  "opus",
+  "flac",
+] as const;
+
+/** Default voice for Piper (via OpenedAI Speech) */
+const PIPER_DEFAULT_VOICE = "alloy";
+
+/** Default audio format for Piper */
+const PIPER_DEFAULT_FORMAT: AudioFormat = "mp3";
+
+// ==========================================
+// OpenAI standard voice names
+// ==========================================
+
+/**
+ * The 6 standard OpenAI TTS voice names.
+ * OpenedAI Speech accepts these names and routes them to configured Piper voices.
+ */
+export const OPENAI_STANDARD_VOICES: readonly string[] = [
+  "alloy",
+  "echo",
+  "fable",
+  "onyx",
+  "nova",
+  "shimmer",
+] as const;
+
+// ==========================================
+// Voice mapping
+// ==========================================
+
+/** Metadata for a Piper voice mapped from an OpenAI voice name */
+export interface PiperVoiceMapping {
+  /** The underlying Piper voice ID configured in OpenedAI Speech */
+  piperVoice: string;
+  /** Human-readable description of the voice character */
+  description: string;
+  /** Gender of the voice */
+  gender: "female" | "male";
+  /** BCP 47 language code */
+  language: string;
+}
+
+/** Fallback mapping used when a voice ID is not found in PIPER_VOICE_MAP */
+const DEFAULT_MAPPING: PiperVoiceMapping = {
+  piperVoice: "en_US-amy-medium",
+  description: "Default voice",
+  gender: "female",
+  language: "en-US",
+};
+
+/**
+ * Mapping of OpenAI standard voice names to their default Piper voice
+ * configuration in OpenedAI Speech.
+ *
+ * These are the default mappings that OpenedAI Speech uses when configured
+ * with Piper as the TTS backend. The actual Piper voice used can be
+ * customized in the OpenedAI Speech configuration file.
+ *
+ * Default Piper voice assignments:
+ * - alloy: en_US-amy-medium (warm, balanced female)
+ * - echo: en_US-ryan-medium (clear, articulate male)
+ * - fable: en_GB-alan-medium (British male narrator)
+ * - onyx: en_US-danny-low (deep, resonant male)
+ * - nova: en_US-lessac-medium (expressive female)
+ * - shimmer: en_US-kristin-medium (bright, energetic female)
+ */
+export const PIPER_VOICE_MAP: Record<string, PiperVoiceMapping> = {
+  alloy: {
+    piperVoice: "en_US-amy-medium",
+    description: "Warm, balanced voice",
+    gender: "female",
+    language: "en-US",
+  },
+  echo: {
+    piperVoice: "en_US-ryan-medium",
+    description: "Clear, articulate voice",
+    gender: "male",
+    language: "en-US",
+  },
+  fable: {
+    piperVoice: "en_GB-alan-medium",
+    description: "British narrator voice",
+    gender: "male",
+    language: "en-GB",
+  },
+  onyx: {
+    piperVoice: "en_US-danny-low",
+    description: "Deep, resonant voice",
+    gender: "male",
+    language: "en-US",
+  },
+  nova: {
+    piperVoice: "en_US-lessac-medium",
+    description: "Expressive, versatile voice",
+    gender: "female",
+    language: "en-US",
+  },
+  shimmer: {
+    piperVoice: "en_US-kristin-medium",
+    description: "Bright, energetic voice",
+    gender: "female",
+    language: "en-US",
+  },
+};
+
+// ==========================================
+// Provider class
+// ==========================================
+
+/**
+ * Piper TTS provider via OpenedAI Speech (fallback tier).
+ *
+ * Ultra-lightweight CPU-only text-to-speech engine using Piper voices
+ * through the OpenedAI Speech server's OpenAI-compatible API.
+ *
+ * Designed for:
+ * - CPU-only environments (no GPU required)
+ * - Low-resource devices (Raspberry Pi, ARM SBCs)
+ * - Fallback when primary TTS engines are unavailable
+ * - High-volume, low-latency synthesis needs
+ *
+ * The provider exposes the 6 standard OpenAI voice names (alloy, echo,
+ * fable, onyx, nova, shimmer) which OpenedAI Speech maps to configured
+ * Piper voices. Additional Piper voices (100+ across 40+ languages)
+ * can be accessed by passing the Piper voice ID directly.
+ *
+ * @example
+ * ```typescript
+ * const piper = new PiperTtsProvider("http://openedai-speech:8000/v1");
+ * const voices = await piper.listVoices();
+ * const result = await piper.synthesize("Hello!", { voice: "alloy" });
+ * ```
+ */
+export class PiperTtsProvider extends BaseTTSProvider {
+  readonly name = "piper";
+  readonly tier: SpeechTier = "fallback";
+
+  /**
+   * Create a new Piper TTS provider.
+   *
+   * @param baseURL - Base URL for the OpenedAI Speech endpoint (e.g. "http://openedai-speech:8000/v1")
+   * @param defaultVoice - Default OpenAI voice name (defaults to "alloy")
+   * @param defaultFormat - Default audio format (defaults to "mp3")
+   */
+  constructor(
+    baseURL: string,
+    defaultVoice: string = PIPER_DEFAULT_VOICE,
+    defaultFormat: AudioFormat = PIPER_DEFAULT_FORMAT
+  ) {
+    super(baseURL, defaultVoice, defaultFormat);
+  }
+
+  /**
+   * List available voices with OpenAI-to-Piper mapping metadata.
+   *
+   * Returns the 6 standard OpenAI voice names with information about
+   * the underlying Piper voice, gender, and language. These are the
+   * voices that can be specified in the `voice` parameter of synthesize().
+   *
+   * @returns Array of VoiceInfo objects for all mapped Piper voices
+   */
+  override listVoices(): Promise<VoiceInfo[]> {
+    const voices: VoiceInfo[] = OPENAI_STANDARD_VOICES.map((voiceId) => {
+      const mapping = PIPER_VOICE_MAP[voiceId] ?? DEFAULT_MAPPING;
+      const genderLabel = mapping.gender === "female" ? "Female" : "Male";
+      const label = voiceId.charAt(0).toUpperCase() + voiceId.slice(1);
+
+      return {
+        id: voiceId,
+        name: `${label} (${genderLabel} - ${mapping.description})`,
+        language: mapping.language,
+        tier: this.tier,
+        isDefault: voiceId === this.defaultVoice,
+      };
+    });
+
+    return Promise.resolve(voices);
+  }
+}
--- a/apps/api/src/speech/providers/tts-provider.factory.ts
+++ b/apps/api/src/speech/providers/tts-provider.factory.ts
@@ -14,30 +14,13 @@
 */

 import { Logger } from "@nestjs/common";
-import { BaseTTSProvider } from "./base-tts.provider";
 import { ChatterboxTTSProvider } from "./chatterbox-tts.provider";
 import { KokoroTtsProvider } from "./kokoro-tts.provider";
+import { PiperTtsProvider } from "./piper-tts.provider";
 import type { ITTSProvider } from "../interfaces/tts-provider.interface";
 import type { SpeechTier, AudioFormat } from "../interfaces/speech-types";
 import type { SpeechConfig } from "../speech.config";

-// ==========================================
-// Concrete provider classes
-// ==========================================
-
-/**
- * Piper TTS provider via OpenedAI Speech (fallback tier).
- * Ultra-lightweight CPU, GPL license.
- */
-class PiperProvider extends BaseTTSProvider {
-  readonly name = "piper";
-  readonly tier: SpeechTier = "fallback";
-
-  constructor(baseURL: string) {
-    super(baseURL, "alloy", "mp3");
-  }
-}
-
 // ==========================================
 // Factory function
 // ==========================================
@@ -76,7 +59,7 @@ export function createTTSProviders(config: SpeechConfig): Map<SpeechTier, ITTSPr

  // Fallback tier: Piper
  if (config.tts.fallback.enabled) {
-    const provider = new PiperProvider(config.tts.fallback.url);
+    const provider = new PiperTtsProvider(config.tts.fallback.url);
    providers.set("fallback", provider);
    logger.log(`Registered fallback TTS provider: piper at ${config.tts.fallback.url}`);
  }