chore: upgrade Node.js runtime to v24 across codebase #419
266
apps/api/src/speech/providers/piper-tts.provider.spec.ts
Normal file
266
apps/api/src/speech/providers/piper-tts.provider.spec.ts
Normal file
@@ -0,0 +1,266 @@
|
||||
/**
|
||||
* PiperTtsProvider Unit Tests
|
||||
*
|
||||
* Tests the Piper TTS provider via OpenedAI Speech (fallback tier).
|
||||
* Validates provider identity, OpenAI voice name mapping, voice listing,
|
||||
* and ultra-lightweight CPU-only design characteristics.
|
||||
*
|
||||
* Issue #395
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import {
|
||||
PiperTtsProvider,
|
||||
PIPER_VOICE_MAP,
|
||||
PIPER_SUPPORTED_FORMATS,
|
||||
OPENAI_STANDARD_VOICES,
|
||||
} from "./piper-tts.provider";
|
||||
import type { VoiceInfo } from "../interfaces/speech-types";
|
||||
|
||||
// ==========================================
|
||||
// Mock OpenAI SDK
|
||||
// ==========================================
|
||||
|
||||
vi.mock("openai", () => {
|
||||
class MockOpenAI {
|
||||
audio = {
|
||||
speech: {
|
||||
create: vi.fn(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return { default: MockOpenAI };
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Provider identity
|
||||
// ==========================================
|
||||
|
||||
describe("PiperTtsProvider", () => {
|
||||
const testBaseURL = "http://openedai-speech:8000/v1";
|
||||
let provider: PiperTtsProvider;
|
||||
|
||||
beforeEach(() => {
|
||||
provider = new PiperTtsProvider(testBaseURL);
|
||||
});
|
||||
|
||||
describe("provider identity", () => {
|
||||
it("should have name 'piper'", () => {
|
||||
expect(provider.name).toBe("piper");
|
||||
});
|
||||
|
||||
it("should have tier 'fallback'", () => {
|
||||
expect(provider.tier).toBe("fallback");
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Constructor
|
||||
// ==========================================
|
||||
|
||||
describe("constructor", () => {
|
||||
it("should use 'alloy' as default voice", () => {
|
||||
const newProvider = new PiperTtsProvider(testBaseURL);
|
||||
expect(newProvider).toBeDefined();
|
||||
});
|
||||
|
||||
it("should accept a custom default voice", () => {
|
||||
const customProvider = new PiperTtsProvider(testBaseURL, "nova");
|
||||
expect(customProvider).toBeDefined();
|
||||
});
|
||||
|
||||
it("should accept a custom default format", () => {
|
||||
const customProvider = new PiperTtsProvider(testBaseURL, "alloy", "wav");
|
||||
expect(customProvider).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// listVoices()
|
||||
// ==========================================
|
||||
|
||||
describe("listVoices", () => {
|
||||
let voices: VoiceInfo[];
|
||||
|
||||
beforeEach(async () => {
|
||||
voices = await provider.listVoices();
|
||||
});
|
||||
|
||||
it("should return an array of VoiceInfo objects", () => {
|
||||
expect(voices).toBeInstanceOf(Array);
|
||||
expect(voices.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should return exactly 6 voices (OpenAI standard set)", () => {
|
||||
expect(voices.length).toBe(6);
|
||||
});
|
||||
|
||||
it("should set tier to 'fallback' on all voices", () => {
|
||||
for (const voice of voices) {
|
||||
expect(voice.tier).toBe("fallback");
|
||||
}
|
||||
});
|
||||
|
||||
it("should have exactly one default voice", () => {
|
||||
const defaults = voices.filter((v) => v.isDefault === true);
|
||||
expect(defaults.length).toBe(1);
|
||||
});
|
||||
|
||||
it("should mark 'alloy' as the default voice", () => {
|
||||
const defaultVoice = voices.find((v) => v.isDefault === true);
|
||||
expect(defaultVoice).toBeDefined();
|
||||
expect(defaultVoice?.id).toBe("alloy");
|
||||
});
|
||||
|
||||
it("should have an id and name for every voice", () => {
|
||||
for (const voice of voices) {
|
||||
expect(voice.id).toBeTruthy();
|
||||
expect(voice.name).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
it("should set language on every voice", () => {
|
||||
for (const voice of voices) {
|
||||
expect(voice.language).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// All 6 OpenAI standard voices present
|
||||
// ==========================================
|
||||
|
||||
describe("OpenAI standard voices", () => {
|
||||
const standardVoiceIds = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
||||
|
||||
it.each(standardVoiceIds)("should include voice '%s'", (voiceId) => {
|
||||
const voice = voices.find((v) => v.id === voiceId);
|
||||
expect(voice).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Voice metadata
|
||||
// ==========================================
|
||||
|
||||
describe("voice metadata", () => {
|
||||
it("should include gender info in voice names", () => {
|
||||
const alloy = voices.find((v) => v.id === "alloy");
|
||||
expect(alloy?.name).toMatch(/Female|Male/);
|
||||
});
|
||||
|
||||
it("should map alloy to a female voice", () => {
|
||||
const alloy = voices.find((v) => v.id === "alloy");
|
||||
expect(alloy?.name).toContain("Female");
|
||||
});
|
||||
|
||||
it("should map echo to a male voice", () => {
|
||||
const echo = voices.find((v) => v.id === "echo");
|
||||
expect(echo?.name).toContain("Male");
|
||||
});
|
||||
|
||||
it("should map fable to a British voice", () => {
|
||||
const fable = voices.find((v) => v.id === "fable");
|
||||
expect(fable?.language).toBe("en-GB");
|
||||
});
|
||||
|
||||
it("should map onyx to a male voice", () => {
|
||||
const onyx = voices.find((v) => v.id === "onyx");
|
||||
expect(onyx?.name).toContain("Male");
|
||||
});
|
||||
|
||||
it("should map nova to a female voice", () => {
|
||||
const nova = voices.find((v) => v.id === "nova");
|
||||
expect(nova?.name).toContain("Female");
|
||||
});
|
||||
|
||||
it("should map shimmer to a female voice", () => {
|
||||
const shimmer = voices.find((v) => v.id === "shimmer");
|
||||
expect(shimmer?.name).toContain("Female");
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// PIPER_VOICE_MAP
|
||||
// ==========================================
|
||||
|
||||
describe("PIPER_VOICE_MAP", () => {
|
||||
it("should contain all 6 OpenAI standard voice names", () => {
|
||||
const expectedKeys = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"];
|
||||
for (const key of expectedKeys) {
|
||||
expect(PIPER_VOICE_MAP).toHaveProperty(key);
|
||||
}
|
||||
});
|
||||
|
||||
it("should map each voice to a Piper voice ID", () => {
|
||||
for (const entry of Object.values(PIPER_VOICE_MAP)) {
|
||||
expect(entry.piperVoice).toBeTruthy();
|
||||
expect(typeof entry.piperVoice).toBe("string");
|
||||
}
|
||||
});
|
||||
|
||||
it("should have gender for each voice entry", () => {
|
||||
for (const entry of Object.values(PIPER_VOICE_MAP)) {
|
||||
expect(entry.gender).toMatch(/^(female|male)$/);
|
||||
}
|
||||
});
|
||||
|
||||
it("should have a language for each voice entry", () => {
|
||||
for (const entry of Object.values(PIPER_VOICE_MAP)) {
|
||||
expect(entry.language).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
it("should have a description for each voice entry", () => {
|
||||
for (const entry of Object.values(PIPER_VOICE_MAP)) {
|
||||
expect(entry.description).toBeTruthy();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// OPENAI_STANDARD_VOICES
|
||||
// ==========================================
|
||||
|
||||
describe("OPENAI_STANDARD_VOICES", () => {
|
||||
it("should be an array of 6 voice IDs", () => {
|
||||
expect(Array.isArray(OPENAI_STANDARD_VOICES)).toBe(true);
|
||||
expect(OPENAI_STANDARD_VOICES.length).toBe(6);
|
||||
});
|
||||
|
||||
it("should contain all standard OpenAI voice names", () => {
|
||||
expect(OPENAI_STANDARD_VOICES).toContain("alloy");
|
||||
expect(OPENAI_STANDARD_VOICES).toContain("echo");
|
||||
expect(OPENAI_STANDARD_VOICES).toContain("fable");
|
||||
expect(OPENAI_STANDARD_VOICES).toContain("onyx");
|
||||
expect(OPENAI_STANDARD_VOICES).toContain("nova");
|
||||
expect(OPENAI_STANDARD_VOICES).toContain("shimmer");
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// PIPER_SUPPORTED_FORMATS
|
||||
// ==========================================
|
||||
|
||||
describe("PIPER_SUPPORTED_FORMATS", () => {
|
||||
it("should include mp3", () => {
|
||||
expect(PIPER_SUPPORTED_FORMATS).toContain("mp3");
|
||||
});
|
||||
|
||||
it("should include wav", () => {
|
||||
expect(PIPER_SUPPORTED_FORMATS).toContain("wav");
|
||||
});
|
||||
|
||||
it("should include opus", () => {
|
||||
expect(PIPER_SUPPORTED_FORMATS).toContain("opus");
|
||||
});
|
||||
|
||||
it("should include flac", () => {
|
||||
expect(PIPER_SUPPORTED_FORMATS).toContain("flac");
|
||||
});
|
||||
|
||||
it("should be a readonly array", () => {
|
||||
expect(Array.isArray(PIPER_SUPPORTED_FORMATS)).toBe(true);
|
||||
});
|
||||
});
|
||||
212
apps/api/src/speech/providers/piper-tts.provider.ts
Normal file
212
apps/api/src/speech/providers/piper-tts.provider.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Piper TTS Provider via OpenedAI Speech
|
||||
*
|
||||
* Fallback-tier TTS provider using Piper via OpenedAI Speech for
|
||||
* ultra-lightweight CPU-only synthesis. Designed for low-resource
|
||||
* environments including Raspberry Pi.
|
||||
*
|
||||
* Features:
|
||||
* - OpenAI-compatible API via OpenedAI Speech server
|
||||
* - 100+ Piper voices across 40+ languages
|
||||
* - 6 standard OpenAI voice names mapped to Piper voices
|
||||
* - Output formats: mp3, wav, opus, flac, aac, pcm
|
||||
* - CPU-only, no GPU required
|
||||
* - GPL license (via OpenedAI Speech)
|
||||
*
|
||||
* Voice names use the OpenAI standard set (alloy, echo, fable, onyx,
|
||||
* nova, shimmer) which OpenedAI Speech maps to configured Piper voices.
|
||||
*
|
||||
* Issue #395
|
||||
*/
|
||||
|
||||
import { BaseTTSProvider } from "./base-tts.provider";
|
||||
import type { SpeechTier, VoiceInfo, AudioFormat } from "../interfaces/speech-types";
|
||||
|
||||
// ==========================================
|
||||
// Constants
|
||||
// ==========================================
|
||||
|
||||
/** Audio formats supported by OpenedAI Speech with Piper backend */
|
||||
export const PIPER_SUPPORTED_FORMATS: readonly AudioFormat[] = [
|
||||
"mp3",
|
||||
"wav",
|
||||
"opus",
|
||||
"flac",
|
||||
] as const;
|
||||
|
||||
/** Default voice for Piper (via OpenedAI Speech) */
|
||||
const PIPER_DEFAULT_VOICE = "alloy";
|
||||
|
||||
/** Default audio format for Piper */
|
||||
const PIPER_DEFAULT_FORMAT: AudioFormat = "mp3";
|
||||
|
||||
// ==========================================
|
||||
// OpenAI standard voice names
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* The 6 standard OpenAI TTS voice names.
|
||||
* OpenedAI Speech accepts these names and routes them to configured Piper voices.
|
||||
*/
|
||||
export const OPENAI_STANDARD_VOICES: readonly string[] = [
|
||||
"alloy",
|
||||
"echo",
|
||||
"fable",
|
||||
"onyx",
|
||||
"nova",
|
||||
"shimmer",
|
||||
] as const;
|
||||
|
||||
// ==========================================
|
||||
// Voice mapping
|
||||
// ==========================================
|
||||
|
||||
/** Metadata for a Piper voice mapped from an OpenAI voice name */
|
||||
export interface PiperVoiceMapping {
|
||||
/** The underlying Piper voice ID configured in OpenedAI Speech */
|
||||
piperVoice: string;
|
||||
/** Human-readable description of the voice character */
|
||||
description: string;
|
||||
/** Gender of the voice */
|
||||
gender: "female" | "male";
|
||||
/** BCP 47 language code */
|
||||
language: string;
|
||||
}
|
||||
|
||||
/** Fallback mapping used when a voice ID is not found in PIPER_VOICE_MAP */
|
||||
const DEFAULT_MAPPING: PiperVoiceMapping = {
|
||||
piperVoice: "en_US-amy-medium",
|
||||
description: "Default voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
};
|
||||
|
||||
/**
|
||||
* Mapping of OpenAI standard voice names to their default Piper voice
|
||||
* configuration in OpenedAI Speech.
|
||||
*
|
||||
* These are the default mappings that OpenedAI Speech uses when configured
|
||||
* with Piper as the TTS backend. The actual Piper voice used can be
|
||||
* customized in the OpenedAI Speech configuration file.
|
||||
*
|
||||
* Default Piper voice assignments:
|
||||
* - alloy: en_US-amy-medium (warm, balanced female)
|
||||
* - echo: en_US-ryan-medium (clear, articulate male)
|
||||
* - fable: en_GB-alan-medium (British male narrator)
|
||||
* - onyx: en_US-danny-low (deep, resonant male)
|
||||
* - nova: en_US-lessac-medium (expressive female)
|
||||
* - shimmer: en_US-kristin-medium (bright, energetic female)
|
||||
*/
|
||||
export const PIPER_VOICE_MAP: Record<string, PiperVoiceMapping> = {
|
||||
alloy: {
|
||||
piperVoice: "en_US-amy-medium",
|
||||
description: "Warm, balanced voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
},
|
||||
echo: {
|
||||
piperVoice: "en_US-ryan-medium",
|
||||
description: "Clear, articulate voice",
|
||||
gender: "male",
|
||||
language: "en-US",
|
||||
},
|
||||
fable: {
|
||||
piperVoice: "en_GB-alan-medium",
|
||||
description: "British narrator voice",
|
||||
gender: "male",
|
||||
language: "en-GB",
|
||||
},
|
||||
onyx: {
|
||||
piperVoice: "en_US-danny-low",
|
||||
description: "Deep, resonant voice",
|
||||
gender: "male",
|
||||
language: "en-US",
|
||||
},
|
||||
nova: {
|
||||
piperVoice: "en_US-lessac-medium",
|
||||
description: "Expressive, versatile voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
},
|
||||
shimmer: {
|
||||
piperVoice: "en_US-kristin-medium",
|
||||
description: "Bright, energetic voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
},
|
||||
};
|
||||
|
||||
// ==========================================
|
||||
// Provider class
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Piper TTS provider via OpenedAI Speech (fallback tier).
|
||||
*
|
||||
* Ultra-lightweight CPU-only text-to-speech engine using Piper voices
|
||||
* through the OpenedAI Speech server's OpenAI-compatible API.
|
||||
*
|
||||
* Designed for:
|
||||
* - CPU-only environments (no GPU required)
|
||||
* - Low-resource devices (Raspberry Pi, ARM SBCs)
|
||||
* - Fallback when primary TTS engines are unavailable
|
||||
* - High-volume, low-latency synthesis needs
|
||||
*
|
||||
* The provider exposes the 6 standard OpenAI voice names (alloy, echo,
|
||||
* fable, onyx, nova, shimmer) which OpenedAI Speech maps to configured
|
||||
* Piper voices. Additional Piper voices (100+ across 40+ languages)
|
||||
* can be accessed by passing the Piper voice ID directly.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const piper = new PiperTtsProvider("http://openedai-speech:8000/v1");
|
||||
* const voices = await piper.listVoices();
|
||||
* const result = await piper.synthesize("Hello!", { voice: "alloy" });
|
||||
* ```
|
||||
*/
|
||||
export class PiperTtsProvider extends BaseTTSProvider {
|
||||
readonly name = "piper";
|
||||
readonly tier: SpeechTier = "fallback";
|
||||
|
||||
/**
|
||||
* Create a new Piper TTS provider.
|
||||
*
|
||||
* @param baseURL - Base URL for the OpenedAI Speech endpoint (e.g. "http://openedai-speech:8000/v1")
|
||||
* @param defaultVoice - Default OpenAI voice name (defaults to "alloy")
|
||||
* @param defaultFormat - Default audio format (defaults to "mp3")
|
||||
*/
|
||||
constructor(
|
||||
baseURL: string,
|
||||
defaultVoice: string = PIPER_DEFAULT_VOICE,
|
||||
defaultFormat: AudioFormat = PIPER_DEFAULT_FORMAT
|
||||
) {
|
||||
super(baseURL, defaultVoice, defaultFormat);
|
||||
}
|
||||
|
||||
/**
|
||||
* List available voices with OpenAI-to-Piper mapping metadata.
|
||||
*
|
||||
* Returns the 6 standard OpenAI voice names with information about
|
||||
* the underlying Piper voice, gender, and language. These are the
|
||||
* voices that can be specified in the `voice` parameter of synthesize().
|
||||
*
|
||||
* @returns Array of VoiceInfo objects for all mapped Piper voices
|
||||
*/
|
||||
override listVoices(): Promise<VoiceInfo[]> {
|
||||
const voices: VoiceInfo[] = OPENAI_STANDARD_VOICES.map((voiceId) => {
|
||||
const mapping = PIPER_VOICE_MAP[voiceId] ?? DEFAULT_MAPPING;
|
||||
const genderLabel = mapping.gender === "female" ? "Female" : "Male";
|
||||
const label = voiceId.charAt(0).toUpperCase() + voiceId.slice(1);
|
||||
|
||||
return {
|
||||
id: voiceId,
|
||||
name: `${label} (${genderLabel} - ${mapping.description})`,
|
||||
language: mapping.language,
|
||||
tier: this.tier,
|
||||
isDefault: voiceId === this.defaultVoice,
|
||||
};
|
||||
});
|
||||
|
||||
return Promise.resolve(voices);
|
||||
}
|
||||
}
|
||||
@@ -14,30 +14,13 @@
|
||||
*/
|
||||
|
||||
import { Logger } from "@nestjs/common";
|
||||
import { BaseTTSProvider } from "./base-tts.provider";
|
||||
import { ChatterboxTTSProvider } from "./chatterbox-tts.provider";
|
||||
import { KokoroTtsProvider } from "./kokoro-tts.provider";
|
||||
import { PiperTtsProvider } from "./piper-tts.provider";
|
||||
import type { ITTSProvider } from "../interfaces/tts-provider.interface";
|
||||
import type { SpeechTier, AudioFormat } from "../interfaces/speech-types";
|
||||
import type { SpeechConfig } from "../speech.config";
|
||||
|
||||
// ==========================================
|
||||
// Concrete provider classes
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Piper TTS provider via OpenedAI Speech (fallback tier).
|
||||
* Ultra-lightweight CPU, GPL license.
|
||||
*/
|
||||
class PiperProvider extends BaseTTSProvider {
|
||||
readonly name = "piper";
|
||||
readonly tier: SpeechTier = "fallback";
|
||||
|
||||
constructor(baseURL: string) {
|
||||
super(baseURL, "alloy", "mp3");
|
||||
}
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// Factory function
|
||||
// ==========================================
|
||||
@@ -76,7 +59,7 @@ export function createTTSProviders(config: SpeechConfig): Map<SpeechTier, ITTSPr
|
||||
|
||||
// Fallback tier: Piper
|
||||
if (config.tts.fallback.enabled) {
|
||||
const provider = new PiperProvider(config.tts.fallback.url);
|
||||
const provider = new PiperTtsProvider(config.tts.fallback.url);
|
||||
providers.set("fallback", provider);
|
||||
logger.log(`Registered fallback TTS provider: piper at ${config.tts.fallback.url}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user