feat(#395): implement Piper TTS provider via OpenedAI Speech
All checks were successful
ci/woodpecker/push/api Pipeline was successful
All checks were successful
ci/woodpecker/push/api Pipeline was successful
Add fallback-tier TTS provider using Piper via OpenedAI Speech for ultra-lightweight CPU-only synthesis. Maps 6 standard OpenAI voice names (alloy, echo, fable, onyx, nova, shimmer) to Piper voices. Update factory to use the new PiperTtsProvider class, replacing the inline stub. Includes 37 unit tests covering provider identity, voice mapping, and voice listing. Fixes #395 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
212
apps/api/src/speech/providers/piper-tts.provider.ts
Normal file
212
apps/api/src/speech/providers/piper-tts.provider.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Piper TTS Provider via OpenedAI Speech
|
||||
*
|
||||
* Fallback-tier TTS provider using Piper via OpenedAI Speech for
|
||||
* ultra-lightweight CPU-only synthesis. Designed for low-resource
|
||||
* environments including Raspberry Pi.
|
||||
*
|
||||
* Features:
|
||||
* - OpenAI-compatible API via OpenedAI Speech server
|
||||
* - 100+ Piper voices across 40+ languages
|
||||
* - 6 standard OpenAI voice names mapped to Piper voices
|
||||
* - Output formats: mp3, wav, opus, flac, aac, pcm
|
||||
* - CPU-only, no GPU required
|
||||
* - GPL license (via OpenedAI Speech)
|
||||
*
|
||||
* Voice names use the OpenAI standard set (alloy, echo, fable, onyx,
|
||||
* nova, shimmer) which OpenedAI Speech maps to configured Piper voices.
|
||||
*
|
||||
* Issue #395
|
||||
*/
|
||||
|
||||
import { BaseTTSProvider } from "./base-tts.provider";
|
||||
import type { SpeechTier, VoiceInfo, AudioFormat } from "../interfaces/speech-types";
|
||||
|
||||
// ==========================================
|
||||
// Constants
|
||||
// ==========================================
|
||||
|
||||
/** Audio formats supported by OpenedAI Speech with Piper backend */
|
||||
export const PIPER_SUPPORTED_FORMATS: readonly AudioFormat[] = [
|
||||
"mp3",
|
||||
"wav",
|
||||
"opus",
|
||||
"flac",
|
||||
] as const;
|
||||
|
||||
/** Default voice for Piper (via OpenedAI Speech) */
|
||||
const PIPER_DEFAULT_VOICE = "alloy";
|
||||
|
||||
/** Default audio format for Piper */
|
||||
const PIPER_DEFAULT_FORMAT: AudioFormat = "mp3";
|
||||
|
||||
// ==========================================
|
||||
// OpenAI standard voice names
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* The 6 standard OpenAI TTS voice names.
|
||||
* OpenedAI Speech accepts these names and routes them to configured Piper voices.
|
||||
*/
|
||||
export const OPENAI_STANDARD_VOICES: readonly string[] = [
|
||||
"alloy",
|
||||
"echo",
|
||||
"fable",
|
||||
"onyx",
|
||||
"nova",
|
||||
"shimmer",
|
||||
] as const;
|
||||
|
||||
// ==========================================
|
||||
// Voice mapping
|
||||
// ==========================================
|
||||
|
||||
/** Metadata for a Piper voice mapped from an OpenAI voice name */
|
||||
export interface PiperVoiceMapping {
|
||||
/** The underlying Piper voice ID configured in OpenedAI Speech */
|
||||
piperVoice: string;
|
||||
/** Human-readable description of the voice character */
|
||||
description: string;
|
||||
/** Gender of the voice */
|
||||
gender: "female" | "male";
|
||||
/** BCP 47 language code */
|
||||
language: string;
|
||||
}
|
||||
|
||||
/** Fallback mapping used when a voice ID is not found in PIPER_VOICE_MAP */
|
||||
const DEFAULT_MAPPING: PiperVoiceMapping = {
|
||||
piperVoice: "en_US-amy-medium",
|
||||
description: "Default voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
};
|
||||
|
||||
/**
|
||||
* Mapping of OpenAI standard voice names to their default Piper voice
|
||||
* configuration in OpenedAI Speech.
|
||||
*
|
||||
* These are the default mappings that OpenedAI Speech uses when configured
|
||||
* with Piper as the TTS backend. The actual Piper voice used can be
|
||||
* customized in the OpenedAI Speech configuration file.
|
||||
*
|
||||
* Default Piper voice assignments:
|
||||
* - alloy: en_US-amy-medium (warm, balanced female)
|
||||
* - echo: en_US-ryan-medium (clear, articulate male)
|
||||
* - fable: en_GB-alan-medium (British male narrator)
|
||||
* - onyx: en_US-danny-low (deep, resonant male)
|
||||
* - nova: en_US-lessac-medium (expressive female)
|
||||
* - shimmer: en_US-kristin-medium (bright, energetic female)
|
||||
*/
|
||||
export const PIPER_VOICE_MAP: Record<string, PiperVoiceMapping> = {
|
||||
alloy: {
|
||||
piperVoice: "en_US-amy-medium",
|
||||
description: "Warm, balanced voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
},
|
||||
echo: {
|
||||
piperVoice: "en_US-ryan-medium",
|
||||
description: "Clear, articulate voice",
|
||||
gender: "male",
|
||||
language: "en-US",
|
||||
},
|
||||
fable: {
|
||||
piperVoice: "en_GB-alan-medium",
|
||||
description: "British narrator voice",
|
||||
gender: "male",
|
||||
language: "en-GB",
|
||||
},
|
||||
onyx: {
|
||||
piperVoice: "en_US-danny-low",
|
||||
description: "Deep, resonant voice",
|
||||
gender: "male",
|
||||
language: "en-US",
|
||||
},
|
||||
nova: {
|
||||
piperVoice: "en_US-lessac-medium",
|
||||
description: "Expressive, versatile voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
},
|
||||
shimmer: {
|
||||
piperVoice: "en_US-kristin-medium",
|
||||
description: "Bright, energetic voice",
|
||||
gender: "female",
|
||||
language: "en-US",
|
||||
},
|
||||
};
|
||||
|
||||
// ==========================================
|
||||
// Provider class
|
||||
// ==========================================
|
||||
|
||||
/**
|
||||
* Piper TTS provider via OpenedAI Speech (fallback tier).
|
||||
*
|
||||
* Ultra-lightweight CPU-only text-to-speech engine using Piper voices
|
||||
* through the OpenedAI Speech server's OpenAI-compatible API.
|
||||
*
|
||||
* Designed for:
|
||||
* - CPU-only environments (no GPU required)
|
||||
* - Low-resource devices (Raspberry Pi, ARM SBCs)
|
||||
* - Fallback when primary TTS engines are unavailable
|
||||
* - High-volume, low-latency synthesis needs
|
||||
*
|
||||
* The provider exposes the 6 standard OpenAI voice names (alloy, echo,
|
||||
* fable, onyx, nova, shimmer) which OpenedAI Speech maps to configured
|
||||
* Piper voices. Additional Piper voices (100+ across 40+ languages)
|
||||
* can be accessed by passing the Piper voice ID directly.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const piper = new PiperTtsProvider("http://openedai-speech:8000/v1");
|
||||
* const voices = await piper.listVoices();
|
||||
* const result = await piper.synthesize("Hello!", { voice: "alloy" });
|
||||
* ```
|
||||
*/
|
||||
export class PiperTtsProvider extends BaseTTSProvider {
|
||||
readonly name = "piper";
|
||||
readonly tier: SpeechTier = "fallback";
|
||||
|
||||
/**
|
||||
* Create a new Piper TTS provider.
|
||||
*
|
||||
* @param baseURL - Base URL for the OpenedAI Speech endpoint (e.g. "http://openedai-speech:8000/v1")
|
||||
* @param defaultVoice - Default OpenAI voice name (defaults to "alloy")
|
||||
* @param defaultFormat - Default audio format (defaults to "mp3")
|
||||
*/
|
||||
constructor(
|
||||
baseURL: string,
|
||||
defaultVoice: string = PIPER_DEFAULT_VOICE,
|
||||
defaultFormat: AudioFormat = PIPER_DEFAULT_FORMAT
|
||||
) {
|
||||
super(baseURL, defaultVoice, defaultFormat);
|
||||
}
|
||||
|
||||
/**
|
||||
* List available voices with OpenAI-to-Piper mapping metadata.
|
||||
*
|
||||
* Returns the 6 standard OpenAI voice names with information about
|
||||
* the underlying Piper voice, gender, and language. These are the
|
||||
* voices that can be specified in the `voice` parameter of synthesize().
|
||||
*
|
||||
* @returns Array of VoiceInfo objects for all mapped Piper voices
|
||||
*/
|
||||
override listVoices(): Promise<VoiceInfo[]> {
|
||||
const voices: VoiceInfo[] = OPENAI_STANDARD_VOICES.map((voiceId) => {
|
||||
const mapping = PIPER_VOICE_MAP[voiceId] ?? DEFAULT_MAPPING;
|
||||
const genderLabel = mapping.gender === "female" ? "Female" : "Male";
|
||||
const label = voiceId.charAt(0).toUpperCase() + voiceId.slice(1);
|
||||
|
||||
return {
|
||||
id: voiceId,
|
||||
name: `${label} (${genderLabel} - ${mapping.description})`,
|
||||
language: mapping.language,
|
||||
tier: this.tier,
|
||||
isDefault: voiceId === this.defaultVoice,
|
||||
};
|
||||
});
|
||||
|
||||
return Promise.resolve(voices);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user