/** * Piper TTS Provider via OpenedAI Speech * * Fallback-tier TTS provider using Piper via OpenedAI Speech for * ultra-lightweight CPU-only synthesis. Designed for low-resource * environments including Raspberry Pi. * * Features: * - OpenAI-compatible API via OpenedAI Speech server * - 100+ Piper voices across 40+ languages * - 6 standard OpenAI voice names mapped to Piper voices * - Output formats: mp3, wav, opus, flac * - CPU-only, no GPU required * - GPL license (via OpenedAI Speech) * * Voice names use the OpenAI standard set (alloy, echo, fable, onyx, * nova, shimmer) which OpenedAI Speech maps to configured Piper voices. * * Issue #395 */ import { BaseTTSProvider } from "./base-tts.provider"; import type { SpeechTier, VoiceInfo, AudioFormat } from "../interfaces/speech-types"; // ========================================== // Constants // ========================================== /** Audio formats supported by OpenedAI Speech with Piper backend */ export const PIPER_SUPPORTED_FORMATS: readonly AudioFormat[] = [ "mp3", "wav", "opus", "flac", ] as const; /** Default voice for Piper (via OpenedAI Speech) */ const PIPER_DEFAULT_VOICE = "alloy"; /** Default audio format for Piper */ const PIPER_DEFAULT_FORMAT: AudioFormat = "mp3"; // ========================================== // OpenAI standard voice names // ========================================== /** * The 6 standard OpenAI TTS voice names. * OpenedAI Speech accepts these names and routes them to configured Piper voices. */ export const OPENAI_STANDARD_VOICES: readonly string[] = [ "alloy", "echo", "fable", "onyx", "nova", "shimmer", ] as const; // ========================================== // Voice mapping // ========================================== /** Metadata for a Piper voice mapped from an OpenAI voice name */ export interface PiperVoiceMapping { /** The underlying Piper voice ID configured in OpenedAI Speech */ piperVoice: string; /** Human-readable description of the voice character */ description: string; /** Gender of the voice */ gender: "female" | "male"; /** BCP 47 language code */ language: string; } /** Fallback mapping used when a voice ID is not found in PIPER_VOICE_MAP */ const DEFAULT_MAPPING: PiperVoiceMapping = { piperVoice: "en_US-amy-medium", description: "Default voice", gender: "female", language: "en-US", }; /** * Mapping of OpenAI standard voice names to their default Piper voice * configuration in OpenedAI Speech. * * These are the default mappings that OpenedAI Speech uses when configured * with Piper as the TTS backend. The actual Piper voice used can be * customized in the OpenedAI Speech configuration file. * * Default Piper voice assignments: * - alloy: en_US-amy-medium (warm, balanced female) * - echo: en_US-ryan-medium (clear, articulate male) * - fable: en_GB-alan-medium (British male narrator) * - onyx: en_US-danny-low (deep, resonant male) * - nova: en_US-lessac-medium (expressive female) * - shimmer: en_US-kristin-medium (bright, energetic female) */ export const PIPER_VOICE_MAP: Record = { alloy: { piperVoice: "en_US-amy-medium", description: "Warm, balanced voice", gender: "female", language: "en-US", }, echo: { piperVoice: "en_US-ryan-medium", description: "Clear, articulate voice", gender: "male", language: "en-US", }, fable: { piperVoice: "en_GB-alan-medium", description: "British narrator voice", gender: "male", language: "en-GB", }, onyx: { piperVoice: "en_US-danny-low", description: "Deep, resonant voice", gender: "male", language: "en-US", }, nova: { piperVoice: "en_US-lessac-medium", description: "Expressive, versatile voice", gender: "female", language: "en-US", }, shimmer: { piperVoice: "en_US-kristin-medium", description: "Bright, energetic voice", gender: "female", language: "en-US", }, }; // ========================================== // Provider class // ========================================== /** * Piper TTS provider via OpenedAI Speech (fallback tier). * * Ultra-lightweight CPU-only text-to-speech engine using Piper voices * through the OpenedAI Speech server's OpenAI-compatible API. * * Designed for: * - CPU-only environments (no GPU required) * - Low-resource devices (Raspberry Pi, ARM SBCs) * - Fallback when primary TTS engines are unavailable * - High-volume, low-latency synthesis needs * * The provider exposes the 6 standard OpenAI voice names (alloy, echo, * fable, onyx, nova, shimmer) which OpenedAI Speech maps to configured * Piper voices. Additional Piper voices (100+ across 40+ languages) * can be accessed by passing the Piper voice ID directly. * * @example * ```typescript * const piper = new PiperTtsProvider("http://openedai-speech:8000/v1"); * const voices = await piper.listVoices(); * const result = await piper.synthesize("Hello!", { voice: "alloy" }); * ``` */ export class PiperTtsProvider extends BaseTTSProvider { readonly name = "piper"; readonly tier: SpeechTier = "fallback"; /** * Create a new Piper TTS provider. * * @param baseURL - Base URL for the OpenedAI Speech endpoint (e.g. "http://openedai-speech:8000/v1") * @param defaultVoice - Default OpenAI voice name (defaults to "alloy") * @param defaultFormat - Default audio format (defaults to "mp3") */ constructor( baseURL: string, defaultVoice: string = PIPER_DEFAULT_VOICE, defaultFormat: AudioFormat = PIPER_DEFAULT_FORMAT ) { super(baseURL, defaultVoice, defaultFormat); } /** * List available voices with OpenAI-to-Piper mapping metadata. * * Returns the 6 standard OpenAI voice names with information about * the underlying Piper voice, gender, and language. These are the * voices that can be specified in the `voice` parameter of synthesize(). * * @returns Array of VoiceInfo objects for all mapped Piper voices */ override listVoices(): Promise { const voices: VoiceInfo[] = OPENAI_STANDARD_VOICES.map((voiceId) => { const mapping = PIPER_VOICE_MAP[voiceId] ?? DEFAULT_MAPPING; const genderLabel = mapping.gender === "female" ? "Female" : "Male"; const label = voiceId.charAt(0).toUpperCase() + voiceId.slice(1); return { id: voiceId, name: `${label} (${genderLabel} - ${mapping.description})`, language: mapping.language, tier: this.tier, isDefault: voiceId === this.defaultVoice, }; }); return Promise.resolve(voices); } }