stack/apps/api/src/speech/providers/piper-tts.provider.ts

/**
 * Piper TTS Provider via OpenedAI Speech
 *
 * Fallback-tier TTS provider using Piper via OpenedAI Speech for
 * ultra-lightweight CPU-only synthesis. Designed for low-resource
 * environments including Raspberry Pi.
 *
 * Features:
 * - OpenAI-compatible API via OpenedAI Speech server
 * - 100+ Piper voices across 40+ languages
 * - 6 standard OpenAI voice names mapped to Piper voices
 * - Output formats: mp3, wav, opus, flac
 * - CPU-only, no GPU required
 * - GPL license (via OpenedAI Speech)
 *
 * Voice names use the OpenAI standard set (alloy, echo, fable, onyx,
 * nova, shimmer) which OpenedAI Speech maps to configured Piper voices.
 *
 * Issue #395
 */

import { BaseTTSProvider } from "./base-tts.provider";
import type { SpeechTier, VoiceInfo, AudioFormat } from "../interfaces/speech-types";

// ==========================================
// Constants
// ==========================================

/** Audio formats supported by OpenedAI Speech with Piper backend */
export const PIPER_SUPPORTED_FORMATS: readonly AudioFormat[] = [
  "mp3",
  "wav",
  "opus",
  "flac",
] as const;

/** Default voice for Piper (via OpenedAI Speech) */
const PIPER_DEFAULT_VOICE = "alloy";

/** Default audio format for Piper */
const PIPER_DEFAULT_FORMAT: AudioFormat = "mp3";

// ==========================================
// OpenAI standard voice names
// ==========================================

/**
 * The 6 standard OpenAI TTS voice names.
 * OpenedAI Speech accepts these names and routes them to configured Piper voices.
 */
export const OPENAI_STANDARD_VOICES: readonly string[] = [
  "alloy",
  "echo",
  "fable",
  "onyx",
  "nova",
  "shimmer",
] as const;

// ==========================================
// Voice mapping
// ==========================================

/** Metadata for a Piper voice mapped from an OpenAI voice name */
export interface PiperVoiceMapping {
  /** The underlying Piper voice ID configured in OpenedAI Speech */
  piperVoice: string;
  /** Human-readable description of the voice character */
  description: string;
  /** Gender of the voice */
  gender: "female" | "male";
  /** BCP 47 language code */
  language: string;
}

/** Fallback mapping used when a voice ID is not found in PIPER_VOICE_MAP */
const DEFAULT_MAPPING: PiperVoiceMapping = {
  piperVoice: "en_US-amy-medium",
  description: "Default voice",
  gender: "female",
  language: "en-US",
};

/**
 * Mapping of OpenAI standard voice names to their default Piper voice
 * configuration in OpenedAI Speech.
 *
 * These are the default mappings that OpenedAI Speech uses when configured
 * with Piper as the TTS backend. The actual Piper voice used can be
 * customized in the OpenedAI Speech configuration file.
 *
 * Default Piper voice assignments:
 * - alloy: en_US-amy-medium (warm, balanced female)
 * - echo: en_US-ryan-medium (clear, articulate male)
 * - fable: en_GB-alan-medium (British male narrator)
 * - onyx: en_US-danny-low (deep, resonant male)
 * - nova: en_US-lessac-medium (expressive female)
 * - shimmer: en_US-kristin-medium (bright, energetic female)
 */
export const PIPER_VOICE_MAP: Record<string, PiperVoiceMapping> = {
  alloy: {
    piperVoice: "en_US-amy-medium",
    description: "Warm, balanced voice",
    gender: "female",
    language: "en-US",
  },
  echo: {
    piperVoice: "en_US-ryan-medium",
    description: "Clear, articulate voice",
    gender: "male",
    language: "en-US",
  },
  fable: {
    piperVoice: "en_GB-alan-medium",
    description: "British narrator voice",
    gender: "male",
    language: "en-GB",
  },
  onyx: {
    piperVoice: "en_US-danny-low",
    description: "Deep, resonant voice",
    gender: "male",
    language: "en-US",
  },
  nova: {
    piperVoice: "en_US-lessac-medium",
    description: "Expressive, versatile voice",
    gender: "female",
    language: "en-US",
  },
  shimmer: {
    piperVoice: "en_US-kristin-medium",
    description: "Bright, energetic voice",
    gender: "female",
    language: "en-US",
  },
};

// ==========================================
// Provider class
// ==========================================

/**
 * Piper TTS provider via OpenedAI Speech (fallback tier).
 *
 * Ultra-lightweight CPU-only text-to-speech engine using Piper voices
 * through the OpenedAI Speech server's OpenAI-compatible API.
 *
 * Designed for:
 * - CPU-only environments (no GPU required)
 * - Low-resource devices (Raspberry Pi, ARM SBCs)
 * - Fallback when primary TTS engines are unavailable
 * - High-volume, low-latency synthesis needs
 *
 * The provider exposes the 6 standard OpenAI voice names (alloy, echo,
 * fable, onyx, nova, shimmer) which OpenedAI Speech maps to configured
 * Piper voices. Additional Piper voices (100+ across 40+ languages)
 * can be accessed by passing the Piper voice ID directly.
 *
 * @example
 * ```typescript
 * const piper = new PiperTtsProvider("http://openedai-speech:8000/v1");
 * const voices = await piper.listVoices();
 * const result = await piper.synthesize("Hello!", { voice: "alloy" });
 * ```
 */
export class PiperTtsProvider extends BaseTTSProvider {
  readonly name = "piper";
  readonly tier: SpeechTier = "fallback";

  /**
   * Create a new Piper TTS provider.
   *
   * @param baseURL - Base URL for the OpenedAI Speech endpoint (e.g. "http://openedai-speech:8000/v1")
   * @param defaultVoice - Default OpenAI voice name (defaults to "alloy")
   * @param defaultFormat - Default audio format (defaults to "mp3")
   */
  constructor(
    baseURL: string,
    defaultVoice: string = PIPER_DEFAULT_VOICE,
    defaultFormat: AudioFormat = PIPER_DEFAULT_FORMAT
  ) {
    super(baseURL, defaultVoice, defaultFormat);
  }

  /**
   * List available voices with OpenAI-to-Piper mapping metadata.
   *
   * Returns the 6 standard OpenAI voice names with information about
   * the underlying Piper voice, gender, and language. These are the
   * voices that can be specified in the `voice` parameter of synthesize().
   *
   * @returns Array of VoiceInfo objects for all mapped Piper voices
   */
  override listVoices(): Promise<VoiceInfo[]> {
    const voices: VoiceInfo[] = OPENAI_STANDARD_VOICES.map((voiceId) => {
      const mapping = PIPER_VOICE_MAP[voiceId] ?? DEFAULT_MAPPING;
      const genderLabel = mapping.gender === "female" ? "Female" : "Male";
      const label = voiceId.charAt(0).toUpperCase() + voiceId.slice(1);

      return {
        id: voiceId,
        name: `${label} (${genderLabel} - ${mapping.description})`,
        language: mapping.language,
        tier: this.tier,
        isDefault: voiceId === this.defaultVoice,
      };
    });

    return Promise.resolve(voices);
  }
}