feat(#393): implement Kokoro-FastAPI TTS provider with voice catalog

Extract KokoroTtsProvider from factory into its own module with: - Full voice catalog of 54 built-in voices across 8 languages - Voice metadata parsing from ID prefix (language, gender, accent) - Exported constants for supported formats and speed range - Comprehensive unit tests (48 tests) - Fix lint/type errors in chatterbox provider (Prettier + unsafe cast) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:27:47 -06:00
parent b5edb4f37e
commit 79b1d81d27
4 changed files with 767 additions and 24 deletions
--- a/apps/api/src/speech/providers/kokoro-tts.provider.spec.ts
+++ b/apps/api/src/speech/providers/kokoro-tts.provider.spec.ts
@@ -0,0 +1,316 @@
+/**
+ * KokoroTtsProvider Unit Tests
+ *
+ * Tests the Kokoro-FastAPI TTS provider with full voice catalog,
+ * voice metadata parsing, and Kokoro-specific feature constants.
+ *
+ * Issue #393
+ */
+
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import {
+  KokoroTtsProvider,
+  KOKORO_SUPPORTED_FORMATS,
+  KOKORO_SPEED_RANGE,
+  KOKORO_VOICES,
+  parseVoicePrefix,
+} from "./kokoro-tts.provider";
+import type { VoiceInfo } from "../interfaces/speech-types";
+
+// ==========================================
+// Mock OpenAI SDK
+// ==========================================
+
+vi.mock("openai", () => {
+  class MockOpenAI {
+    audio = {
+      speech: {
+        create: vi.fn(),
+      },
+    };
+  }
+  return { default: MockOpenAI };
+});
+
+// ==========================================
+// Provider identity
+// ==========================================
+
+describe("KokoroTtsProvider", () => {
+  const testBaseURL = "http://kokoro-tts:8880/v1";
+  let provider: KokoroTtsProvider;
+
+  beforeEach(() => {
+    provider = new KokoroTtsProvider(testBaseURL);
+  });
+
+  describe("provider identity", () => {
+    it("should have name 'kokoro'", () => {
+      expect(provider.name).toBe("kokoro");
+    });
+
+    it("should have tier 'default'", () => {
+      expect(provider.tier).toBe("default");
+    });
+  });
+
+  // ==========================================
+  // listVoices()
+  // ==========================================
+
+  describe("listVoices", () => {
+    let voices: VoiceInfo[];
+
+    beforeEach(async () => {
+      voices = await provider.listVoices();
+    });
+
+    it("should return an array of VoiceInfo objects", () => {
+      expect(voices).toBeInstanceOf(Array);
+      expect(voices.length).toBeGreaterThan(0);
+    });
+
+    it("should return at least 10 voices", () => {
+      // The issue specifies at least: af_heart, af_bella, af_nicole, af_sarah, af_sky,
+      // am_adam, am_michael, bf_emma, bf_isabella, bm_george, bm_lewis
+      expect(voices.length).toBeGreaterThanOrEqual(10);
+    });
+
+    it("should set tier to 'default' on all voices", () => {
+      for (const voice of voices) {
+        expect(voice.tier).toBe("default");
+      }
+    });
+
+    it("should have exactly one default voice", () => {
+      const defaults = voices.filter((v) => v.isDefault === true);
+      expect(defaults.length).toBe(1);
+    });
+
+    it("should mark af_heart as the default voice", () => {
+      const defaultVoice = voices.find((v) => v.isDefault === true);
+      expect(defaultVoice).toBeDefined();
+      expect(defaultVoice?.id).toBe("af_heart");
+    });
+
+    it("should have an id and name for every voice", () => {
+      for (const voice of voices) {
+        expect(voice.id).toBeTruthy();
+        expect(voice.name).toBeTruthy();
+      }
+    });
+
+    it("should set language on every voice", () => {
+      for (const voice of voices) {
+        expect(voice.language).toBeTruthy();
+      }
+    });
+
+    // ==========================================
+    // Required voices from the issue
+    // ==========================================
+
+    describe("required voices", () => {
+      const requiredVoiceIds = [
+        "af_heart",
+        "af_bella",
+        "af_nicole",
+        "af_sarah",
+        "af_sky",
+        "am_adam",
+        "am_michael",
+        "bf_emma",
+        "bf_isabella",
+        "bm_george",
+        "bm_lewis",
+      ];
+
+      it.each(requiredVoiceIds)("should include voice '%s'", (voiceId) => {
+        const voice = voices.find((v) => v.id === voiceId);
+        expect(voice).toBeDefined();
+      });
+    });
+
+    // ==========================================
+    // Voice metadata from prefix
+    // ==========================================
+
+    describe("voice metadata from prefix", () => {
+      it("should set language to 'en-US' for af_ prefix voices", () => {
+        const voice = voices.find((v) => v.id === "af_heart");
+        expect(voice?.language).toBe("en-US");
+      });
+
+      it("should set language to 'en-US' for am_ prefix voices", () => {
+        const voice = voices.find((v) => v.id === "am_adam");
+        expect(voice?.language).toBe("en-US");
+      });
+
+      it("should set language to 'en-GB' for bf_ prefix voices", () => {
+        const voice = voices.find((v) => v.id === "bf_emma");
+        expect(voice?.language).toBe("en-GB");
+      });
+
+      it("should set language to 'en-GB' for bm_ prefix voices", () => {
+        const voice = voices.find((v) => v.id === "bm_george");
+        expect(voice?.language).toBe("en-GB");
+      });
+
+      it("should include gender in voice name for af_ prefix", () => {
+        const voice = voices.find((v) => v.id === "af_heart");
+        expect(voice?.name).toContain("Female");
+      });
+
+      it("should include gender in voice name for am_ prefix", () => {
+        const voice = voices.find((v) => v.id === "am_adam");
+        expect(voice?.name).toContain("Male");
+      });
+
+      it("should include gender in voice name for bf_ prefix", () => {
+        const voice = voices.find((v) => v.id === "bf_emma");
+        expect(voice?.name).toContain("Female");
+      });
+
+      it("should include gender in voice name for bm_ prefix", () => {
+        const voice = voices.find((v) => v.id === "bm_george");
+        expect(voice?.name).toContain("Male");
+      });
+    });
+
+    // ==========================================
+    // Voice name formatting
+    // ==========================================
+
+    describe("voice name formatting", () => {
+      it("should capitalize the voice name portion", () => {
+        const voice = voices.find((v) => v.id === "af_heart");
+        expect(voice?.name).toContain("Heart");
+      });
+
+      it("should include the accent/language label in the name", () => {
+        const afVoice = voices.find((v) => v.id === "af_heart");
+        expect(afVoice?.name).toContain("American");
+
+        const bfVoice = voices.find((v) => v.id === "bf_emma");
+        expect(bfVoice?.name).toContain("British");
+      });
+    });
+  });
+
+  // ==========================================
+  // Custom constructor
+  // ==========================================
+
+  describe("constructor", () => {
+    it("should accept custom default voice", () => {
+      const customProvider = new KokoroTtsProvider(testBaseURL, "af_bella");
+      expect(customProvider).toBeDefined();
+    });
+
+    it("should accept custom default format", () => {
+      const customProvider = new KokoroTtsProvider(testBaseURL, "af_heart", "wav");
+      expect(customProvider).toBeDefined();
+    });
+
+    it("should use af_heart as default voice when none specified", () => {
+      const defaultProvider = new KokoroTtsProvider(testBaseURL);
+      expect(defaultProvider).toBeDefined();
+    });
+  });
+});
+
+// ==========================================
+// parseVoicePrefix utility
+// ==========================================
+
+describe("parseVoicePrefix", () => {
+  it("should parse af_ as American English Female", () => {
+    const result = parseVoicePrefix("af_heart");
+    expect(result.language).toBe("en-US");
+    expect(result.gender).toBe("female");
+    expect(result.accent).toBe("American");
+  });
+
+  it("should parse am_ as American English Male", () => {
+    const result = parseVoicePrefix("am_adam");
+    expect(result.language).toBe("en-US");
+    expect(result.gender).toBe("male");
+    expect(result.accent).toBe("American");
+  });
+
+  it("should parse bf_ as British English Female", () => {
+    const result = parseVoicePrefix("bf_emma");
+    expect(result.language).toBe("en-GB");
+    expect(result.gender).toBe("female");
+    expect(result.accent).toBe("British");
+  });
+
+  it("should parse bm_ as British English Male", () => {
+    const result = parseVoicePrefix("bm_george");
+    expect(result.language).toBe("en-GB");
+    expect(result.gender).toBe("male");
+    expect(result.accent).toBe("British");
+  });
+
+  it("should return unknown for unrecognized prefix", () => {
+    const result = parseVoicePrefix("xx_unknown");
+    expect(result.language).toBe("unknown");
+    expect(result.gender).toBe("unknown");
+    expect(result.accent).toBe("Unknown");
+  });
+});
+
+// ==========================================
+// Exported constants
+// ==========================================
+
+describe("KOKORO_SUPPORTED_FORMATS", () => {
+  it("should include mp3", () => {
+    expect(KOKORO_SUPPORTED_FORMATS).toContain("mp3");
+  });
+
+  it("should include wav", () => {
+    expect(KOKORO_SUPPORTED_FORMATS).toContain("wav");
+  });
+
+  it("should include opus", () => {
+    expect(KOKORO_SUPPORTED_FORMATS).toContain("opus");
+  });
+
+  it("should include flac", () => {
+    expect(KOKORO_SUPPORTED_FORMATS).toContain("flac");
+  });
+
+  it("should be a readonly array", () => {
+    expect(Array.isArray(KOKORO_SUPPORTED_FORMATS)).toBe(true);
+  });
+});
+
+describe("KOKORO_SPEED_RANGE", () => {
+  it("should have min speed of 0.25", () => {
+    expect(KOKORO_SPEED_RANGE.min).toBe(0.25);
+  });
+
+  it("should have max speed of 4.0", () => {
+    expect(KOKORO_SPEED_RANGE.max).toBe(4.0);
+  });
+});
+
+describe("KOKORO_VOICES", () => {
+  it("should be a non-empty array", () => {
+    expect(Array.isArray(KOKORO_VOICES)).toBe(true);
+    expect(KOKORO_VOICES.length).toBeGreaterThan(0);
+  });
+
+  it("should contain voice entries with id and label", () => {
+    for (const voice of KOKORO_VOICES) {
+      expect(voice.id).toBeTruthy();
+      expect(voice.label).toBeTruthy();
+    }
+  });
+
+  it("should include voices from multiple language prefixes", () => {
+    const prefixes = new Set(KOKORO_VOICES.map((v) => v.id.substring(0, 2)));
+    expect(prefixes.size).toBeGreaterThanOrEqual(4);
+  });
+});