feat(#393): implement Kokoro-FastAPI TTS provider with voice catalog
Some checks failed
ci/woodpecker/push/api Pipeline failed
Some checks failed
ci/woodpecker/push/api Pipeline failed
Extract KokoroTtsProvider from factory into its own module with: - Full voice catalog of 54 built-in voices across 8 languages - Voice metadata parsing from ID prefix (language, gender, accent) - Exported constants for supported formats and speed range - Comprehensive unit tests (48 tests) - Fix lint/type errors in chatterbox provider (Prettier + unsafe cast) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
316
apps/api/src/speech/providers/kokoro-tts.provider.spec.ts
Normal file
316
apps/api/src/speech/providers/kokoro-tts.provider.spec.ts
Normal file
@@ -0,0 +1,316 @@
|
||||
/**
|
||||
* KokoroTtsProvider Unit Tests
|
||||
*
|
||||
* Tests the Kokoro-FastAPI TTS provider with full voice catalog,
|
||||
* voice metadata parsing, and Kokoro-specific feature constants.
|
||||
*
|
||||
* Issue #393
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import {
|
||||
KokoroTtsProvider,
|
||||
KOKORO_SUPPORTED_FORMATS,
|
||||
KOKORO_SPEED_RANGE,
|
||||
KOKORO_VOICES,
|
||||
parseVoicePrefix,
|
||||
} from "./kokoro-tts.provider";
|
||||
import type { VoiceInfo } from "../interfaces/speech-types";
|
||||
|
||||
// ==========================================
|
||||
// Mock OpenAI SDK
|
||||
// ==========================================
|
||||
|
||||
vi.mock("openai", () => {
|
||||
class MockOpenAI {
|
||||
audio = {
|
||||
speech: {
|
||||
create: vi.fn(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return { default: MockOpenAI };
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Provider identity
|
||||
// ==========================================
|
||||
|
||||
describe("KokoroTtsProvider", () => {
|
||||
const testBaseURL = "http://kokoro-tts:8880/v1";
|
||||
let provider: KokoroTtsProvider;
|
||||
|
||||
beforeEach(() => {
|
||||
provider = new KokoroTtsProvider(testBaseURL);
|
||||
});
|
||||
|
||||
describe("provider identity", () => {
|
||||
it("should have name 'kokoro'", () => {
|
||||
expect(provider.name).toBe("kokoro");
|
||||
});
|
||||
|
||||
it("should have tier 'default'", () => {
|
||||
expect(provider.tier).toBe("default");
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// listVoices()
|
||||
// ==========================================
|
||||
|
||||
describe("listVoices", () => {
|
||||
let voices: VoiceInfo[];
|
||||
|
||||
beforeEach(async () => {
|
||||
voices = await provider.listVoices();
|
||||
});
|
||||
|
||||
it("should return an array of VoiceInfo objects", () => {
|
||||
expect(voices).toBeInstanceOf(Array);
|
||||
expect(voices.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should return at least 10 voices", () => {
|
||||
// The issue specifies at least: af_heart, af_bella, af_nicole, af_sarah, af_sky,
|
||||
// am_adam, am_michael, bf_emma, bf_isabella, bm_george, bm_lewis
|
||||
expect(voices.length).toBeGreaterThanOrEqual(10);
|
||||
});
|
||||
|
||||
it("should set tier to 'default' on all voices", () => {
|
||||
for (const voice of voices) {
|
||||
expect(voice.tier).toBe("default");
|
||||
}
|
||||
});
|
||||
|
||||
it("should have exactly one default voice", () => {
|
||||
const defaults = voices.filter((v) => v.isDefault === true);
|
||||
expect(defaults.length).toBe(1);
|
||||
});
|
||||
|
||||
it("should mark af_heart as the default voice", () => {
|
||||
const defaultVoice = voices.find((v) => v.isDefault === true);
|
||||
expect(defaultVoice).toBeDefined();
|
||||
expect(defaultVoice?.id).toBe("af_heart");
|
||||
});
|
||||
|
||||
it("should have an id and name for every voice", () => {
|
||||
for (const voice of voices) {
|
||||
expect(voice.id).toBeTruthy();
|
||||
expect(voice.name).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
it("should set language on every voice", () => {
|
||||
for (const voice of voices) {
|
||||
expect(voice.language).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Required voices from the issue
|
||||
// ==========================================
|
||||
|
||||
describe("required voices", () => {
|
||||
const requiredVoiceIds = [
|
||||
"af_heart",
|
||||
"af_bella",
|
||||
"af_nicole",
|
||||
"af_sarah",
|
||||
"af_sky",
|
||||
"am_adam",
|
||||
"am_michael",
|
||||
"bf_emma",
|
||||
"bf_isabella",
|
||||
"bm_george",
|
||||
"bm_lewis",
|
||||
];
|
||||
|
||||
it.each(requiredVoiceIds)("should include voice '%s'", (voiceId) => {
|
||||
const voice = voices.find((v) => v.id === voiceId);
|
||||
expect(voice).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Voice metadata from prefix
|
||||
// ==========================================
|
||||
|
||||
describe("voice metadata from prefix", () => {
|
||||
it("should set language to 'en-US' for af_ prefix voices", () => {
|
||||
const voice = voices.find((v) => v.id === "af_heart");
|
||||
expect(voice?.language).toBe("en-US");
|
||||
});
|
||||
|
||||
it("should set language to 'en-US' for am_ prefix voices", () => {
|
||||
const voice = voices.find((v) => v.id === "am_adam");
|
||||
expect(voice?.language).toBe("en-US");
|
||||
});
|
||||
|
||||
it("should set language to 'en-GB' for bf_ prefix voices", () => {
|
||||
const voice = voices.find((v) => v.id === "bf_emma");
|
||||
expect(voice?.language).toBe("en-GB");
|
||||
});
|
||||
|
||||
it("should set language to 'en-GB' for bm_ prefix voices", () => {
|
||||
const voice = voices.find((v) => v.id === "bm_george");
|
||||
expect(voice?.language).toBe("en-GB");
|
||||
});
|
||||
|
||||
it("should include gender in voice name for af_ prefix", () => {
|
||||
const voice = voices.find((v) => v.id === "af_heart");
|
||||
expect(voice?.name).toContain("Female");
|
||||
});
|
||||
|
||||
it("should include gender in voice name for am_ prefix", () => {
|
||||
const voice = voices.find((v) => v.id === "am_adam");
|
||||
expect(voice?.name).toContain("Male");
|
||||
});
|
||||
|
||||
it("should include gender in voice name for bf_ prefix", () => {
|
||||
const voice = voices.find((v) => v.id === "bf_emma");
|
||||
expect(voice?.name).toContain("Female");
|
||||
});
|
||||
|
||||
it("should include gender in voice name for bm_ prefix", () => {
|
||||
const voice = voices.find((v) => v.id === "bm_george");
|
||||
expect(voice?.name).toContain("Male");
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Voice name formatting
|
||||
// ==========================================
|
||||
|
||||
describe("voice name formatting", () => {
|
||||
it("should capitalize the voice name portion", () => {
|
||||
const voice = voices.find((v) => v.id === "af_heart");
|
||||
expect(voice?.name).toContain("Heart");
|
||||
});
|
||||
|
||||
it("should include the accent/language label in the name", () => {
|
||||
const afVoice = voices.find((v) => v.id === "af_heart");
|
||||
expect(afVoice?.name).toContain("American");
|
||||
|
||||
const bfVoice = voices.find((v) => v.id === "bf_emma");
|
||||
expect(bfVoice?.name).toContain("British");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Custom constructor
|
||||
// ==========================================
|
||||
|
||||
describe("constructor", () => {
|
||||
it("should accept custom default voice", () => {
|
||||
const customProvider = new KokoroTtsProvider(testBaseURL, "af_bella");
|
||||
expect(customProvider).toBeDefined();
|
||||
});
|
||||
|
||||
it("should accept custom default format", () => {
|
||||
const customProvider = new KokoroTtsProvider(testBaseURL, "af_heart", "wav");
|
||||
expect(customProvider).toBeDefined();
|
||||
});
|
||||
|
||||
it("should use af_heart as default voice when none specified", () => {
|
||||
const defaultProvider = new KokoroTtsProvider(testBaseURL);
|
||||
expect(defaultProvider).toBeDefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// parseVoicePrefix utility
|
||||
// ==========================================
|
||||
|
||||
describe("parseVoicePrefix", () => {
|
||||
it("should parse af_ as American English Female", () => {
|
||||
const result = parseVoicePrefix("af_heart");
|
||||
expect(result.language).toBe("en-US");
|
||||
expect(result.gender).toBe("female");
|
||||
expect(result.accent).toBe("American");
|
||||
});
|
||||
|
||||
it("should parse am_ as American English Male", () => {
|
||||
const result = parseVoicePrefix("am_adam");
|
||||
expect(result.language).toBe("en-US");
|
||||
expect(result.gender).toBe("male");
|
||||
expect(result.accent).toBe("American");
|
||||
});
|
||||
|
||||
it("should parse bf_ as British English Female", () => {
|
||||
const result = parseVoicePrefix("bf_emma");
|
||||
expect(result.language).toBe("en-GB");
|
||||
expect(result.gender).toBe("female");
|
||||
expect(result.accent).toBe("British");
|
||||
});
|
||||
|
||||
it("should parse bm_ as British English Male", () => {
|
||||
const result = parseVoicePrefix("bm_george");
|
||||
expect(result.language).toBe("en-GB");
|
||||
expect(result.gender).toBe("male");
|
||||
expect(result.accent).toBe("British");
|
||||
});
|
||||
|
||||
it("should return unknown for unrecognized prefix", () => {
|
||||
const result = parseVoicePrefix("xx_unknown");
|
||||
expect(result.language).toBe("unknown");
|
||||
expect(result.gender).toBe("unknown");
|
||||
expect(result.accent).toBe("Unknown");
|
||||
});
|
||||
});
|
||||
|
||||
// ==========================================
|
||||
// Exported constants
|
||||
// ==========================================
|
||||
|
||||
describe("KOKORO_SUPPORTED_FORMATS", () => {
|
||||
it("should include mp3", () => {
|
||||
expect(KOKORO_SUPPORTED_FORMATS).toContain("mp3");
|
||||
});
|
||||
|
||||
it("should include wav", () => {
|
||||
expect(KOKORO_SUPPORTED_FORMATS).toContain("wav");
|
||||
});
|
||||
|
||||
it("should include opus", () => {
|
||||
expect(KOKORO_SUPPORTED_FORMATS).toContain("opus");
|
||||
});
|
||||
|
||||
it("should include flac", () => {
|
||||
expect(KOKORO_SUPPORTED_FORMATS).toContain("flac");
|
||||
});
|
||||
|
||||
it("should be a readonly array", () => {
|
||||
expect(Array.isArray(KOKORO_SUPPORTED_FORMATS)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("KOKORO_SPEED_RANGE", () => {
|
||||
it("should have min speed of 0.25", () => {
|
||||
expect(KOKORO_SPEED_RANGE.min).toBe(0.25);
|
||||
});
|
||||
|
||||
it("should have max speed of 4.0", () => {
|
||||
expect(KOKORO_SPEED_RANGE.max).toBe(4.0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("KOKORO_VOICES", () => {
|
||||
it("should be a non-empty array", () => {
|
||||
expect(Array.isArray(KOKORO_VOICES)).toBe(true);
|
||||
expect(KOKORO_VOICES.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should contain voice entries with id and label", () => {
|
||||
for (const voice of KOKORO_VOICES) {
|
||||
expect(voice.id).toBeTruthy();
|
||||
expect(voice.label).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
it("should include voices from multiple language prefixes", () => {
|
||||
const prefixes = new Set(KOKORO_VOICES.map((v) => v.id.substring(0, 2)));
|
||||
expect(prefixes.size).toBeGreaterThanOrEqual(4);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user