Files
stack/apps/api/src/speech/speech.controller.spec.ts
Jason Woltje 527262af38
All checks were successful
ci/woodpecker/push/api Pipeline was successful
feat(#392): create /api/speech/transcribe REST endpoint
Add SpeechController with POST /api/speech/transcribe for audio
transcription and GET /api/speech/health for provider status.
Uses AudioValidationPipe for file upload validation and returns
results in standard { data: T } envelope.

Includes 10 unit tests covering transcribe with options, error
propagation, and all health status combinations.

Fixes #392

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:47:52 -06:00

438 lines
14 KiB
TypeScript

import { describe, it, expect, beforeEach, vi } from "vitest";
import { StreamableFile, ServiceUnavailableException } from "@nestjs/common";
import { SpeechController } from "./speech.controller";
import { SpeechService } from "./speech.service";
import type { TranscribeDto } from "./dto/transcribe.dto";
import type { SynthesizeDto } from "./dto/synthesize.dto";
import type { TranscriptionResult, SynthesisResult, VoiceInfo } from "./interfaces/speech-types";
describe("SpeechController", () => {
let controller: SpeechController;
let service: SpeechService;
const mockSpeechService = {
transcribe: vi.fn(),
synthesize: vi.fn(),
listVoices: vi.fn(),
isSTTAvailable: vi.fn(),
isTTSAvailable: vi.fn(),
};
const mockWorkspaceId = "550e8400-e29b-41d4-a716-446655440001";
const mockUserId = "550e8400-e29b-41d4-a716-446655440002";
const mockUser = {
id: mockUserId,
email: "test@example.com",
name: "Test User",
workspaceId: mockWorkspaceId,
};
const mockFile: Express.Multer.File = {
buffer: Buffer.from("fake-audio-data"),
mimetype: "audio/wav",
size: 1024,
originalname: "test.wav",
fieldname: "file",
encoding: "7bit",
stream: null as never,
destination: "",
filename: "",
path: "",
};
const mockTranscriptionResult: TranscriptionResult = {
text: "Hello, world!",
language: "en",
durationSeconds: 2.5,
confidence: 0.95,
};
beforeEach(() => {
service = mockSpeechService as unknown as SpeechService;
controller = new SpeechController(service);
vi.clearAllMocks();
});
it("should be defined", () => {
expect(controller).toBeDefined();
});
describe("transcribe", () => {
it("should transcribe audio file and return data wrapper", async () => {
mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
const dto: TranscribeDto = {};
const result = await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
expect(result).toEqual({ data: mockTranscriptionResult });
expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
mimeType: "audio/wav",
});
});
it("should pass language override from DTO to service", async () => {
mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
const dto: TranscribeDto = { language: "fr" };
await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
language: "fr",
mimeType: "audio/wav",
});
});
it("should pass model override from DTO to service", async () => {
mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
const dto: TranscribeDto = { model: "whisper-large-v3" };
await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
model: "whisper-large-v3",
mimeType: "audio/wav",
});
});
it("should pass all DTO options to service", async () => {
mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
const dto: TranscribeDto = {
language: "de",
model: "whisper-large-v3",
prompt: "Meeting notes",
temperature: 0.5,
};
await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
language: "de",
model: "whisper-large-v3",
prompt: "Meeting notes",
temperature: 0.5,
mimeType: "audio/wav",
});
});
it("should propagate service errors", async () => {
mockSpeechService.transcribe.mockRejectedValue(new Error("STT unavailable"));
const dto: TranscribeDto = {};
await expect(controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser)).rejects.toThrow(
"STT unavailable"
);
});
});
describe("health", () => {
it("should return health status with both providers available", async () => {
mockSpeechService.isSTTAvailable.mockReturnValue(true);
mockSpeechService.isTTSAvailable.mockReturnValue(true);
const result = await controller.health(mockWorkspaceId);
expect(result).toEqual({
data: {
stt: { available: true },
tts: { available: true },
},
});
});
it("should return health status with STT unavailable", async () => {
mockSpeechService.isSTTAvailable.mockReturnValue(false);
mockSpeechService.isTTSAvailable.mockReturnValue(true);
const result = await controller.health(mockWorkspaceId);
expect(result).toEqual({
data: {
stt: { available: false },
tts: { available: true },
},
});
});
it("should return health status with TTS unavailable", async () => {
mockSpeechService.isSTTAvailable.mockReturnValue(true);
mockSpeechService.isTTSAvailable.mockReturnValue(false);
const result = await controller.health(mockWorkspaceId);
expect(result).toEqual({
data: {
stt: { available: true },
tts: { available: false },
},
});
});
it("should return health status with both providers unavailable", async () => {
mockSpeechService.isSTTAvailable.mockReturnValue(false);
mockSpeechService.isTTSAvailable.mockReturnValue(false);
const result = await controller.health(mockWorkspaceId);
expect(result).toEqual({
data: {
stt: { available: false },
tts: { available: false },
},
});
});
});
// ==============================================
// POST /api/speech/synthesize (Issue #396)
// ==============================================
describe("synthesize", () => {
const mockAudioBuffer = Buffer.from("fake-audio-data");
const mockSynthesisResult: SynthesisResult = {
audio: mockAudioBuffer,
format: "mp3",
voice: "af_heart",
tier: "default",
durationSeconds: 2.5,
};
it("should synthesize text and return a StreamableFile", async () => {
const dto: SynthesizeDto = { text: "Hello world" };
mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
expect(mockSpeechService.synthesize).toHaveBeenCalledWith("Hello world", {});
expect(result).toBeInstanceOf(StreamableFile);
});
it("should pass voice, speed, format, and tier options to the service", async () => {
const dto: SynthesizeDto = {
text: "Test with options",
voice: "af_heart",
speed: 1.5,
format: "wav",
tier: "premium",
};
const wavResult: SynthesisResult = {
audio: mockAudioBuffer,
format: "wav",
voice: "af_heart",
tier: "premium",
};
mockSpeechService.synthesize.mockResolvedValue(wavResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
expect(mockSpeechService.synthesize).toHaveBeenCalledWith("Test with options", {
voice: "af_heart",
speed: 1.5,
format: "wav",
tier: "premium",
});
expect(result).toBeInstanceOf(StreamableFile);
});
it("should set correct Content-Type for mp3 format", async () => {
const dto: SynthesizeDto = { text: "Hello", format: "mp3" };
mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
expect(result).toBeInstanceOf(StreamableFile);
const headers = result.getHeaders();
expect(headers.type).toBe("audio/mpeg");
});
it("should set correct Content-Type for wav format", async () => {
const dto: SynthesizeDto = { text: "Hello" };
const wavResult: SynthesisResult = { ...mockSynthesisResult, format: "wav" };
mockSpeechService.synthesize.mockResolvedValue(wavResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.type).toBe("audio/wav");
});
it("should set correct Content-Type for opus format", async () => {
const dto: SynthesizeDto = { text: "Hello" };
const opusResult: SynthesisResult = { ...mockSynthesisResult, format: "opus" };
mockSpeechService.synthesize.mockResolvedValue(opusResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.type).toBe("audio/opus");
});
it("should set correct Content-Type for flac format", async () => {
const dto: SynthesizeDto = { text: "Hello" };
const flacResult: SynthesisResult = { ...mockSynthesisResult, format: "flac" };
mockSpeechService.synthesize.mockResolvedValue(flacResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.type).toBe("audio/flac");
});
it("should set correct Content-Type for aac format", async () => {
const dto: SynthesizeDto = { text: "Hello" };
const aacResult: SynthesisResult = { ...mockSynthesisResult, format: "aac" };
mockSpeechService.synthesize.mockResolvedValue(aacResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.type).toBe("audio/aac");
});
it("should set correct Content-Type for pcm format", async () => {
const dto: SynthesizeDto = { text: "Hello" };
const pcmResult: SynthesisResult = { ...mockSynthesisResult, format: "pcm" };
mockSpeechService.synthesize.mockResolvedValue(pcmResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.type).toBe("audio/pcm");
});
it("should set Content-Disposition header for download with correct extension", async () => {
const dto: SynthesizeDto = { text: "Hello" };
mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.disposition).toContain("attachment");
expect(headers.disposition).toContain("speech.mp3");
});
it("should set Content-Disposition with correct file extension for wav", async () => {
const dto: SynthesizeDto = { text: "Hello" };
const wavResult: SynthesisResult = { ...mockSynthesisResult, format: "wav" };
mockSpeechService.synthesize.mockResolvedValue(wavResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.disposition).toContain("speech.wav");
});
it("should set Content-Length header based on audio buffer size", async () => {
const dto: SynthesizeDto = { text: "Hello" };
mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
const headers = result.getHeaders();
expect(headers.length).toBe(mockAudioBuffer.length);
});
it("should propagate ServiceUnavailableException from service", async () => {
const dto: SynthesizeDto = { text: "Hello" };
mockSpeechService.synthesize.mockRejectedValue(
new ServiceUnavailableException("No TTS providers are available")
);
await expect(controller.synthesize(dto, mockWorkspaceId, mockUser)).rejects.toThrow(
ServiceUnavailableException
);
});
});
// ==============================================
// GET /api/speech/voices (Issue #396)
// ==============================================
describe("getVoices", () => {
const mockVoices: VoiceInfo[] = [
{
id: "af_heart",
name: "Heart",
language: "en",
tier: "default",
isDefault: true,
},
{
id: "af_sky",
name: "Sky",
language: "en",
tier: "default",
isDefault: false,
},
{
id: "chatterbox-voice",
name: "Chatterbox Default",
language: "en",
tier: "premium",
isDefault: true,
},
];
it("should return all voices when no tier filter is provided", async () => {
mockSpeechService.listVoices.mockResolvedValue(mockVoices);
const result = await controller.getVoices(mockWorkspaceId);
expect(mockSpeechService.listVoices).toHaveBeenCalledWith(undefined);
expect(result).toEqual({ data: mockVoices });
});
it("should filter voices by default tier", async () => {
const defaultVoices = mockVoices.filter((v) => v.tier === "default");
mockSpeechService.listVoices.mockResolvedValue(defaultVoices);
const result = await controller.getVoices(mockWorkspaceId, "default");
expect(mockSpeechService.listVoices).toHaveBeenCalledWith("default");
expect(result).toEqual({ data: defaultVoices });
});
it("should filter voices by premium tier", async () => {
const premiumVoices = mockVoices.filter((v) => v.tier === "premium");
mockSpeechService.listVoices.mockResolvedValue(premiumVoices);
const result = await controller.getVoices(mockWorkspaceId, "premium");
expect(mockSpeechService.listVoices).toHaveBeenCalledWith("premium");
expect(result).toEqual({ data: premiumVoices });
});
it("should return empty array when no voices are available", async () => {
mockSpeechService.listVoices.mockResolvedValue([]);
const result = await controller.getVoices(mockWorkspaceId);
expect(result).toEqual({ data: [] });
});
it("should return empty array when filtering by tier with no matching voices", async () => {
mockSpeechService.listVoices.mockResolvedValue([]);
const result = await controller.getVoices(mockWorkspaceId, "fallback");
expect(mockSpeechService.listVoices).toHaveBeenCalledWith("fallback");
expect(result).toEqual({ data: [] });
});
});
});