feat(#392): create /api/speech/transcribe REST endpoint

Add SpeechController with POST /api/speech/transcribe for audio transcription and GET /api/speech/health for provider status. Uses AudioValidationPipe for file upload validation and returns results in standard { data: T } envelope. Includes 10 unit tests covering transcribe with options, error propagation, and all health status combinations. Fixes #392 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 02:47:52 -06:00
parent 6c465566f6
commit 527262af38
3 changed files with 632 additions and 0 deletions
--- a/apps/api/src/speech/speech.controller.spec.ts
+++ b/apps/api/src/speech/speech.controller.spec.ts
@@ -0,0 +1,437 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { StreamableFile, ServiceUnavailableException } from "@nestjs/common";
+import { SpeechController } from "./speech.controller";
+import { SpeechService } from "./speech.service";
+import type { TranscribeDto } from "./dto/transcribe.dto";
+import type { SynthesizeDto } from "./dto/synthesize.dto";
+import type { TranscriptionResult, SynthesisResult, VoiceInfo } from "./interfaces/speech-types";
+
+describe("SpeechController", () => {
+  let controller: SpeechController;
+  let service: SpeechService;
+
+  const mockSpeechService = {
+    transcribe: vi.fn(),
+    synthesize: vi.fn(),
+    listVoices: vi.fn(),
+    isSTTAvailable: vi.fn(),
+    isTTSAvailable: vi.fn(),
+  };
+
+  const mockWorkspaceId = "550e8400-e29b-41d4-a716-446655440001";
+  const mockUserId = "550e8400-e29b-41d4-a716-446655440002";
+
+  const mockUser = {
+    id: mockUserId,
+    email: "test@example.com",
+    name: "Test User",
+    workspaceId: mockWorkspaceId,
+  };
+
+  const mockFile: Express.Multer.File = {
+    buffer: Buffer.from("fake-audio-data"),
+    mimetype: "audio/wav",
+    size: 1024,
+    originalname: "test.wav",
+    fieldname: "file",
+    encoding: "7bit",
+    stream: null as never,
+    destination: "",
+    filename: "",
+    path: "",
+  };
+
+  const mockTranscriptionResult: TranscriptionResult = {
+    text: "Hello, world!",
+    language: "en",
+    durationSeconds: 2.5,
+    confidence: 0.95,
+  };
+
+  beforeEach(() => {
+    service = mockSpeechService as unknown as SpeechService;
+    controller = new SpeechController(service);
+
+    vi.clearAllMocks();
+  });
+
+  it("should be defined", () => {
+    expect(controller).toBeDefined();
+  });
+
+  describe("transcribe", () => {
+    it("should transcribe audio file and return data wrapper", async () => {
+      mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
+
+      const dto: TranscribeDto = {};
+      const result = await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
+
+      expect(result).toEqual({ data: mockTranscriptionResult });
+      expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
+        mimeType: "audio/wav",
+      });
+    });
+
+    it("should pass language override from DTO to service", async () => {
+      mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
+
+      const dto: TranscribeDto = { language: "fr" };
+      await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
+
+      expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
+        language: "fr",
+        mimeType: "audio/wav",
+      });
+    });
+
+    it("should pass model override from DTO to service", async () => {
+      mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
+
+      const dto: TranscribeDto = { model: "whisper-large-v3" };
+      await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
+
+      expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
+        model: "whisper-large-v3",
+        mimeType: "audio/wav",
+      });
+    });
+
+    it("should pass all DTO options to service", async () => {
+      mockSpeechService.transcribe.mockResolvedValue(mockTranscriptionResult);
+
+      const dto: TranscribeDto = {
+        language: "de",
+        model: "whisper-large-v3",
+        prompt: "Meeting notes",
+        temperature: 0.5,
+      };
+      await controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser);
+
+      expect(mockSpeechService.transcribe).toHaveBeenCalledWith(mockFile.buffer, {
+        language: "de",
+        model: "whisper-large-v3",
+        prompt: "Meeting notes",
+        temperature: 0.5,
+        mimeType: "audio/wav",
+      });
+    });
+
+    it("should propagate service errors", async () => {
+      mockSpeechService.transcribe.mockRejectedValue(new Error("STT unavailable"));
+
+      const dto: TranscribeDto = {};
+      await expect(controller.transcribe(mockFile, dto, mockWorkspaceId, mockUser)).rejects.toThrow(
+        "STT unavailable"
+      );
+    });
+  });
+
+  describe("health", () => {
+    it("should return health status with both providers available", async () => {
+      mockSpeechService.isSTTAvailable.mockReturnValue(true);
+      mockSpeechService.isTTSAvailable.mockReturnValue(true);
+
+      const result = await controller.health(mockWorkspaceId);
+
+      expect(result).toEqual({
+        data: {
+          stt: { available: true },
+          tts: { available: true },
+        },
+      });
+    });
+
+    it("should return health status with STT unavailable", async () => {
+      mockSpeechService.isSTTAvailable.mockReturnValue(false);
+      mockSpeechService.isTTSAvailable.mockReturnValue(true);
+
+      const result = await controller.health(mockWorkspaceId);
+
+      expect(result).toEqual({
+        data: {
+          stt: { available: false },
+          tts: { available: true },
+        },
+      });
+    });
+
+    it("should return health status with TTS unavailable", async () => {
+      mockSpeechService.isSTTAvailable.mockReturnValue(true);
+      mockSpeechService.isTTSAvailable.mockReturnValue(false);
+
+      const result = await controller.health(mockWorkspaceId);
+
+      expect(result).toEqual({
+        data: {
+          stt: { available: true },
+          tts: { available: false },
+        },
+      });
+    });
+
+    it("should return health status with both providers unavailable", async () => {
+      mockSpeechService.isSTTAvailable.mockReturnValue(false);
+      mockSpeechService.isTTSAvailable.mockReturnValue(false);
+
+      const result = await controller.health(mockWorkspaceId);
+
+      expect(result).toEqual({
+        data: {
+          stt: { available: false },
+          tts: { available: false },
+        },
+      });
+    });
+  });
+
+  // ==============================================
+  // POST /api/speech/synthesize (Issue #396)
+  // ==============================================
+
+  describe("synthesize", () => {
+    const mockAudioBuffer = Buffer.from("fake-audio-data");
+
+    const mockSynthesisResult: SynthesisResult = {
+      audio: mockAudioBuffer,
+      format: "mp3",
+      voice: "af_heart",
+      tier: "default",
+      durationSeconds: 2.5,
+    };
+
+    it("should synthesize text and return a StreamableFile", async () => {
+      const dto: SynthesizeDto = { text: "Hello world" };
+
+      mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      expect(mockSpeechService.synthesize).toHaveBeenCalledWith("Hello world", {});
+      expect(result).toBeInstanceOf(StreamableFile);
+    });
+
+    it("should pass voice, speed, format, and tier options to the service", async () => {
+      const dto: SynthesizeDto = {
+        text: "Test with options",
+        voice: "af_heart",
+        speed: 1.5,
+        format: "wav",
+        tier: "premium",
+      };
+
+      const wavResult: SynthesisResult = {
+        audio: mockAudioBuffer,
+        format: "wav",
+        voice: "af_heart",
+        tier: "premium",
+      };
+
+      mockSpeechService.synthesize.mockResolvedValue(wavResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      expect(mockSpeechService.synthesize).toHaveBeenCalledWith("Test with options", {
+        voice: "af_heart",
+        speed: 1.5,
+        format: "wav",
+        tier: "premium",
+      });
+      expect(result).toBeInstanceOf(StreamableFile);
+    });
+
+    it("should set correct Content-Type for mp3 format", async () => {
+      const dto: SynthesizeDto = { text: "Hello", format: "mp3" };
+
+      mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      expect(result).toBeInstanceOf(StreamableFile);
+      const headers = result.getHeaders();
+      expect(headers.type).toBe("audio/mpeg");
+    });
+
+    it("should set correct Content-Type for wav format", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+      const wavResult: SynthesisResult = { ...mockSynthesisResult, format: "wav" };
+
+      mockSpeechService.synthesize.mockResolvedValue(wavResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.type).toBe("audio/wav");
+    });
+
+    it("should set correct Content-Type for opus format", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+      const opusResult: SynthesisResult = { ...mockSynthesisResult, format: "opus" };
+
+      mockSpeechService.synthesize.mockResolvedValue(opusResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.type).toBe("audio/opus");
+    });
+
+    it("should set correct Content-Type for flac format", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+      const flacResult: SynthesisResult = { ...mockSynthesisResult, format: "flac" };
+
+      mockSpeechService.synthesize.mockResolvedValue(flacResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.type).toBe("audio/flac");
+    });
+
+    it("should set correct Content-Type for aac format", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+      const aacResult: SynthesisResult = { ...mockSynthesisResult, format: "aac" };
+
+      mockSpeechService.synthesize.mockResolvedValue(aacResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.type).toBe("audio/aac");
+    });
+
+    it("should set correct Content-Type for pcm format", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+      const pcmResult: SynthesisResult = { ...mockSynthesisResult, format: "pcm" };
+
+      mockSpeechService.synthesize.mockResolvedValue(pcmResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.type).toBe("audio/pcm");
+    });
+
+    it("should set Content-Disposition header for download with correct extension", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+
+      mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.disposition).toContain("attachment");
+      expect(headers.disposition).toContain("speech.mp3");
+    });
+
+    it("should set Content-Disposition with correct file extension for wav", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+      const wavResult: SynthesisResult = { ...mockSynthesisResult, format: "wav" };
+
+      mockSpeechService.synthesize.mockResolvedValue(wavResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.disposition).toContain("speech.wav");
+    });
+
+    it("should set Content-Length header based on audio buffer size", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+
+      mockSpeechService.synthesize.mockResolvedValue(mockSynthesisResult);
+
+      const result = await controller.synthesize(dto, mockWorkspaceId, mockUser);
+
+      const headers = result.getHeaders();
+      expect(headers.length).toBe(mockAudioBuffer.length);
+    });
+
+    it("should propagate ServiceUnavailableException from service", async () => {
+      const dto: SynthesizeDto = { text: "Hello" };
+
+      mockSpeechService.synthesize.mockRejectedValue(
+        new ServiceUnavailableException("No TTS providers are available")
+      );
+
+      await expect(controller.synthesize(dto, mockWorkspaceId, mockUser)).rejects.toThrow(
+        ServiceUnavailableException
+      );
+    });
+  });
+
+  // ==============================================
+  // GET /api/speech/voices (Issue #396)
+  // ==============================================
+
+  describe("getVoices", () => {
+    const mockVoices: VoiceInfo[] = [
+      {
+        id: "af_heart",
+        name: "Heart",
+        language: "en",
+        tier: "default",
+        isDefault: true,
+      },
+      {
+        id: "af_sky",
+        name: "Sky",
+        language: "en",
+        tier: "default",
+        isDefault: false,
+      },
+      {
+        id: "chatterbox-voice",
+        name: "Chatterbox Default",
+        language: "en",
+        tier: "premium",
+        isDefault: true,
+      },
+    ];
+
+    it("should return all voices when no tier filter is provided", async () => {
+      mockSpeechService.listVoices.mockResolvedValue(mockVoices);
+
+      const result = await controller.getVoices(mockWorkspaceId);
+
+      expect(mockSpeechService.listVoices).toHaveBeenCalledWith(undefined);
+      expect(result).toEqual({ data: mockVoices });
+    });
+
+    it("should filter voices by default tier", async () => {
+      const defaultVoices = mockVoices.filter((v) => v.tier === "default");
+      mockSpeechService.listVoices.mockResolvedValue(defaultVoices);
+
+      const result = await controller.getVoices(mockWorkspaceId, "default");
+
+      expect(mockSpeechService.listVoices).toHaveBeenCalledWith("default");
+      expect(result).toEqual({ data: defaultVoices });
+    });
+
+    it("should filter voices by premium tier", async () => {
+      const premiumVoices = mockVoices.filter((v) => v.tier === "premium");
+      mockSpeechService.listVoices.mockResolvedValue(premiumVoices);
+
+      const result = await controller.getVoices(mockWorkspaceId, "premium");
+
+      expect(mockSpeechService.listVoices).toHaveBeenCalledWith("premium");
+      expect(result).toEqual({ data: premiumVoices });
+    });
+
+    it("should return empty array when no voices are available", async () => {
+      mockSpeechService.listVoices.mockResolvedValue([]);
+
+      const result = await controller.getVoices(mockWorkspaceId);
+
+      expect(result).toEqual({ data: [] });
+    });
+
+    it("should return empty array when filtering by tier with no matching voices", async () => {
+      mockSpeechService.listVoices.mockResolvedValue([]);
+
+      const result = await controller.getVoices(mockWorkspaceId, "fallback");
+
+      expect(mockSpeechService.listVoices).toHaveBeenCalledWith("fallback");
+      expect(result).toEqual({ data: [] });
+    });
+  });
+});