2026-02-15 18:37:54 +00:00
1 changed files with 933 additions and 0 deletions
--- a/apps/api/src/speech/speech.integration.spec.ts
+++ b/apps/api/src/speech/speech.integration.spec.ts
@@ -0,0 +1,933 @@
 /**
 * Speech Services E2E Integration Tests
 *
 * Tests the full speech pipeline from API endpoints through to mocked external providers.
 * Covers REST transcription, synthesis, provider fallback, WebSocket streaming,
 * audio validation, file size limits, authentication, voice listing, and health checks.
 *
 * Uses NestJS testing module with supertest for HTTP testing and direct gateway
 * invocation for WebSocket streaming tests.
 *
 * Issue #405
 */
 import { describe, it, expect, beforeAll, beforeEach, afterAll, vi } from "vitest";
 import { Test } from "@nestjs/testing";
 import {
  type INestApplication,
  type CanActivate,
  type ExecutionContext,
  UnauthorizedException,
  ValidationPipe,
 } from "@nestjs/common";
 import request from "supertest";
 import type { App } from "supertest/types";
 import { SpeechController } from "./speech.controller";
 import { SpeechService } from "./speech.service";
 import { SpeechGateway } from "./speech.gateway";
 import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
 import { speechConfig } from "./speech.config";
 import type { SpeechConfig } from "./speech.config";
 import type { ISTTProvider } from "./interfaces/stt-provider.interface";
 import type { ITTSProvider } from "./interfaces/tts-provider.interface";
 import type {
  TranscriptionResult,
  SynthesisResult,
  VoiceInfo,
  SpeechTier,
 } from "./interfaces/speech-types";
 import { AuthGuard } from "../auth/guards/auth.guard";
 import { WorkspaceGuard, PermissionGuard } from "../common/guards";
 import { AuthService } from "../auth/auth.service";
 import { PrismaService } from "../prisma/prisma.service";
 // ==========================================
 // Test Fixtures
 // ==========================================
 /**
 * Small WAV file header (44 bytes) + minimal data.
 * Not a real audio file, but has the correct structure for testing.
 */
 const TEST_AUDIO_BUFFER = Buffer.alloc(1024, 0);
 const MOCK_WORKSPACE_ID = "550e8400-e29b-41d4-a716-446655440001";
 const MOCK_USER_ID = "550e8400-e29b-41d4-a716-446655440002";
 const MOCK_USER = {
  id: MOCK_USER_ID,
  email: "test@example.com",
  name: "Test User",
  workspaceId: MOCK_WORKSPACE_ID,
 };
 const MOCK_TRANSCRIPTION_RESULT: TranscriptionResult = {
  text: "Hello, this is a test transcription.",
  language: "en",
  durationSeconds: 3.2,
  confidence: 0.97,
  segments: [
    { text: "Hello, this is a test transcription.", start: 0, end: 3.2, confidence: 0.97 },
  ],
 };
 const MOCK_SYNTHESIS_RESULT: SynthesisResult = {
  audio: Buffer.from("fake-synthesized-audio-data-mp3"),
  format: "mp3",
  voice: "af_heart",
  tier: "default" as SpeechTier,
  durationSeconds: 2.1,
 };
 const MOCK_VOICES: VoiceInfo[] = [
  { id: "af_heart", name: "Heart", language: "en", tier: "default", isDefault: true },
  { id: "af_sky", name: "Sky", language: "en", tier: "default", isDefault: false },
  {
    id: "chatterbox-default",
    name: "Chatterbox",
    language: "en",
    tier: "premium",
    isDefault: true,
  },
 ];
 const MOCK_SPEECH_CONFIG: SpeechConfig = {
  stt: {
    enabled: true,
    baseUrl: "http://speaches:8000/v1",
    model: "test-model",
    language: "en",
  },
  tts: {
    default: { enabled: true, url: "http://kokoro:8880/v1", voice: "af_heart", format: "mp3" },
    premium: { enabled: true, url: "http://chatterbox:8881/v1" },
    fallback: { enabled: true, url: "http://openedai:8000/v1" },
  },
  limits: {
    maxUploadSize: 25_000_000,
    maxDurationSeconds: 600,
    maxTextLength: 4096,
  },
 };
 // ==========================================
 // Mock Providers
 // ==========================================
 function createMockSTTProvider(): ISTTProvider {
  return {
    name: "mock-stt",
    transcribe: vi.fn().mockResolvedValue(MOCK_TRANSCRIPTION_RESULT),
    isHealthy: vi.fn().mockResolvedValue(true),
  };
 }
 function createMockTTSProvider(tier: SpeechTier, name: string): ITTSProvider {
  const voices = MOCK_VOICES.filter((v) => v.tier === tier);
  return {
    name,
    tier,
    synthesize: vi.fn().mockResolvedValue({
      ...MOCK_SYNTHESIS_RESULT,
      tier,
    }),
    listVoices: vi.fn().mockResolvedValue(voices),
    isHealthy: vi.fn().mockResolvedValue(true),
  };
 }
 // ==========================================
 // Test Guards
 // ==========================================
 /**
 * Conditional auth guard for testing.
 * Authenticates requests that carry `Authorization: Bearer test-token`.
 * Rejects all others with UnauthorizedException.
 */
 class TestAuthGuard implements CanActivate {
  canActivate(context: ExecutionContext): boolean {
    const req = context.switchToHttp().getRequest<{
      headers: Record<string, string | undefined>;
      user?: typeof MOCK_USER;
      cookies?: Record<string, string>;
    }>();
    const authHeader = req.headers.authorization;
    const cookieToken = req.cookies?.["better-auth.session_token"];
    if (authHeader === "Bearer test-token" || cookieToken === "test-token") {
      req.user = { ...MOCK_USER };
      return true;
    }
    throw new UnauthorizedException("No authentication token provided");
  }
 }
 /**
 * Test workspace guard that attaches a mock workspace to the request.
 */
 class TestWorkspaceGuard implements CanActivate {
  canActivate(context: ExecutionContext): boolean {
    const req = context.switchToHttp().getRequest<{
      workspace?: { id: string };
      headers: Record<string, string | undefined>;
    }>();
    const workspaceId = req.headers["x-workspace-id"] ?? MOCK_WORKSPACE_ID;
    req.workspace = { id: workspaceId as string };
    return true;
  }
 }
 /**
 * Test permission guard that always allows access.
 */
 class TestPermissionGuard implements CanActivate {
  canActivate(): boolean {
    return true;
  }
 }
 // ==========================================
 // Tests
 // ==========================================
 describe("Speech Services E2E Integration", () => {
  let app: INestApplication;
  let mockSTTProvider: ISTTProvider;
  let defaultTTSProvider: ITTSProvider;
  let premiumTTSProvider: ITTSProvider;
  let fallbackTTSProvider: ITTSProvider;
  let ttsProvidersMap: Map<SpeechTier, ITTSProvider>;
  // WebSocket gateway test dependencies
  let speechGateway: SpeechGateway;
  let mockSpeechService: SpeechService;
  beforeAll(async () => {
    // Create mock providers
    mockSTTProvider = createMockSTTProvider();
    defaultTTSProvider = createMockTTSProvider("default", "mock-kokoro");
    premiumTTSProvider = createMockTTSProvider("premium", "mock-chatterbox");
    fallbackTTSProvider = createMockTTSProvider("fallback", "mock-piper");
    ttsProvidersMap = new Map<SpeechTier, ITTSProvider>([
      ["default", defaultTTSProvider],
      ["premium", premiumTTSProvider],
      ["fallback", fallbackTTSProvider],
    ]);
    const moduleRef = await Test.createTestingModule({
      controllers: [SpeechController],
      providers: [
        SpeechService,
        {
          provide: speechConfig.KEY,
          useValue: MOCK_SPEECH_CONFIG,
        },
        {
          provide: STT_PROVIDER,
          useValue: mockSTTProvider,
        },
        {
          provide: TTS_PROVIDERS,
          useValue: ttsProvidersMap,
        },
        // Gateway dependencies (not tested via HTTP but needed for DI)
        {
          provide: SpeechGateway,
          useFactory: (
            authService: AuthService,
            prisma: PrismaService,
            speechService: SpeechService,
            config: SpeechConfig
          ): SpeechGateway => {
            return new SpeechGateway(authService, prisma, speechService, config);
          },
          inject: [AuthService, PrismaService, SpeechService, speechConfig.KEY],
        },
        {
          provide: AuthService,
          useValue: {
            verifySession: vi.fn().mockResolvedValue({
              user: { id: MOCK_USER_ID, email: "test@example.com", name: "Test User" },
              session: { id: "test-session" },
            }),
          },
        },
        {
          provide: PrismaService,
          useValue: {
            workspaceMember: {
              findFirst: vi.fn().mockResolvedValue({
                userId: MOCK_USER_ID,
                workspaceId: MOCK_WORKSPACE_ID,
                role: "MEMBER",
              }),
            },
          },
        },
      ],
    })
      .overrideGuard(AuthGuard)
      .useClass(TestAuthGuard)
      .overrideGuard(WorkspaceGuard)
      .useClass(TestWorkspaceGuard)
      .overrideGuard(PermissionGuard)
      .useClass(TestPermissionGuard)
      .compile();
    app = moduleRef.createNestApplication();
    app.useGlobalPipes(new ValidationPipe({ transform: true, whitelist: true }));
    await app.init();
    // Capture references for WebSocket tests
    speechGateway = moduleRef.get(SpeechGateway);
    mockSpeechService = moduleRef.get(SpeechService);
  });
  beforeEach(() => {
    vi.clearAllMocks();
    // Reset default mock behaviors
    (mockSTTProvider.transcribe as ReturnType<typeof vi.fn>).mockResolvedValue(
      MOCK_TRANSCRIPTION_RESULT
    );
    (defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
      ...MOCK_SYNTHESIS_RESULT,
      tier: "default",
    });
    (premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
      ...MOCK_SYNTHESIS_RESULT,
      tier: "premium",
    });
    (fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
      ...MOCK_SYNTHESIS_RESULT,
      tier: "fallback",
    });
    (defaultTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue(
      MOCK_VOICES.filter((v) => v.tier === "default")
    );
    (premiumTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue(
      MOCK_VOICES.filter((v) => v.tier === "premium")
    );
    (fallbackTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue([]);
  });
  afterAll(async () => {
    if (app) {
      await app.close();
    }
  });
  // ==========================================
  // Scenario 1: REST Transcription
  // ==========================================
  describe("Scenario 1: REST Transcription (POST /speech/transcribe)", () => {
    it("should transcribe an uploaded audio file and return the transcription result", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", TEST_AUDIO_BUFFER, {
          filename: "test.wav",
          contentType: "audio/wav",
        })
        .expect(201);
      expect(response.body).toHaveProperty("data");
      expect(response.body.data).toMatchObject({
        text: MOCK_TRANSCRIPTION_RESULT.text,
        language: MOCK_TRANSCRIPTION_RESULT.language,
        durationSeconds: MOCK_TRANSCRIPTION_RESULT.durationSeconds,
        confidence: MOCK_TRANSCRIPTION_RESULT.confidence,
      });
      expect(response.body.data.segments).toBeDefined();
      expect(response.body.data.segments).toHaveLength(1);
      expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
        expect.any(Buffer),
        expect.objectContaining({ mimeType: "audio/wav" })
      );
    });
    it("should pass optional transcription parameters to the service", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", TEST_AUDIO_BUFFER, {
          filename: "test.mp3",
          contentType: "audio/mpeg",
        })
        .field("language", "fr")
        .field("model", "whisper-large-v3")
        .field("prompt", "Meeting transcript")
        .field("temperature", "0.3")
        .expect(201);
      expect(response.body.data.text).toBe(MOCK_TRANSCRIPTION_RESULT.text);
      expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
        expect.any(Buffer),
        expect.objectContaining({
          mimeType: "audio/mpeg",
          language: "fr",
          model: "whisper-large-v3",
          prompt: "Meeting transcript",
          temperature: 0.3,
        })
      );
    });
    it("should reject request without an audio file", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .expect(400);
      expect(response.body).toHaveProperty("message");
    });
  });
  // ==========================================
  // Scenario 2: REST Synthesis
  // ==========================================
  describe("Scenario 2: REST Synthesis (POST /speech/synthesize)", () => {
    it("should synthesize text and return audio binary response", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({ text: "Hello, world!" })
        .expect(201);
      // Response should be binary audio
      expect(response.headers["content-type"]).toContain("audio/mpeg");
      expect(response.headers["content-disposition"]).toContain("attachment");
      expect(response.headers["content-disposition"]).toContain("speech.mp3");
      expect(response.body).toBeDefined();
      expect(Buffer.isBuffer(response.body) || response.body instanceof Buffer).toBe(true);
    });
    it("should pass voice, speed, format, and tier options to the service", async () => {
      (defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
        audio: Buffer.from("wav-audio-data"),
        format: "wav",
        voice: "af_sky",
        tier: "default",
        durationSeconds: 1.5,
      });
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({
          text: "Test with options",
          voice: "af_sky",
          speed: 1.5,
          format: "wav",
        })
        .expect(201);
      expect(response.headers["content-type"]).toContain("audio/wav");
      expect(response.headers["content-disposition"]).toContain("speech.wav");
    });
    it("should accept empty text (validation delegated to service)", async () => {
      // The SynthesizeDto allows empty strings (no @IsNotEmpty decorator).
      // The service/provider handles empty text semantics.
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({ text: "" })
        .expect(201);
      expect(response.headers["content-type"]).toContain("audio/mpeg");
    });
    it("should reject missing text field", async () => {
      await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({})
        .expect(400);
    });
  });
  // ==========================================
  // Scenario 3: Provider Fallback
  // ==========================================
  describe("Scenario 3: Provider Fallback", () => {
    it("should fall back from premium to default when premium fails", async () => {
      // Make premium provider fail
      (premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
        new Error("Premium provider unavailable")
      );
      // Default provider should succeed
      (defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
        audio: Buffer.from("fallback-audio"),
        format: "mp3",
        voice: "af_heart",
        tier: "default",
      });
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({ text: "Fallback test", tier: "premium" })
        .expect(201);
      // Premium was attempted first
      expect(premiumTTSProvider.synthesize).toHaveBeenCalled();
      // Then default succeeded
      expect(defaultTTSProvider.synthesize).toHaveBeenCalled();
      expect(response.headers["content-type"]).toContain("audio/mpeg");
    });
    it("should fall back through entire chain: premium -> default -> fallback", async () => {
      // Make premium and default fail
      (premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
        new Error("Premium down")
      );
      (defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
        new Error("Default down")
      );
      // Fallback should succeed
      (fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
        audio: Buffer.from("fallback-piper-audio"),
        format: "mp3",
        voice: "piper-default",
        tier: "fallback",
      });
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({ text: "Full fallback chain test", tier: "premium" })
        .expect(201);
      expect(premiumTTSProvider.synthesize).toHaveBeenCalled();
      expect(defaultTTSProvider.synthesize).toHaveBeenCalled();
      expect(fallbackTTSProvider.synthesize).toHaveBeenCalled();
      expect(response.headers["content-type"]).toContain("audio/mpeg");
    });
    it("should return 503 when all TTS providers fail", async () => {
      (premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
        new Error("Premium down")
      );
      (defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
        new Error("Default down")
      );
      (fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
        new Error("Fallback down")
      );
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .set("Authorization", "Bearer test-token")
        .send({ text: "All providers down", tier: "premium" })
        .expect(503);
      expect(response.body).toHaveProperty("message");
      expect(response.body.message).toContain("All TTS providers failed");
    });
  });
  // ==========================================
  // Scenario 4: WebSocket Streaming Transcription
  // ==========================================
  describe("Scenario 4: WebSocket Streaming Transcription", () => {
    interface MockSocket {
      id: string;
      join: ReturnType<typeof vi.fn>;
      leave: ReturnType<typeof vi.fn>;
      emit: ReturnType<typeof vi.fn>;
      disconnect: ReturnType<typeof vi.fn>;
      data: { userId?: string; workspaceId?: string };
      handshake: {
        auth: Record<string, unknown>;
        query: Record<string, unknown>;
        headers: Record<string, unknown>;
      };
    }
    function createTestSocket(overrides?: Partial<MockSocket>): MockSocket {
      return {
        id: "e2e-test-socket",
        join: vi.fn(),
        leave: vi.fn(),
        emit: vi.fn(),
        disconnect: vi.fn(),
        data: {},
        handshake: {
          auth: { token: "valid-token" },
          query: {},
          headers: {},
        },
        ...overrides,
      };
    }
    it("should complete the full streaming transcription lifecycle", async () => {
      const client = createTestSocket();
      // Authenticate the client
      await speechGateway.handleConnection(client as never);
      expect(client.data.userId).toBe(MOCK_USER_ID);
      expect(client.data.workspaceId).toBe(MOCK_WORKSPACE_ID);
      expect(client.disconnect).not.toHaveBeenCalled();
      // Start transcription session
      speechGateway.handleStartTranscription(client as never, { language: "en" });
      expect(client.emit).toHaveBeenCalledWith(
        "transcription-started",
        expect.objectContaining({ sessionId: "e2e-test-socket" })
      );
      // Send audio chunks
      const chunk1 = Buffer.from("audio-data-chunk-1");
      const chunk2 = Buffer.from("audio-data-chunk-2");
      const chunk3 = Buffer.from("audio-data-chunk-3");
      speechGateway.handleAudioChunk(client as never, chunk1);
      speechGateway.handleAudioChunk(client as never, chunk2);
      speechGateway.handleAudioChunk(client as never, chunk3);
      // No errors should have been emitted for chunks
      const errorCalls = client.emit.mock.calls.filter(
        (call: unknown[]) => call[0] === "transcription-error"
      );
      expect(errorCalls).toHaveLength(0);
      vi.clearAllMocks();
      (mockSTTProvider.transcribe as ReturnType<typeof vi.fn>).mockResolvedValue(
        MOCK_TRANSCRIPTION_RESULT
      );
      // Stop transcription - should trigger the full transcription pipeline
      await speechGateway.handleStopTranscription(client as never);
      // Verify transcription was called with concatenated audio
      expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
        expect.any(Buffer),
        expect.objectContaining({ language: "en" })
      );
      // Verify the final result was emitted
      expect(client.emit).toHaveBeenCalledWith(
        "transcription-final",
        expect.objectContaining({
          text: MOCK_TRANSCRIPTION_RESULT.text,
          language: "en",
          durationSeconds: 3.2,
          confidence: 0.97,
        })
      );
    });
    it("should clean up session on disconnect", async () => {
      const client = createTestSocket({ id: "disconnect-test" });
      await speechGateway.handleConnection(client as never);
      speechGateway.handleStartTranscription(client as never, {});
      speechGateway.handleAudioChunk(client as never, Buffer.from("data"));
      // Disconnect
      speechGateway.handleDisconnect(client as never);
      // Trying to send more chunks should fail (session cleaned up)
      vi.clearAllMocks();
      speechGateway.handleAudioChunk(client as never, Buffer.from("more-data"));
      expect(client.emit).toHaveBeenCalledWith(
        "transcription-error",
        expect.objectContaining({
          message: expect.stringContaining("No active transcription session"),
        })
      );
    });
    it("should reject unauthenticated WebSocket clients", async () => {
      const client = createTestSocket({
        id: "unauth-ws-client",
        handshake: { auth: {}, query: {}, headers: {} },
      });
      await speechGateway.handleConnection(client as never);
      expect(client.disconnect).toHaveBeenCalled();
      expect(client.data.userId).toBeUndefined();
    });
  });
  // ==========================================
  // Scenario 5: Audio Validation (Invalid MIME Type)
  // ==========================================
  describe("Scenario 5: Audio Validation", () => {
    it("should reject files with unsupported MIME types", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", Buffer.from("not-audio"), {
          filename: "document.pdf",
          contentType: "application/pdf",
        })
        .expect(400);
      expect(response.body).toHaveProperty("message");
      expect(response.body.message).toContain("Unsupported audio format");
      expect(response.body.message).toContain("application/pdf");
    });
    it("should reject files with text/plain MIME type", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", Buffer.from("plain text content"), {
          filename: "notes.txt",
          contentType: "text/plain",
        })
        .expect(400);
      expect(response.body.message).toContain("Unsupported audio format");
    });
    it("should reject video MIME types", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", Buffer.from("video-data"), {
          filename: "video.mp4",
          contentType: "video/mp4",
        })
        .expect(400);
      expect(response.body.message).toContain("Unsupported audio format");
    });
    it("should accept valid audio MIME types", async () => {
      const validMimeTypes = [
        { mime: "audio/wav", ext: "wav" },
        { mime: "audio/mpeg", ext: "mp3" },
        { mime: "audio/webm", ext: "webm" },
        { mime: "audio/ogg", ext: "ogg" },
        { mime: "audio/flac", ext: "flac" },
      ];
      for (const { mime, ext } of validMimeTypes) {
        const response = await request(app.getHttpServer() as App)
          .post("/speech/transcribe")
          .set("Authorization", "Bearer test-token")
          .attach("file", TEST_AUDIO_BUFFER, {
            filename: `test.${ext}`,
            contentType: mime,
          })
          .expect(201);
        expect(response.body).toHaveProperty("data");
        expect(response.body.data.text).toBe(MOCK_TRANSCRIPTION_RESULT.text);
      }
    });
  });
  // ==========================================
  // Scenario 6: File Size Limits
  // ==========================================
  describe("Scenario 6: File Size Limits", () => {
    it("should reject files exceeding the maximum upload size (25 MB)", async () => {
      // Create a buffer slightly over the 25 MB limit
      const oversizedBuffer = Buffer.alloc(25_000_001, 0);
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", oversizedBuffer, {
          filename: "large-audio.wav",
          contentType: "audio/wav",
        })
        .expect(400);
      expect(response.body).toHaveProperty("message");
      expect(response.body.message).toContain("exceeds maximum allowed size");
    });
    it("should accept files within the size limit", async () => {
      // Create a buffer at the exact limit
      const maxBuffer = Buffer.alloc(1024, 0);
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .set("Authorization", "Bearer test-token")
        .attach("file", maxBuffer, {
          filename: "acceptable-audio.wav",
          contentType: "audio/wav",
        })
        .expect(201);
      expect(response.body).toHaveProperty("data");
    });
  });
  // ==========================================
  // Scenario 7: Authentication
  // ==========================================
  describe("Scenario 7: Authentication", () => {
    it("should reject POST /speech/transcribe without authentication", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/transcribe")
        .attach("file", TEST_AUDIO_BUFFER, {
          filename: "test.wav",
          contentType: "audio/wav",
        })
        .expect(401);
      expect(response.body).toHaveProperty("message");
      expect(response.body.message).toContain("No authentication token provided");
    });
    it("should reject POST /speech/synthesize without authentication", async () => {
      const response = await request(app.getHttpServer() as App)
        .post("/speech/synthesize")
        .send({ text: "Hello" })
        .expect(401);
      expect(response.body.message).toContain("No authentication token provided");
    });
    it("should reject GET /speech/voices without authentication", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/voices")
        .expect(401);
      expect(response.body.message).toContain("No authentication token provided");
    });
    it("should reject GET /speech/health without authentication", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/health")
        .expect(401);
      expect(response.body.message).toContain("No authentication token provided");
    });
    it("should reject requests with an invalid token", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/voices")
        .set("Authorization", "Bearer invalid-token-xyz")
        .expect(401);
      expect(response.body.message).toContain("No authentication token provided");
    });
  });
  // ==========================================
  // Scenario 8: Voice Listing
  // ==========================================
  describe("Scenario 8: Voice Listing (GET /speech/voices)", () => {
    it("should return all voices when no tier filter is provided", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/voices")
        .set("Authorization", "Bearer test-token")
        .expect(200);
      expect(response.body).toHaveProperty("data");
      expect(Array.isArray(response.body.data)).toBe(true);
      // Should have voices from all providers that returned voices
      const voices = response.body.data as VoiceInfo[];
      expect(voices.length).toBeGreaterThan(0);
      // Verify voice structure
      for (const voice of voices) {
        expect(voice).toHaveProperty("id");
        expect(voice).toHaveProperty("name");
        expect(voice).toHaveProperty("tier");
      }
    });
    it("should filter voices by tier when tier query param is provided", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/voices?tier=default")
        .set("Authorization", "Bearer test-token")
        .expect(200);
      const voices = response.body.data as VoiceInfo[];
      expect(voices.length).toBeGreaterThan(0);
      for (const voice of voices) {
        expect(voice.tier).toBe("default");
      }
      expect(defaultTTSProvider.listVoices).toHaveBeenCalled();
    });
    it("should return empty array for tier with no voices", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/voices?tier=fallback")
        .set("Authorization", "Bearer test-token")
        .expect(200);
      expect(response.body.data).toEqual([]);
    });
    it("should include voice metadata (id, name, language, tier, isDefault)", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/voices?tier=default")
        .set("Authorization", "Bearer test-token")
        .expect(200);
      const voices = response.body.data as VoiceInfo[];
      const defaultVoice = voices.find((v) => v.isDefault === true);
      expect(defaultVoice).toBeDefined();
      expect(defaultVoice).toMatchObject({
        id: "af_heart",
        name: "Heart",
        language: "en",
        tier: "default",
        isDefault: true,
      });
    });
  });
  // ==========================================
  // Scenario 9: Health Check
  // ==========================================
  describe("Scenario 9: Health Check (GET /speech/health)", () => {
    it("should return health status for both STT and TTS providers", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/health")
        .set("Authorization", "Bearer test-token")
        .expect(200);
      expect(response.body).toHaveProperty("data");
      expect(response.body.data).toHaveProperty("stt");
      expect(response.body.data).toHaveProperty("tts");
      expect(response.body.data.stt).toHaveProperty("available");
      expect(response.body.data.tts).toHaveProperty("available");
      // Both should be available since we have mock providers registered and config enabled
      expect(response.body.data.stt.available).toBe(true);
      expect(response.body.data.tts.available).toBe(true);
    });
    it("should return consistent health check format", async () => {
      const response = await request(app.getHttpServer() as App)
        .get("/speech/health")
        .set("Authorization", "Bearer test-token")
        .expect(200);
      // Verify the response matches the expected shape
      expect(response.body).toEqual({
        data: {
          stt: { available: expect.any(Boolean) },
          tts: { available: expect.any(Boolean) },
        },
      });
    });
  });
 });