Files
stack/apps/api/src/speech/speech.integration.spec.ts
Jason Woltje d2c7602430
All checks were successful
ci/woodpecker/push/api Pipeline was successful
test(#405): add E2E integration tests for speech services
Adds comprehensive integration tests covering all 9 required scenarios:
1. REST transcription (POST /speech/transcribe)
2. REST synthesis (POST /speech/synthesize)
3. Provider fallback (premium -> default -> fallback chain)
4. WebSocket streaming transcription lifecycle
5. Audio MIME type validation (reject invalid formats)
6. File size limit enforcement (25 MB max)
7. Authentication on all endpoints (401 without token)
8. Voice listing with tier filtering (GET /speech/voices)
9. Health check status (GET /speech/health)

Uses NestJS testing module with mocked providers (CI-compatible).
30 test cases, all passing.

Fixes #405
2026-02-15 03:26:05 -06:00

934 lines
32 KiB
TypeScript

/**
* Speech Services E2E Integration Tests
*
* Tests the full speech pipeline from API endpoints through to mocked external providers.
* Covers REST transcription, synthesis, provider fallback, WebSocket streaming,
* audio validation, file size limits, authentication, voice listing, and health checks.
*
* Uses NestJS testing module with supertest for HTTP testing and direct gateway
* invocation for WebSocket streaming tests.
*
* Issue #405
*/
import { describe, it, expect, beforeAll, beforeEach, afterAll, vi } from "vitest";
import { Test } from "@nestjs/testing";
import {
type INestApplication,
type CanActivate,
type ExecutionContext,
UnauthorizedException,
ValidationPipe,
} from "@nestjs/common";
import request from "supertest";
import type { App } from "supertest/types";
import { SpeechController } from "./speech.controller";
import { SpeechService } from "./speech.service";
import { SpeechGateway } from "./speech.gateway";
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
import { speechConfig } from "./speech.config";
import type { SpeechConfig } from "./speech.config";
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
import type {
TranscriptionResult,
SynthesisResult,
VoiceInfo,
SpeechTier,
} from "./interfaces/speech-types";
import { AuthGuard } from "../auth/guards/auth.guard";
import { WorkspaceGuard, PermissionGuard } from "../common/guards";
import { AuthService } from "../auth/auth.service";
import { PrismaService } from "../prisma/prisma.service";
// ==========================================
// Test Fixtures
// ==========================================
/**
* Small WAV file header (44 bytes) + minimal data.
* Not a real audio file, but has the correct structure for testing.
*/
const TEST_AUDIO_BUFFER = Buffer.alloc(1024, 0);
const MOCK_WORKSPACE_ID = "550e8400-e29b-41d4-a716-446655440001";
const MOCK_USER_ID = "550e8400-e29b-41d4-a716-446655440002";
const MOCK_USER = {
id: MOCK_USER_ID,
email: "test@example.com",
name: "Test User",
workspaceId: MOCK_WORKSPACE_ID,
};
const MOCK_TRANSCRIPTION_RESULT: TranscriptionResult = {
text: "Hello, this is a test transcription.",
language: "en",
durationSeconds: 3.2,
confidence: 0.97,
segments: [
{ text: "Hello, this is a test transcription.", start: 0, end: 3.2, confidence: 0.97 },
],
};
const MOCK_SYNTHESIS_RESULT: SynthesisResult = {
audio: Buffer.from("fake-synthesized-audio-data-mp3"),
format: "mp3",
voice: "af_heart",
tier: "default" as SpeechTier,
durationSeconds: 2.1,
};
const MOCK_VOICES: VoiceInfo[] = [
{ id: "af_heart", name: "Heart", language: "en", tier: "default", isDefault: true },
{ id: "af_sky", name: "Sky", language: "en", tier: "default", isDefault: false },
{
id: "chatterbox-default",
name: "Chatterbox",
language: "en",
tier: "premium",
isDefault: true,
},
];
const MOCK_SPEECH_CONFIG: SpeechConfig = {
stt: {
enabled: true,
baseUrl: "http://speaches:8000/v1",
model: "test-model",
language: "en",
},
tts: {
default: { enabled: true, url: "http://kokoro:8880/v1", voice: "af_heart", format: "mp3" },
premium: { enabled: true, url: "http://chatterbox:8881/v1" },
fallback: { enabled: true, url: "http://openedai:8000/v1" },
},
limits: {
maxUploadSize: 25_000_000,
maxDurationSeconds: 600,
maxTextLength: 4096,
},
};
// ==========================================
// Mock Providers
// ==========================================
function createMockSTTProvider(): ISTTProvider {
return {
name: "mock-stt",
transcribe: vi.fn().mockResolvedValue(MOCK_TRANSCRIPTION_RESULT),
isHealthy: vi.fn().mockResolvedValue(true),
};
}
function createMockTTSProvider(tier: SpeechTier, name: string): ITTSProvider {
const voices = MOCK_VOICES.filter((v) => v.tier === tier);
return {
name,
tier,
synthesize: vi.fn().mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier,
}),
listVoices: vi.fn().mockResolvedValue(voices),
isHealthy: vi.fn().mockResolvedValue(true),
};
}
// ==========================================
// Test Guards
// ==========================================
/**
* Conditional auth guard for testing.
* Authenticates requests that carry `Authorization: Bearer test-token`.
* Rejects all others with UnauthorizedException.
*/
class TestAuthGuard implements CanActivate {
canActivate(context: ExecutionContext): boolean {
const req = context.switchToHttp().getRequest<{
headers: Record<string, string | undefined>;
user?: typeof MOCK_USER;
cookies?: Record<string, string>;
}>();
const authHeader = req.headers.authorization;
const cookieToken = req.cookies?.["better-auth.session_token"];
if (authHeader === "Bearer test-token" || cookieToken === "test-token") {
req.user = { ...MOCK_USER };
return true;
}
throw new UnauthorizedException("No authentication token provided");
}
}
/**
* Test workspace guard that attaches a mock workspace to the request.
*/
class TestWorkspaceGuard implements CanActivate {
canActivate(context: ExecutionContext): boolean {
const req = context.switchToHttp().getRequest<{
workspace?: { id: string };
headers: Record<string, string | undefined>;
}>();
const workspaceId = req.headers["x-workspace-id"] ?? MOCK_WORKSPACE_ID;
req.workspace = { id: workspaceId as string };
return true;
}
}
/**
* Test permission guard that always allows access.
*/
class TestPermissionGuard implements CanActivate {
canActivate(): boolean {
return true;
}
}
// ==========================================
// Tests
// ==========================================
describe("Speech Services E2E Integration", () => {
let app: INestApplication;
let mockSTTProvider: ISTTProvider;
let defaultTTSProvider: ITTSProvider;
let premiumTTSProvider: ITTSProvider;
let fallbackTTSProvider: ITTSProvider;
let ttsProvidersMap: Map<SpeechTier, ITTSProvider>;
// WebSocket gateway test dependencies
let speechGateway: SpeechGateway;
let mockSpeechService: SpeechService;
beforeAll(async () => {
// Create mock providers
mockSTTProvider = createMockSTTProvider();
defaultTTSProvider = createMockTTSProvider("default", "mock-kokoro");
premiumTTSProvider = createMockTTSProvider("premium", "mock-chatterbox");
fallbackTTSProvider = createMockTTSProvider("fallback", "mock-piper");
ttsProvidersMap = new Map<SpeechTier, ITTSProvider>([
["default", defaultTTSProvider],
["premium", premiumTTSProvider],
["fallback", fallbackTTSProvider],
]);
const moduleRef = await Test.createTestingModule({
controllers: [SpeechController],
providers: [
SpeechService,
{
provide: speechConfig.KEY,
useValue: MOCK_SPEECH_CONFIG,
},
{
provide: STT_PROVIDER,
useValue: mockSTTProvider,
},
{
provide: TTS_PROVIDERS,
useValue: ttsProvidersMap,
},
// Gateway dependencies (not tested via HTTP but needed for DI)
{
provide: SpeechGateway,
useFactory: (
authService: AuthService,
prisma: PrismaService,
speechService: SpeechService,
config: SpeechConfig
): SpeechGateway => {
return new SpeechGateway(authService, prisma, speechService, config);
},
inject: [AuthService, PrismaService, SpeechService, speechConfig.KEY],
},
{
provide: AuthService,
useValue: {
verifySession: vi.fn().mockResolvedValue({
user: { id: MOCK_USER_ID, email: "test@example.com", name: "Test User" },
session: { id: "test-session" },
}),
},
},
{
provide: PrismaService,
useValue: {
workspaceMember: {
findFirst: vi.fn().mockResolvedValue({
userId: MOCK_USER_ID,
workspaceId: MOCK_WORKSPACE_ID,
role: "MEMBER",
}),
},
},
},
],
})
.overrideGuard(AuthGuard)
.useClass(TestAuthGuard)
.overrideGuard(WorkspaceGuard)
.useClass(TestWorkspaceGuard)
.overrideGuard(PermissionGuard)
.useClass(TestPermissionGuard)
.compile();
app = moduleRef.createNestApplication();
app.useGlobalPipes(new ValidationPipe({ transform: true, whitelist: true }));
await app.init();
// Capture references for WebSocket tests
speechGateway = moduleRef.get(SpeechGateway);
mockSpeechService = moduleRef.get(SpeechService);
});
beforeEach(() => {
vi.clearAllMocks();
// Reset default mock behaviors
(mockSTTProvider.transcribe as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_TRANSCRIPTION_RESULT
);
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier: "default",
});
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier: "premium",
});
(fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier: "fallback",
});
(defaultTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_VOICES.filter((v) => v.tier === "default")
);
(premiumTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_VOICES.filter((v) => v.tier === "premium")
);
(fallbackTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue([]);
});
afterAll(async () => {
if (app) {
await app.close();
}
});
// ==========================================
// Scenario 1: REST Transcription
// ==========================================
describe("Scenario 1: REST Transcription (POST /speech/transcribe)", () => {
it("should transcribe an uploaded audio file and return the transcription result", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", TEST_AUDIO_BUFFER, {
filename: "test.wav",
contentType: "audio/wav",
})
.expect(201);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toMatchObject({
text: MOCK_TRANSCRIPTION_RESULT.text,
language: MOCK_TRANSCRIPTION_RESULT.language,
durationSeconds: MOCK_TRANSCRIPTION_RESULT.durationSeconds,
confidence: MOCK_TRANSCRIPTION_RESULT.confidence,
});
expect(response.body.data.segments).toBeDefined();
expect(response.body.data.segments).toHaveLength(1);
expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
expect.any(Buffer),
expect.objectContaining({ mimeType: "audio/wav" })
);
});
it("should pass optional transcription parameters to the service", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", TEST_AUDIO_BUFFER, {
filename: "test.mp3",
contentType: "audio/mpeg",
})
.field("language", "fr")
.field("model", "whisper-large-v3")
.field("prompt", "Meeting transcript")
.field("temperature", "0.3")
.expect(201);
expect(response.body.data.text).toBe(MOCK_TRANSCRIPTION_RESULT.text);
expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
expect.any(Buffer),
expect.objectContaining({
mimeType: "audio/mpeg",
language: "fr",
model: "whisper-large-v3",
prompt: "Meeting transcript",
temperature: 0.3,
})
);
});
it("should reject request without an audio file", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.expect(400);
expect(response.body).toHaveProperty("message");
});
});
// ==========================================
// Scenario 2: REST Synthesis
// ==========================================
describe("Scenario 2: REST Synthesis (POST /speech/synthesize)", () => {
it("should synthesize text and return audio binary response", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "Hello, world!" })
.expect(201);
// Response should be binary audio
expect(response.headers["content-type"]).toContain("audio/mpeg");
expect(response.headers["content-disposition"]).toContain("attachment");
expect(response.headers["content-disposition"]).toContain("speech.mp3");
expect(response.body).toBeDefined();
expect(Buffer.isBuffer(response.body) || response.body instanceof Buffer).toBe(true);
});
it("should pass voice, speed, format, and tier options to the service", async () => {
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
audio: Buffer.from("wav-audio-data"),
format: "wav",
voice: "af_sky",
tier: "default",
durationSeconds: 1.5,
});
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({
text: "Test with options",
voice: "af_sky",
speed: 1.5,
format: "wav",
})
.expect(201);
expect(response.headers["content-type"]).toContain("audio/wav");
expect(response.headers["content-disposition"]).toContain("speech.wav");
});
it("should accept empty text (validation delegated to service)", async () => {
// The SynthesizeDto allows empty strings (no @IsNotEmpty decorator).
// The service/provider handles empty text semantics.
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "" })
.expect(201);
expect(response.headers["content-type"]).toContain("audio/mpeg");
});
it("should reject missing text field", async () => {
await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({})
.expect(400);
});
});
// ==========================================
// Scenario 3: Provider Fallback
// ==========================================
describe("Scenario 3: Provider Fallback", () => {
it("should fall back from premium to default when premium fails", async () => {
// Make premium provider fail
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Premium provider unavailable")
);
// Default provider should succeed
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
audio: Buffer.from("fallback-audio"),
format: "mp3",
voice: "af_heart",
tier: "default",
});
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "Fallback test", tier: "premium" })
.expect(201);
// Premium was attempted first
expect(premiumTTSProvider.synthesize).toHaveBeenCalled();
// Then default succeeded
expect(defaultTTSProvider.synthesize).toHaveBeenCalled();
expect(response.headers["content-type"]).toContain("audio/mpeg");
});
it("should fall back through entire chain: premium -> default -> fallback", async () => {
// Make premium and default fail
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Premium down")
);
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Default down")
);
// Fallback should succeed
(fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
audio: Buffer.from("fallback-piper-audio"),
format: "mp3",
voice: "piper-default",
tier: "fallback",
});
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "Full fallback chain test", tier: "premium" })
.expect(201);
expect(premiumTTSProvider.synthesize).toHaveBeenCalled();
expect(defaultTTSProvider.synthesize).toHaveBeenCalled();
expect(fallbackTTSProvider.synthesize).toHaveBeenCalled();
expect(response.headers["content-type"]).toContain("audio/mpeg");
});
it("should return 503 when all TTS providers fail", async () => {
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Premium down")
);
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Default down")
);
(fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Fallback down")
);
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "All providers down", tier: "premium" })
.expect(503);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("All TTS providers failed");
});
});
// ==========================================
// Scenario 4: WebSocket Streaming Transcription
// ==========================================
describe("Scenario 4: WebSocket Streaming Transcription", () => {
interface MockSocket {
id: string;
join: ReturnType<typeof vi.fn>;
leave: ReturnType<typeof vi.fn>;
emit: ReturnType<typeof vi.fn>;
disconnect: ReturnType<typeof vi.fn>;
data: { userId?: string; workspaceId?: string };
handshake: {
auth: Record<string, unknown>;
query: Record<string, unknown>;
headers: Record<string, unknown>;
};
}
function createTestSocket(overrides?: Partial<MockSocket>): MockSocket {
return {
id: "e2e-test-socket",
join: vi.fn(),
leave: vi.fn(),
emit: vi.fn(),
disconnect: vi.fn(),
data: {},
handshake: {
auth: { token: "valid-token" },
query: {},
headers: {},
},
...overrides,
};
}
it("should complete the full streaming transcription lifecycle", async () => {
const client = createTestSocket();
// Authenticate the client
await speechGateway.handleConnection(client as never);
expect(client.data.userId).toBe(MOCK_USER_ID);
expect(client.data.workspaceId).toBe(MOCK_WORKSPACE_ID);
expect(client.disconnect).not.toHaveBeenCalled();
// Start transcription session
speechGateway.handleStartTranscription(client as never, { language: "en" });
expect(client.emit).toHaveBeenCalledWith(
"transcription-started",
expect.objectContaining({ sessionId: "e2e-test-socket" })
);
// Send audio chunks
const chunk1 = Buffer.from("audio-data-chunk-1");
const chunk2 = Buffer.from("audio-data-chunk-2");
const chunk3 = Buffer.from("audio-data-chunk-3");
speechGateway.handleAudioChunk(client as never, chunk1);
speechGateway.handleAudioChunk(client as never, chunk2);
speechGateway.handleAudioChunk(client as never, chunk3);
// No errors should have been emitted for chunks
const errorCalls = client.emit.mock.calls.filter(
(call: unknown[]) => call[0] === "transcription-error"
);
expect(errorCalls).toHaveLength(0);
vi.clearAllMocks();
(mockSTTProvider.transcribe as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_TRANSCRIPTION_RESULT
);
// Stop transcription - should trigger the full transcription pipeline
await speechGateway.handleStopTranscription(client as never);
// Verify transcription was called with concatenated audio
expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
expect.any(Buffer),
expect.objectContaining({ language: "en" })
);
// Verify the final result was emitted
expect(client.emit).toHaveBeenCalledWith(
"transcription-final",
expect.objectContaining({
text: MOCK_TRANSCRIPTION_RESULT.text,
language: "en",
durationSeconds: 3.2,
confidence: 0.97,
})
);
});
it("should clean up session on disconnect", async () => {
const client = createTestSocket({ id: "disconnect-test" });
await speechGateway.handleConnection(client as never);
speechGateway.handleStartTranscription(client as never, {});
speechGateway.handleAudioChunk(client as never, Buffer.from("data"));
// Disconnect
speechGateway.handleDisconnect(client as never);
// Trying to send more chunks should fail (session cleaned up)
vi.clearAllMocks();
speechGateway.handleAudioChunk(client as never, Buffer.from("more-data"));
expect(client.emit).toHaveBeenCalledWith(
"transcription-error",
expect.objectContaining({
message: expect.stringContaining("No active transcription session"),
})
);
});
it("should reject unauthenticated WebSocket clients", async () => {
const client = createTestSocket({
id: "unauth-ws-client",
handshake: { auth: {}, query: {}, headers: {} },
});
await speechGateway.handleConnection(client as never);
expect(client.disconnect).toHaveBeenCalled();
expect(client.data.userId).toBeUndefined();
});
});
// ==========================================
// Scenario 5: Audio Validation (Invalid MIME Type)
// ==========================================
describe("Scenario 5: Audio Validation", () => {
it("should reject files with unsupported MIME types", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", Buffer.from("not-audio"), {
filename: "document.pdf",
contentType: "application/pdf",
})
.expect(400);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("Unsupported audio format");
expect(response.body.message).toContain("application/pdf");
});
it("should reject files with text/plain MIME type", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", Buffer.from("plain text content"), {
filename: "notes.txt",
contentType: "text/plain",
})
.expect(400);
expect(response.body.message).toContain("Unsupported audio format");
});
it("should reject video MIME types", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", Buffer.from("video-data"), {
filename: "video.mp4",
contentType: "video/mp4",
})
.expect(400);
expect(response.body.message).toContain("Unsupported audio format");
});
it("should accept valid audio MIME types", async () => {
const validMimeTypes = [
{ mime: "audio/wav", ext: "wav" },
{ mime: "audio/mpeg", ext: "mp3" },
{ mime: "audio/webm", ext: "webm" },
{ mime: "audio/ogg", ext: "ogg" },
{ mime: "audio/flac", ext: "flac" },
];
for (const { mime, ext } of validMimeTypes) {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", TEST_AUDIO_BUFFER, {
filename: `test.${ext}`,
contentType: mime,
})
.expect(201);
expect(response.body).toHaveProperty("data");
expect(response.body.data.text).toBe(MOCK_TRANSCRIPTION_RESULT.text);
}
});
});
// ==========================================
// Scenario 6: File Size Limits
// ==========================================
describe("Scenario 6: File Size Limits", () => {
it("should reject files exceeding the maximum upload size (25 MB)", async () => {
// Create a buffer slightly over the 25 MB limit
const oversizedBuffer = Buffer.alloc(25_000_001, 0);
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", oversizedBuffer, {
filename: "large-audio.wav",
contentType: "audio/wav",
})
.expect(400);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("exceeds maximum allowed size");
});
it("should accept files within the size limit", async () => {
// Create a buffer at the exact limit
const maxBuffer = Buffer.alloc(1024, 0);
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", maxBuffer, {
filename: "acceptable-audio.wav",
contentType: "audio/wav",
})
.expect(201);
expect(response.body).toHaveProperty("data");
});
});
// ==========================================
// Scenario 7: Authentication
// ==========================================
describe("Scenario 7: Authentication", () => {
it("should reject POST /speech/transcribe without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.attach("file", TEST_AUDIO_BUFFER, {
filename: "test.wav",
contentType: "audio/wav",
})
.expect(401);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject POST /speech/synthesize without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.send({ text: "Hello" })
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject GET /speech/voices without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices")
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject GET /speech/health without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/health")
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject requests with an invalid token", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices")
.set("Authorization", "Bearer invalid-token-xyz")
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
});
// ==========================================
// Scenario 8: Voice Listing
// ==========================================
describe("Scenario 8: Voice Listing (GET /speech/voices)", () => {
it("should return all voices when no tier filter is provided", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices")
.set("Authorization", "Bearer test-token")
.expect(200);
expect(response.body).toHaveProperty("data");
expect(Array.isArray(response.body.data)).toBe(true);
// Should have voices from all providers that returned voices
const voices = response.body.data as VoiceInfo[];
expect(voices.length).toBeGreaterThan(0);
// Verify voice structure
for (const voice of voices) {
expect(voice).toHaveProperty("id");
expect(voice).toHaveProperty("name");
expect(voice).toHaveProperty("tier");
}
});
it("should filter voices by tier when tier query param is provided", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices?tier=default")
.set("Authorization", "Bearer test-token")
.expect(200);
const voices = response.body.data as VoiceInfo[];
expect(voices.length).toBeGreaterThan(0);
for (const voice of voices) {
expect(voice.tier).toBe("default");
}
expect(defaultTTSProvider.listVoices).toHaveBeenCalled();
});
it("should return empty array for tier with no voices", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices?tier=fallback")
.set("Authorization", "Bearer test-token")
.expect(200);
expect(response.body.data).toEqual([]);
});
it("should include voice metadata (id, name, language, tier, isDefault)", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices?tier=default")
.set("Authorization", "Bearer test-token")
.expect(200);
const voices = response.body.data as VoiceInfo[];
const defaultVoice = voices.find((v) => v.isDefault === true);
expect(defaultVoice).toBeDefined();
expect(defaultVoice).toMatchObject({
id: "af_heart",
name: "Heart",
language: "en",
tier: "default",
isDefault: true,
});
});
});
// ==========================================
// Scenario 9: Health Check
// ==========================================
describe("Scenario 9: Health Check (GET /speech/health)", () => {
it("should return health status for both STT and TTS providers", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/health")
.set("Authorization", "Bearer test-token")
.expect(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("stt");
expect(response.body.data).toHaveProperty("tts");
expect(response.body.data.stt).toHaveProperty("available");
expect(response.body.data.tts).toHaveProperty("available");
// Both should be available since we have mock providers registered and config enabled
expect(response.body.data.stt.available).toBe(true);
expect(response.body.data.tts.available).toBe(true);
});
it("should return consistent health check format", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/health")
.set("Authorization", "Bearer test-token")
.expect(200);
// Verify the response matches the expected shape
expect(response.body).toEqual({
data: {
stt: { available: expect.any(Boolean) },
tts: { available: expect.any(Boolean) },
},
});
});
});
});