chore: upgrade Node.js runtime to v24 across codebase #419

Merged
jason.woltje merged 438 commits from fix/auth-frontend-remediation into main 2026-02-17 01:04:47 +00:00
Showing only changes of commit d2c7602430 - Show all commits

View File

@@ -0,0 +1,933 @@
/**
* Speech Services E2E Integration Tests
*
* Tests the full speech pipeline from API endpoints through to mocked external providers.
* Covers REST transcription, synthesis, provider fallback, WebSocket streaming,
* audio validation, file size limits, authentication, voice listing, and health checks.
*
* Uses NestJS testing module with supertest for HTTP testing and direct gateway
* invocation for WebSocket streaming tests.
*
* Issue #405
*/
import { describe, it, expect, beforeAll, beforeEach, afterAll, vi } from "vitest";
import { Test } from "@nestjs/testing";
import {
type INestApplication,
type CanActivate,
type ExecutionContext,
UnauthorizedException,
ValidationPipe,
} from "@nestjs/common";
import request from "supertest";
import type { App } from "supertest/types";
import { SpeechController } from "./speech.controller";
import { SpeechService } from "./speech.service";
import { SpeechGateway } from "./speech.gateway";
import { STT_PROVIDER, TTS_PROVIDERS } from "./speech.constants";
import { speechConfig } from "./speech.config";
import type { SpeechConfig } from "./speech.config";
import type { ISTTProvider } from "./interfaces/stt-provider.interface";
import type { ITTSProvider } from "./interfaces/tts-provider.interface";
import type {
TranscriptionResult,
SynthesisResult,
VoiceInfo,
SpeechTier,
} from "./interfaces/speech-types";
import { AuthGuard } from "../auth/guards/auth.guard";
import { WorkspaceGuard, PermissionGuard } from "../common/guards";
import { AuthService } from "../auth/auth.service";
import { PrismaService } from "../prisma/prisma.service";
// ==========================================
// Test Fixtures
// ==========================================
/**
* Small WAV file header (44 bytes) + minimal data.
* Not a real audio file, but has the correct structure for testing.
*/
const TEST_AUDIO_BUFFER = Buffer.alloc(1024, 0);
const MOCK_WORKSPACE_ID = "550e8400-e29b-41d4-a716-446655440001";
const MOCK_USER_ID = "550e8400-e29b-41d4-a716-446655440002";
const MOCK_USER = {
id: MOCK_USER_ID,
email: "test@example.com",
name: "Test User",
workspaceId: MOCK_WORKSPACE_ID,
};
const MOCK_TRANSCRIPTION_RESULT: TranscriptionResult = {
text: "Hello, this is a test transcription.",
language: "en",
durationSeconds: 3.2,
confidence: 0.97,
segments: [
{ text: "Hello, this is a test transcription.", start: 0, end: 3.2, confidence: 0.97 },
],
};
const MOCK_SYNTHESIS_RESULT: SynthesisResult = {
audio: Buffer.from("fake-synthesized-audio-data-mp3"),
format: "mp3",
voice: "af_heart",
tier: "default" as SpeechTier,
durationSeconds: 2.1,
};
const MOCK_VOICES: VoiceInfo[] = [
{ id: "af_heart", name: "Heart", language: "en", tier: "default", isDefault: true },
{ id: "af_sky", name: "Sky", language: "en", tier: "default", isDefault: false },
{
id: "chatterbox-default",
name: "Chatterbox",
language: "en",
tier: "premium",
isDefault: true,
},
];
const MOCK_SPEECH_CONFIG: SpeechConfig = {
stt: {
enabled: true,
baseUrl: "http://speaches:8000/v1",
model: "test-model",
language: "en",
},
tts: {
default: { enabled: true, url: "http://kokoro:8880/v1", voice: "af_heart", format: "mp3" },
premium: { enabled: true, url: "http://chatterbox:8881/v1" },
fallback: { enabled: true, url: "http://openedai:8000/v1" },
},
limits: {
maxUploadSize: 25_000_000,
maxDurationSeconds: 600,
maxTextLength: 4096,
},
};
// ==========================================
// Mock Providers
// ==========================================
function createMockSTTProvider(): ISTTProvider {
return {
name: "mock-stt",
transcribe: vi.fn().mockResolvedValue(MOCK_TRANSCRIPTION_RESULT),
isHealthy: vi.fn().mockResolvedValue(true),
};
}
function createMockTTSProvider(tier: SpeechTier, name: string): ITTSProvider {
const voices = MOCK_VOICES.filter((v) => v.tier === tier);
return {
name,
tier,
synthesize: vi.fn().mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier,
}),
listVoices: vi.fn().mockResolvedValue(voices),
isHealthy: vi.fn().mockResolvedValue(true),
};
}
// ==========================================
// Test Guards
// ==========================================
/**
* Conditional auth guard for testing.
* Authenticates requests that carry `Authorization: Bearer test-token`.
* Rejects all others with UnauthorizedException.
*/
class TestAuthGuard implements CanActivate {
canActivate(context: ExecutionContext): boolean {
const req = context.switchToHttp().getRequest<{
headers: Record<string, string | undefined>;
user?: typeof MOCK_USER;
cookies?: Record<string, string>;
}>();
const authHeader = req.headers.authorization;
const cookieToken = req.cookies?.["better-auth.session_token"];
if (authHeader === "Bearer test-token" || cookieToken === "test-token") {
req.user = { ...MOCK_USER };
return true;
}
throw new UnauthorizedException("No authentication token provided");
}
}
/**
* Test workspace guard that attaches a mock workspace to the request.
*/
class TestWorkspaceGuard implements CanActivate {
canActivate(context: ExecutionContext): boolean {
const req = context.switchToHttp().getRequest<{
workspace?: { id: string };
headers: Record<string, string | undefined>;
}>();
const workspaceId = req.headers["x-workspace-id"] ?? MOCK_WORKSPACE_ID;
req.workspace = { id: workspaceId as string };
return true;
}
}
/**
* Test permission guard that always allows access.
*/
class TestPermissionGuard implements CanActivate {
canActivate(): boolean {
return true;
}
}
// ==========================================
// Tests
// ==========================================
describe("Speech Services E2E Integration", () => {
let app: INestApplication;
let mockSTTProvider: ISTTProvider;
let defaultTTSProvider: ITTSProvider;
let premiumTTSProvider: ITTSProvider;
let fallbackTTSProvider: ITTSProvider;
let ttsProvidersMap: Map<SpeechTier, ITTSProvider>;
// WebSocket gateway test dependencies
let speechGateway: SpeechGateway;
let mockSpeechService: SpeechService;
beforeAll(async () => {
// Create mock providers
mockSTTProvider = createMockSTTProvider();
defaultTTSProvider = createMockTTSProvider("default", "mock-kokoro");
premiumTTSProvider = createMockTTSProvider("premium", "mock-chatterbox");
fallbackTTSProvider = createMockTTSProvider("fallback", "mock-piper");
ttsProvidersMap = new Map<SpeechTier, ITTSProvider>([
["default", defaultTTSProvider],
["premium", premiumTTSProvider],
["fallback", fallbackTTSProvider],
]);
const moduleRef = await Test.createTestingModule({
controllers: [SpeechController],
providers: [
SpeechService,
{
provide: speechConfig.KEY,
useValue: MOCK_SPEECH_CONFIG,
},
{
provide: STT_PROVIDER,
useValue: mockSTTProvider,
},
{
provide: TTS_PROVIDERS,
useValue: ttsProvidersMap,
},
// Gateway dependencies (not tested via HTTP but needed for DI)
{
provide: SpeechGateway,
useFactory: (
authService: AuthService,
prisma: PrismaService,
speechService: SpeechService,
config: SpeechConfig
): SpeechGateway => {
return new SpeechGateway(authService, prisma, speechService, config);
},
inject: [AuthService, PrismaService, SpeechService, speechConfig.KEY],
},
{
provide: AuthService,
useValue: {
verifySession: vi.fn().mockResolvedValue({
user: { id: MOCK_USER_ID, email: "test@example.com", name: "Test User" },
session: { id: "test-session" },
}),
},
},
{
provide: PrismaService,
useValue: {
workspaceMember: {
findFirst: vi.fn().mockResolvedValue({
userId: MOCK_USER_ID,
workspaceId: MOCK_WORKSPACE_ID,
role: "MEMBER",
}),
},
},
},
],
})
.overrideGuard(AuthGuard)
.useClass(TestAuthGuard)
.overrideGuard(WorkspaceGuard)
.useClass(TestWorkspaceGuard)
.overrideGuard(PermissionGuard)
.useClass(TestPermissionGuard)
.compile();
app = moduleRef.createNestApplication();
app.useGlobalPipes(new ValidationPipe({ transform: true, whitelist: true }));
await app.init();
// Capture references for WebSocket tests
speechGateway = moduleRef.get(SpeechGateway);
mockSpeechService = moduleRef.get(SpeechService);
});
beforeEach(() => {
vi.clearAllMocks();
// Reset default mock behaviors
(mockSTTProvider.transcribe as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_TRANSCRIPTION_RESULT
);
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier: "default",
});
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier: "premium",
});
(fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
...MOCK_SYNTHESIS_RESULT,
tier: "fallback",
});
(defaultTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_VOICES.filter((v) => v.tier === "default")
);
(premiumTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_VOICES.filter((v) => v.tier === "premium")
);
(fallbackTTSProvider.listVoices as ReturnType<typeof vi.fn>).mockResolvedValue([]);
});
afterAll(async () => {
if (app) {
await app.close();
}
});
// ==========================================
// Scenario 1: REST Transcription
// ==========================================
describe("Scenario 1: REST Transcription (POST /speech/transcribe)", () => {
it("should transcribe an uploaded audio file and return the transcription result", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", TEST_AUDIO_BUFFER, {
filename: "test.wav",
contentType: "audio/wav",
})
.expect(201);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toMatchObject({
text: MOCK_TRANSCRIPTION_RESULT.text,
language: MOCK_TRANSCRIPTION_RESULT.language,
durationSeconds: MOCK_TRANSCRIPTION_RESULT.durationSeconds,
confidence: MOCK_TRANSCRIPTION_RESULT.confidence,
});
expect(response.body.data.segments).toBeDefined();
expect(response.body.data.segments).toHaveLength(1);
expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
expect.any(Buffer),
expect.objectContaining({ mimeType: "audio/wav" })
);
});
it("should pass optional transcription parameters to the service", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", TEST_AUDIO_BUFFER, {
filename: "test.mp3",
contentType: "audio/mpeg",
})
.field("language", "fr")
.field("model", "whisper-large-v3")
.field("prompt", "Meeting transcript")
.field("temperature", "0.3")
.expect(201);
expect(response.body.data.text).toBe(MOCK_TRANSCRIPTION_RESULT.text);
expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
expect.any(Buffer),
expect.objectContaining({
mimeType: "audio/mpeg",
language: "fr",
model: "whisper-large-v3",
prompt: "Meeting transcript",
temperature: 0.3,
})
);
});
it("should reject request without an audio file", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.expect(400);
expect(response.body).toHaveProperty("message");
});
});
// ==========================================
// Scenario 2: REST Synthesis
// ==========================================
describe("Scenario 2: REST Synthesis (POST /speech/synthesize)", () => {
it("should synthesize text and return audio binary response", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "Hello, world!" })
.expect(201);
// Response should be binary audio
expect(response.headers["content-type"]).toContain("audio/mpeg");
expect(response.headers["content-disposition"]).toContain("attachment");
expect(response.headers["content-disposition"]).toContain("speech.mp3");
expect(response.body).toBeDefined();
expect(Buffer.isBuffer(response.body) || response.body instanceof Buffer).toBe(true);
});
it("should pass voice, speed, format, and tier options to the service", async () => {
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
audio: Buffer.from("wav-audio-data"),
format: "wav",
voice: "af_sky",
tier: "default",
durationSeconds: 1.5,
});
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({
text: "Test with options",
voice: "af_sky",
speed: 1.5,
format: "wav",
})
.expect(201);
expect(response.headers["content-type"]).toContain("audio/wav");
expect(response.headers["content-disposition"]).toContain("speech.wav");
});
it("should accept empty text (validation delegated to service)", async () => {
// The SynthesizeDto allows empty strings (no @IsNotEmpty decorator).
// The service/provider handles empty text semantics.
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "" })
.expect(201);
expect(response.headers["content-type"]).toContain("audio/mpeg");
});
it("should reject missing text field", async () => {
await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({})
.expect(400);
});
});
// ==========================================
// Scenario 3: Provider Fallback
// ==========================================
describe("Scenario 3: Provider Fallback", () => {
it("should fall back from premium to default when premium fails", async () => {
// Make premium provider fail
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Premium provider unavailable")
);
// Default provider should succeed
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
audio: Buffer.from("fallback-audio"),
format: "mp3",
voice: "af_heart",
tier: "default",
});
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "Fallback test", tier: "premium" })
.expect(201);
// Premium was attempted first
expect(premiumTTSProvider.synthesize).toHaveBeenCalled();
// Then default succeeded
expect(defaultTTSProvider.synthesize).toHaveBeenCalled();
expect(response.headers["content-type"]).toContain("audio/mpeg");
});
it("should fall back through entire chain: premium -> default -> fallback", async () => {
// Make premium and default fail
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Premium down")
);
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Default down")
);
// Fallback should succeed
(fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockResolvedValue({
audio: Buffer.from("fallback-piper-audio"),
format: "mp3",
voice: "piper-default",
tier: "fallback",
});
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "Full fallback chain test", tier: "premium" })
.expect(201);
expect(premiumTTSProvider.synthesize).toHaveBeenCalled();
expect(defaultTTSProvider.synthesize).toHaveBeenCalled();
expect(fallbackTTSProvider.synthesize).toHaveBeenCalled();
expect(response.headers["content-type"]).toContain("audio/mpeg");
});
it("should return 503 when all TTS providers fail", async () => {
(premiumTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Premium down")
);
(defaultTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Default down")
);
(fallbackTTSProvider.synthesize as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("Fallback down")
);
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.set("Authorization", "Bearer test-token")
.send({ text: "All providers down", tier: "premium" })
.expect(503);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("All TTS providers failed");
});
});
// ==========================================
// Scenario 4: WebSocket Streaming Transcription
// ==========================================
describe("Scenario 4: WebSocket Streaming Transcription", () => {
interface MockSocket {
id: string;
join: ReturnType<typeof vi.fn>;
leave: ReturnType<typeof vi.fn>;
emit: ReturnType<typeof vi.fn>;
disconnect: ReturnType<typeof vi.fn>;
data: { userId?: string; workspaceId?: string };
handshake: {
auth: Record<string, unknown>;
query: Record<string, unknown>;
headers: Record<string, unknown>;
};
}
function createTestSocket(overrides?: Partial<MockSocket>): MockSocket {
return {
id: "e2e-test-socket",
join: vi.fn(),
leave: vi.fn(),
emit: vi.fn(),
disconnect: vi.fn(),
data: {},
handshake: {
auth: { token: "valid-token" },
query: {},
headers: {},
},
...overrides,
};
}
it("should complete the full streaming transcription lifecycle", async () => {
const client = createTestSocket();
// Authenticate the client
await speechGateway.handleConnection(client as never);
expect(client.data.userId).toBe(MOCK_USER_ID);
expect(client.data.workspaceId).toBe(MOCK_WORKSPACE_ID);
expect(client.disconnect).not.toHaveBeenCalled();
// Start transcription session
speechGateway.handleStartTranscription(client as never, { language: "en" });
expect(client.emit).toHaveBeenCalledWith(
"transcription-started",
expect.objectContaining({ sessionId: "e2e-test-socket" })
);
// Send audio chunks
const chunk1 = Buffer.from("audio-data-chunk-1");
const chunk2 = Buffer.from("audio-data-chunk-2");
const chunk3 = Buffer.from("audio-data-chunk-3");
speechGateway.handleAudioChunk(client as never, chunk1);
speechGateway.handleAudioChunk(client as never, chunk2);
speechGateway.handleAudioChunk(client as never, chunk3);
// No errors should have been emitted for chunks
const errorCalls = client.emit.mock.calls.filter(
(call: unknown[]) => call[0] === "transcription-error"
);
expect(errorCalls).toHaveLength(0);
vi.clearAllMocks();
(mockSTTProvider.transcribe as ReturnType<typeof vi.fn>).mockResolvedValue(
MOCK_TRANSCRIPTION_RESULT
);
// Stop transcription - should trigger the full transcription pipeline
await speechGateway.handleStopTranscription(client as never);
// Verify transcription was called with concatenated audio
expect(mockSTTProvider.transcribe).toHaveBeenCalledWith(
expect.any(Buffer),
expect.objectContaining({ language: "en" })
);
// Verify the final result was emitted
expect(client.emit).toHaveBeenCalledWith(
"transcription-final",
expect.objectContaining({
text: MOCK_TRANSCRIPTION_RESULT.text,
language: "en",
durationSeconds: 3.2,
confidence: 0.97,
})
);
});
it("should clean up session on disconnect", async () => {
const client = createTestSocket({ id: "disconnect-test" });
await speechGateway.handleConnection(client as never);
speechGateway.handleStartTranscription(client as never, {});
speechGateway.handleAudioChunk(client as never, Buffer.from("data"));
// Disconnect
speechGateway.handleDisconnect(client as never);
// Trying to send more chunks should fail (session cleaned up)
vi.clearAllMocks();
speechGateway.handleAudioChunk(client as never, Buffer.from("more-data"));
expect(client.emit).toHaveBeenCalledWith(
"transcription-error",
expect.objectContaining({
message: expect.stringContaining("No active transcription session"),
})
);
});
it("should reject unauthenticated WebSocket clients", async () => {
const client = createTestSocket({
id: "unauth-ws-client",
handshake: { auth: {}, query: {}, headers: {} },
});
await speechGateway.handleConnection(client as never);
expect(client.disconnect).toHaveBeenCalled();
expect(client.data.userId).toBeUndefined();
});
});
// ==========================================
// Scenario 5: Audio Validation (Invalid MIME Type)
// ==========================================
describe("Scenario 5: Audio Validation", () => {
it("should reject files with unsupported MIME types", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", Buffer.from("not-audio"), {
filename: "document.pdf",
contentType: "application/pdf",
})
.expect(400);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("Unsupported audio format");
expect(response.body.message).toContain("application/pdf");
});
it("should reject files with text/plain MIME type", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", Buffer.from("plain text content"), {
filename: "notes.txt",
contentType: "text/plain",
})
.expect(400);
expect(response.body.message).toContain("Unsupported audio format");
});
it("should reject video MIME types", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", Buffer.from("video-data"), {
filename: "video.mp4",
contentType: "video/mp4",
})
.expect(400);
expect(response.body.message).toContain("Unsupported audio format");
});
it("should accept valid audio MIME types", async () => {
const validMimeTypes = [
{ mime: "audio/wav", ext: "wav" },
{ mime: "audio/mpeg", ext: "mp3" },
{ mime: "audio/webm", ext: "webm" },
{ mime: "audio/ogg", ext: "ogg" },
{ mime: "audio/flac", ext: "flac" },
];
for (const { mime, ext } of validMimeTypes) {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", TEST_AUDIO_BUFFER, {
filename: `test.${ext}`,
contentType: mime,
})
.expect(201);
expect(response.body).toHaveProperty("data");
expect(response.body.data.text).toBe(MOCK_TRANSCRIPTION_RESULT.text);
}
});
});
// ==========================================
// Scenario 6: File Size Limits
// ==========================================
describe("Scenario 6: File Size Limits", () => {
it("should reject files exceeding the maximum upload size (25 MB)", async () => {
// Create a buffer slightly over the 25 MB limit
const oversizedBuffer = Buffer.alloc(25_000_001, 0);
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", oversizedBuffer, {
filename: "large-audio.wav",
contentType: "audio/wav",
})
.expect(400);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("exceeds maximum allowed size");
});
it("should accept files within the size limit", async () => {
// Create a buffer at the exact limit
const maxBuffer = Buffer.alloc(1024, 0);
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.set("Authorization", "Bearer test-token")
.attach("file", maxBuffer, {
filename: "acceptable-audio.wav",
contentType: "audio/wav",
})
.expect(201);
expect(response.body).toHaveProperty("data");
});
});
// ==========================================
// Scenario 7: Authentication
// ==========================================
describe("Scenario 7: Authentication", () => {
it("should reject POST /speech/transcribe without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/transcribe")
.attach("file", TEST_AUDIO_BUFFER, {
filename: "test.wav",
contentType: "audio/wav",
})
.expect(401);
expect(response.body).toHaveProperty("message");
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject POST /speech/synthesize without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.post("/speech/synthesize")
.send({ text: "Hello" })
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject GET /speech/voices without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices")
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject GET /speech/health without authentication", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/health")
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
it("should reject requests with an invalid token", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices")
.set("Authorization", "Bearer invalid-token-xyz")
.expect(401);
expect(response.body.message).toContain("No authentication token provided");
});
});
// ==========================================
// Scenario 8: Voice Listing
// ==========================================
describe("Scenario 8: Voice Listing (GET /speech/voices)", () => {
it("should return all voices when no tier filter is provided", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices")
.set("Authorization", "Bearer test-token")
.expect(200);
expect(response.body).toHaveProperty("data");
expect(Array.isArray(response.body.data)).toBe(true);
// Should have voices from all providers that returned voices
const voices = response.body.data as VoiceInfo[];
expect(voices.length).toBeGreaterThan(0);
// Verify voice structure
for (const voice of voices) {
expect(voice).toHaveProperty("id");
expect(voice).toHaveProperty("name");
expect(voice).toHaveProperty("tier");
}
});
it("should filter voices by tier when tier query param is provided", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices?tier=default")
.set("Authorization", "Bearer test-token")
.expect(200);
const voices = response.body.data as VoiceInfo[];
expect(voices.length).toBeGreaterThan(0);
for (const voice of voices) {
expect(voice.tier).toBe("default");
}
expect(defaultTTSProvider.listVoices).toHaveBeenCalled();
});
it("should return empty array for tier with no voices", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices?tier=fallback")
.set("Authorization", "Bearer test-token")
.expect(200);
expect(response.body.data).toEqual([]);
});
it("should include voice metadata (id, name, language, tier, isDefault)", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/voices?tier=default")
.set("Authorization", "Bearer test-token")
.expect(200);
const voices = response.body.data as VoiceInfo[];
const defaultVoice = voices.find((v) => v.isDefault === true);
expect(defaultVoice).toBeDefined();
expect(defaultVoice).toMatchObject({
id: "af_heart",
name: "Heart",
language: "en",
tier: "default",
isDefault: true,
});
});
});
// ==========================================
// Scenario 9: Health Check
// ==========================================
describe("Scenario 9: Health Check (GET /speech/health)", () => {
it("should return health status for both STT and TTS providers", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/health")
.set("Authorization", "Bearer test-token")
.expect(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("stt");
expect(response.body.data).toHaveProperty("tts");
expect(response.body.data.stt).toHaveProperty("available");
expect(response.body.data.tts).toHaveProperty("available");
// Both should be available since we have mock providers registered and config enabled
expect(response.body.data.stt.available).toBe(true);
expect(response.body.data.tts.available).toBe(true);
});
it("should return consistent health check format", async () => {
const response = await request(app.getHttpServer() as App)
.get("/speech/health")
.set("Authorization", "Bearer test-token")
.expect(200);
// Verify the response matches the expected shape
expect(response.body).toEqual({
data: {
stt: { available: expect.any(Boolean) },
tts: { available: expect.any(Boolean) },
},
});
});
});
});