diff --git a/apps/api/src/speech/dto/index.ts b/apps/api/src/speech/dto/index.ts new file mode 100644 index 0000000..8b644f8 --- /dev/null +++ b/apps/api/src/speech/dto/index.ts @@ -0,0 +1,8 @@ +/** + * Speech DTOs barrel export + * + * Issue #398 + */ + +export { TranscribeDto } from "./transcribe.dto"; +export { SynthesizeDto } from "./synthesize.dto"; diff --git a/apps/api/src/speech/dto/synthesize.dto.ts b/apps/api/src/speech/dto/synthesize.dto.ts new file mode 100644 index 0000000..171dc0e --- /dev/null +++ b/apps/api/src/speech/dto/synthesize.dto.ts @@ -0,0 +1,85 @@ +/** + * SynthesizeDto + * + * DTO for text-to-speech synthesis requests. + * The text field is validated by TextValidationPipe for length/emptiness. + * Additional options control voice, speed, format, and tier selection. + * + * Issue #398 + */ + +import { IsString, IsOptional, IsNumber, IsIn, Min, Max, MaxLength } from "class-validator"; +import { Type } from "class-transformer"; +import type { AudioFormat, SpeechTier } from "../interfaces/speech-types"; + +/** + * Valid audio output formats for TTS synthesis. + */ +const VALID_AUDIO_FORMATS: readonly AudioFormat[] = [ + "mp3", + "wav", + "opus", + "flac", + "aac", + "pcm", +] as const; + +/** + * Valid TTS tiers for provider selection. + */ +const VALID_SPEECH_TIERS: readonly SpeechTier[] = ["default", "premium", "fallback"] as const; + +export class SynthesizeDto { + /** + * Text to convert to speech. + * Validated separately by TextValidationPipe for length and emptiness. + */ + @IsString({ message: "text must be a string" }) + @MaxLength(4096, { message: "text must not exceed 4096 characters" }) + text!: string; + + /** + * Voice ID to use for synthesis. + * Available voices depend on the selected tier and provider. + * If omitted, the default voice from speech config is used. + */ + @IsOptional() + @IsString({ message: "voice must be a string" }) + @MaxLength(100, { message: "voice must not exceed 100 characters" }) + voice?: string; + + /** + * Speech speed multiplier (0.5 to 2.0). + * 1.0 is normal speed, <1.0 is slower, >1.0 is faster. + */ + @IsOptional() + @Type(() => Number) + @IsNumber({}, { message: "speed must be a number" }) + @Min(0.5, { message: "speed must be at least 0.5" }) + @Max(2.0, { message: "speed must not exceed 2.0" }) + speed?: number; + + /** + * Desired audio output format. + * Supported: mp3, wav, opus, flac, aac, pcm. + * If omitted, the default format from speech config is used. + */ + @IsOptional() + @IsString({ message: "format must be a string" }) + @IsIn(VALID_AUDIO_FORMATS, { + message: `format must be one of: ${VALID_AUDIO_FORMATS.join(", ")}`, + }) + format?: AudioFormat; + + /** + * TTS tier to use for synthesis. + * Controls which provider is used: default (Kokoro), premium (Chatterbox), or fallback (Piper). + * If the selected tier is unavailable, the service falls back to the next available tier. + */ + @IsOptional() + @IsString({ message: "tier must be a string" }) + @IsIn(VALID_SPEECH_TIERS, { + message: `tier must be one of: ${VALID_SPEECH_TIERS.join(", ")}`, + }) + tier?: SpeechTier; +} diff --git a/apps/api/src/speech/dto/transcribe.dto.ts b/apps/api/src/speech/dto/transcribe.dto.ts new file mode 100644 index 0000000..8a7bbe4 --- /dev/null +++ b/apps/api/src/speech/dto/transcribe.dto.ts @@ -0,0 +1,54 @@ +/** + * TranscribeDto + * + * DTO for speech-to-text transcription requests. + * Supports optional language and model overrides. + * + * The audio file itself is handled by Multer (FileInterceptor) + * and validated by AudioValidationPipe. + * + * Issue #398 + */ + +import { IsString, IsOptional, IsNumber, Min, Max, MaxLength } from "class-validator"; +import { Type } from "class-transformer"; + +export class TranscribeDto { + /** + * Language code for transcription (e.g., "en", "fr", "de"). + * If omitted, the default from speech config is used. + */ + @IsOptional() + @IsString({ message: "language must be a string" }) + @MaxLength(10, { message: "language must not exceed 10 characters" }) + language?: string; + + /** + * Model override for transcription. + * If omitted, the default model from speech config is used. + */ + @IsOptional() + @IsString({ message: "model must be a string" }) + @MaxLength(200, { message: "model must not exceed 200 characters" }) + model?: string; + + /** + * Optional prompt to guide the transcription model. + * Useful for providing context or expected vocabulary. + */ + @IsOptional() + @IsString({ message: "prompt must be a string" }) + @MaxLength(1000, { message: "prompt must not exceed 1000 characters" }) + prompt?: string; + + /** + * Temperature for transcription (0.0 to 1.0). + * Lower values produce more deterministic results. + */ + @IsOptional() + @Type(() => Number) + @IsNumber({}, { message: "temperature must be a number" }) + @Min(0, { message: "temperature must be at least 0" }) + @Max(1, { message: "temperature must not exceed 1" }) + temperature?: number; +} diff --git a/apps/api/src/speech/pipes/audio-validation.pipe.spec.ts b/apps/api/src/speech/pipes/audio-validation.pipe.spec.ts new file mode 100644 index 0000000..fc9c5ab --- /dev/null +++ b/apps/api/src/speech/pipes/audio-validation.pipe.spec.ts @@ -0,0 +1,205 @@ +/** + * AudioValidationPipe Tests + * + * Issue #398: Validates uploaded audio files for MIME type and file size. + * Tests cover valid types, invalid types, size limits, and edge cases. + */ + +import { describe, it, expect, beforeEach } from "vitest"; +import { BadRequestException } from "@nestjs/common"; +import { AudioValidationPipe } from "./audio-validation.pipe"; + +/** + * Helper to create a mock Express.Multer.File object. + */ +function createMockFile(overrides: Partial = {}): Express.Multer.File { + return { + fieldname: "file", + originalname: "test.mp3", + encoding: "7bit", + mimetype: "audio/mpeg", + size: 1024, + destination: "", + filename: "", + path: "", + buffer: Buffer.from("fake-audio-data"), + stream: undefined as never, + ...overrides, + }; +} + +describe("AudioValidationPipe", () => { + // ========================================== + // Default config (25MB max) + // ========================================== + describe("with default config", () => { + let pipe: AudioValidationPipe; + + beforeEach(() => { + pipe = new AudioValidationPipe(); + }); + + // ========================================== + // MIME type validation + // ========================================== + describe("MIME type validation", () => { + it("should accept audio/wav", () => { + const file = createMockFile({ mimetype: "audio/wav" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept audio/mp3", () => { + const file = createMockFile({ mimetype: "audio/mp3" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept audio/mpeg", () => { + const file = createMockFile({ mimetype: "audio/mpeg" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept audio/webm", () => { + const file = createMockFile({ mimetype: "audio/webm" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept audio/ogg", () => { + const file = createMockFile({ mimetype: "audio/ogg" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept audio/flac", () => { + const file = createMockFile({ mimetype: "audio/flac" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept audio/x-m4a", () => { + const file = createMockFile({ mimetype: "audio/x-m4a" }); + expect(pipe.transform(file)).toBe(file); + }); + + it("should reject unsupported MIME types with descriptive error", () => { + const file = createMockFile({ mimetype: "video/mp4" }); + expect(() => pipe.transform(file)).toThrow(BadRequestException); + expect(() => pipe.transform(file)).toThrow(/Unsupported audio format.*video\/mp4/); + }); + + it("should reject application/octet-stream", () => { + const file = createMockFile({ mimetype: "application/octet-stream" }); + expect(() => pipe.transform(file)).toThrow(BadRequestException); + }); + + it("should reject text/plain", () => { + const file = createMockFile({ mimetype: "text/plain" }); + expect(() => pipe.transform(file)).toThrow(BadRequestException); + }); + + it("should reject image/png", () => { + const file = createMockFile({ mimetype: "image/png" }); + expect(() => pipe.transform(file)).toThrow(BadRequestException); + }); + + it("should include supported formats in error message", () => { + const file = createMockFile({ mimetype: "video/mp4" }); + try { + pipe.transform(file); + expect.fail("Expected BadRequestException"); + } catch (error) { + expect(error).toBeInstanceOf(BadRequestException); + const response = (error as BadRequestException).getResponse(); + const message = + typeof response === "string" ? response : (response as Record).message; + expect(message).toContain("audio/wav"); + expect(message).toContain("audio/mpeg"); + } + }); + }); + + // ========================================== + // File size validation + // ========================================== + describe("file size validation", () => { + it("should accept files under the size limit", () => { + const file = createMockFile({ size: 1024 * 1024 }); // 1MB + expect(pipe.transform(file)).toBe(file); + }); + + it("should accept files exactly at the size limit", () => { + const file = createMockFile({ size: 25_000_000 }); // 25MB (default) + expect(pipe.transform(file)).toBe(file); + }); + + it("should reject files exceeding the size limit", () => { + const file = createMockFile({ size: 25_000_001 }); // 1 byte over + expect(() => pipe.transform(file)).toThrow(BadRequestException); + expect(() => pipe.transform(file)).toThrow(/exceeds maximum/); + }); + + it("should include human-readable sizes in error message", () => { + const file = createMockFile({ size: 30_000_000 }); // 30MB + try { + pipe.transform(file); + expect.fail("Expected BadRequestException"); + } catch (error) { + expect(error).toBeInstanceOf(BadRequestException); + const response = (error as BadRequestException).getResponse(); + const message = + typeof response === "string" ? response : (response as Record).message; + // Should show something like "28.6 MB" and "23.8 MB" + expect(message).toContain("MB"); + } + }); + + it("should accept zero-size files (MIME check still applies)", () => { + const file = createMockFile({ size: 0 }); + expect(pipe.transform(file)).toBe(file); + }); + }); + + // ========================================== + // Edge cases + // ========================================== + describe("edge cases", () => { + it("should throw if no file is provided (null)", () => { + expect(() => pipe.transform(null as unknown as Express.Multer.File)).toThrow( + BadRequestException + ); + expect(() => pipe.transform(null as unknown as Express.Multer.File)).toThrow( + /No audio file provided/ + ); + }); + + it("should throw if no file is provided (undefined)", () => { + expect(() => pipe.transform(undefined as unknown as Express.Multer.File)).toThrow( + BadRequestException + ); + }); + }); + }); + + // ========================================== + // Custom config + // ========================================== + describe("with custom config", () => { + it("should use custom max file size", () => { + const pipe = new AudioValidationPipe({ maxFileSize: 1_000_000 }); // 1MB + const smallFile = createMockFile({ size: 500_000 }); + expect(pipe.transform(smallFile)).toBe(smallFile); + + const largeFile = createMockFile({ size: 1_000_001 }); + expect(() => pipe.transform(largeFile)).toThrow(BadRequestException); + }); + + it("should allow overriding accepted MIME types", () => { + const pipe = new AudioValidationPipe({ + allowedMimeTypes: ["audio/wav"], + }); + + const wavFile = createMockFile({ mimetype: "audio/wav" }); + expect(pipe.transform(wavFile)).toBe(wavFile); + + const mp3File = createMockFile({ mimetype: "audio/mpeg" }); + expect(() => pipe.transform(mp3File)).toThrow(BadRequestException); + }); + }); +}); diff --git a/apps/api/src/speech/pipes/audio-validation.pipe.ts b/apps/api/src/speech/pipes/audio-validation.pipe.ts new file mode 100644 index 0000000..f5491d6 --- /dev/null +++ b/apps/api/src/speech/pipes/audio-validation.pipe.ts @@ -0,0 +1,102 @@ +/** + * AudioValidationPipe + * + * NestJS PipeTransform that validates uploaded audio files. + * Checks MIME type against an allow-list and file size against a configurable maximum. + * + * Usage: + * ```typescript + * @Post('transcribe') + * @UseInterceptors(FileInterceptor('file')) + * async transcribe( + * @UploadedFile(new AudioValidationPipe()) file: Express.Multer.File, + * ) { ... } + * ``` + * + * Issue #398 + */ + +import { BadRequestException } from "@nestjs/common"; +import type { PipeTransform } from "@nestjs/common"; + +/** + * Default accepted MIME types for audio uploads. + */ +const DEFAULT_ALLOWED_MIME_TYPES: readonly string[] = [ + "audio/wav", + "audio/mp3", + "audio/mpeg", + "audio/webm", + "audio/ogg", + "audio/flac", + "audio/x-m4a", +] as const; + +/** + * Default maximum upload size in bytes (25 MB). + */ +const DEFAULT_MAX_FILE_SIZE = 25_000_000; + +/** + * Options for customizing AudioValidationPipe behavior. + */ +export interface AudioValidationPipeOptions { + /** Maximum file size in bytes. Defaults to 25 MB. */ + maxFileSize?: number; + + /** List of accepted MIME types. Defaults to common audio formats. */ + allowedMimeTypes?: string[]; +} + +/** + * Format bytes into a human-readable string (e.g., "25.0 MB"). + */ +function formatBytes(bytes: number): string { + if (bytes < 1024) { + return `${String(bytes)} B`; + } + if (bytes < 1024 * 1024) { + return `${(bytes / 1024).toFixed(1)} KB`; + } + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +export class AudioValidationPipe implements PipeTransform { + private readonly maxFileSize: number; + private readonly allowedMimeTypes: readonly string[]; + + constructor(options?: AudioValidationPipeOptions) { + this.maxFileSize = options?.maxFileSize ?? DEFAULT_MAX_FILE_SIZE; + this.allowedMimeTypes = options?.allowedMimeTypes ?? DEFAULT_ALLOWED_MIME_TYPES; + } + + /** + * Validate the uploaded file's MIME type and size. + * + * @param file - The uploaded file from Multer + * @returns The validated file, unchanged + * @throws {BadRequestException} If the file is missing, has an unsupported MIME type, or exceeds the size limit + */ + transform(file: Express.Multer.File | undefined): Express.Multer.File { + if (!file) { + throw new BadRequestException("No audio file provided"); + } + + // Validate MIME type + if (!this.allowedMimeTypes.includes(file.mimetype)) { + throw new BadRequestException( + `Unsupported audio format: ${file.mimetype}. ` + + `Supported formats: ${this.allowedMimeTypes.join(", ")}` + ); + } + + // Validate file size + if (file.size > this.maxFileSize) { + throw new BadRequestException( + `File size ${formatBytes(file.size)} exceeds maximum allowed size of ${formatBytes(this.maxFileSize)}` + ); + } + + return file; + } +} diff --git a/apps/api/src/speech/pipes/index.ts b/apps/api/src/speech/pipes/index.ts new file mode 100644 index 0000000..8bb0ab5 --- /dev/null +++ b/apps/api/src/speech/pipes/index.ts @@ -0,0 +1,10 @@ +/** + * Speech Pipes barrel export + * + * Issue #398 + */ + +export { AudioValidationPipe } from "./audio-validation.pipe"; +export type { AudioValidationPipeOptions } from "./audio-validation.pipe"; +export { TextValidationPipe } from "./text-validation.pipe"; +export type { TextValidationPipeOptions } from "./text-validation.pipe"; diff --git a/apps/api/src/speech/pipes/text-validation.pipe.spec.ts b/apps/api/src/speech/pipes/text-validation.pipe.spec.ts new file mode 100644 index 0000000..33a263c --- /dev/null +++ b/apps/api/src/speech/pipes/text-validation.pipe.spec.ts @@ -0,0 +1,136 @@ +/** + * TextValidationPipe Tests + * + * Issue #398: Validates text input for TTS synthesis. + * Tests cover text length, empty text, whitespace, and configurable limits. + */ + +import { describe, it, expect, beforeEach } from "vitest"; +import { BadRequestException } from "@nestjs/common"; +import { TextValidationPipe } from "./text-validation.pipe"; + +describe("TextValidationPipe", () => { + // ========================================== + // Default config (4096 max length) + // ========================================== + describe("with default config", () => { + let pipe: TextValidationPipe; + + beforeEach(() => { + pipe = new TextValidationPipe(); + }); + + // ========================================== + // Valid text + // ========================================== + describe("valid text", () => { + it("should accept normal text", () => { + const text = "Hello, world!"; + expect(pipe.transform(text)).toBe(text); + }); + + it("should accept text at exactly the max length", () => { + const text = "a".repeat(4096); + expect(pipe.transform(text)).toBe(text); + }); + + it("should accept single character text", () => { + expect(pipe.transform("a")).toBe("a"); + }); + + it("should accept text with unicode characters", () => { + const text = "Hello, world! 你好世界"; + expect(pipe.transform(text)).toBe(text); + }); + + it("should accept multi-line text", () => { + const text = "Line one.\nLine two.\nLine three."; + expect(pipe.transform(text)).toBe(text); + }); + }); + + // ========================================== + // Text length validation + // ========================================== + describe("text length validation", () => { + it("should reject text exceeding max length", () => { + const text = "a".repeat(4097); + expect(() => pipe.transform(text)).toThrow(BadRequestException); + expect(() => pipe.transform(text)).toThrow(/exceeds maximum/); + }); + + it("should include length details in error message", () => { + const text = "a".repeat(5000); + try { + pipe.transform(text); + expect.fail("Expected BadRequestException"); + } catch (error) { + expect(error).toBeInstanceOf(BadRequestException); + const response = (error as BadRequestException).getResponse(); + const message = + typeof response === "string" ? response : (response as Record).message; + expect(message).toContain("5000"); + expect(message).toContain("4096"); + } + }); + }); + + // ========================================== + // Empty text validation + // ========================================== + describe("empty text validation", () => { + it("should reject empty string", () => { + expect(() => pipe.transform("")).toThrow(BadRequestException); + expect(() => pipe.transform("")).toThrow(/Text cannot be empty/); + }); + + it("should reject whitespace-only string", () => { + expect(() => pipe.transform(" ")).toThrow(BadRequestException); + expect(() => pipe.transform(" ")).toThrow(/Text cannot be empty/); + }); + + it("should reject tabs and newlines only", () => { + expect(() => pipe.transform("\t\n\r")).toThrow(BadRequestException); + }); + + it("should reject null", () => { + expect(() => pipe.transform(null as unknown as string)).toThrow(BadRequestException); + }); + + it("should reject undefined", () => { + expect(() => pipe.transform(undefined as unknown as string)).toThrow(BadRequestException); + }); + }); + + // ========================================== + // Text with leading/trailing whitespace + // ========================================== + describe("whitespace handling", () => { + it("should accept text with leading/trailing whitespace (preserves it)", () => { + const text = " Hello, world! "; + expect(pipe.transform(text)).toBe(text); + }); + }); + }); + + // ========================================== + // Custom config + // ========================================== + describe("with custom config", () => { + it("should use custom max text length", () => { + const pipe = new TextValidationPipe({ maxTextLength: 100 }); + + const shortText = "Hello"; + expect(pipe.transform(shortText)).toBe(shortText); + + const longText = "a".repeat(101); + expect(() => pipe.transform(longText)).toThrow(BadRequestException); + }); + + it("should accept text at exact custom limit", () => { + const pipe = new TextValidationPipe({ maxTextLength: 50 }); + const text = "a".repeat(50); + expect(pipe.transform(text)).toBe(text); + }); + }); +}); diff --git a/apps/api/src/speech/pipes/text-validation.pipe.ts b/apps/api/src/speech/pipes/text-validation.pipe.ts new file mode 100644 index 0000000..36796d1 --- /dev/null +++ b/apps/api/src/speech/pipes/text-validation.pipe.ts @@ -0,0 +1,65 @@ +/** + * TextValidationPipe + * + * NestJS PipeTransform that validates text input for TTS synthesis. + * Checks that text is non-empty and within the configurable maximum length. + * + * Usage: + * ```typescript + * @Post('synthesize') + * async synthesize( + * @Body('text', new TextValidationPipe()) text: string, + * ) { ... } + * ``` + * + * Issue #398 + */ + +import { BadRequestException } from "@nestjs/common"; +import type { PipeTransform } from "@nestjs/common"; + +/** + * Default maximum text length for TTS input (4096 characters). + */ +const DEFAULT_MAX_TEXT_LENGTH = 4096; + +/** + * Options for customizing TextValidationPipe behavior. + */ +export interface TextValidationPipeOptions { + /** Maximum text length in characters. Defaults to 4096. */ + maxTextLength?: number; +} + +export class TextValidationPipe implements PipeTransform { + private readonly maxTextLength: number; + + constructor(options?: TextValidationPipeOptions) { + this.maxTextLength = options?.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH; + } + + /** + * Validate the text input for TTS synthesis. + * + * @param text - The text to validate + * @returns The validated text, unchanged + * @throws {BadRequestException} If text is empty, whitespace-only, or exceeds the max length + */ + transform(text: string | null | undefined): string { + if (text === null || text === undefined) { + throw new BadRequestException("Text cannot be empty"); + } + + if (text.trim().length === 0) { + throw new BadRequestException("Text cannot be empty"); + } + + if (text.length > this.maxTextLength) { + throw new BadRequestException( + `Text length ${String(text.length)} exceeds maximum allowed length of ${String(this.maxTextLength)} characters` + ); + } + + return text; + } +}