feat(#398): add audio/text validation pipes and speech DTOs

Create AudioValidationPipe for MIME type and file size validation, TextValidationPipe for TTS text input validation, and DTOs for transcribe/synthesize endpoints. Includes 36 unit tests. Fixes #398
2026-02-15 02:37:54 -06:00
parent d37c78f503
commit 7b4fda6011
8 changed files with 665 additions and 0 deletions
--- a/apps/api/src/speech/dto/index.ts
+++ b/apps/api/src/speech/dto/index.ts
@@ -0,0 +1,8 @@
+/**
+ * Speech DTOs barrel export
+ *
+ * Issue #398
+ */
+
+export { TranscribeDto } from "./transcribe.dto";
+export { SynthesizeDto } from "./synthesize.dto";
--- a/apps/api/src/speech/dto/synthesize.dto.ts
+++ b/apps/api/src/speech/dto/synthesize.dto.ts
@@ -0,0 +1,85 @@
+/**
+ * SynthesizeDto
+ *
+ * DTO for text-to-speech synthesis requests.
+ * The text field is validated by TextValidationPipe for length/emptiness.
+ * Additional options control voice, speed, format, and tier selection.
+ *
+ * Issue #398
+ */
+
+import { IsString, IsOptional, IsNumber, IsIn, Min, Max, MaxLength } from "class-validator";
+import { Type } from "class-transformer";
+import type { AudioFormat, SpeechTier } from "../interfaces/speech-types";
+
+/**
+ * Valid audio output formats for TTS synthesis.
+ */
+const VALID_AUDIO_FORMATS: readonly AudioFormat[] = [
+  "mp3",
+  "wav",
+  "opus",
+  "flac",
+  "aac",
+  "pcm",
+] as const;
+
+/**
+ * Valid TTS tiers for provider selection.
+ */
+const VALID_SPEECH_TIERS: readonly SpeechTier[] = ["default", "premium", "fallback"] as const;
+
+export class SynthesizeDto {
+  /**
+   * Text to convert to speech.
+   * Validated separately by TextValidationPipe for length and emptiness.
+   */
+  @IsString({ message: "text must be a string" })
+  @MaxLength(4096, { message: "text must not exceed 4096 characters" })
+  text!: string;
+
+  /**
+   * Voice ID to use for synthesis.
+   * Available voices depend on the selected tier and provider.
+   * If omitted, the default voice from speech config is used.
+   */
+  @IsOptional()
+  @IsString({ message: "voice must be a string" })
+  @MaxLength(100, { message: "voice must not exceed 100 characters" })
+  voice?: string;
+
+  /**
+   * Speech speed multiplier (0.5 to 2.0).
+   * 1.0 is normal speed, <1.0 is slower, >1.0 is faster.
+   */
+  @IsOptional()
+  @Type(() => Number)
+  @IsNumber({}, { message: "speed must be a number" })
+  @Min(0.5, { message: "speed must be at least 0.5" })
+  @Max(2.0, { message: "speed must not exceed 2.0" })
+  speed?: number;
+
+  /**
+   * Desired audio output format.
+   * Supported: mp3, wav, opus, flac, aac, pcm.
+   * If omitted, the default format from speech config is used.
+   */
+  @IsOptional()
+  @IsString({ message: "format must be a string" })
+  @IsIn(VALID_AUDIO_FORMATS, {
+    message: `format must be one of: ${VALID_AUDIO_FORMATS.join(", ")}`,
+  })
+  format?: AudioFormat;
+
+  /**
+   * TTS tier to use for synthesis.
+   * Controls which provider is used: default (Kokoro), premium (Chatterbox), or fallback (Piper).
+   * If the selected tier is unavailable, the service falls back to the next available tier.
+   */
+  @IsOptional()
+  @IsString({ message: "tier must be a string" })
+  @IsIn(VALID_SPEECH_TIERS, {
+    message: `tier must be one of: ${VALID_SPEECH_TIERS.join(", ")}`,
+  })
+  tier?: SpeechTier;
+}
--- a/apps/api/src/speech/dto/transcribe.dto.ts
+++ b/apps/api/src/speech/dto/transcribe.dto.ts
@@ -0,0 +1,54 @@
+/**
+ * TranscribeDto
+ *
+ * DTO for speech-to-text transcription requests.
+ * Supports optional language and model overrides.
+ *
+ * The audio file itself is handled by Multer (FileInterceptor)
+ * and validated by AudioValidationPipe.
+ *
+ * Issue #398
+ */
+
+import { IsString, IsOptional, IsNumber, Min, Max, MaxLength } from "class-validator";
+import { Type } from "class-transformer";
+
+export class TranscribeDto {
+  /**
+   * Language code for transcription (e.g., "en", "fr", "de").
+   * If omitted, the default from speech config is used.
+   */
+  @IsOptional()
+  @IsString({ message: "language must be a string" })
+  @MaxLength(10, { message: "language must not exceed 10 characters" })
+  language?: string;
+
+  /**
+   * Model override for transcription.
+   * If omitted, the default model from speech config is used.
+   */
+  @IsOptional()
+  @IsString({ message: "model must be a string" })
+  @MaxLength(200, { message: "model must not exceed 200 characters" })
+  model?: string;
+
+  /**
+   * Optional prompt to guide the transcription model.
+   * Useful for providing context or expected vocabulary.
+   */
+  @IsOptional()
+  @IsString({ message: "prompt must be a string" })
+  @MaxLength(1000, { message: "prompt must not exceed 1000 characters" })
+  prompt?: string;
+
+  /**
+   * Temperature for transcription (0.0 to 1.0).
+   * Lower values produce more deterministic results.
+   */
+  @IsOptional()
+  @Type(() => Number)
+  @IsNumber({}, { message: "temperature must be a number" })
+  @Min(0, { message: "temperature must be at least 0" })
+  @Max(1, { message: "temperature must not exceed 1" })
+  temperature?: number;
+}