feat(#398): add audio/text validation pipes and speech DTOs
All checks were successful
ci/woodpecker/push/api Pipeline was successful

Create AudioValidationPipe for MIME type and file size validation,
TextValidationPipe for TTS text input validation, and DTOs for
transcribe/synthesize endpoints. Includes 36 unit tests.

Fixes #398
This commit is contained in:
2026-02-15 02:37:54 -06:00
parent d37c78f503
commit 7b4fda6011
8 changed files with 665 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
/**
* Speech DTOs barrel export
*
* Issue #398
*/
export { TranscribeDto } from "./transcribe.dto";
export { SynthesizeDto } from "./synthesize.dto";

View File

@@ -0,0 +1,85 @@
/**
* SynthesizeDto
*
* DTO for text-to-speech synthesis requests.
* The text field is validated by TextValidationPipe for length/emptiness.
* Additional options control voice, speed, format, and tier selection.
*
* Issue #398
*/
import { IsString, IsOptional, IsNumber, IsIn, Min, Max, MaxLength } from "class-validator";
import { Type } from "class-transformer";
import type { AudioFormat, SpeechTier } from "../interfaces/speech-types";
/**
* Valid audio output formats for TTS synthesis.
*/
const VALID_AUDIO_FORMATS: readonly AudioFormat[] = [
"mp3",
"wav",
"opus",
"flac",
"aac",
"pcm",
] as const;
/**
* Valid TTS tiers for provider selection.
*/
const VALID_SPEECH_TIERS: readonly SpeechTier[] = ["default", "premium", "fallback"] as const;
export class SynthesizeDto {
/**
* Text to convert to speech.
* Validated separately by TextValidationPipe for length and emptiness.
*/
@IsString({ message: "text must be a string" })
@MaxLength(4096, { message: "text must not exceed 4096 characters" })
text!: string;
/**
* Voice ID to use for synthesis.
* Available voices depend on the selected tier and provider.
* If omitted, the default voice from speech config is used.
*/
@IsOptional()
@IsString({ message: "voice must be a string" })
@MaxLength(100, { message: "voice must not exceed 100 characters" })
voice?: string;
/**
* Speech speed multiplier (0.5 to 2.0).
* 1.0 is normal speed, <1.0 is slower, >1.0 is faster.
*/
@IsOptional()
@Type(() => Number)
@IsNumber({}, { message: "speed must be a number" })
@Min(0.5, { message: "speed must be at least 0.5" })
@Max(2.0, { message: "speed must not exceed 2.0" })
speed?: number;
/**
* Desired audio output format.
* Supported: mp3, wav, opus, flac, aac, pcm.
* If omitted, the default format from speech config is used.
*/
@IsOptional()
@IsString({ message: "format must be a string" })
@IsIn(VALID_AUDIO_FORMATS, {
message: `format must be one of: ${VALID_AUDIO_FORMATS.join(", ")}`,
})
format?: AudioFormat;
/**
* TTS tier to use for synthesis.
* Controls which provider is used: default (Kokoro), premium (Chatterbox), or fallback (Piper).
* If the selected tier is unavailable, the service falls back to the next available tier.
*/
@IsOptional()
@IsString({ message: "tier must be a string" })
@IsIn(VALID_SPEECH_TIERS, {
message: `tier must be one of: ${VALID_SPEECH_TIERS.join(", ")}`,
})
tier?: SpeechTier;
}

View File

@@ -0,0 +1,54 @@
/**
* TranscribeDto
*
* DTO for speech-to-text transcription requests.
* Supports optional language and model overrides.
*
* The audio file itself is handled by Multer (FileInterceptor)
* and validated by AudioValidationPipe.
*
* Issue #398
*/
import { IsString, IsOptional, IsNumber, Min, Max, MaxLength } from "class-validator";
import { Type } from "class-transformer";
export class TranscribeDto {
/**
* Language code for transcription (e.g., "en", "fr", "de").
* If omitted, the default from speech config is used.
*/
@IsOptional()
@IsString({ message: "language must be a string" })
@MaxLength(10, { message: "language must not exceed 10 characters" })
language?: string;
/**
* Model override for transcription.
* If omitted, the default model from speech config is used.
*/
@IsOptional()
@IsString({ message: "model must be a string" })
@MaxLength(200, { message: "model must not exceed 200 characters" })
model?: string;
/**
* Optional prompt to guide the transcription model.
* Useful for providing context or expected vocabulary.
*/
@IsOptional()
@IsString({ message: "prompt must be a string" })
@MaxLength(1000, { message: "prompt must not exceed 1000 characters" })
prompt?: string;
/**
* Temperature for transcription (0.0 to 1.0).
* Lower values produce more deterministic results.
*/
@IsOptional()
@Type(() => Number)
@IsNumber({}, { message: "temperature must be a number" })
@Min(0, { message: "temperature must be at least 0" })
@Max(1, { message: "temperature must not exceed 1" })
temperature?: number;
}