feat(#398): add audio/text validation pipes and speech DTOs
All checks were successful
ci/woodpecker/push/api Pipeline was successful
All checks were successful
ci/woodpecker/push/api Pipeline was successful
Create AudioValidationPipe for MIME type and file size validation, TextValidationPipe for TTS text input validation, and DTOs for transcribe/synthesize endpoints. Includes 36 unit tests. Fixes #398
This commit is contained in:
8
apps/api/src/speech/dto/index.ts
Normal file
8
apps/api/src/speech/dto/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Speech DTOs barrel export
|
||||
*
|
||||
* Issue #398
|
||||
*/
|
||||
|
||||
export { TranscribeDto } from "./transcribe.dto";
|
||||
export { SynthesizeDto } from "./synthesize.dto";
|
||||
85
apps/api/src/speech/dto/synthesize.dto.ts
Normal file
85
apps/api/src/speech/dto/synthesize.dto.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
/**
|
||||
* SynthesizeDto
|
||||
*
|
||||
* DTO for text-to-speech synthesis requests.
|
||||
* The text field is validated by TextValidationPipe for length/emptiness.
|
||||
* Additional options control voice, speed, format, and tier selection.
|
||||
*
|
||||
* Issue #398
|
||||
*/
|
||||
|
||||
import { IsString, IsOptional, IsNumber, IsIn, Min, Max, MaxLength } from "class-validator";
|
||||
import { Type } from "class-transformer";
|
||||
import type { AudioFormat, SpeechTier } from "../interfaces/speech-types";
|
||||
|
||||
/**
|
||||
* Valid audio output formats for TTS synthesis.
|
||||
*/
|
||||
const VALID_AUDIO_FORMATS: readonly AudioFormat[] = [
|
||||
"mp3",
|
||||
"wav",
|
||||
"opus",
|
||||
"flac",
|
||||
"aac",
|
||||
"pcm",
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Valid TTS tiers for provider selection.
|
||||
*/
|
||||
const VALID_SPEECH_TIERS: readonly SpeechTier[] = ["default", "premium", "fallback"] as const;
|
||||
|
||||
export class SynthesizeDto {
|
||||
/**
|
||||
* Text to convert to speech.
|
||||
* Validated separately by TextValidationPipe for length and emptiness.
|
||||
*/
|
||||
@IsString({ message: "text must be a string" })
|
||||
@MaxLength(4096, { message: "text must not exceed 4096 characters" })
|
||||
text!: string;
|
||||
|
||||
/**
|
||||
* Voice ID to use for synthesis.
|
||||
* Available voices depend on the selected tier and provider.
|
||||
* If omitted, the default voice from speech config is used.
|
||||
*/
|
||||
@IsOptional()
|
||||
@IsString({ message: "voice must be a string" })
|
||||
@MaxLength(100, { message: "voice must not exceed 100 characters" })
|
||||
voice?: string;
|
||||
|
||||
/**
|
||||
* Speech speed multiplier (0.5 to 2.0).
|
||||
* 1.0 is normal speed, <1.0 is slower, >1.0 is faster.
|
||||
*/
|
||||
@IsOptional()
|
||||
@Type(() => Number)
|
||||
@IsNumber({}, { message: "speed must be a number" })
|
||||
@Min(0.5, { message: "speed must be at least 0.5" })
|
||||
@Max(2.0, { message: "speed must not exceed 2.0" })
|
||||
speed?: number;
|
||||
|
||||
/**
|
||||
* Desired audio output format.
|
||||
* Supported: mp3, wav, opus, flac, aac, pcm.
|
||||
* If omitted, the default format from speech config is used.
|
||||
*/
|
||||
@IsOptional()
|
||||
@IsString({ message: "format must be a string" })
|
||||
@IsIn(VALID_AUDIO_FORMATS, {
|
||||
message: `format must be one of: ${VALID_AUDIO_FORMATS.join(", ")}`,
|
||||
})
|
||||
format?: AudioFormat;
|
||||
|
||||
/**
|
||||
* TTS tier to use for synthesis.
|
||||
* Controls which provider is used: default (Kokoro), premium (Chatterbox), or fallback (Piper).
|
||||
* If the selected tier is unavailable, the service falls back to the next available tier.
|
||||
*/
|
||||
@IsOptional()
|
||||
@IsString({ message: "tier must be a string" })
|
||||
@IsIn(VALID_SPEECH_TIERS, {
|
||||
message: `tier must be one of: ${VALID_SPEECH_TIERS.join(", ")}`,
|
||||
})
|
||||
tier?: SpeechTier;
|
||||
}
|
||||
54
apps/api/src/speech/dto/transcribe.dto.ts
Normal file
54
apps/api/src/speech/dto/transcribe.dto.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* TranscribeDto
|
||||
*
|
||||
* DTO for speech-to-text transcription requests.
|
||||
* Supports optional language and model overrides.
|
||||
*
|
||||
* The audio file itself is handled by Multer (FileInterceptor)
|
||||
* and validated by AudioValidationPipe.
|
||||
*
|
||||
* Issue #398
|
||||
*/
|
||||
|
||||
import { IsString, IsOptional, IsNumber, Min, Max, MaxLength } from "class-validator";
|
||||
import { Type } from "class-transformer";
|
||||
|
||||
export class TranscribeDto {
|
||||
/**
|
||||
* Language code for transcription (e.g., "en", "fr", "de").
|
||||
* If omitted, the default from speech config is used.
|
||||
*/
|
||||
@IsOptional()
|
||||
@IsString({ message: "language must be a string" })
|
||||
@MaxLength(10, { message: "language must not exceed 10 characters" })
|
||||
language?: string;
|
||||
|
||||
/**
|
||||
* Model override for transcription.
|
||||
* If omitted, the default model from speech config is used.
|
||||
*/
|
||||
@IsOptional()
|
||||
@IsString({ message: "model must be a string" })
|
||||
@MaxLength(200, { message: "model must not exceed 200 characters" })
|
||||
model?: string;
|
||||
|
||||
/**
|
||||
* Optional prompt to guide the transcription model.
|
||||
* Useful for providing context or expected vocabulary.
|
||||
*/
|
||||
@IsOptional()
|
||||
@IsString({ message: "prompt must be a string" })
|
||||
@MaxLength(1000, { message: "prompt must not exceed 1000 characters" })
|
||||
prompt?: string;
|
||||
|
||||
/**
|
||||
* Temperature for transcription (0.0 to 1.0).
|
||||
* Lower values produce more deterministic results.
|
||||
*/
|
||||
@IsOptional()
|
||||
@Type(() => Number)
|
||||
@IsNumber({}, { message: "temperature must be a number" })
|
||||
@Min(0, { message: "temperature must be at least 0" })
|
||||
@Max(1, { message: "temperature must not exceed 1" })
|
||||
temperature?: number;
|
||||
}
|
||||
Reference in New Issue
Block a user