fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments
All checks were successful
ci/woodpecker/push/web Pipeline was successful
ci/woodpecker/push/api Pipeline was successful

Critical fixes:
- Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor
- Add /speech namespace to WebSocket connection URL
- Pass auth token in WebSocket handshake options
- Wrap audio.play() in try-catch for NotAllowedError and DOMException handling
- Replace bare catch block with named error parameter and descriptive message
- Add connect_error and disconnect event handlers to WebSocket
- Update JSDoc to accurately describe batch transcription (not real-time partial)

Important fixes:
- Emit transcription-error before disconnect in gateway auth failures
- Capture MediaRecorder error details and clean up media tracks on error
- Change TtsDefaultConfig.format type from string to AudioFormat
- Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth
- Fix voice count from 54 to 53 in provider, AGENTS.md, and docs
- Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 03:44:33 -06:00
parent dcbc8d1053
commit af9c5799af
14 changed files with 91 additions and 53 deletions

View File

@@ -2,7 +2,7 @@
* SynthesizeDto
*
* DTO for text-to-speech synthesis requests.
* The text field is validated by TextValidationPipe for length/emptiness.
* Text and option fields are validated by class-validator decorators.
* Additional options control voice, speed, format, and tier selection.
*
* Issue #398
@@ -10,29 +10,13 @@
import { IsString, IsOptional, IsNumber, IsIn, Min, Max, MaxLength } from "class-validator";
import { Type } from "class-transformer";
import { AUDIO_FORMATS, SPEECH_TIERS } from "../interfaces/speech-types";
import type { AudioFormat, SpeechTier } from "../interfaces/speech-types";
/**
* Valid audio output formats for TTS synthesis.
*/
const VALID_AUDIO_FORMATS: readonly AudioFormat[] = [
"mp3",
"wav",
"opus",
"flac",
"aac",
"pcm",
] as const;
/**
* Valid TTS tiers for provider selection.
*/
const VALID_SPEECH_TIERS: readonly SpeechTier[] = ["default", "premium", "fallback"] as const;
export class SynthesizeDto {
/**
* Text to convert to speech.
* Validated separately by TextValidationPipe for length and emptiness.
* Validated by class-validator decorators for type and maximum length.
*/
@IsString({ message: "text must be a string" })
@MaxLength(4096, { message: "text must not exceed 4096 characters" })
@@ -66,8 +50,8 @@ export class SynthesizeDto {
*/
@IsOptional()
@IsString({ message: "format must be a string" })
@IsIn(VALID_AUDIO_FORMATS, {
message: `format must be one of: ${VALID_AUDIO_FORMATS.join(", ")}`,
@IsIn(AUDIO_FORMATS, {
message: `format must be one of: ${AUDIO_FORMATS.join(", ")}`,
})
format?: AudioFormat;
@@ -78,8 +62,8 @@ export class SynthesizeDto {
*/
@IsOptional()
@IsString({ message: "tier must be a string" })
@IsIn(VALID_SPEECH_TIERS, {
message: `tier must be one of: ${VALID_SPEECH_TIERS.join(", ")}`,
@IsIn(SPEECH_TIERS, {
message: `tier must be one of: ${SPEECH_TIERS.join(", ")}`,
})
tier?: SpeechTier;
}