fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments

Critical fixes: - Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor - Add /speech namespace to WebSocket connection URL - Pass auth token in WebSocket handshake options - Wrap audio.play() in try-catch for NotAllowedError and DOMException handling - Replace bare catch block with named error parameter and descriptive message - Add connect_error and disconnect event handlers to WebSocket - Update JSDoc to accurately describe batch transcription (not real-time partial) Important fixes: - Emit transcription-error before disconnect in gateway auth failures - Capture MediaRecorder error details and clean up media tracks on error - Change TtsDefaultConfig.format type from string to AudioFormat - Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth - Fix voice count from 54 to 53 in provider, AGENTS.md, and docs - Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 03:44:33 -06:00
parent dcbc8d1053
commit af9c5799af
14 changed files with 91 additions and 53 deletions
--- a/apps/api/src/speech/dto/synthesize.dto.ts
+++ b/apps/api/src/speech/dto/synthesize.dto.ts
@@ -2,7 +2,7 @@
 * SynthesizeDto
 *
 * DTO for text-to-speech synthesis requests.
- * The text field is validated by TextValidationPipe for length/emptiness.
+ * Text and option fields are validated by class-validator decorators.
 * Additional options control voice, speed, format, and tier selection.
 *
 * Issue #398
@@ -10,29 +10,13 @@

 import { IsString, IsOptional, IsNumber, IsIn, Min, Max, MaxLength } from "class-validator";
 import { Type } from "class-transformer";
+import { AUDIO_FORMATS, SPEECH_TIERS } from "../interfaces/speech-types";
 import type { AudioFormat, SpeechTier } from "../interfaces/speech-types";

-/**
- * Valid audio output formats for TTS synthesis.
- */
-const VALID_AUDIO_FORMATS: readonly AudioFormat[] = [
-  "mp3",
-  "wav",
-  "opus",
-  "flac",
-  "aac",
-  "pcm",
-] as const;
-
-/**
- * Valid TTS tiers for provider selection.
- */
-const VALID_SPEECH_TIERS: readonly SpeechTier[] = ["default", "premium", "fallback"] as const;
-
 export class SynthesizeDto {
  /**
   * Text to convert to speech.
-   * Validated separately by TextValidationPipe for length and emptiness.
+   * Validated by class-validator decorators for type and maximum length.
   */
  @IsString({ message: "text must be a string" })
  @MaxLength(4096, { message: "text must not exceed 4096 characters" })
@@ -66,8 +50,8 @@ export class SynthesizeDto {
   */
  @IsOptional()
  @IsString({ message: "format must be a string" })
-  @IsIn(VALID_AUDIO_FORMATS, {
-    message: `format must be one of: ${VALID_AUDIO_FORMATS.join(", ")}`,
+  @IsIn(AUDIO_FORMATS, {
+    message: `format must be one of: ${AUDIO_FORMATS.join(", ")}`,
  })
  format?: AudioFormat;

@@ -78,8 +62,8 @@ export class SynthesizeDto {
   */
  @IsOptional()
  @IsString({ message: "tier must be a string" })
-  @IsIn(VALID_SPEECH_TIERS, {
-    message: `tier must be one of: ${VALID_SPEECH_TIERS.join(", ")}`,
+  @IsIn(SPEECH_TIERS, {
+    message: `tier must be one of: ${SPEECH_TIERS.join(", ")}`,
  })
  tier?: SpeechTier;
 }