fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments

Critical fixes: - Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor - Add /speech namespace to WebSocket connection URL - Pass auth token in WebSocket handshake options - Wrap audio.play() in try-catch for NotAllowedError and DOMException handling - Replace bare catch block with named error parameter and descriptive message - Add connect_error and disconnect event handlers to WebSocket - Update JSDoc to accurately describe batch transcription (not real-time partial) Important fixes: - Emit transcription-error before disconnect in gateway auth failures - Capture MediaRecorder error details and clean up media tracks on error - Change TtsDefaultConfig.format type from string to AudioFormat - Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth - Fix voice count from 54 to 53 in provider, AGENTS.md, and docs - Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 03:44:33 -06:00
parent dcbc8d1053
commit af9c5799af
14 changed files with 91 additions and 53 deletions
--- a/apps/api/src/speech/interfaces/index.ts
+++ b/apps/api/src/speech/interfaces/index.ts
@@ -6,6 +6,7 @@

 export type { ISTTProvider } from "./stt-provider.interface";
 export type { ITTSProvider } from "./tts-provider.interface";
+export { SPEECH_TIERS, AUDIO_FORMATS } from "./speech-types";
 export type {
  SpeechTier,
  AudioFormat,
--- a/apps/api/src/speech/interfaces/speech-types.ts
+++ b/apps/api/src/speech/interfaces/speech-types.ts
@@ -12,19 +12,21 @@
 // ==========================================

 /**
- * TTS provider tier.
+ * Canonical array of TTS provider tiers.
 * Determines which TTS engine is used for synthesis.
 *
 * - default: Primary TTS engine (e.g., Kokoro)
 * - premium: Higher quality TTS engine (e.g., Chatterbox)
 * - fallback: Backup TTS engine (e.g., Piper/OpenedAI)
 */
-export type SpeechTier = "default" | "premium" | "fallback";
+export const SPEECH_TIERS = ["default", "premium", "fallback"] as const;
+export type SpeechTier = (typeof SPEECH_TIERS)[number];

 /**
- * Audio output format for TTS synthesis.
+ * Canonical array of audio output formats for TTS synthesis.
 */
-export type AudioFormat = "mp3" | "wav" | "opus" | "flac" | "aac" | "pcm";
+export const AUDIO_FORMATS = ["mp3", "wav", "opus", "flac", "aac", "pcm"] as const;
+export type AudioFormat = (typeof AUDIO_FORMATS)[number];

 // ==========================================
 // STT Types
--- a/apps/api/src/speech/interfaces/stt-provider.interface.ts
+++ b/apps/api/src/speech/interfaces/stt-provider.interface.ts
@@ -16,7 +16,7 @@ import type { TranscribeOptions, TranscriptionResult } from "./speech-types";
 *
 * @example
 * ```typescript
- * class SpeachesProvider implements ISTTProvider {
+ * class SpeachesSttProvider implements ISTTProvider {
 *   readonly name = "speaches";
 *
 *   async transcribe(audio: Buffer, options?: TranscribeOptions): Promise<TranscriptionResult> {