fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments
Critical fixes: - Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor - Add /speech namespace to WebSocket connection URL - Pass auth token in WebSocket handshake options - Wrap audio.play() in try-catch for NotAllowedError and DOMException handling - Replace bare catch block with named error parameter and descriptive message - Add connect_error and disconnect event handlers to WebSocket - Update JSDoc to accurately describe batch transcription (not real-time partial) Important fixes: - Emit transcription-error before disconnect in gateway auth failures - Capture MediaRecorder error details and clean up media tracks on error - Change TtsDefaultConfig.format type from string to AudioFormat - Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth - Fix voice count from 54 to 53 in provider, AGENTS.md, and docs - Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,7 +19,7 @@ export interface TextToSpeechButtonProps {
|
||||
text: string;
|
||||
/** Optional voice ID to use */
|
||||
voice?: string;
|
||||
/** Optional tier (e.g. "standard", "premium") */
|
||||
/** Optional tier (e.g. "default", "premium", "fallback") */
|
||||
tier?: string;
|
||||
/** Optional className for the container */
|
||||
className?: string;
|
||||
|
||||
@@ -173,8 +173,17 @@ export function useTextToSpeech(): UseTextToSpeechReturn {
|
||||
const play = useCallback(async (): Promise<void> => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
await audio.play();
|
||||
setIsPlaying(true);
|
||||
try {
|
||||
await audio.play();
|
||||
setIsPlaying(true);
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof DOMException && err.name === "NotAllowedError"
|
||||
? "Playback was blocked by the browser. Try interacting with the page first."
|
||||
: "Unable to play audio. The format may not be supported.";
|
||||
setError(message);
|
||||
setIsPlaying(false);
|
||||
}
|
||||
}
|
||||
}, []);
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
/**
|
||||
* useVoiceInput hook
|
||||
*
|
||||
* Custom hook for microphone capture and real-time transcription.
|
||||
* Supports WebSocket streaming for real-time partial transcriptions
|
||||
* Custom hook for microphone capture and speech-to-text transcription.
|
||||
* Supports WebSocket streaming with batch transcription on stop,
|
||||
* with REST upload fallback when WebSocket is unavailable.
|
||||
*/
|
||||
|
||||
@@ -20,6 +20,8 @@ export interface UseVoiceInputOptions {
|
||||
useWebSocket?: boolean;
|
||||
/** Audio sample rate in Hz (default: 16000) */
|
||||
sampleRate?: number;
|
||||
/** Authentication token for WebSocket connection */
|
||||
token?: string;
|
||||
}
|
||||
|
||||
/** Return type for the useVoiceInput hook */
|
||||
@@ -75,14 +77,14 @@ function getAudioMimeType(): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for microphone capture and real-time speech-to-text transcription.
|
||||
* Hook for microphone capture and speech-to-text transcription.
|
||||
*
|
||||
* Uses WebSocket streaming by default for real-time partial transcriptions.
|
||||
* Uses WebSocket streaming by default with batch transcription on stop.
|
||||
* Falls back to REST upload (POST /api/speech/transcribe) if WebSocket
|
||||
* is disabled or unavailable.
|
||||
*/
|
||||
export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInputReturn {
|
||||
const { onTranscript, useWebSocket: useWs = true, sampleRate = 16000 } = options;
|
||||
const { onTranscript, useWebSocket: useWs = true, sampleRate = 16000, token } = options;
|
||||
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [transcript, setTranscript] = useState("");
|
||||
@@ -143,9 +145,12 @@ export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInput
|
||||
};
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(updateLevel);
|
||||
} catch {
|
||||
} catch (err) {
|
||||
// Audio analysis is non-critical; continue without it
|
||||
console.warn("Audio analysis not available");
|
||||
console.warn(
|
||||
"Audio level visualization unavailable:",
|
||||
err instanceof Error ? err.message : String(err)
|
||||
);
|
||||
}
|
||||
}, []);
|
||||
|
||||
@@ -169,11 +174,14 @@ export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInput
|
||||
* Connect to the speech WebSocket namespace
|
||||
*/
|
||||
const connectSocket = useCallback((): Socket => {
|
||||
const socket = io(API_BASE_URL, {
|
||||
const socket = io(`${API_BASE_URL}/speech`, {
|
||||
path: "/socket.io",
|
||||
transports: ["websocket", "polling"],
|
||||
...(token ? { auth: { token } } : {}),
|
||||
});
|
||||
|
||||
// Future use: the gateway does not currently emit transcription-partial,
|
||||
// but the listener is registered for when real-time partial transcription is added.
|
||||
socket.on("transcription-partial", (data: TranscriptionPartialPayload) => {
|
||||
setPartialTranscript(data.text);
|
||||
});
|
||||
@@ -188,9 +196,19 @@ export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInput
|
||||
setError(data.message);
|
||||
});
|
||||
|
||||
socket.on("connect_error", (err: Error) => {
|
||||
setError(`WebSocket connection failed: ${err.message}`);
|
||||
});
|
||||
|
||||
socket.on("disconnect", (reason: string) => {
|
||||
if (reason !== "io client disconnect") {
|
||||
setError(`WebSocket disconnected unexpectedly: ${reason}`);
|
||||
}
|
||||
});
|
||||
|
||||
socketRef.current = socket;
|
||||
return socket;
|
||||
}, []);
|
||||
}, [token]);
|
||||
|
||||
/**
|
||||
* Disconnect the WebSocket
|
||||
@@ -200,6 +218,8 @@ export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInput
|
||||
socketRef.current.off("transcription-partial");
|
||||
socketRef.current.off("transcription-final");
|
||||
socketRef.current.off("transcription-error");
|
||||
socketRef.current.off("connect_error");
|
||||
socketRef.current.off("disconnect");
|
||||
socketRef.current.disconnect();
|
||||
socketRef.current = null;
|
||||
}
|
||||
@@ -211,7 +231,7 @@ export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInput
|
||||
const sendAudioViaRest = useCallback(async (audioBlob: Blob): Promise<void> => {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("audio", audioBlob, "recording.webm");
|
||||
formData.append("file", audioBlob, "recording.webm");
|
||||
|
||||
const response = await apiPostFormData<TranscribeResponse>(
|
||||
"/api/speech/transcribe",
|
||||
@@ -315,10 +335,16 @@ export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInput
|
||||
});
|
||||
|
||||
// Handle errors
|
||||
mediaRecorder.addEventListener("error", () => {
|
||||
setError("Recording encountered an issue. Please try again.");
|
||||
mediaRecorder.addEventListener("error", (event: Event) => {
|
||||
let errorMessage = "Recording encountered an issue. Please try again.";
|
||||
if ("error" in event && event.error instanceof DOMException) {
|
||||
errorMessage = `Recording error: ${event.error.name} - ${event.error.message}`;
|
||||
}
|
||||
setError(errorMessage);
|
||||
setIsRecording(false);
|
||||
isRecordingRef.current = false;
|
||||
stopMediaTracks();
|
||||
cleanupAudioAnalysis();
|
||||
});
|
||||
|
||||
// Start recording with timeslice for streaming chunks (250ms intervals)
|
||||
|
||||
Reference in New Issue
Block a user