Critical fixes: - Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor - Add /speech namespace to WebSocket connection URL - Pass auth token in WebSocket handshake options - Wrap audio.play() in try-catch for NotAllowedError and DOMException handling - Replace bare catch block with named error parameter and descriptive message - Add connect_error and disconnect event handlers to WebSocket - Update JSDoc to accurately describe batch transcription (not real-time partial) Important fixes: - Emit transcription-error before disconnect in gateway auth failures - Capture MediaRecorder error details and clean up media tracks on error - Change TtsDefaultConfig.format type from string to AudioFormat - Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth - Fix voice count from 54 to 53 in provider, AGENTS.md, and docs - Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
249 lines
6.5 KiB
TypeScript
249 lines
6.5 KiB
TypeScript
/**
|
|
* useTextToSpeech hook
|
|
* Manages TTS API integration with synthesis, caching, and playback state
|
|
*/
|
|
|
|
import { useState, useCallback, useRef, useEffect } from "react";
|
|
import { synthesizeSpeech } from "@/lib/api/speech";
|
|
|
|
export interface SynthesizeOptions {
|
|
voice?: string;
|
|
speed?: number;
|
|
format?: string;
|
|
tier?: string;
|
|
}
|
|
|
|
export interface UseTextToSpeechReturn {
|
|
/** Synthesize text to speech audio */
|
|
synthesize: (text: string, options?: SynthesizeOptions) => Promise<void>;
|
|
/** The URL of the synthesized audio blob */
|
|
audioUrl: string | null;
|
|
/** Whether synthesis is in progress */
|
|
isLoading: boolean;
|
|
/** Error message if synthesis failed */
|
|
error: string | null;
|
|
/** Start or resume audio playback */
|
|
play: () => Promise<void>;
|
|
/** Pause audio playback */
|
|
pause: () => void;
|
|
/** Stop audio and reset to beginning */
|
|
stop: () => void;
|
|
/** Whether audio is currently playing */
|
|
isPlaying: boolean;
|
|
/** Total duration of the audio in seconds */
|
|
duration: number;
|
|
/** Current playback position in seconds */
|
|
currentTime: number;
|
|
}
|
|
|
|
/** Cache key generator for text + options combination */
|
|
function getCacheKey(text: string, options?: SynthesizeOptions): string {
|
|
return JSON.stringify({ text, ...options });
|
|
}
|
|
|
|
/**
|
|
* Hook for text-to-speech API integration with caching and playback controls
|
|
*/
|
|
export function useTextToSpeech(): UseTextToSpeechReturn {
|
|
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
|
const [isLoading, setIsLoading] = useState(false);
|
|
const [error, setError] = useState<string | null>(null);
|
|
const [isPlaying, setIsPlaying] = useState(false);
|
|
const [duration, setDuration] = useState(0);
|
|
const [currentTime, setCurrentTime] = useState(0);
|
|
|
|
// Audio element ref for playback control
|
|
const audioRef = useRef<HTMLAudioElement | null>(null);
|
|
|
|
// Cache: maps cache key -> blob URL
|
|
const cacheRef = useRef<Map<string, string>>(new Map());
|
|
|
|
// Track all blob URLs for cleanup
|
|
const blobUrlsRef = useRef<Set<string>>(new Set());
|
|
|
|
/**
|
|
* Clean up audio element event listeners and state
|
|
*/
|
|
const cleanupAudio = useCallback(() => {
|
|
const audio = audioRef.current;
|
|
if (audio) {
|
|
audio.pause();
|
|
audio.removeEventListener("ended", handleEnded);
|
|
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
|
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
|
|
audioRef.current = null;
|
|
}
|
|
setIsPlaying(false);
|
|
}, []);
|
|
|
|
/**
|
|
* Handle audio ended event
|
|
*/
|
|
function handleEnded(): void {
|
|
setIsPlaying(false);
|
|
setCurrentTime(0);
|
|
}
|
|
|
|
/**
|
|
* Handle audio time update event
|
|
*/
|
|
function handleTimeUpdate(): void {
|
|
const audio = audioRef.current;
|
|
if (audio) {
|
|
setCurrentTime(audio.currentTime);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Handle audio metadata loaded event
|
|
*/
|
|
function handleLoadedMetadata(): void {
|
|
const audio = audioRef.current;
|
|
if (audio && isFinite(audio.duration)) {
|
|
setDuration(audio.duration);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set up a new Audio element for a given URL
|
|
*/
|
|
const setupAudio = useCallback(
|
|
(url: string) => {
|
|
cleanupAudio();
|
|
|
|
const audio = new Audio(url);
|
|
audio.addEventListener("ended", handleEnded);
|
|
audio.addEventListener("timeupdate", handleTimeUpdate);
|
|
audio.addEventListener("loadedmetadata", handleLoadedMetadata);
|
|
audioRef.current = audio;
|
|
},
|
|
[cleanupAudio]
|
|
);
|
|
|
|
/**
|
|
* Synthesize text to speech
|
|
*/
|
|
const synthesize = useCallback(
|
|
async (text: string, options?: SynthesizeOptions): Promise<void> => {
|
|
setError(null);
|
|
|
|
// Check cache first
|
|
const cacheKey = getCacheKey(text, options);
|
|
const cachedUrl = cacheRef.current.get(cacheKey);
|
|
|
|
if (cachedUrl) {
|
|
setAudioUrl(cachedUrl);
|
|
setupAudio(cachedUrl);
|
|
return;
|
|
}
|
|
|
|
setIsLoading(true);
|
|
|
|
try {
|
|
const blob = await synthesizeSpeech({
|
|
text,
|
|
...(options?.voice !== undefined && { voice: options.voice }),
|
|
...(options?.speed !== undefined && { speed: options.speed }),
|
|
...(options?.format !== undefined && { format: options.format }),
|
|
...(options?.tier !== undefined && { tier: options.tier }),
|
|
});
|
|
|
|
const url = URL.createObjectURL(blob);
|
|
|
|
// Store in cache and track for cleanup
|
|
cacheRef.current.set(cacheKey, url);
|
|
blobUrlsRef.current.add(url);
|
|
|
|
setAudioUrl(url);
|
|
setupAudio(url);
|
|
} catch (err) {
|
|
const errorMsg = err instanceof Error ? err.message : "Speech synthesis failed";
|
|
setError(errorMsg);
|
|
setAudioUrl(null);
|
|
} finally {
|
|
setIsLoading(false);
|
|
}
|
|
},
|
|
[setupAudio]
|
|
);
|
|
|
|
/**
|
|
* Start or resume audio playback
|
|
*/
|
|
const play = useCallback(async (): Promise<void> => {
|
|
const audio = audioRef.current;
|
|
if (audio) {
|
|
try {
|
|
await audio.play();
|
|
setIsPlaying(true);
|
|
} catch (err) {
|
|
const message =
|
|
err instanceof DOMException && err.name === "NotAllowedError"
|
|
? "Playback was blocked by the browser. Try interacting with the page first."
|
|
: "Unable to play audio. The format may not be supported.";
|
|
setError(message);
|
|
setIsPlaying(false);
|
|
}
|
|
}
|
|
}, []);
|
|
|
|
/**
|
|
* Pause audio playback
|
|
*/
|
|
const pause = useCallback((): void => {
|
|
const audio = audioRef.current;
|
|
if (audio) {
|
|
audio.pause();
|
|
setIsPlaying(false);
|
|
}
|
|
}, []);
|
|
|
|
/**
|
|
* Stop audio and reset to beginning
|
|
*/
|
|
const stop = useCallback((): void => {
|
|
const audio = audioRef.current;
|
|
if (audio) {
|
|
audio.pause();
|
|
audio.currentTime = 0;
|
|
setIsPlaying(false);
|
|
setCurrentTime(0);
|
|
}
|
|
}, []);
|
|
|
|
// Cleanup on unmount: revoke all blob URLs and clean up audio
|
|
useEffect((): (() => void) => {
|
|
return (): void => {
|
|
// Clean up audio element
|
|
const audio = audioRef.current;
|
|
if (audio) {
|
|
audio.pause();
|
|
audio.removeEventListener("ended", handleEnded);
|
|
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
|
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
|
|
audioRef.current = null;
|
|
}
|
|
|
|
// Revoke all blob URLs
|
|
for (const url of blobUrlsRef.current) {
|
|
URL.revokeObjectURL(url);
|
|
}
|
|
blobUrlsRef.current.clear();
|
|
cacheRef.current.clear();
|
|
};
|
|
}, []);
|
|
|
|
return {
|
|
synthesize,
|
|
audioUrl,
|
|
isLoading,
|
|
error,
|
|
play,
|
|
pause,
|
|
stop,
|
|
isPlaying,
|
|
duration,
|
|
currentTime,
|
|
};
|
|
}
|