Files
stack/apps/web/src/hooks/useTextToSpeech.ts
Jason Woltje af9c5799af
All checks were successful
ci/woodpecker/push/web Pipeline was successful
ci/woodpecker/push/api Pipeline was successful
fix(#388): address PR review findings — fix WebSocket/REST bugs, improve error handling, fix types and comments
Critical fixes:
- Fix FormData field name mismatch (audio -> file) to match backend FileInterceptor
- Add /speech namespace to WebSocket connection URL
- Pass auth token in WebSocket handshake options
- Wrap audio.play() in try-catch for NotAllowedError and DOMException handling
- Replace bare catch block with named error parameter and descriptive message
- Add connect_error and disconnect event handlers to WebSocket
- Update JSDoc to accurately describe batch transcription (not real-time partial)

Important fixes:
- Emit transcription-error before disconnect in gateway auth failures
- Capture MediaRecorder error details and clean up media tracks on error
- Change TtsDefaultConfig.format type from string to AudioFormat
- Define canonical SPEECH_TIERS and AUDIO_FORMATS arrays as single source of truth
- Fix voice count from 54 to 53 in provider, AGENTS.md, and docs
- Fix inaccurate comments (Piper formats, tier prop, SpeachesProvider, TextValidationPipe)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 03:44:33 -06:00

249 lines
6.5 KiB
TypeScript

/**
* useTextToSpeech hook
* Manages TTS API integration with synthesis, caching, and playback state
*/
import { useState, useCallback, useRef, useEffect } from "react";
import { synthesizeSpeech } from "@/lib/api/speech";
export interface SynthesizeOptions {
voice?: string;
speed?: number;
format?: string;
tier?: string;
}
export interface UseTextToSpeechReturn {
/** Synthesize text to speech audio */
synthesize: (text: string, options?: SynthesizeOptions) => Promise<void>;
/** The URL of the synthesized audio blob */
audioUrl: string | null;
/** Whether synthesis is in progress */
isLoading: boolean;
/** Error message if synthesis failed */
error: string | null;
/** Start or resume audio playback */
play: () => Promise<void>;
/** Pause audio playback */
pause: () => void;
/** Stop audio and reset to beginning */
stop: () => void;
/** Whether audio is currently playing */
isPlaying: boolean;
/** Total duration of the audio in seconds */
duration: number;
/** Current playback position in seconds */
currentTime: number;
}
/** Cache key generator for text + options combination */
function getCacheKey(text: string, options?: SynthesizeOptions): string {
return JSON.stringify({ text, ...options });
}
/**
* Hook for text-to-speech API integration with caching and playback controls
*/
export function useTextToSpeech(): UseTextToSpeechReturn {
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [isPlaying, setIsPlaying] = useState(false);
const [duration, setDuration] = useState(0);
const [currentTime, setCurrentTime] = useState(0);
// Audio element ref for playback control
const audioRef = useRef<HTMLAudioElement | null>(null);
// Cache: maps cache key -> blob URL
const cacheRef = useRef<Map<string, string>>(new Map());
// Track all blob URLs for cleanup
const blobUrlsRef = useRef<Set<string>>(new Set());
/**
* Clean up audio element event listeners and state
*/
const cleanupAudio = useCallback(() => {
const audio = audioRef.current;
if (audio) {
audio.pause();
audio.removeEventListener("ended", handleEnded);
audio.removeEventListener("timeupdate", handleTimeUpdate);
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
audioRef.current = null;
}
setIsPlaying(false);
}, []);
/**
* Handle audio ended event
*/
function handleEnded(): void {
setIsPlaying(false);
setCurrentTime(0);
}
/**
* Handle audio time update event
*/
function handleTimeUpdate(): void {
const audio = audioRef.current;
if (audio) {
setCurrentTime(audio.currentTime);
}
}
/**
* Handle audio metadata loaded event
*/
function handleLoadedMetadata(): void {
const audio = audioRef.current;
if (audio && isFinite(audio.duration)) {
setDuration(audio.duration);
}
}
/**
* Set up a new Audio element for a given URL
*/
const setupAudio = useCallback(
(url: string) => {
cleanupAudio();
const audio = new Audio(url);
audio.addEventListener("ended", handleEnded);
audio.addEventListener("timeupdate", handleTimeUpdate);
audio.addEventListener("loadedmetadata", handleLoadedMetadata);
audioRef.current = audio;
},
[cleanupAudio]
);
/**
* Synthesize text to speech
*/
const synthesize = useCallback(
async (text: string, options?: SynthesizeOptions): Promise<void> => {
setError(null);
// Check cache first
const cacheKey = getCacheKey(text, options);
const cachedUrl = cacheRef.current.get(cacheKey);
if (cachedUrl) {
setAudioUrl(cachedUrl);
setupAudio(cachedUrl);
return;
}
setIsLoading(true);
try {
const blob = await synthesizeSpeech({
text,
...(options?.voice !== undefined && { voice: options.voice }),
...(options?.speed !== undefined && { speed: options.speed }),
...(options?.format !== undefined && { format: options.format }),
...(options?.tier !== undefined && { tier: options.tier }),
});
const url = URL.createObjectURL(blob);
// Store in cache and track for cleanup
cacheRef.current.set(cacheKey, url);
blobUrlsRef.current.add(url);
setAudioUrl(url);
setupAudio(url);
} catch (err) {
const errorMsg = err instanceof Error ? err.message : "Speech synthesis failed";
setError(errorMsg);
setAudioUrl(null);
} finally {
setIsLoading(false);
}
},
[setupAudio]
);
/**
* Start or resume audio playback
*/
const play = useCallback(async (): Promise<void> => {
const audio = audioRef.current;
if (audio) {
try {
await audio.play();
setIsPlaying(true);
} catch (err) {
const message =
err instanceof DOMException && err.name === "NotAllowedError"
? "Playback was blocked by the browser. Try interacting with the page first."
: "Unable to play audio. The format may not be supported.";
setError(message);
setIsPlaying(false);
}
}
}, []);
/**
* Pause audio playback
*/
const pause = useCallback((): void => {
const audio = audioRef.current;
if (audio) {
audio.pause();
setIsPlaying(false);
}
}, []);
/**
* Stop audio and reset to beginning
*/
const stop = useCallback((): void => {
const audio = audioRef.current;
if (audio) {
audio.pause();
audio.currentTime = 0;
setIsPlaying(false);
setCurrentTime(0);
}
}, []);
// Cleanup on unmount: revoke all blob URLs and clean up audio
useEffect((): (() => void) => {
return (): void => {
// Clean up audio element
const audio = audioRef.current;
if (audio) {
audio.pause();
audio.removeEventListener("ended", handleEnded);
audio.removeEventListener("timeupdate", handleTimeUpdate);
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
audioRef.current = null;
}
// Revoke all blob URLs
for (const url of blobUrlsRef.current) {
URL.revokeObjectURL(url);
}
blobUrlsRef.current.clear();
cacheRef.current.clear();
};
}, []);
return {
synthesize,
audioUrl,
isLoading,
error,
play,
pause,
stop,
isPlaying,
duration,
currentTime,
};
}