stack/apps/web/src/hooks/useTextToSpeech.ts

/**
 * useTextToSpeech hook
 * Manages TTS API integration with synthesis, caching, and playback state
 */

import { useState, useCallback, useRef, useEffect } from "react";
import { synthesizeSpeech } from "@/lib/api/speech";

export interface SynthesizeOptions {
  voice?: string;
  speed?: number;
  format?: string;
  tier?: string;
}

export interface UseTextToSpeechReturn {
  /** Synthesize text to speech audio */
  synthesize: (text: string, options?: SynthesizeOptions) => Promise<void>;
  /** The URL of the synthesized audio blob */
  audioUrl: string | null;
  /** Whether synthesis is in progress */
  isLoading: boolean;
  /** Error message if synthesis failed */
  error: string | null;
  /** Start or resume audio playback */
  play: () => Promise<void>;
  /** Pause audio playback */
  pause: () => void;
  /** Stop audio and reset to beginning */
  stop: () => void;
  /** Whether audio is currently playing */
  isPlaying: boolean;
  /** Total duration of the audio in seconds */
  duration: number;
  /** Current playback position in seconds */
  currentTime: number;
}

/** Cache key generator for text + options combination */
function getCacheKey(text: string, options?: SynthesizeOptions): string {
  return JSON.stringify({ text, ...options });
}

/**
 * Hook for text-to-speech API integration with caching and playback controls
 */
export function useTextToSpeech(): UseTextToSpeechReturn {
  const [audioUrl, setAudioUrl] = useState<string | null>(null);
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [isPlaying, setIsPlaying] = useState(false);
  const [duration, setDuration] = useState(0);
  const [currentTime, setCurrentTime] = useState(0);

  // Audio element ref for playback control
  const audioRef = useRef<HTMLAudioElement | null>(null);

  // Cache: maps cache key -> blob URL
  const cacheRef = useRef<Map<string, string>>(new Map());

  // Track all blob URLs for cleanup
  const blobUrlsRef = useRef<Set<string>>(new Set());

  /**
   * Clean up audio element event listeners and state
   */
  const cleanupAudio = useCallback(() => {
    const audio = audioRef.current;
    if (audio) {
      audio.pause();
      audio.removeEventListener("ended", handleEnded);
      audio.removeEventListener("timeupdate", handleTimeUpdate);
      audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
      audioRef.current = null;
    }
    setIsPlaying(false);
  }, []);

  /**
   * Handle audio ended event
   */
  function handleEnded(): void {
    setIsPlaying(false);
    setCurrentTime(0);
  }

  /**
   * Handle audio time update event
   */
  function handleTimeUpdate(): void {
    const audio = audioRef.current;
    if (audio) {
      setCurrentTime(audio.currentTime);
    }
  }

  /**
   * Handle audio metadata loaded event
   */
  function handleLoadedMetadata(): void {
    const audio = audioRef.current;
    if (audio && isFinite(audio.duration)) {
      setDuration(audio.duration);
    }
  }

  /**
   * Set up a new Audio element for a given URL
   */
  const setupAudio = useCallback(
    (url: string) => {
      cleanupAudio();

      const audio = new Audio(url);
      audio.addEventListener("ended", handleEnded);
      audio.addEventListener("timeupdate", handleTimeUpdate);
      audio.addEventListener("loadedmetadata", handleLoadedMetadata);
      audioRef.current = audio;
    },
    [cleanupAudio]
  );

  /**
   * Synthesize text to speech
   */
  const synthesize = useCallback(
    async (text: string, options?: SynthesizeOptions): Promise<void> => {
      setError(null);

      // Check cache first
      const cacheKey = getCacheKey(text, options);
      const cachedUrl = cacheRef.current.get(cacheKey);

      if (cachedUrl) {
        setAudioUrl(cachedUrl);
        setupAudio(cachedUrl);
        return;
      }

      setIsLoading(true);

      try {
        const blob = await synthesizeSpeech({
          text,
          ...(options?.voice !== undefined && { voice: options.voice }),
          ...(options?.speed !== undefined && { speed: options.speed }),
          ...(options?.format !== undefined && { format: options.format }),
          ...(options?.tier !== undefined && { tier: options.tier }),
        });

        const url = URL.createObjectURL(blob);

        // Store in cache and track for cleanup
        cacheRef.current.set(cacheKey, url);
        blobUrlsRef.current.add(url);

        setAudioUrl(url);
        setupAudio(url);
      } catch (err) {
        const errorMsg = err instanceof Error ? err.message : "Speech synthesis failed";
        setError(errorMsg);
        setAudioUrl(null);
      } finally {
        setIsLoading(false);
      }
    },
    [setupAudio]
  );

  /**
   * Start or resume audio playback
   */
  const play = useCallback(async (): Promise<void> => {
    const audio = audioRef.current;
    if (audio) {
      try {
        await audio.play();
        setIsPlaying(true);
      } catch (err) {
        const message =
          err instanceof DOMException && err.name === "NotAllowedError"
            ? "Playback was blocked by the browser. Try interacting with the page first."
            : "Unable to play audio. The format may not be supported.";
        setError(message);
        setIsPlaying(false);
      }
    }
  }, []);

  /**
   * Pause audio playback
   */
  const pause = useCallback((): void => {
    const audio = audioRef.current;
    if (audio) {
      audio.pause();
      setIsPlaying(false);
    }
  }, []);

  /**
   * Stop audio and reset to beginning
   */
  const stop = useCallback((): void => {
    const audio = audioRef.current;
    if (audio) {
      audio.pause();
      audio.currentTime = 0;
      setIsPlaying(false);
      setCurrentTime(0);
    }
  }, []);

  // Cleanup on unmount: revoke all blob URLs and clean up audio
  useEffect((): (() => void) => {
    return (): void => {
      // Clean up audio element
      const audio = audioRef.current;
      if (audio) {
        audio.pause();
        audio.removeEventListener("ended", handleEnded);
        audio.removeEventListener("timeupdate", handleTimeUpdate);
        audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
        audioRef.current = null;
      }

      // Revoke all blob URLs
      for (const url of blobUrlsRef.current) {
        URL.revokeObjectURL(url);
      }
      blobUrlsRef.current.clear();
      cacheRef.current.clear();
    };
  }, []);

  return {
    synthesize,
    audioUrl,
    isLoading,
    error,
    play,
    pause,
    stop,
    isPlaying,
    duration,
    currentTime,
  };
}