2026-02-15 18:37:54 +00:00
14 changed files with 2664 additions and 0 deletions
--- a/apps/web/src/components/speech/AudioPlayer.test.tsx
+++ b/apps/web/src/components/speech/AudioPlayer.test.tsx
@@ -0,0 +1,178 @@
+/**
+ * @file AudioPlayer.test.tsx
+ * @description Tests for the AudioPlayer component that provides inline TTS audio playback
+ */
+
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { AudioPlayer } from "./AudioPlayer";
+
+// Mock HTMLAudioElement
+class MockAudio {
+  src = "";
+  currentTime = 0;
+  duration = 60;
+  paused = true;
+  playbackRate = 1;
+  volume = 1;
+  onended: (() => void) | null = null;
+  ontimeupdate: (() => void) | null = null;
+  onloadedmetadata: (() => void) | null = null;
+  onerror: ((e: unknown) => void) | null = null;
+
+  play(): Promise<void> {
+    this.paused = false;
+    return Promise.resolve();
+  }
+
+  pause(): void {
+    this.paused = true;
+  }
+
+  addEventListener(event: string, handler: () => void): void {
+    if (event === "ended") this.onended = handler;
+    if (event === "timeupdate") this.ontimeupdate = handler;
+    if (event === "loadedmetadata") this.onloadedmetadata = handler;
+    if (event === "error") this.onerror = handler;
+  }
+
+  removeEventListener(): void {
+    // no-op for tests
+  }
+}
+
+vi.stubGlobal("Audio", MockAudio);
+
+describe("AudioPlayer", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe("rendering", () => {
+    it("should render play button", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      const playButton = screen.getByRole("button", { name: "Play audio" });
+      expect(playButton).toBeInTheDocument();
+    });
+
+    it("should render download button", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      const downloadButton = screen.getByRole("button", { name: /download/i });
+      expect(downloadButton).toBeInTheDocument();
+    });
+
+    it("should render time display showing 0:00", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      expect(screen.getByText("0:00")).toBeInTheDocument();
+    });
+
+    it("should render speed control", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      const speedButton = screen.getByRole("button", { name: "Playback speed" });
+      expect(speedButton).toBeInTheDocument();
+    });
+
+    it("should render progress bar", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      const progressBar = screen.getByRole("progressbar");
+      expect(progressBar).toBeInTheDocument();
+    });
+
+    it("should not render when src is null", () => {
+      const { container } = render(<AudioPlayer src={null} />);
+
+      expect(container.firstChild).toBeNull();
+    });
+  });
+
+  describe("play/pause", () => {
+    it("should toggle to pause button when playing", async () => {
+      const user = userEvent.setup();
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      const playButton = screen.getByRole("button", { name: "Play audio" });
+      await user.click(playButton);
+
+      expect(screen.getByRole("button", { name: "Pause audio" })).toBeInTheDocument();
+    });
+  });
+
+  describe("speed control", () => {
+    it("should cycle through speed options on click", async () => {
+      const user = userEvent.setup();
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      const speedButton = screen.getByRole("button", { name: "Playback speed" });
+
+      // Default should be 1x
+      expect(speedButton).toHaveTextContent("1x");
+
+      // Click to go to 1.5x
+      await user.click(speedButton);
+      expect(speedButton).toHaveTextContent("1.5x");
+
+      // Click to go to 2x
+      await user.click(speedButton);
+      expect(speedButton).toHaveTextContent("2x");
+
+      // Click to go to 0.5x
+      await user.click(speedButton);
+      expect(speedButton).toHaveTextContent("0.5x");
+
+      // Click to go back to 1x
+      await user.click(speedButton);
+      expect(speedButton).toHaveTextContent("1x");
+    });
+  });
+
+  describe("accessibility", () => {
+    it("should have proper aria labels on controls", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      expect(screen.getByRole("button", { name: "Play audio" })).toBeInTheDocument();
+      expect(screen.getByRole("button", { name: /download/i })).toBeInTheDocument();
+      expect(screen.getByRole("button", { name: "Playback speed" })).toBeInTheDocument();
+      expect(screen.getByRole("progressbar")).toHaveAttribute("aria-label");
+    });
+
+    it("should have region role on the player container", () => {
+      render(<AudioPlayer src="blob:test-audio" />);
+
+      expect(screen.getByRole("region", { name: /audio player/i })).toBeInTheDocument();
+    });
+  });
+
+  describe("design", () => {
+    it("should not use aggressive red colors", () => {
+      const { container } = render(<AudioPlayer src="blob:test-audio" />);
+
+      const allElements = container.querySelectorAll("*");
+      allElements.forEach((el) => {
+        const className = el.className;
+        if (typeof className === "string") {
+          expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
+        }
+      });
+    });
+  });
+
+  describe("callbacks", () => {
+    it("should call onPlayStateChange when play state changes", async () => {
+      const onPlayStateChange = vi.fn();
+      const user = userEvent.setup();
+
+      render(<AudioPlayer src="blob:test-audio" onPlayStateChange={onPlayStateChange} />);
+
+      const playButton = screen.getByRole("button", { name: "Play audio" });
+      await user.click(playButton);
+
+      expect(onPlayStateChange).toHaveBeenCalledWith(true);
+    });
+  });
+});
--- a/apps/web/src/components/speech/AudioPlayer.tsx
+++ b/apps/web/src/components/speech/AudioPlayer.tsx
@@ -0,0 +1,250 @@
+/**
+ * AudioPlayer Component
+ * Inline audio player for TTS content with play/pause, progress,
+ * speed control, download, and duration display.
+ *
+ * Follows PDA-friendly design: no aggressive colors, calm interface.
+ */
+
+import { useState, useRef, useEffect, useCallback } from "react";
+import type { ReactElement } from "react";
+
+/** Playback speed options */
+const SPEED_OPTIONS = [1, 1.5, 2, 0.5] as const;
+
+export interface AudioPlayerProps {
+  /** URL of the audio to play (blob URL or HTTP URL). If null, nothing renders. */
+  src: string | null;
+  /** Whether to auto-play when src changes */
+  autoPlay?: boolean;
+  /** Callback when play state changes */
+  onPlayStateChange?: (isPlaying: boolean) => void;
+  /** Optional className for the container */
+  className?: string;
+}
+
+/**
+ * Format seconds into M:SS display
+ */
+function formatTime(seconds: number): string {
+  if (!isFinite(seconds) || seconds < 0) return "0:00";
+  const mins = Math.floor(seconds / 60);
+  const secs = Math.floor(seconds % 60);
+  return `${String(mins)}:${String(secs).padStart(2, "0")}`;
+}
+
+/**
+ * AudioPlayer displays an inline audio player with controls for
+ * play/pause, progress tracking, speed adjustment, and download.
+ */
+export function AudioPlayer({
+  src,
+  autoPlay = false,
+  onPlayStateChange,
+  className = "",
+}: AudioPlayerProps): ReactElement | null {
+  const [isPlaying, setIsPlaying] = useState(false);
+  const [currentTime, setCurrentTime] = useState(0);
+  const [duration, setDuration] = useState(0);
+  const [speedIndex, setSpeedIndex] = useState(0);
+
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  /**
+   * Set up audio element when src changes
+   */
+  useEffect((): (() => void) | undefined => {
+    if (!src) return undefined;
+
+    const audio = new Audio(src);
+    audioRef.current = audio;
+
+    const onLoadedMetadata = (): void => {
+      if (isFinite(audio.duration)) {
+        setDuration(audio.duration);
+      }
+    };
+
+    const onTimeUpdate = (): void => {
+      setCurrentTime(audio.currentTime);
+    };
+
+    const onEnded = (): void => {
+      setIsPlaying(false);
+      setCurrentTime(0);
+      onPlayStateChange?.(false);
+    };
+
+    audio.addEventListener("loadedmetadata", onLoadedMetadata);
+    audio.addEventListener("timeupdate", onTimeUpdate);
+    audio.addEventListener("ended", onEnded);
+
+    if (autoPlay) {
+      void audio.play().then(() => {
+        setIsPlaying(true);
+        onPlayStateChange?.(true);
+      });
+    }
+
+    return (): void => {
+      audio.pause();
+      audio.removeEventListener("loadedmetadata", onLoadedMetadata);
+      audio.removeEventListener("timeupdate", onTimeUpdate);
+      audio.removeEventListener("ended", onEnded);
+      audioRef.current = null;
+    };
+  }, [src, autoPlay, onPlayStateChange]);
+
+  /**
+   * Toggle play/pause
+   */
+  const togglePlayPause = useCallback(async (): Promise<void> => {
+    const audio = audioRef.current;
+    if (!audio) return;
+
+    if (isPlaying) {
+      audio.pause();
+      setIsPlaying(false);
+      onPlayStateChange?.(false);
+    } else {
+      await audio.play();
+      setIsPlaying(true);
+      onPlayStateChange?.(true);
+    }
+  }, [isPlaying, onPlayStateChange]);
+
+  /**
+   * Cycle through speed options
+   */
+  const cycleSpeed = useCallback((): void => {
+    const nextIndex = (speedIndex + 1) % SPEED_OPTIONS.length;
+    setSpeedIndex(nextIndex);
+
+    const audio = audioRef.current;
+    if (audio) {
+      audio.playbackRate = SPEED_OPTIONS[nextIndex] ?? 1;
+    }
+  }, [speedIndex]);
+
+  /**
+   * Handle progress bar click for seeking
+   */
+  const handleProgressClick = useCallback(
+    (event: React.MouseEvent<HTMLDivElement>): void => {
+      const audio = audioRef.current;
+      if (!audio || !duration) return;
+
+      const rect = event.currentTarget.getBoundingClientRect();
+      const clickX = event.clientX - rect.left;
+      const fraction = clickX / rect.width;
+      audio.currentTime = fraction * duration;
+      setCurrentTime(audio.currentTime);
+    },
+    [duration]
+  );
+
+  /**
+   * Handle download
+   */
+  const handleDownload = useCallback((): void => {
+    if (!src) return;
+
+    const link = document.createElement("a");
+    link.href = src;
+    link.download = "speech-audio.mp3";
+    document.body.appendChild(link);
+    link.click();
+    document.body.removeChild(link);
+  }, [src]);
+
+  // Don't render if no source
+  if (!src) return null;
+
+  const progress = duration > 0 ? (currentTime / duration) * 100 : 0;
+  const currentSpeed = SPEED_OPTIONS[speedIndex] ?? 1;
+
+  return (
+    <div
+      role="region"
+      aria-label="Audio player"
+      className={`flex items-center gap-2 rounded-lg border border-gray-200 bg-gray-50 px-3 py-2 ${className}`}
+    >
+      {/* Play/Pause Button */}
+      <button
+        type="button"
+        onClick={() => void togglePlayPause()}
+        aria-label={isPlaying ? "Pause audio" : "Play audio"}
+        className="flex h-8 w-8 shrink-0 items-center justify-center rounded-full bg-blue-500 text-white transition-colors hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-blue-300"
+      >
+        {isPlaying ? (
+          <svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true">
+            <rect x="6" y="4" width="4" height="16" rx="1" />
+            <rect x="14" y="4" width="4" height="16" rx="1" />
+          </svg>
+        ) : (
+          <svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true">
+            <polygon points="6,4 20,12 6,20" />
+          </svg>
+        )}
+      </button>
+
+      {/* Time Display */}
+      <span className="min-w-[3.5rem] text-xs text-gray-500 tabular-nums">
+        {formatTime(currentTime)}
+        {duration > 0 && <span className="text-gray-400"> / {formatTime(duration)}</span>}
+      </span>
+
+      {/* Progress Bar */}
+      <div
+        role="progressbar"
+        aria-label="Audio progress"
+        aria-valuenow={Math.round(progress)}
+        aria-valuemin={0}
+        aria-valuemax={100}
+        className="relative h-1.5 flex-1 cursor-pointer rounded-full bg-gray-200"
+        onClick={handleProgressClick}
+      >
+        <div
+          className="absolute left-0 top-0 h-full rounded-full bg-blue-400 transition-all"
+          style={{ width: `${String(Math.min(progress, 100))}%` }}
+        />
+      </div>
+
+      {/* Speed Control */}
+      <button
+        type="button"
+        onClick={cycleSpeed}
+        aria-label="Playback speed"
+        className="min-w-[2.5rem] rounded px-1.5 py-0.5 text-xs font-medium text-gray-600 transition-colors hover:bg-gray-200 focus:outline-none focus:ring-2 focus:ring-blue-300"
+      >
+        {String(currentSpeed)}x
+      </button>
+
+      {/* Download Button */}
+      <button
+        type="button"
+        onClick={handleDownload}
+        aria-label="Download audio"
+        className="flex h-7 w-7 shrink-0 items-center justify-center rounded text-gray-500 transition-colors hover:bg-gray-200 hover:text-gray-700 focus:outline-none focus:ring-2 focus:ring-blue-300"
+      >
+        <svg
+          width="14"
+          height="14"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          strokeWidth="2"
+          strokeLinecap="round"
+          strokeLinejoin="round"
+          aria-hidden="true"
+        >
+          <path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4" />
+          <polyline points="7 10 12 15 17 10" />
+          <line x1="12" y1="15" x2="12" y2="3" />
+        </svg>
+      </button>
+    </div>
+  );
+}
+
+export default AudioPlayer;
--- a/apps/web/src/components/speech/AudioVisualizer.test.tsx
+++ b/apps/web/src/components/speech/AudioVisualizer.test.tsx
@@ -0,0 +1,70 @@
+import { describe, it, expect } from "vitest";
+import { render, screen } from "@testing-library/react";
+import { AudioVisualizer } from "./AudioVisualizer";
+
+describe("AudioVisualizer", (): void => {
+  it("should render the visualizer container", (): void => {
+    render(<AudioVisualizer audioLevel={0} isActive={false} />);
+
+    const container = screen.getByTestId("audio-visualizer");
+    expect(container).toBeInTheDocument();
+  });
+
+  it("should render visualization bars", (): void => {
+    render(<AudioVisualizer audioLevel={0.5} isActive={true} />);
+
+    const bars = screen.getAllByTestId("visualizer-bar");
+    expect(bars.length).toBeGreaterThan(0);
+  });
+
+  it("should show inactive state when not active", (): void => {
+    render(<AudioVisualizer audioLevel={0} isActive={false} />);
+
+    const container = screen.getByTestId("audio-visualizer");
+    expect(container).toBeInTheDocument();
+    // Bars should be at minimum height when inactive
+    const bars = screen.getAllByTestId("visualizer-bar");
+    bars.forEach((bar) => {
+      const style = bar.getAttribute("style");
+      expect(style).toContain("height");
+    });
+  });
+
+  it("should reflect audio level in bar heights when active", (): void => {
+    render(<AudioVisualizer audioLevel={0.8} isActive={true} />);
+
+    const bars = screen.getAllByTestId("visualizer-bar");
+    // At least one bar should have non-minimal height
+    const hasActiveBars = bars.some((bar) => {
+      const style = bar.getAttribute("style") ?? "";
+      const heightMatch = /height:\s*(\d+)/.exec(style);
+      return heightMatch?.[1] ? parseInt(heightMatch[1], 10) > 4 : false;
+    });
+    expect(hasActiveBars).toBe(true);
+  });
+
+  it("should use calm colors (no aggressive reds)", (): void => {
+    render(<AudioVisualizer audioLevel={0.5} isActive={true} />);
+
+    const container = screen.getByTestId("audio-visualizer");
+    const allElements = container.querySelectorAll("*");
+    allElements.forEach((el) => {
+      const className = (el as HTMLElement).className;
+      expect(className).not.toMatch(/bg-red-|text-red-/);
+    });
+  });
+
+  it("should accept custom className", (): void => {
+    render(<AudioVisualizer audioLevel={0.5} isActive={true} className="custom-class" />);
+
+    const container = screen.getByTestId("audio-visualizer");
+    expect(container.className).toContain("custom-class");
+  });
+
+  it("should render with configurable bar count", (): void => {
+    render(<AudioVisualizer audioLevel={0.5} isActive={true} barCount={8} />);
+
+    const bars = screen.getAllByTestId("visualizer-bar");
+    expect(bars).toHaveLength(8);
+  });
+});
--- a/apps/web/src/components/speech/AudioVisualizer.tsx
+++ b/apps/web/src/components/speech/AudioVisualizer.tsx
@@ -0,0 +1,87 @@
+/**
+ * AudioVisualizer component
+ *
+ * Displays a simple audio level visualization using bars.
+ * Uses the Web Audio API's AnalyserNode data (passed as audioLevel)
+ * to show microphone input levels during recording.
+ *
+ * Design: Calm, non-aggressive colors following PDA-friendly guidelines.
+ */
+
+import { useMemo } from "react";
+
+export interface AudioVisualizerProps {
+  /** Current audio level (0-1) */
+  audioLevel: number;
+  /** Whether the visualizer is actively listening */
+  isActive: boolean;
+  /** Number of bars to display (default: 5) */
+  barCount?: number;
+  /** Additional CSS classes */
+  className?: string;
+}
+
+/**
+ * Generate bar heights based on audio level.
+ * Creates a natural-looking wave pattern where center bars are taller.
+ */
+function generateBarHeights(level: number, count: number): number[] {
+  const heights: number[] = [];
+  const center = (count - 1) / 2;
+
+  for (let i = 0; i < count; i++) {
+    // Distance from center (0-1)
+    const distFromCenter = Math.abs(i - center) / center;
+    // Center bars are taller, edge bars shorter
+    const multiplier = 1 - distFromCenter * 0.5;
+    // Min height 4px, max height 24px when active
+    const minHeight = 4;
+    const maxHeight = 24;
+    const height = minHeight + level * (maxHeight - minHeight) * multiplier;
+    heights.push(Math.round(height));
+  }
+
+  return heights;
+}
+
+/**
+ * Audio level visualizer with animated bars.
+ * Shows microphone input levels during voice recording.
+ */
+export function AudioVisualizer({
+  audioLevel,
+  isActive,
+  barCount = 5,
+  className = "",
+}: AudioVisualizerProps): React.JSX.Element {
+  const barHeights = useMemo(() => {
+    if (!isActive) {
+      return Array.from({ length: barCount }, () => 4);
+    }
+    return generateBarHeights(audioLevel, barCount);
+  }, [audioLevel, isActive, barCount]);
+
+  return (
+    <div
+      data-testid="audio-visualizer"
+      className={`flex items-center gap-0.5 ${className}`}
+      role="img"
+      aria-label={
+        isActive
+          ? `Audio level: ${String(Math.round(audioLevel * 100))}%`
+          : "Audio visualizer inactive"
+      }
+    >
+      {barHeights.map((height, index) => (
+        <div
+          key={index}
+          data-testid="visualizer-bar"
+          className={`w-1 rounded-full transition-all duration-150 ease-out ${
+            isActive ? "bg-sky-400" : "bg-slate-300 dark:bg-slate-600"
+          }`}
+          style={{ height: `${height.toString()}px` }}
+        />
+      ))}
+    </div>
+  );
+}
--- a/apps/web/src/components/speech/TextToSpeechButton.test.tsx
+++ b/apps/web/src/components/speech/TextToSpeechButton.test.tsx
@@ -0,0 +1,218 @@
+/**
+ * @file TextToSpeechButton.test.tsx
+ * @description Tests for the TextToSpeechButton "Read aloud" component
+ */
+
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { TextToSpeechButton } from "./TextToSpeechButton";
+
+// Mock the useTextToSpeech hook
+const mockSynthesize = vi.fn();
+const mockPlay = vi.fn();
+const mockPause = vi.fn();
+const mockStop = vi.fn();
+
+vi.mock("@/hooks/useTextToSpeech", () => ({
+  useTextToSpeech: vi.fn(() => ({
+    synthesize: mockSynthesize,
+    play: mockPlay,
+    pause: mockPause,
+    stop: mockStop,
+    audioUrl: null,
+    isLoading: false,
+    error: null,
+    isPlaying: false,
+    duration: 0,
+    currentTime: 0,
+  })),
+}));
+
+// Import after mocking
+import { useTextToSpeech } from "@/hooks/useTextToSpeech";
+
+const mockUseTextToSpeech = useTextToSpeech as ReturnType<typeof vi.fn>;
+
+// Mock HTMLAudioElement for AudioPlayer used inside TextToSpeechButton
+class MockAudio {
+  src = "";
+  currentTime = 0;
+  duration = 60;
+  paused = true;
+  playbackRate = 1;
+  volume = 1;
+  onended: (() => void) | null = null;
+  ontimeupdate: (() => void) | null = null;
+  onloadedmetadata: (() => void) | null = null;
+  onerror: ((e: unknown) => void) | null = null;
+
+  play(): Promise<void> {
+    this.paused = false;
+    return Promise.resolve();
+  }
+
+  pause(): void {
+    this.paused = true;
+  }
+
+  addEventListener(): void {
+    // no-op
+  }
+
+  removeEventListener(): void {
+    // no-op
+  }
+}
+
+vi.stubGlobal("Audio", MockAudio);
+
+describe("TextToSpeechButton", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockUseTextToSpeech.mockReturnValue({
+      synthesize: mockSynthesize,
+      play: mockPlay,
+      pause: mockPause,
+      stop: mockStop,
+      audioUrl: null,
+      isLoading: false,
+      error: null,
+      isPlaying: false,
+      duration: 0,
+      currentTime: 0,
+    });
+  });
+
+  describe("rendering", () => {
+    it("should render a read aloud button", () => {
+      render(<TextToSpeechButton text="Hello world" />);
+
+      const button = screen.getByRole("button", { name: /read aloud/i });
+      expect(button).toBeInTheDocument();
+    });
+
+    it("should not render AudioPlayer initially when no audio is synthesized", () => {
+      render(<TextToSpeechButton text="Hello world" />);
+
+      expect(screen.queryByRole("region", { name: /audio player/i })).not.toBeInTheDocument();
+    });
+  });
+
+  describe("click behavior", () => {
+    it("should call synthesize with text on click", async () => {
+      const user = userEvent.setup();
+      mockSynthesize.mockResolvedValueOnce(undefined);
+
+      render(<TextToSpeechButton text="Hello world" />);
+
+      const button = screen.getByRole("button", { name: /read aloud/i });
+      await user.click(button);
+
+      expect(mockSynthesize).toHaveBeenCalledWith("Hello world", undefined);
+    });
+
+    it("should pass voice and tier options when provided", async () => {
+      const user = userEvent.setup();
+      mockSynthesize.mockResolvedValueOnce(undefined);
+
+      render(<TextToSpeechButton text="Hello" voice="alloy" tier="premium" />);
+
+      const button = screen.getByRole("button", { name: /read aloud/i });
+      await user.click(button);
+
+      expect(mockSynthesize).toHaveBeenCalledWith("Hello", {
+        voice: "alloy",
+        tier: "premium",
+      });
+    });
+  });
+
+  describe("loading state", () => {
+    it("should show loading indicator while synthesizing", () => {
+      mockUseTextToSpeech.mockReturnValue({
+        synthesize: mockSynthesize,
+        play: mockPlay,
+        pause: mockPause,
+        stop: mockStop,
+        audioUrl: null,
+        isLoading: true,
+        error: null,
+        isPlaying: false,
+        duration: 0,
+        currentTime: 0,
+      });
+
+      render(<TextToSpeechButton text="Hello world" />);
+
+      const button = screen.getByRole("button", { name: /synthesizing/i });
+      expect(button).toBeInTheDocument();
+      expect(button).toBeDisabled();
+    });
+  });
+
+  describe("audio player integration", () => {
+    it("should show AudioPlayer when audio is available", () => {
+      mockUseTextToSpeech.mockReturnValue({
+        synthesize: mockSynthesize,
+        play: mockPlay,
+        pause: mockPause,
+        stop: mockStop,
+        audioUrl: "blob:mock-url",
+        isLoading: false,
+        error: null,
+        isPlaying: false,
+        duration: 30,
+        currentTime: 0,
+      });
+
+      render(<TextToSpeechButton text="Hello world" />);
+
+      expect(screen.getByRole("region", { name: /audio player/i })).toBeInTheDocument();
+    });
+  });
+
+  describe("error state", () => {
+    it("should display error message when synthesis fails", () => {
+      mockUseTextToSpeech.mockReturnValue({
+        synthesize: mockSynthesize,
+        play: mockPlay,
+        pause: mockPause,
+        stop: mockStop,
+        audioUrl: null,
+        isLoading: false,
+        error: "Synthesis failed",
+        isPlaying: false,
+        duration: 0,
+        currentTime: 0,
+      });
+
+      render(<TextToSpeechButton text="Hello world" />);
+
+      expect(screen.getByText(/synthesis failed/i)).toBeInTheDocument();
+    });
+  });
+
+  describe("accessibility", () => {
+    it("should have proper aria label on button", () => {
+      render(<TextToSpeechButton text="Hello world" />);
+
+      const button = screen.getByRole("button", { name: /read aloud/i });
+      expect(button).toBeInTheDocument();
+    });
+  });
+
+  describe("design", () => {
+    it("should not use aggressive colors", () => {
+      const { container } = render(<TextToSpeechButton text="Hello world" />);
+
+      const allElements = container.querySelectorAll("*");
+      allElements.forEach((el) => {
+        const className = el.className;
+        if (typeof className === "string") {
+          expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
+        }
+      });
+    });
+  });
+});
--- a/apps/web/src/components/speech/TextToSpeechButton.tsx
+++ b/apps/web/src/components/speech/TextToSpeechButton.tsx
@@ -0,0 +1,126 @@
+/**
+ * TextToSpeechButton Component
+ * "Read aloud" button that synthesizes text and plays it via AudioPlayer.
+ *
+ * Accepts text as a prop, with optional voice and tier selection.
+ * Shows loading state during synthesis and integrates AudioPlayer for playback.
+ *
+ * Follows PDA-friendly design: no aggressive colors, calm interface.
+ */
+
+import { useCallback } from "react";
+import type { ReactElement } from "react";
+import { useTextToSpeech } from "@/hooks/useTextToSpeech";
+import type { SynthesizeOptions } from "@/hooks/useTextToSpeech";
+import { AudioPlayer } from "./AudioPlayer";
+
+export interface TextToSpeechButtonProps {
+  /** The text to synthesize to speech */
+  text: string;
+  /** Optional voice ID to use */
+  voice?: string;
+  /** Optional tier (e.g. "standard", "premium") */
+  tier?: string;
+  /** Optional className for the container */
+  className?: string;
+}
+
+/**
+ * TextToSpeechButton provides a "Read aloud" button that synthesizes
+ * the given text and displays an AudioPlayer for playback control.
+ */
+export function TextToSpeechButton({
+  text,
+  voice,
+  tier,
+  className = "",
+}: TextToSpeechButtonProps): ReactElement {
+  const { synthesize, audioUrl, isLoading, error } = useTextToSpeech();
+
+  /**
+   * Handle read aloud button click
+   */
+  const handleClick = useCallback(async (): Promise<void> => {
+    let options: SynthesizeOptions | undefined;
+
+    if (voice !== undefined || tier !== undefined) {
+      options = {};
+      if (voice !== undefined) options.voice = voice;
+      if (tier !== undefined) options.tier = tier;
+    }
+
+    await synthesize(text, options);
+  }, [text, voice, tier, synthesize]);
+
+  return (
+    <div className={`flex flex-col gap-2 ${className}`}>
+      {/* Read Aloud Button */}
+      <button
+        type="button"
+        onClick={() => void handleClick()}
+        disabled={isLoading}
+        aria-label={isLoading ? "Synthesizing speech" : "Read aloud"}
+        className="inline-flex items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-1.5 text-sm font-medium text-gray-700 transition-colors hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-blue-300 disabled:cursor-not-allowed disabled:opacity-50"
+      >
+        {isLoading ? (
+          <>
+            {/* Spinner */}
+            <svg
+              className="h-4 w-4 animate-spin text-gray-500"
+              viewBox="0 0 24 24"
+              fill="none"
+              aria-hidden="true"
+            >
+              <circle
+                cx="12"
+                cy="12"
+                r="10"
+                stroke="currentColor"
+                strokeWidth="3"
+                className="opacity-25"
+              />
+              <path
+                fill="currentColor"
+                d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
+                className="opacity-75"
+              />
+            </svg>
+            <span>Synthesizing...</span>
+          </>
+        ) : (
+          <>
+            {/* Speaker Icon */}
+            <svg
+              width="16"
+              height="16"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="2"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+              aria-hidden="true"
+            >
+              <polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
+              <path d="M15.54 8.46a5 5 0 010 7.07" />
+              <path d="M19.07 4.93a10 10 0 010 14.14" />
+            </svg>
+            <span>Read aloud</span>
+          </>
+        )}
+      </button>
+
+      {/* Error Display */}
+      {error && (
+        <p className="text-sm text-amber-600" role="alert">
+          {error}
+        </p>
+      )}
+
+      {/* Audio Player (shown after synthesis) */}
+      {audioUrl && <AudioPlayer src={audioUrl} />}
+    </div>
+  );
+}
+
+export default TextToSpeechButton;
--- a/apps/web/src/components/speech/VoiceInput.test.tsx
+++ b/apps/web/src/components/speech/VoiceInput.test.tsx
@@ -0,0 +1,228 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { VoiceInput } from "./VoiceInput";
+
+// Mock the useVoiceInput hook
+const mockStartRecording = vi.fn();
+const mockStopRecording = vi.fn();
+
+vi.mock("@/hooks/useVoiceInput", () => ({
+  useVoiceInput: vi.fn(() => ({
+    isRecording: false,
+    startRecording: mockStartRecording,
+    stopRecording: mockStopRecording,
+    transcript: "",
+    partialTranscript: "",
+    error: null,
+    audioLevel: 0,
+  })),
+}));
+
+// We need to import after mocking
+import { useVoiceInput } from "@/hooks/useVoiceInput";
+
+describe("VoiceInput", (): void => {
+  beforeEach((): void => {
+    vi.clearAllMocks();
+    // Reset mock implementation to default
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: false,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "",
+      error: null,
+      audioLevel: 0,
+    });
+  });
+
+  it("should render a microphone button", (): void => {
+    render(<VoiceInput />);
+
+    const button = screen.getByRole("button", {
+      name: /start voice input/i,
+    });
+    expect(button).toBeInTheDocument();
+  });
+
+  it("should have accessible aria label", (): void => {
+    render(<VoiceInput />);
+
+    const button = screen.getByRole("button", {
+      name: /start voice input/i,
+    });
+    expect(button).toHaveAttribute("aria-label", "Start voice input");
+  });
+
+  it("should call startRecording when mic button is clicked", async (): Promise<void> => {
+    const user = userEvent.setup();
+    render(<VoiceInput />);
+
+    const button = screen.getByRole("button", {
+      name: /start voice input/i,
+    });
+    await user.click(button);
+
+    expect(mockStartRecording).toHaveBeenCalledTimes(1);
+  });
+
+  it("should show recording state when isRecording is true", (): void => {
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: true,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "",
+      error: null,
+      audioLevel: 0.5,
+    });
+
+    render(<VoiceInput />);
+
+    const button = screen.getByRole("button", {
+      name: /stop voice input/i,
+    });
+    expect(button).toBeInTheDocument();
+  });
+
+  it("should call stopRecording when mic button is clicked while recording", async (): Promise<void> => {
+    const user = userEvent.setup();
+
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: true,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "",
+      error: null,
+      audioLevel: 0.5,
+    });
+
+    render(<VoiceInput />);
+
+    const button = screen.getByRole("button", {
+      name: /stop voice input/i,
+    });
+    await user.click(button);
+
+    expect(mockStopRecording).toHaveBeenCalledTimes(1);
+  });
+
+  it("should display partial transcription text", (): void => {
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: true,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "hello worl",
+      error: null,
+      audioLevel: 0.3,
+    });
+
+    render(<VoiceInput />);
+
+    expect(screen.getByText("hello worl")).toBeInTheDocument();
+  });
+
+  it("should display final transcript text", (): void => {
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: false,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "hello world",
+      partialTranscript: "",
+      error: null,
+      audioLevel: 0,
+    });
+
+    render(<VoiceInput />);
+
+    expect(screen.getByText("hello world")).toBeInTheDocument();
+  });
+
+  it("should display error message", (): void => {
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: false,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "",
+      error: "Microphone access not available",
+      audioLevel: 0,
+    });
+
+    render(<VoiceInput />);
+
+    expect(screen.getByText("Microphone access not available")).toBeInTheDocument();
+  });
+
+  it("should call onTranscript callback prop", (): void => {
+    const onTranscript = vi.fn();
+
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: false,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "final text",
+      partialTranscript: "",
+      error: null,
+      audioLevel: 0,
+    });
+
+    render(<VoiceInput onTranscript={onTranscript} />);
+
+    // The onTranscript prop is passed to the hook - we verify the prop is accepted
+    expect(useVoiceInput).toHaveBeenCalledWith(
+      expect.objectContaining({
+        onTranscript,
+      })
+    );
+  });
+
+  it("should use calm, non-aggressive design for recording indicator", (): void => {
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: true,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "",
+      error: null,
+      audioLevel: 0.5,
+    });
+
+    render(<VoiceInput />);
+
+    // Check there are no aggressive red colors in the recording state
+    const button = screen.getByRole("button", { name: /stop voice input/i });
+    const className = button.className;
+    expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
+  });
+
+  it("should use calm design for error display", (): void => {
+    vi.mocked(useVoiceInput).mockReturnValue({
+      isRecording: false,
+      startRecording: mockStartRecording,
+      stopRecording: mockStopRecording,
+      transcript: "",
+      partialTranscript: "",
+      error: "Something went wrong",
+      audioLevel: 0,
+    });
+
+    render(<VoiceInput />);
+
+    const errorEl = screen.getByText("Something went wrong");
+    const className = errorEl.className;
+    expect(className).not.toMatch(/text-red-600|bg-red-/);
+  });
+
+  it("should be disabled when disabled prop is true", (): void => {
+    render(<VoiceInput disabled />);
+
+    const button = screen.getByRole("button", {
+      name: /start voice input/i,
+    });
+    expect(button).toBeDisabled();
+  });
+});
--- a/apps/web/src/components/speech/VoiceInput.tsx
+++ b/apps/web/src/components/speech/VoiceInput.tsx
@@ -0,0 +1,146 @@
+/**
+ * VoiceInput component
+ *
+ * Provides a microphone button with visual feedback for voice input.
+ * Click to start/stop recording with real-time transcription display.
+ *
+ * Design principles:
+ * - PDA-friendly: calm, non-aggressive colors
+ * - Gentle pulsing animation for recording state (blue/green)
+ * - Mobile-friendly touch interaction
+ * - Accessible with proper aria labels
+ */
+
+import { useVoiceInput } from "@/hooks/useVoiceInput";
+import type { UseVoiceInputOptions } from "@/hooks/useVoiceInput";
+import { AudioVisualizer } from "./AudioVisualizer";
+import { Mic, MicOff } from "lucide-react";
+
+export interface VoiceInputProps {
+  /** Callback fired when final transcription is received */
+  onTranscript?: (text: string) => void;
+  /** Whether to use WebSocket streaming (default: true) */
+  useWebSocket?: boolean;
+  /** Whether the input is disabled */
+  disabled?: boolean;
+  /** Additional CSS classes for the container */
+  className?: string;
+}
+
+/**
+ * Voice input component with microphone capture and real-time transcription.
+ * Shows a mic button that toggles recording, with visual feedback
+ * and transcription text display.
+ */
+export function VoiceInput({
+  onTranscript,
+  useWebSocket: useWs,
+  disabled = false,
+  className = "",
+}: VoiceInputProps): React.JSX.Element {
+  const hookOptions: UseVoiceInputOptions = {};
+  if (onTranscript !== undefined) {
+    hookOptions.onTranscript = onTranscript;
+  }
+  if (useWs !== undefined) {
+    hookOptions.useWebSocket = useWs;
+  }
+
+  const {
+    isRecording,
+    startRecording,
+    stopRecording,
+    transcript,
+    partialTranscript,
+    error,
+    audioLevel,
+  } = useVoiceInput(hookOptions);
+
+  const handleClick = (): void => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      void startRecording();
+    }
+  };
+
+  const displayText = isRecording ? partialTranscript : transcript;
+
+  return (
+    <div className={`flex flex-col items-center gap-3 ${className}`}>
+      {/* Mic button with recording indicator */}
+      <div className="relative flex items-center gap-2">
+        {/* Pulsing ring animation when recording */}
+        {isRecording && (
+          <div
+            className="absolute inset-0 -m-1 rounded-full bg-sky-400/20 animate-pulse"
+            aria-hidden="true"
+          />
+        )}
+
+        <button
+          type="button"
+          onClick={handleClick}
+          disabled={disabled}
+          aria-label={isRecording ? "Stop voice input" : "Start voice input"}
+          className={`
+            relative z-10 flex items-center justify-center
+            w-10 h-10 rounded-full transition-all duration-200
+            focus:outline-none focus:ring-2 focus:ring-sky-400 focus:ring-offset-2
+            disabled:opacity-50 disabled:cursor-not-allowed
+            ${
+              isRecording
+                ? "bg-sky-500 text-white hover:bg-sky-600 shadow-md"
+                : "bg-slate-100 text-slate-600 hover:bg-slate-200 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600"
+            }
+          `}
+        >
+          {isRecording ? (
+            <MicOff className="w-5 h-5" aria-hidden="true" />
+          ) : (
+            <Mic className="w-5 h-5" aria-hidden="true" />
+          )}
+        </button>
+
+        {/* Audio level visualizer - shown during recording */}
+        {isRecording && (
+          <AudioVisualizer audioLevel={audioLevel} isActive={isRecording} barCount={5} />
+        )}
+      </div>
+
+      {/* Recording status indicator */}
+      {isRecording && (
+        <div className="flex items-center gap-1.5 text-xs text-sky-600 dark:text-sky-400">
+          <span className="w-2 h-2 rounded-full bg-sky-500 animate-pulse" aria-hidden="true" />
+          <span>Listening...</span>
+        </div>
+      )}
+
+      {/* Transcription text display */}
+      {displayText && (
+        <p
+          className={`
+            text-sm max-w-md text-center px-3 py-1.5 rounded-lg
+            ${
+              isRecording
+                ? "text-slate-500 dark:text-slate-400 bg-slate-50 dark:bg-slate-800/50 italic"
+                : "text-slate-700 dark:text-slate-200 bg-slate-100 dark:bg-slate-800"
+            }
+          `}
+        >
+          {displayText}
+        </p>
+      )}
+
+      {/* Error display - calm, non-aggressive */}
+      {error && (
+        <p
+          className="text-sm text-amber-700 dark:text-amber-400 bg-amber-50 dark:bg-amber-900/20 px-3 py-1.5 rounded-lg max-w-md text-center"
+          role="alert"
+        >
+          {error}
+        </p>
+      )}
+    </div>
+  );
+}
--- a/apps/web/src/components/speech/index.ts
+++ b/apps/web/src/components/speech/index.ts
@@ -0,0 +1,8 @@
+export { VoiceInput } from "./VoiceInput";
+export type { VoiceInputProps } from "./VoiceInput";
+export { AudioVisualizer } from "./AudioVisualizer";
+export type { AudioVisualizerProps } from "./AudioVisualizer";
+export { AudioPlayer } from "./AudioPlayer";
+export type { AudioPlayerProps } from "./AudioPlayer";
+export { TextToSpeechButton } from "./TextToSpeechButton";
+export type { TextToSpeechButtonProps } from "./TextToSpeechButton";
--- a/apps/web/src/hooks/useTextToSpeech.test.ts
+++ b/apps/web/src/hooks/useTextToSpeech.test.ts
@@ -0,0 +1,285 @@
+/**
+ * @file useTextToSpeech.test.ts
+ * @description Tests for the useTextToSpeech hook that manages TTS API integration
+ */
+
+import { renderHook, act } from "@testing-library/react";
+import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
+import { useTextToSpeech } from "./useTextToSpeech";
+import * as speechApi from "@/lib/api/speech";
+
+// Mock the speech API module
+vi.mock("@/lib/api/speech", () => ({
+  synthesizeSpeech: vi.fn(),
+  getVoices: vi.fn(),
+}));
+
+// Mock URL.createObjectURL and URL.revokeObjectURL
+const mockCreateObjectURL = vi.fn().mockReturnValue("blob:mock-audio-url");
+const mockRevokeObjectURL = vi.fn();
+
+beforeEach(() => {
+  global.URL.createObjectURL = mockCreateObjectURL;
+  global.URL.revokeObjectURL = mockRevokeObjectURL;
+});
+
+// Mock HTMLAudioElement
+class MockAudio {
+  src = "";
+  currentTime = 0;
+  duration = 120;
+  paused = true;
+  playbackRate = 1;
+  volume = 1;
+  onended: (() => void) | null = null;
+  ontimeupdate: (() => void) | null = null;
+  onloadedmetadata: (() => void) | null = null;
+  onerror: ((e: unknown) => void) | null = null;
+
+  play(): Promise<void> {
+    this.paused = false;
+    return Promise.resolve();
+  }
+
+  pause(): void {
+    this.paused = true;
+  }
+
+  addEventListener(event: string, handler: () => void): void {
+    if (event === "ended") this.onended = handler;
+    if (event === "timeupdate") this.ontimeupdate = handler;
+    if (event === "loadedmetadata") this.onloadedmetadata = handler;
+    if (event === "error") this.onerror = handler;
+  }
+
+  removeEventListener(): void {
+    // no-op for tests
+  }
+}
+
+vi.stubGlobal("Audio", MockAudio);
+
+const mockSynthesizeSpeech = speechApi.synthesizeSpeech as ReturnType<typeof vi.fn>;
+
+describe("useTextToSpeech", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockCreateObjectURL.mockReturnValue("blob:mock-audio-url");
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  describe("initial state", () => {
+    it("should return correct initial interface", () => {
+      const { result } = renderHook(() => useTextToSpeech());
+
+      expect(result.current.synthesize).toBeTypeOf("function");
+      expect(result.current.play).toBeTypeOf("function");
+      expect(result.current.pause).toBeTypeOf("function");
+      expect(result.current.stop).toBeTypeOf("function");
+      expect(result.current.audioUrl).toBeNull();
+      expect(result.current.isLoading).toBe(false);
+      expect(result.current.error).toBeNull();
+      expect(result.current.isPlaying).toBe(false);
+      expect(result.current.duration).toBe(0);
+      expect(result.current.currentTime).toBe(0);
+    });
+  });
+
+  describe("synthesize", () => {
+    it("should call API and return audio blob URL", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello world");
+      });
+
+      expect(mockSynthesizeSpeech).toHaveBeenCalledWith({
+        text: "Hello world",
+      });
+      expect(result.current.audioUrl).toBe("blob:mock-audio-url");
+      expect(result.current.isLoading).toBe(false);
+      expect(result.current.error).toBeNull();
+    });
+
+    it("should pass voice and tier options to API", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello", {
+          voice: "alloy",
+          tier: "premium",
+          speed: 1.5,
+        });
+      });
+
+      expect(mockSynthesizeSpeech).toHaveBeenCalledWith({
+        text: "Hello",
+        voice: "alloy",
+        tier: "premium",
+        speed: 1.5,
+      });
+    });
+
+    it("should set loading state while synthesizing", async () => {
+      let resolvePromise: ((value: Blob) => void) | undefined;
+      const pendingPromise = new Promise<Blob>((resolve) => {
+        resolvePromise = resolve;
+      });
+      mockSynthesizeSpeech.mockReturnValueOnce(pendingPromise);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      act(() => {
+        void result.current.synthesize("Hello");
+      });
+
+      expect(result.current.isLoading).toBe(true);
+
+      await act(async () => {
+        resolvePromise?.(new Blob(["audio"], { type: "audio/mpeg" }));
+        await pendingPromise;
+      });
+
+      expect(result.current.isLoading).toBe(false);
+    });
+
+    it("should handle API errors gracefully", async () => {
+      mockSynthesizeSpeech.mockRejectedValueOnce(new Error("Synthesis failed"));
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello");
+      });
+
+      expect(result.current.error).toBe("Synthesis failed");
+      expect(result.current.isLoading).toBe(false);
+      expect(result.current.audioUrl).toBeNull();
+    });
+
+    it("should cache audio for repeated synthesis of same text", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValue(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      // First call
+      await act(async () => {
+        await result.current.synthesize("Hello world");
+      });
+
+      // Second call with same text
+      await act(async () => {
+        await result.current.synthesize("Hello world");
+      });
+
+      // API should only be called once due to caching
+      expect(mockSynthesizeSpeech).toHaveBeenCalledTimes(1);
+    });
+
+    it("should not cache when options differ", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValue(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello", { voice: "alloy" });
+      });
+
+      await act(async () => {
+        await result.current.synthesize("Hello", { voice: "nova" });
+      });
+
+      expect(mockSynthesizeSpeech).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe("playback controls", () => {
+    it("should play audio after synthesis", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello");
+      });
+
+      await act(async () => {
+        await result.current.play();
+      });
+
+      expect(result.current.isPlaying).toBe(true);
+    });
+
+    it("should pause audio playback", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello");
+      });
+
+      await act(async () => {
+        await result.current.play();
+      });
+
+      act(() => {
+        result.current.pause();
+      });
+
+      expect(result.current.isPlaying).toBe(false);
+    });
+
+    it("should stop and reset playback", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
+
+      const { result } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello");
+      });
+
+      await act(async () => {
+        await result.current.play();
+      });
+
+      act(() => {
+        result.current.stop();
+      });
+
+      expect(result.current.isPlaying).toBe(false);
+      expect(result.current.currentTime).toBe(0);
+    });
+  });
+
+  describe("cleanup", () => {
+    it("should revoke object URLs on unmount", async () => {
+      const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
+      mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
+
+      const { result, unmount } = renderHook(() => useTextToSpeech());
+
+      await act(async () => {
+        await result.current.synthesize("Hello");
+      });
+
+      unmount();
+
+      expect(mockRevokeObjectURL).toHaveBeenCalled();
+    });
+  });
+});
--- a/apps/web/src/hooks/useTextToSpeech.ts
+++ b/apps/web/src/hooks/useTextToSpeech.ts
@@ -0,0 +1,239 @@
+/**
+ * useTextToSpeech hook
+ * Manages TTS API integration with synthesis, caching, and playback state
+ */
+
+import { useState, useCallback, useRef, useEffect } from "react";
+import { synthesizeSpeech } from "@/lib/api/speech";
+
+export interface SynthesizeOptions {
+  voice?: string;
+  speed?: number;
+  format?: string;
+  tier?: string;
+}
+
+export interface UseTextToSpeechReturn {
+  /** Synthesize text to speech audio */
+  synthesize: (text: string, options?: SynthesizeOptions) => Promise<void>;
+  /** The URL of the synthesized audio blob */
+  audioUrl: string | null;
+  /** Whether synthesis is in progress */
+  isLoading: boolean;
+  /** Error message if synthesis failed */
+  error: string | null;
+  /** Start or resume audio playback */
+  play: () => Promise<void>;
+  /** Pause audio playback */
+  pause: () => void;
+  /** Stop audio and reset to beginning */
+  stop: () => void;
+  /** Whether audio is currently playing */
+  isPlaying: boolean;
+  /** Total duration of the audio in seconds */
+  duration: number;
+  /** Current playback position in seconds */
+  currentTime: number;
+}
+
+/** Cache key generator for text + options combination */
+function getCacheKey(text: string, options?: SynthesizeOptions): string {
+  return JSON.stringify({ text, ...options });
+}
+
+/**
+ * Hook for text-to-speech API integration with caching and playback controls
+ */
+export function useTextToSpeech(): UseTextToSpeechReturn {
+  const [audioUrl, setAudioUrl] = useState<string | null>(null);
+  const [isLoading, setIsLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const [isPlaying, setIsPlaying] = useState(false);
+  const [duration, setDuration] = useState(0);
+  const [currentTime, setCurrentTime] = useState(0);
+
+  // Audio element ref for playback control
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  // Cache: maps cache key -> blob URL
+  const cacheRef = useRef<Map<string, string>>(new Map());
+
+  // Track all blob URLs for cleanup
+  const blobUrlsRef = useRef<Set<string>>(new Set());
+
+  /**
+   * Clean up audio element event listeners and state
+   */
+  const cleanupAudio = useCallback(() => {
+    const audio = audioRef.current;
+    if (audio) {
+      audio.pause();
+      audio.removeEventListener("ended", handleEnded);
+      audio.removeEventListener("timeupdate", handleTimeUpdate);
+      audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
+      audioRef.current = null;
+    }
+    setIsPlaying(false);
+  }, []);
+
+  /**
+   * Handle audio ended event
+   */
+  function handleEnded(): void {
+    setIsPlaying(false);
+    setCurrentTime(0);
+  }
+
+  /**
+   * Handle audio time update event
+   */
+  function handleTimeUpdate(): void {
+    const audio = audioRef.current;
+    if (audio) {
+      setCurrentTime(audio.currentTime);
+    }
+  }
+
+  /**
+   * Handle audio metadata loaded event
+   */
+  function handleLoadedMetadata(): void {
+    const audio = audioRef.current;
+    if (audio && isFinite(audio.duration)) {
+      setDuration(audio.duration);
+    }
+  }
+
+  /**
+   * Set up a new Audio element for a given URL
+   */
+  const setupAudio = useCallback(
+    (url: string) => {
+      cleanupAudio();
+
+      const audio = new Audio(url);
+      audio.addEventListener("ended", handleEnded);
+      audio.addEventListener("timeupdate", handleTimeUpdate);
+      audio.addEventListener("loadedmetadata", handleLoadedMetadata);
+      audioRef.current = audio;
+    },
+    [cleanupAudio]
+  );
+
+  /**
+   * Synthesize text to speech
+   */
+  const synthesize = useCallback(
+    async (text: string, options?: SynthesizeOptions): Promise<void> => {
+      setError(null);
+
+      // Check cache first
+      const cacheKey = getCacheKey(text, options);
+      const cachedUrl = cacheRef.current.get(cacheKey);
+
+      if (cachedUrl) {
+        setAudioUrl(cachedUrl);
+        setupAudio(cachedUrl);
+        return;
+      }
+
+      setIsLoading(true);
+
+      try {
+        const blob = await synthesizeSpeech({
+          text,
+          ...(options?.voice !== undefined && { voice: options.voice }),
+          ...(options?.speed !== undefined && { speed: options.speed }),
+          ...(options?.format !== undefined && { format: options.format }),
+          ...(options?.tier !== undefined && { tier: options.tier }),
+        });
+
+        const url = URL.createObjectURL(blob);
+
+        // Store in cache and track for cleanup
+        cacheRef.current.set(cacheKey, url);
+        blobUrlsRef.current.add(url);
+
+        setAudioUrl(url);
+        setupAudio(url);
+      } catch (err) {
+        const errorMsg = err instanceof Error ? err.message : "Speech synthesis failed";
+        setError(errorMsg);
+        setAudioUrl(null);
+      } finally {
+        setIsLoading(false);
+      }
+    },
+    [setupAudio]
+  );
+
+  /**
+   * Start or resume audio playback
+   */
+  const play = useCallback(async (): Promise<void> => {
+    const audio = audioRef.current;
+    if (audio) {
+      await audio.play();
+      setIsPlaying(true);
+    }
+  }, []);
+
+  /**
+   * Pause audio playback
+   */
+  const pause = useCallback((): void => {
+    const audio = audioRef.current;
+    if (audio) {
+      audio.pause();
+      setIsPlaying(false);
+    }
+  }, []);
+
+  /**
+   * Stop audio and reset to beginning
+   */
+  const stop = useCallback((): void => {
+    const audio = audioRef.current;
+    if (audio) {
+      audio.pause();
+      audio.currentTime = 0;
+      setIsPlaying(false);
+      setCurrentTime(0);
+    }
+  }, []);
+
+  // Cleanup on unmount: revoke all blob URLs and clean up audio
+  useEffect((): (() => void) => {
+    return (): void => {
+      // Clean up audio element
+      const audio = audioRef.current;
+      if (audio) {
+        audio.pause();
+        audio.removeEventListener("ended", handleEnded);
+        audio.removeEventListener("timeupdate", handleTimeUpdate);
+        audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
+        audioRef.current = null;
+      }
+
+      // Revoke all blob URLs
+      for (const url of blobUrlsRef.current) {
+        URL.revokeObjectURL(url);
+      }
+      blobUrlsRef.current.clear();
+      cacheRef.current.clear();
+    };
+  }, []);
+
+  return {
+    synthesize,
+    audioUrl,
+    isLoading,
+    error,
+    play,
+    pause,
+    stop,
+    isPlaying,
+    duration,
+    currentTime,
+  };
+}
--- a/apps/web/src/hooks/useVoiceInput.test.ts
+++ b/apps/web/src/hooks/useVoiceInput.test.ts
@@ -0,0 +1,362 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { renderHook, act, waitFor } from "@testing-library/react";
+import { useVoiceInput } from "./useVoiceInput";
+import type { Socket } from "socket.io-client";
+import { io } from "socket.io-client";
+
+// Mock socket.io-client
+vi.mock("socket.io-client");
+
+// Mock MediaRecorder
+const mockMediaRecorder = {
+  start: vi.fn(),
+  stop: vi.fn(),
+  pause: vi.fn(),
+  resume: vi.fn(),
+  state: "inactive" as RecordingState,
+  ondataavailable: null as ((event: BlobEvent) => void) | null,
+  onstop: null as (() => void) | null,
+  onerror: null as ((event: Event) => void) | null,
+  addEventListener: vi.fn((event: string, handler: EventListenerOrEventListenerObject) => {
+    if (event === "dataavailable") {
+      mockMediaRecorder.ondataavailable = handler as (event: BlobEvent) => void;
+    } else if (event === "stop") {
+      mockMediaRecorder.onstop = handler as () => void;
+    } else if (event === "error") {
+      mockMediaRecorder.onerror = handler as (event: Event) => void;
+    }
+  }),
+  removeEventListener: vi.fn(),
+  stream: {
+    getTracks: vi.fn(() => [{ stop: vi.fn() }]),
+  },
+};
+
+// Mock MediaStream with getByteFrequencyData for audio level
+const mockAnalyserNode = {
+  fftSize: 256,
+  frequencyBinCount: 128,
+  getByteFrequencyData: vi.fn((array: Uint8Array) => {
+    // Simulate some audio data
+    for (let i = 0; i < array.length; i++) {
+      array[i] = 128;
+    }
+  }),
+  connect: vi.fn(),
+  disconnect: vi.fn(),
+};
+
+const mockMediaStreamSource = {
+  connect: vi.fn(),
+  disconnect: vi.fn(),
+};
+
+const mockAudioContext = {
+  createAnalyser: vi.fn(() => mockAnalyserNode),
+  createMediaStreamSource: vi.fn(() => mockMediaStreamSource),
+  close: vi.fn(),
+  state: "running",
+};
+
+// Mock getUserMedia
+const mockGetUserMedia = vi.fn();
+
+// Set up global mocks
+Object.defineProperty(global.navigator, "mediaDevices", {
+  value: {
+    getUserMedia: mockGetUserMedia,
+  },
+  writable: true,
+  configurable: true,
+});
+
+// Mock AudioContext
+vi.stubGlobal(
+  "AudioContext",
+  vi.fn(() => mockAudioContext)
+);
+
+// Mock MediaRecorder constructor
+vi.stubGlobal(
+  "MediaRecorder",
+  vi.fn(() => mockMediaRecorder)
+);
+
+// Add isTypeSupported static method
+(
+  global.MediaRecorder as unknown as { isTypeSupported: (type: string) => boolean }
+).isTypeSupported = vi.fn(() => true);
+
+describe("useVoiceInput", (): void => {
+  let mockSocket: Partial<Socket>;
+  let socketEventHandlers: Record<string, (data: unknown) => void>;
+
+  beforeEach((): void => {
+    socketEventHandlers = {};
+
+    mockSocket = {
+      on: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
+        socketEventHandlers[event] = handler;
+        return mockSocket;
+      }) as unknown as Socket["on"],
+      off: vi.fn(() => mockSocket) as unknown as Socket["off"],
+      emit: vi.fn() as unknown as Socket["emit"],
+      connect: vi.fn(),
+      disconnect: vi.fn(),
+      connected: true,
+    };
+
+    (io as unknown as ReturnType<typeof vi.fn>).mockReturnValue(mockSocket);
+
+    // Reset MediaRecorder mock state
+    mockMediaRecorder.state = "inactive";
+    mockMediaRecorder.ondataavailable = null;
+    mockMediaRecorder.onstop = null;
+    mockMediaRecorder.onerror = null;
+
+    // Default: getUserMedia succeeds
+    const mockStream = {
+      getTracks: vi.fn(() => [{ stop: vi.fn() }]),
+    } as unknown as MediaStream;
+    mockGetUserMedia.mockResolvedValue(mockStream);
+  });
+
+  afterEach((): void => {
+    vi.clearAllMocks();
+  });
+
+  it("should return the correct interface", (): void => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    expect(result.current).toHaveProperty("isRecording");
+    expect(result.current).toHaveProperty("startRecording");
+    expect(result.current).toHaveProperty("stopRecording");
+    expect(result.current).toHaveProperty("transcript");
+    expect(result.current).toHaveProperty("partialTranscript");
+    expect(result.current).toHaveProperty("error");
+    expect(result.current).toHaveProperty("audioLevel");
+  });
+
+  it("should start with default state", (): void => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    expect(result.current.isRecording).toBe(false);
+    expect(result.current.transcript).toBe("");
+    expect(result.current.partialTranscript).toBe("");
+    expect(result.current.error).toBeNull();
+    expect(result.current.audioLevel).toBe(0);
+  });
+
+  it("should start recording when startRecording is called", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    expect(result.current.isRecording).toBe(true);
+    expect(mockGetUserMedia).toHaveBeenCalledWith({
+      audio: {
+        echoCancellation: true,
+        noiseSuppression: true,
+        sampleRate: 16000,
+      },
+    });
+  });
+
+  it("should stop recording when stopRecording is called", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    expect(result.current.isRecording).toBe(true);
+
+    act(() => {
+      result.current.stopRecording();
+    });
+
+    expect(result.current.isRecording).toBe(false);
+  });
+
+  it("should set error when microphone access is denied", async (): Promise<void> => {
+    mockGetUserMedia.mockRejectedValueOnce(
+      new DOMException("Permission denied", "NotAllowedError")
+    );
+
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    expect(result.current.isRecording).toBe(false);
+    expect(result.current.error).toBeTruthy();
+    expect(result.current.error).toContain("microphone");
+  });
+
+  it("should connect to speech WebSocket namespace", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    expect(io).toHaveBeenCalledWith(
+      expect.any(String),
+      expect.objectContaining({
+        path: "/socket.io",
+      })
+    );
+  });
+
+  it("should emit start-transcription when recording begins", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    expect(mockSocket.emit).toHaveBeenCalledWith(
+      "start-transcription",
+      expect.objectContaining({
+        // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
+        format: expect.any(String),
+      })
+    );
+  });
+
+  it("should emit stop-transcription when recording stops", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    act(() => {
+      result.current.stopRecording();
+    });
+
+    expect(mockSocket.emit).toHaveBeenCalledWith("stop-transcription");
+  });
+
+  it("should handle partial transcription events", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    act(() => {
+      socketEventHandlers["transcription-partial"]?.({
+        text: "hello world",
+      });
+    });
+
+    await waitFor(() => {
+      expect(result.current.partialTranscript).toBe("hello world");
+    });
+  });
+
+  it("should handle final transcription events", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    act(() => {
+      socketEventHandlers["transcription-final"]?.({
+        text: "hello world final",
+      });
+    });
+
+    await waitFor(() => {
+      expect(result.current.transcript).toBe("hello world final");
+    });
+  });
+
+  it("should handle transcription error events", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    act(() => {
+      socketEventHandlers["transcription-error"]?.({
+        message: "Transcription failed",
+      });
+    });
+
+    await waitFor(() => {
+      expect(result.current.error).toBe("Transcription failed");
+    });
+  });
+
+  it("should call onTranscript callback when final transcription received", async (): Promise<void> => {
+    const onTranscript = vi.fn();
+    const { result } = renderHook(() => useVoiceInput({ onTranscript }));
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    act(() => {
+      socketEventHandlers["transcription-final"]?.({
+        text: "final text",
+      });
+    });
+
+    await waitFor(() => {
+      expect(onTranscript).toHaveBeenCalledWith("final text");
+    });
+  });
+
+  it("should clean up on unmount", async (): Promise<void> => {
+    const { result, unmount } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    unmount();
+
+    expect(mockSocket.disconnect).toHaveBeenCalled();
+  });
+
+  it("should not start recording if already recording", async (): Promise<void> => {
+    const { result } = renderHook(() => useVoiceInput());
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    // Reset the call count
+    mockGetUserMedia.mockClear();
+
+    await act(async () => {
+      await result.current.startRecording();
+    });
+
+    // Should not have called getUserMedia again
+    expect(mockGetUserMedia).not.toHaveBeenCalled();
+  });
+
+  describe("REST fallback", (): void => {
+    it("should fall back to REST when WebSocket is unavailable", async (): Promise<void> => {
+      // Simulate socket not connecting
+      (mockSocket as { connected: boolean }).connected = false;
+
+      const { result } = renderHook(() => useVoiceInput({ useWebSocket: false }));
+
+      // Should still be able to start recording (REST mode)
+      await act(async () => {
+        await result.current.startRecording();
+      });
+
+      expect(result.current.isRecording).toBe(true);
+    });
+  });
+});
--- a/apps/web/src/hooks/useVoiceInput.ts
+++ b/apps/web/src/hooks/useVoiceInput.ts
@@ -0,0 +1,409 @@
+/**
+ * useVoiceInput hook
+ *
+ * Custom hook for microphone capture and real-time transcription.
+ * Supports WebSocket streaming for real-time partial transcriptions
+ * with REST upload fallback when WebSocket is unavailable.
+ */
+
+import { useState, useCallback, useRef, useEffect } from "react";
+import type { Socket } from "socket.io-client";
+import { io } from "socket.io-client";
+import { API_BASE_URL } from "@/lib/config";
+import { apiPostFormData } from "@/lib/api/client";
+
+/** Options for the useVoiceInput hook */
+export interface UseVoiceInputOptions {
+  /** Callback fired when final transcription is received */
+  onTranscript?: (text: string) => void;
+  /** Whether to use WebSocket streaming (default: true) */
+  useWebSocket?: boolean;
+  /** Audio sample rate in Hz (default: 16000) */
+  sampleRate?: number;
+}
+
+/** Return type for the useVoiceInput hook */
+export interface UseVoiceInputReturn {
+  /** Whether the microphone is currently recording */
+  isRecording: boolean;
+  /** Start microphone capture and transcription */
+  startRecording: () => Promise<void>;
+  /** Stop microphone capture and transcription */
+  stopRecording: () => void;
+  /** The final transcription text */
+  transcript: string;
+  /** Partial transcription text (updates in real-time) */
+  partialTranscript: string;
+  /** Error message if something went wrong */
+  error: string | null;
+  /** Current audio input level (0-1) */
+  audioLevel: number;
+}
+
+interface TranscriptionPartialPayload {
+  text: string;
+}
+
+interface TranscriptionFinalPayload {
+  text: string;
+}
+
+interface TranscriptionErrorPayload {
+  message: string;
+}
+
+interface TranscribeResponse {
+  data: {
+    text: string;
+  };
+}
+
+/**
+ * Determine the best MIME type for audio recording
+ */
+function getAudioMimeType(): string {
+  if (typeof MediaRecorder === "undefined") {
+    return "audio/webm";
+  }
+  const types = ["audio/webm;codecs=opus", "audio/webm", "audio/ogg;codecs=opus", "audio/mp4"];
+  for (const type of types) {
+    if (MediaRecorder.isTypeSupported(type)) {
+      return type;
+    }
+  }
+  return "audio/webm";
+}
+
+/**
+ * Hook for microphone capture and real-time speech-to-text transcription.
+ *
+ * Uses WebSocket streaming by default for real-time partial transcriptions.
+ * Falls back to REST upload (POST /api/speech/transcribe) if WebSocket
+ * is disabled or unavailable.
+ */
+export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInputReturn {
+  const { onTranscript, useWebSocket: useWs = true, sampleRate = 16000 } = options;
+
+  const [isRecording, setIsRecording] = useState(false);
+  const [transcript, setTranscript] = useState("");
+  const [partialTranscript, setPartialTranscript] = useState("");
+  const [error, setError] = useState<string | null>(null);
+  const [audioLevel, setAudioLevel] = useState(0);
+
+  // Refs to hold mutable state without re-renders
+  const socketRef = useRef<Socket | null>(null);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const analyserRef = useRef<AnalyserNode | null>(null);
+  const animationFrameRef = useRef<number | null>(null);
+  const onTranscriptRef = useRef(onTranscript);
+  const recordedChunksRef = useRef<Blob[]>([]);
+  const isRecordingRef = useRef(false);
+
+  // Keep callback ref up to date
+  useEffect(() => {
+    onTranscriptRef.current = onTranscript;
+  }, [onTranscript]);
+
+  /**
+   * Set up audio analysis for visualizing input level
+   */
+  const setupAudioAnalysis = useCallback((stream: MediaStream): void => {
+    try {
+      const audioContext = new AudioContext();
+      const analyser = audioContext.createAnalyser();
+      const source = audioContext.createMediaStreamSource(stream);
+
+      analyser.fftSize = 256;
+      source.connect(analyser);
+
+      audioContextRef.current = audioContext;
+      analyserRef.current = analyser;
+
+      // Start level monitoring
+      const dataArray = new Uint8Array(analyser.frequencyBinCount);
+
+      const updateLevel = (): void => {
+        if (!isRecordingRef.current) {
+          return;
+        }
+
+        analyser.getByteFrequencyData(dataArray);
+
+        // Calculate average level
+        let sum = 0;
+        for (const value of dataArray) {
+          sum += value;
+        }
+        const average = sum / dataArray.length / 255;
+        setAudioLevel(average);
+
+        animationFrameRef.current = requestAnimationFrame(updateLevel);
+      };
+
+      animationFrameRef.current = requestAnimationFrame(updateLevel);
+    } catch {
+      // Audio analysis is non-critical; continue without it
+      console.warn("Audio analysis not available");
+    }
+  }, []);
+
+  /**
+   * Clean up audio analysis resources
+   */
+  const cleanupAudioAnalysis = useCallback((): void => {
+    if (animationFrameRef.current !== null) {
+      cancelAnimationFrame(animationFrameRef.current);
+      animationFrameRef.current = null;
+    }
+    if (audioContextRef.current) {
+      void audioContextRef.current.close();
+      audioContextRef.current = null;
+    }
+    analyserRef.current = null;
+    setAudioLevel(0);
+  }, []);
+
+  /**
+   * Connect to the speech WebSocket namespace
+   */
+  const connectSocket = useCallback((): Socket => {
+    const socket = io(API_BASE_URL, {
+      path: "/socket.io",
+      transports: ["websocket", "polling"],
+    });
+
+    socket.on("transcription-partial", (data: TranscriptionPartialPayload) => {
+      setPartialTranscript(data.text);
+    });
+
+    socket.on("transcription-final", (data: TranscriptionFinalPayload) => {
+      setTranscript(data.text);
+      setPartialTranscript("");
+      onTranscriptRef.current?.(data.text);
+    });
+
+    socket.on("transcription-error", (data: TranscriptionErrorPayload) => {
+      setError(data.message);
+    });
+
+    socketRef.current = socket;
+    return socket;
+  }, []);
+
+  /**
+   * Disconnect the WebSocket
+   */
+  const disconnectSocket = useCallback((): void => {
+    if (socketRef.current) {
+      socketRef.current.off("transcription-partial");
+      socketRef.current.off("transcription-final");
+      socketRef.current.off("transcription-error");
+      socketRef.current.disconnect();
+      socketRef.current = null;
+    }
+  }, []);
+
+  /**
+   * Send recorded audio via REST API as fallback
+   */
+  const sendAudioViaRest = useCallback(async (audioBlob: Blob): Promise<void> => {
+    try {
+      const formData = new FormData();
+      formData.append("audio", audioBlob, "recording.webm");
+
+      const response = await apiPostFormData<TranscribeResponse>(
+        "/api/speech/transcribe",
+        formData
+      );
+
+      if (response.data.text) {
+        setTranscript(response.data.text);
+        setPartialTranscript("");
+        onTranscriptRef.current?.(response.data.text);
+      }
+    } catch (err) {
+      const message = err instanceof Error ? err.message : "Transcription request failed";
+      setError(message);
+    }
+  }, []);
+
+  /**
+   * Stop all media tracks on the stream
+   */
+  const stopMediaTracks = useCallback((): void => {
+    if (streamRef.current) {
+      streamRef.current.getTracks().forEach((track) => {
+        track.stop();
+      });
+      streamRef.current = null;
+    }
+  }, []);
+
+  /**
+   * Start microphone capture and transcription
+   */
+  const startRecording = useCallback(async (): Promise<void> => {
+    // Prevent double-start
+    if (isRecordingRef.current) {
+      return;
+    }
+
+    setError(null);
+    setPartialTranscript("");
+    recordedChunksRef.current = [];
+
+    try {
+      // Request microphone access
+      const stream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          sampleRate,
+        },
+      });
+
+      streamRef.current = stream;
+
+      // Set up audio level visualization
+      setupAudioAnalysis(stream);
+
+      // Determine MIME type
+      const mimeType = getAudioMimeType();
+
+      // Create MediaRecorder
+      const mediaRecorder = new MediaRecorder(stream, { mimeType });
+      mediaRecorderRef.current = mediaRecorder;
+
+      // Connect WebSocket if enabled
+      let socket: Socket | null = null;
+      if (useWs) {
+        socket = connectSocket();
+
+        // Emit start-transcription event
+        socket.emit("start-transcription", {
+          format: mimeType,
+          sampleRate,
+        });
+      }
+
+      // Handle audio data chunks
+      mediaRecorder.addEventListener("dataavailable", (event: BlobEvent) => {
+        if (event.data.size > 0) {
+          if (socket?.connected) {
+            // Stream chunks via WebSocket
+            socket.emit("audio-chunk", event.data);
+          } else {
+            // Collect chunks for REST upload
+            recordedChunksRef.current.push(event.data);
+          }
+        }
+      });
+
+      // Handle recording stop
+      mediaRecorder.addEventListener("stop", () => {
+        // If using REST fallback, send collected audio
+        if (!useWs || !socket?.connected) {
+          if (recordedChunksRef.current.length > 0) {
+            const audioBlob = new Blob(recordedChunksRef.current, {
+              type: mimeType,
+            });
+            void sendAudioViaRest(audioBlob);
+          }
+        }
+      });
+
+      // Handle errors
+      mediaRecorder.addEventListener("error", () => {
+        setError("Recording encountered an issue. Please try again.");
+        setIsRecording(false);
+        isRecordingRef.current = false;
+      });
+
+      // Start recording with timeslice for streaming chunks (250ms intervals)
+      mediaRecorder.start(250);
+      setIsRecording(true);
+      isRecordingRef.current = true;
+    } catch (err) {
+      // Handle specific error types
+      if (err instanceof DOMException) {
+        if (err.name === "NotAllowedError") {
+          setError(
+            "Microphone access was not granted. Please allow microphone access to use voice input."
+          );
+        } else if (err.name === "NotFoundError") {
+          setError("No microphone found. Please connect a microphone and try again.");
+        } else {
+          setError("Unable to access the microphone. Please check your device settings.");
+        }
+      } else {
+        setError("Unable to start voice input. Please try again.");
+      }
+
+      // Clean up on failure
+      stopMediaTracks();
+      cleanupAudioAnalysis();
+    }
+  }, [
+    useWs,
+    sampleRate,
+    setupAudioAnalysis,
+    connectSocket,
+    sendAudioViaRest,
+    stopMediaTracks,
+    cleanupAudioAnalysis,
+  ]);
+
+  /**
+   * Stop microphone capture and transcription
+   */
+  const stopRecording = useCallback((): void => {
+    setIsRecording(false);
+    isRecordingRef.current = false;
+
+    // Stop MediaRecorder
+    if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
+      mediaRecorderRef.current.stop();
+      mediaRecorderRef.current = null;
+    }
+
+    // Stop media tracks
+    stopMediaTracks();
+
+    // Clean up audio analysis
+    cleanupAudioAnalysis();
+
+    // Emit stop event and disconnect WebSocket
+    if (socketRef.current) {
+      socketRef.current.emit("stop-transcription");
+      // Give the server a moment to process the final chunk before disconnecting
+      setTimeout(() => {
+        disconnectSocket();
+      }, 500);
+    }
+  }, [stopMediaTracks, cleanupAudioAnalysis, disconnectSocket]);
+
+  // Cleanup on unmount
+  useEffect(() => {
+    return (): void => {
+      isRecordingRef.current = false;
+      if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
+        mediaRecorderRef.current.stop();
+      }
+      stopMediaTracks();
+      cleanupAudioAnalysis();
+      disconnectSocket();
+    };
+  }, [stopMediaTracks, cleanupAudioAnalysis, disconnectSocket]);
+
+  return {
+    isRecording,
+    startRecording,
+    stopRecording,
+    transcript,
+    partialTranscript,
+    error,
+    audioLevel,
+  };
+}
--- a/apps/web/src/lib/api/speech.ts
+++ b/apps/web/src/lib/api/speech.ts
@@ -0,0 +1,58 @@
+/**
+ * Speech API client
+ * Handles text-to-speech synthesis and voice listing via /api/speech
+ */
+
+import { apiGet } from "./client";
+import { API_BASE_URL } from "../config";
+
+export interface VoiceInfo {
+  id: string;
+  name: string;
+  language: string;
+  gender?: string;
+  preview_url?: string;
+}
+
+export interface SynthesizeOptions {
+  text: string;
+  voice?: string;
+  speed?: number;
+  format?: string;
+  tier?: string;
+}
+
+export interface VoicesResponse {
+  data: VoiceInfo[];
+}
+
+/**
+ * Fetch available TTS voices
+ */
+export async function getVoices(): Promise<VoicesResponse> {
+  return apiGet<VoicesResponse>("/api/speech/voices");
+}
+
+/**
+ * Synthesize text to speech audio
+ * Returns the audio as a Blob since the API returns binary audio data
+ */
+export async function synthesizeSpeech(options: SynthesizeOptions): Promise<Blob> {
+  const url = `${API_BASE_URL}/api/speech/synthesize`;
+
+  const response = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    credentials: "include",
+    body: JSON.stringify(options),
+  });
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "Unknown error");
+    throw new Error(`Speech synthesis failed: ${errorText}`);
+  }
+
+  return response.blob();
+}