feat(#403): add audio playback component for TTS output
All checks were successful
ci/woodpecker/push/web Pipeline was successful
All checks were successful
ci/woodpecker/push/web Pipeline was successful
Implements AudioPlayer inline component with play/pause, progress bar, speed control (0.5x-2x), download, and duration display. Adds TextToSpeechButton "Read aloud" component that synthesizes text via the speech API and integrates AudioPlayer for playback. Includes useTextToSpeech hook with API integration, audio caching, and playback state management. All 32 tests passing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
178
apps/web/src/components/speech/AudioPlayer.test.tsx
Normal file
178
apps/web/src/components/speech/AudioPlayer.test.tsx
Normal file
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* @file AudioPlayer.test.tsx
|
||||
* @description Tests for the AudioPlayer component that provides inline TTS audio playback
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { AudioPlayer } from "./AudioPlayer";
|
||||
|
||||
// Mock HTMLAudioElement
|
||||
class MockAudio {
|
||||
src = "";
|
||||
currentTime = 0;
|
||||
duration = 60;
|
||||
paused = true;
|
||||
playbackRate = 1;
|
||||
volume = 1;
|
||||
onended: (() => void) | null = null;
|
||||
ontimeupdate: (() => void) | null = null;
|
||||
onloadedmetadata: (() => void) | null = null;
|
||||
onerror: ((e: unknown) => void) | null = null;
|
||||
|
||||
play(): Promise<void> {
|
||||
this.paused = false;
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
pause(): void {
|
||||
this.paused = true;
|
||||
}
|
||||
|
||||
addEventListener(event: string, handler: () => void): void {
|
||||
if (event === "ended") this.onended = handler;
|
||||
if (event === "timeupdate") this.ontimeupdate = handler;
|
||||
if (event === "loadedmetadata") this.onloadedmetadata = handler;
|
||||
if (event === "error") this.onerror = handler;
|
||||
}
|
||||
|
||||
removeEventListener(): void {
|
||||
// no-op for tests
|
||||
}
|
||||
}
|
||||
|
||||
vi.stubGlobal("Audio", MockAudio);
|
||||
|
||||
describe("AudioPlayer", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("rendering", () => {
|
||||
it("should render play button", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const playButton = screen.getByRole("button", { name: "Play audio" });
|
||||
expect(playButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render download button", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const downloadButton = screen.getByRole("button", { name: /download/i });
|
||||
expect(downloadButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render time display showing 0:00", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
expect(screen.getByText("0:00")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render speed control", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const speedButton = screen.getByRole("button", { name: "Playback speed" });
|
||||
expect(speedButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render progress bar", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const progressBar = screen.getByRole("progressbar");
|
||||
expect(progressBar).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not render when src is null", () => {
|
||||
const { container } = render(<AudioPlayer src={null} />);
|
||||
|
||||
expect(container.firstChild).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("play/pause", () => {
|
||||
it("should toggle to pause button when playing", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const playButton = screen.getByRole("button", { name: "Play audio" });
|
||||
await user.click(playButton);
|
||||
|
||||
expect(screen.getByRole("button", { name: "Pause audio" })).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("speed control", () => {
|
||||
it("should cycle through speed options on click", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const speedButton = screen.getByRole("button", { name: "Playback speed" });
|
||||
|
||||
// Default should be 1x
|
||||
expect(speedButton).toHaveTextContent("1x");
|
||||
|
||||
// Click to go to 1.5x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("1.5x");
|
||||
|
||||
// Click to go to 2x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("2x");
|
||||
|
||||
// Click to go to 0.5x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("0.5x");
|
||||
|
||||
// Click to go back to 1x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("1x");
|
||||
});
|
||||
});
|
||||
|
||||
describe("accessibility", () => {
|
||||
it("should have proper aria labels on controls", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
expect(screen.getByRole("button", { name: "Play audio" })).toBeInTheDocument();
|
||||
expect(screen.getByRole("button", { name: /download/i })).toBeInTheDocument();
|
||||
expect(screen.getByRole("button", { name: "Playback speed" })).toBeInTheDocument();
|
||||
expect(screen.getByRole("progressbar")).toHaveAttribute("aria-label");
|
||||
});
|
||||
|
||||
it("should have region role on the player container", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
expect(screen.getByRole("region", { name: /audio player/i })).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("design", () => {
|
||||
it("should not use aggressive red colors", () => {
|
||||
const { container } = render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const allElements = container.querySelectorAll("*");
|
||||
allElements.forEach((el) => {
|
||||
const className = el.className;
|
||||
if (typeof className === "string") {
|
||||
expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("callbacks", () => {
|
||||
it("should call onPlayStateChange when play state changes", async () => {
|
||||
const onPlayStateChange = vi.fn();
|
||||
const user = userEvent.setup();
|
||||
|
||||
render(<AudioPlayer src="blob:test-audio" onPlayStateChange={onPlayStateChange} />);
|
||||
|
||||
const playButton = screen.getByRole("button", { name: "Play audio" });
|
||||
await user.click(playButton);
|
||||
|
||||
expect(onPlayStateChange).toHaveBeenCalledWith(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
250
apps/web/src/components/speech/AudioPlayer.tsx
Normal file
250
apps/web/src/components/speech/AudioPlayer.tsx
Normal file
@@ -0,0 +1,250 @@
|
||||
/**
|
||||
* AudioPlayer Component
|
||||
* Inline audio player for TTS content with play/pause, progress,
|
||||
* speed control, download, and duration display.
|
||||
*
|
||||
* Follows PDA-friendly design: no aggressive colors, calm interface.
|
||||
*/
|
||||
|
||||
import { useState, useRef, useEffect, useCallback } from "react";
|
||||
import type { ReactElement } from "react";
|
||||
|
||||
/** Playback speed options */
|
||||
const SPEED_OPTIONS = [1, 1.5, 2, 0.5] as const;
|
||||
|
||||
export interface AudioPlayerProps {
|
||||
/** URL of the audio to play (blob URL or HTTP URL). If null, nothing renders. */
|
||||
src: string | null;
|
||||
/** Whether to auto-play when src changes */
|
||||
autoPlay?: boolean;
|
||||
/** Callback when play state changes */
|
||||
onPlayStateChange?: (isPlaying: boolean) => void;
|
||||
/** Optional className for the container */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format seconds into M:SS display
|
||||
*/
|
||||
function formatTime(seconds: number): string {
|
||||
if (!isFinite(seconds) || seconds < 0) return "0:00";
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${String(mins)}:${String(secs).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* AudioPlayer displays an inline audio player with controls for
|
||||
* play/pause, progress tracking, speed adjustment, and download.
|
||||
*/
|
||||
export function AudioPlayer({
|
||||
src,
|
||||
autoPlay = false,
|
||||
onPlayStateChange,
|
||||
className = "",
|
||||
}: AudioPlayerProps): ReactElement | null {
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const [speedIndex, setSpeedIndex] = useState(0);
|
||||
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
/**
|
||||
* Set up audio element when src changes
|
||||
*/
|
||||
useEffect((): (() => void) | undefined => {
|
||||
if (!src) return undefined;
|
||||
|
||||
const audio = new Audio(src);
|
||||
audioRef.current = audio;
|
||||
|
||||
const onLoadedMetadata = (): void => {
|
||||
if (isFinite(audio.duration)) {
|
||||
setDuration(audio.duration);
|
||||
}
|
||||
};
|
||||
|
||||
const onTimeUpdate = (): void => {
|
||||
setCurrentTime(audio.currentTime);
|
||||
};
|
||||
|
||||
const onEnded = (): void => {
|
||||
setIsPlaying(false);
|
||||
setCurrentTime(0);
|
||||
onPlayStateChange?.(false);
|
||||
};
|
||||
|
||||
audio.addEventListener("loadedmetadata", onLoadedMetadata);
|
||||
audio.addEventListener("timeupdate", onTimeUpdate);
|
||||
audio.addEventListener("ended", onEnded);
|
||||
|
||||
if (autoPlay) {
|
||||
void audio.play().then(() => {
|
||||
setIsPlaying(true);
|
||||
onPlayStateChange?.(true);
|
||||
});
|
||||
}
|
||||
|
||||
return (): void => {
|
||||
audio.pause();
|
||||
audio.removeEventListener("loadedmetadata", onLoadedMetadata);
|
||||
audio.removeEventListener("timeupdate", onTimeUpdate);
|
||||
audio.removeEventListener("ended", onEnded);
|
||||
audioRef.current = null;
|
||||
};
|
||||
}, [src, autoPlay, onPlayStateChange]);
|
||||
|
||||
/**
|
||||
* Toggle play/pause
|
||||
*/
|
||||
const togglePlayPause = useCallback(async (): Promise<void> => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio) return;
|
||||
|
||||
if (isPlaying) {
|
||||
audio.pause();
|
||||
setIsPlaying(false);
|
||||
onPlayStateChange?.(false);
|
||||
} else {
|
||||
await audio.play();
|
||||
setIsPlaying(true);
|
||||
onPlayStateChange?.(true);
|
||||
}
|
||||
}, [isPlaying, onPlayStateChange]);
|
||||
|
||||
/**
|
||||
* Cycle through speed options
|
||||
*/
|
||||
const cycleSpeed = useCallback((): void => {
|
||||
const nextIndex = (speedIndex + 1) % SPEED_OPTIONS.length;
|
||||
setSpeedIndex(nextIndex);
|
||||
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.playbackRate = SPEED_OPTIONS[nextIndex] ?? 1;
|
||||
}
|
||||
}, [speedIndex]);
|
||||
|
||||
/**
|
||||
* Handle progress bar click for seeking
|
||||
*/
|
||||
const handleProgressClick = useCallback(
|
||||
(event: React.MouseEvent<HTMLDivElement>): void => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio || !duration) return;
|
||||
|
||||
const rect = event.currentTarget.getBoundingClientRect();
|
||||
const clickX = event.clientX - rect.left;
|
||||
const fraction = clickX / rect.width;
|
||||
audio.currentTime = fraction * duration;
|
||||
setCurrentTime(audio.currentTime);
|
||||
},
|
||||
[duration]
|
||||
);
|
||||
|
||||
/**
|
||||
* Handle download
|
||||
*/
|
||||
const handleDownload = useCallback((): void => {
|
||||
if (!src) return;
|
||||
|
||||
const link = document.createElement("a");
|
||||
link.href = src;
|
||||
link.download = "speech-audio.mp3";
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
}, [src]);
|
||||
|
||||
// Don't render if no source
|
||||
if (!src) return null;
|
||||
|
||||
const progress = duration > 0 ? (currentTime / duration) * 100 : 0;
|
||||
const currentSpeed = SPEED_OPTIONS[speedIndex] ?? 1;
|
||||
|
||||
return (
|
||||
<div
|
||||
role="region"
|
||||
aria-label="Audio player"
|
||||
className={`flex items-center gap-2 rounded-lg border border-gray-200 bg-gray-50 px-3 py-2 ${className}`}
|
||||
>
|
||||
{/* Play/Pause Button */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void togglePlayPause()}
|
||||
aria-label={isPlaying ? "Pause audio" : "Play audio"}
|
||||
className="flex h-8 w-8 shrink-0 items-center justify-center rounded-full bg-blue-500 text-white transition-colors hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-blue-300"
|
||||
>
|
||||
{isPlaying ? (
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true">
|
||||
<rect x="6" y="4" width="4" height="16" rx="1" />
|
||||
<rect x="14" y="4" width="4" height="16" rx="1" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true">
|
||||
<polygon points="6,4 20,12 6,20" />
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Time Display */}
|
||||
<span className="min-w-[3.5rem] text-xs text-gray-500 tabular-nums">
|
||||
{formatTime(currentTime)}
|
||||
{duration > 0 && <span className="text-gray-400"> / {formatTime(duration)}</span>}
|
||||
</span>
|
||||
|
||||
{/* Progress Bar */}
|
||||
<div
|
||||
role="progressbar"
|
||||
aria-label="Audio progress"
|
||||
aria-valuenow={Math.round(progress)}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
className="relative h-1.5 flex-1 cursor-pointer rounded-full bg-gray-200"
|
||||
onClick={handleProgressClick}
|
||||
>
|
||||
<div
|
||||
className="absolute left-0 top-0 h-full rounded-full bg-blue-400 transition-all"
|
||||
style={{ width: `${String(Math.min(progress, 100))}%` }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Speed Control */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={cycleSpeed}
|
||||
aria-label="Playback speed"
|
||||
className="min-w-[2.5rem] rounded px-1.5 py-0.5 text-xs font-medium text-gray-600 transition-colors hover:bg-gray-200 focus:outline-none focus:ring-2 focus:ring-blue-300"
|
||||
>
|
||||
{String(currentSpeed)}x
|
||||
</button>
|
||||
|
||||
{/* Download Button */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleDownload}
|
||||
aria-label="Download audio"
|
||||
className="flex h-7 w-7 shrink-0 items-center justify-center rounded text-gray-500 transition-colors hover:bg-gray-200 hover:text-gray-700 focus:outline-none focus:ring-2 focus:ring-blue-300"
|
||||
>
|
||||
<svg
|
||||
width="14"
|
||||
height="14"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4" />
|
||||
<polyline points="7 10 12 15 17 10" />
|
||||
<line x1="12" y1="15" x2="12" y2="3" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default AudioPlayer;
|
||||
70
apps/web/src/components/speech/AudioVisualizer.test.tsx
Normal file
70
apps/web/src/components/speech/AudioVisualizer.test.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { AudioVisualizer } from "./AudioVisualizer";
|
||||
|
||||
describe("AudioVisualizer", (): void => {
|
||||
it("should render the visualizer container", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0} isActive={false} />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
expect(container).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render visualization bars", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} />);
|
||||
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
expect(bars.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should show inactive state when not active", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0} isActive={false} />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
expect(container).toBeInTheDocument();
|
||||
// Bars should be at minimum height when inactive
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
bars.forEach((bar) => {
|
||||
const style = bar.getAttribute("style");
|
||||
expect(style).toContain("height");
|
||||
});
|
||||
});
|
||||
|
||||
it("should reflect audio level in bar heights when active", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.8} isActive={true} />);
|
||||
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
// At least one bar should have non-minimal height
|
||||
const hasActiveBars = bars.some((bar) => {
|
||||
const style = bar.getAttribute("style") ?? "";
|
||||
const heightMatch = /height:\s*(\d+)/.exec(style);
|
||||
return heightMatch?.[1] ? parseInt(heightMatch[1], 10) > 4 : false;
|
||||
});
|
||||
expect(hasActiveBars).toBe(true);
|
||||
});
|
||||
|
||||
it("should use calm colors (no aggressive reds)", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
const allElements = container.querySelectorAll("*");
|
||||
allElements.forEach((el) => {
|
||||
const className = (el as HTMLElement).className;
|
||||
expect(className).not.toMatch(/bg-red-|text-red-/);
|
||||
});
|
||||
});
|
||||
|
||||
it("should accept custom className", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} className="custom-class" />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
expect(container.className).toContain("custom-class");
|
||||
});
|
||||
|
||||
it("should render with configurable bar count", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} barCount={8} />);
|
||||
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
expect(bars).toHaveLength(8);
|
||||
});
|
||||
});
|
||||
87
apps/web/src/components/speech/AudioVisualizer.tsx
Normal file
87
apps/web/src/components/speech/AudioVisualizer.tsx
Normal file
@@ -0,0 +1,87 @@
|
||||
/**
|
||||
* AudioVisualizer component
|
||||
*
|
||||
* Displays a simple audio level visualization using bars.
|
||||
* Uses the Web Audio API's AnalyserNode data (passed as audioLevel)
|
||||
* to show microphone input levels during recording.
|
||||
*
|
||||
* Design: Calm, non-aggressive colors following PDA-friendly guidelines.
|
||||
*/
|
||||
|
||||
import { useMemo } from "react";
|
||||
|
||||
export interface AudioVisualizerProps {
|
||||
/** Current audio level (0-1) */
|
||||
audioLevel: number;
|
||||
/** Whether the visualizer is actively listening */
|
||||
isActive: boolean;
|
||||
/** Number of bars to display (default: 5) */
|
||||
barCount?: number;
|
||||
/** Additional CSS classes */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate bar heights based on audio level.
|
||||
* Creates a natural-looking wave pattern where center bars are taller.
|
||||
*/
|
||||
function generateBarHeights(level: number, count: number): number[] {
|
||||
const heights: number[] = [];
|
||||
const center = (count - 1) / 2;
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
// Distance from center (0-1)
|
||||
const distFromCenter = Math.abs(i - center) / center;
|
||||
// Center bars are taller, edge bars shorter
|
||||
const multiplier = 1 - distFromCenter * 0.5;
|
||||
// Min height 4px, max height 24px when active
|
||||
const minHeight = 4;
|
||||
const maxHeight = 24;
|
||||
const height = minHeight + level * (maxHeight - minHeight) * multiplier;
|
||||
heights.push(Math.round(height));
|
||||
}
|
||||
|
||||
return heights;
|
||||
}
|
||||
|
||||
/**
|
||||
* Audio level visualizer with animated bars.
|
||||
* Shows microphone input levels during voice recording.
|
||||
*/
|
||||
export function AudioVisualizer({
|
||||
audioLevel,
|
||||
isActive,
|
||||
barCount = 5,
|
||||
className = "",
|
||||
}: AudioVisualizerProps): React.JSX.Element {
|
||||
const barHeights = useMemo(() => {
|
||||
if (!isActive) {
|
||||
return Array.from({ length: barCount }, () => 4);
|
||||
}
|
||||
return generateBarHeights(audioLevel, barCount);
|
||||
}, [audioLevel, isActive, barCount]);
|
||||
|
||||
return (
|
||||
<div
|
||||
data-testid="audio-visualizer"
|
||||
className={`flex items-center gap-0.5 ${className}`}
|
||||
role="img"
|
||||
aria-label={
|
||||
isActive
|
||||
? `Audio level: ${String(Math.round(audioLevel * 100))}%`
|
||||
: "Audio visualizer inactive"
|
||||
}
|
||||
>
|
||||
{barHeights.map((height, index) => (
|
||||
<div
|
||||
key={index}
|
||||
data-testid="visualizer-bar"
|
||||
className={`w-1 rounded-full transition-all duration-150 ease-out ${
|
||||
isActive ? "bg-sky-400" : "bg-slate-300 dark:bg-slate-600"
|
||||
}`}
|
||||
style={{ height: `${height.toString()}px` }}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
218
apps/web/src/components/speech/TextToSpeechButton.test.tsx
Normal file
218
apps/web/src/components/speech/TextToSpeechButton.test.tsx
Normal file
@@ -0,0 +1,218 @@
|
||||
/**
|
||||
* @file TextToSpeechButton.test.tsx
|
||||
* @description Tests for the TextToSpeechButton "Read aloud" component
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { TextToSpeechButton } from "./TextToSpeechButton";
|
||||
|
||||
// Mock the useTextToSpeech hook
|
||||
const mockSynthesize = vi.fn();
|
||||
const mockPlay = vi.fn();
|
||||
const mockPause = vi.fn();
|
||||
const mockStop = vi.fn();
|
||||
|
||||
vi.mock("@/hooks/useTextToSpeech", () => ({
|
||||
useTextToSpeech: vi.fn(() => ({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: false,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
})),
|
||||
}));
|
||||
|
||||
// Import after mocking
|
||||
import { useTextToSpeech } from "@/hooks/useTextToSpeech";
|
||||
|
||||
const mockUseTextToSpeech = useTextToSpeech as ReturnType<typeof vi.fn>;
|
||||
|
||||
// Mock HTMLAudioElement for AudioPlayer used inside TextToSpeechButton
|
||||
class MockAudio {
|
||||
src = "";
|
||||
currentTime = 0;
|
||||
duration = 60;
|
||||
paused = true;
|
||||
playbackRate = 1;
|
||||
volume = 1;
|
||||
onended: (() => void) | null = null;
|
||||
ontimeupdate: (() => void) | null = null;
|
||||
onloadedmetadata: (() => void) | null = null;
|
||||
onerror: ((e: unknown) => void) | null = null;
|
||||
|
||||
play(): Promise<void> {
|
||||
this.paused = false;
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
pause(): void {
|
||||
this.paused = true;
|
||||
}
|
||||
|
||||
addEventListener(): void {
|
||||
// no-op
|
||||
}
|
||||
|
||||
removeEventListener(): void {
|
||||
// no-op
|
||||
}
|
||||
}
|
||||
|
||||
vi.stubGlobal("Audio", MockAudio);
|
||||
|
||||
describe("TextToSpeechButton", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: false,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
});
|
||||
});
|
||||
|
||||
describe("rendering", () => {
|
||||
it("should render a read aloud button", () => {
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not render AudioPlayer initially when no audio is synthesized", () => {
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
expect(screen.queryByRole("region", { name: /audio player/i })).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("click behavior", () => {
|
||||
it("should call synthesize with text on click", async () => {
|
||||
const user = userEvent.setup();
|
||||
mockSynthesize.mockResolvedValueOnce(undefined);
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
await user.click(button);
|
||||
|
||||
expect(mockSynthesize).toHaveBeenCalledWith("Hello world", undefined);
|
||||
});
|
||||
|
||||
it("should pass voice and tier options when provided", async () => {
|
||||
const user = userEvent.setup();
|
||||
mockSynthesize.mockResolvedValueOnce(undefined);
|
||||
|
||||
render(<TextToSpeechButton text="Hello" voice="alloy" tier="premium" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
await user.click(button);
|
||||
|
||||
expect(mockSynthesize).toHaveBeenCalledWith("Hello", {
|
||||
voice: "alloy",
|
||||
tier: "premium",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("loading state", () => {
|
||||
it("should show loading indicator while synthesizing", () => {
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: true,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
});
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /synthesizing/i });
|
||||
expect(button).toBeInTheDocument();
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("audio player integration", () => {
|
||||
it("should show AudioPlayer when audio is available", () => {
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: "blob:mock-url",
|
||||
isLoading: false,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 30,
|
||||
currentTime: 0,
|
||||
});
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
expect(screen.getByRole("region", { name: /audio player/i })).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("error state", () => {
|
||||
it("should display error message when synthesis fails", () => {
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: false,
|
||||
error: "Synthesis failed",
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
});
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
expect(screen.getByText(/synthesis failed/i)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("accessibility", () => {
|
||||
it("should have proper aria label on button", () => {
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("design", () => {
|
||||
it("should not use aggressive colors", () => {
|
||||
const { container } = render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const allElements = container.querySelectorAll("*");
|
||||
allElements.forEach((el) => {
|
||||
const className = el.className;
|
||||
if (typeof className === "string") {
|
||||
expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
126
apps/web/src/components/speech/TextToSpeechButton.tsx
Normal file
126
apps/web/src/components/speech/TextToSpeechButton.tsx
Normal file
@@ -0,0 +1,126 @@
|
||||
/**
|
||||
* TextToSpeechButton Component
|
||||
* "Read aloud" button that synthesizes text and plays it via AudioPlayer.
|
||||
*
|
||||
* Accepts text as a prop, with optional voice and tier selection.
|
||||
* Shows loading state during synthesis and integrates AudioPlayer for playback.
|
||||
*
|
||||
* Follows PDA-friendly design: no aggressive colors, calm interface.
|
||||
*/
|
||||
|
||||
import { useCallback } from "react";
|
||||
import type { ReactElement } from "react";
|
||||
import { useTextToSpeech } from "@/hooks/useTextToSpeech";
|
||||
import type { SynthesizeOptions } from "@/hooks/useTextToSpeech";
|
||||
import { AudioPlayer } from "./AudioPlayer";
|
||||
|
||||
export interface TextToSpeechButtonProps {
|
||||
/** The text to synthesize to speech */
|
||||
text: string;
|
||||
/** Optional voice ID to use */
|
||||
voice?: string;
|
||||
/** Optional tier (e.g. "standard", "premium") */
|
||||
tier?: string;
|
||||
/** Optional className for the container */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* TextToSpeechButton provides a "Read aloud" button that synthesizes
|
||||
* the given text and displays an AudioPlayer for playback control.
|
||||
*/
|
||||
export function TextToSpeechButton({
|
||||
text,
|
||||
voice,
|
||||
tier,
|
||||
className = "",
|
||||
}: TextToSpeechButtonProps): ReactElement {
|
||||
const { synthesize, audioUrl, isLoading, error } = useTextToSpeech();
|
||||
|
||||
/**
|
||||
* Handle read aloud button click
|
||||
*/
|
||||
const handleClick = useCallback(async (): Promise<void> => {
|
||||
let options: SynthesizeOptions | undefined;
|
||||
|
||||
if (voice !== undefined || tier !== undefined) {
|
||||
options = {};
|
||||
if (voice !== undefined) options.voice = voice;
|
||||
if (tier !== undefined) options.tier = tier;
|
||||
}
|
||||
|
||||
await synthesize(text, options);
|
||||
}, [text, voice, tier, synthesize]);
|
||||
|
||||
return (
|
||||
<div className={`flex flex-col gap-2 ${className}`}>
|
||||
{/* Read Aloud Button */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void handleClick()}
|
||||
disabled={isLoading}
|
||||
aria-label={isLoading ? "Synthesizing speech" : "Read aloud"}
|
||||
className="inline-flex items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-1.5 text-sm font-medium text-gray-700 transition-colors hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-blue-300 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{isLoading ? (
|
||||
<>
|
||||
{/* Spinner */}
|
||||
<svg
|
||||
className="h-4 w-4 animate-spin text-gray-500"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<circle
|
||||
cx="12"
|
||||
cy="12"
|
||||
r="10"
|
||||
stroke="currentColor"
|
||||
strokeWidth="3"
|
||||
className="opacity-25"
|
||||
/>
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
|
||||
className="opacity-75"
|
||||
/>
|
||||
</svg>
|
||||
<span>Synthesizing...</span>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
{/* Speaker Icon */}
|
||||
<svg
|
||||
width="16"
|
||||
height="16"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
|
||||
<path d="M15.54 8.46a5 5 0 010 7.07" />
|
||||
<path d="M19.07 4.93a10 10 0 010 14.14" />
|
||||
</svg>
|
||||
<span>Read aloud</span>
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Error Display */}
|
||||
{error && (
|
||||
<p className="text-sm text-amber-600" role="alert">
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{/* Audio Player (shown after synthesis) */}
|
||||
{audioUrl && <AudioPlayer src={audioUrl} />}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default TextToSpeechButton;
|
||||
228
apps/web/src/components/speech/VoiceInput.test.tsx
Normal file
228
apps/web/src/components/speech/VoiceInput.test.tsx
Normal file
@@ -0,0 +1,228 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { VoiceInput } from "./VoiceInput";
|
||||
|
||||
// Mock the useVoiceInput hook
|
||||
const mockStartRecording = vi.fn();
|
||||
const mockStopRecording = vi.fn();
|
||||
|
||||
vi.mock("@/hooks/useVoiceInput", () => ({
|
||||
useVoiceInput: vi.fn(() => ({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
})),
|
||||
}));
|
||||
|
||||
// We need to import after mocking
|
||||
import { useVoiceInput } from "@/hooks/useVoiceInput";
|
||||
|
||||
describe("VoiceInput", (): void => {
|
||||
beforeEach((): void => {
|
||||
vi.clearAllMocks();
|
||||
// Reset mock implementation to default
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it("should render a microphone button", (): void => {
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should have accessible aria label", (): void => {
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
expect(button).toHaveAttribute("aria-label", "Start voice input");
|
||||
});
|
||||
|
||||
it("should call startRecording when mic button is clicked", async (): Promise<void> => {
|
||||
const user = userEvent.setup();
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
await user.click(button);
|
||||
|
||||
expect(mockStartRecording).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should show recording state when isRecording is true", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0.5,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /stop voice input/i,
|
||||
});
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should call stopRecording when mic button is clicked while recording", async (): Promise<void> => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0.5,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /stop voice input/i,
|
||||
});
|
||||
await user.click(button);
|
||||
|
||||
expect(mockStopRecording).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should display partial transcription text", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "hello worl",
|
||||
error: null,
|
||||
audioLevel: 0.3,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
expect(screen.getByText("hello worl")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display final transcript text", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "hello world",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
expect(screen.getByText("hello world")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display error message", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: "Microphone access not available",
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
expect(screen.getByText("Microphone access not available")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should call onTranscript callback prop", (): void => {
|
||||
const onTranscript = vi.fn();
|
||||
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "final text",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput onTranscript={onTranscript} />);
|
||||
|
||||
// The onTranscript prop is passed to the hook - we verify the prop is accepted
|
||||
expect(useVoiceInput).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
onTranscript,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should use calm, non-aggressive design for recording indicator", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0.5,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
// Check there are no aggressive red colors in the recording state
|
||||
const button = screen.getByRole("button", { name: /stop voice input/i });
|
||||
const className = button.className;
|
||||
expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
|
||||
});
|
||||
|
||||
it("should use calm design for error display", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: "Something went wrong",
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
const errorEl = screen.getByText("Something went wrong");
|
||||
const className = errorEl.className;
|
||||
expect(className).not.toMatch(/text-red-600|bg-red-/);
|
||||
});
|
||||
|
||||
it("should be disabled when disabled prop is true", (): void => {
|
||||
render(<VoiceInput disabled />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
146
apps/web/src/components/speech/VoiceInput.tsx
Normal file
146
apps/web/src/components/speech/VoiceInput.tsx
Normal file
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* VoiceInput component
|
||||
*
|
||||
* Provides a microphone button with visual feedback for voice input.
|
||||
* Click to start/stop recording with real-time transcription display.
|
||||
*
|
||||
* Design principles:
|
||||
* - PDA-friendly: calm, non-aggressive colors
|
||||
* - Gentle pulsing animation for recording state (blue/green)
|
||||
* - Mobile-friendly touch interaction
|
||||
* - Accessible with proper aria labels
|
||||
*/
|
||||
|
||||
import { useVoiceInput } from "@/hooks/useVoiceInput";
|
||||
import type { UseVoiceInputOptions } from "@/hooks/useVoiceInput";
|
||||
import { AudioVisualizer } from "./AudioVisualizer";
|
||||
import { Mic, MicOff } from "lucide-react";
|
||||
|
||||
export interface VoiceInputProps {
|
||||
/** Callback fired when final transcription is received */
|
||||
onTranscript?: (text: string) => void;
|
||||
/** Whether to use WebSocket streaming (default: true) */
|
||||
useWebSocket?: boolean;
|
||||
/** Whether the input is disabled */
|
||||
disabled?: boolean;
|
||||
/** Additional CSS classes for the container */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Voice input component with microphone capture and real-time transcription.
|
||||
* Shows a mic button that toggles recording, with visual feedback
|
||||
* and transcription text display.
|
||||
*/
|
||||
export function VoiceInput({
|
||||
onTranscript,
|
||||
useWebSocket: useWs,
|
||||
disabled = false,
|
||||
className = "",
|
||||
}: VoiceInputProps): React.JSX.Element {
|
||||
const hookOptions: UseVoiceInputOptions = {};
|
||||
if (onTranscript !== undefined) {
|
||||
hookOptions.onTranscript = onTranscript;
|
||||
}
|
||||
if (useWs !== undefined) {
|
||||
hookOptions.useWebSocket = useWs;
|
||||
}
|
||||
|
||||
const {
|
||||
isRecording,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
transcript,
|
||||
partialTranscript,
|
||||
error,
|
||||
audioLevel,
|
||||
} = useVoiceInput(hookOptions);
|
||||
|
||||
const handleClick = (): void => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
void startRecording();
|
||||
}
|
||||
};
|
||||
|
||||
const displayText = isRecording ? partialTranscript : transcript;
|
||||
|
||||
return (
|
||||
<div className={`flex flex-col items-center gap-3 ${className}`}>
|
||||
{/* Mic button with recording indicator */}
|
||||
<div className="relative flex items-center gap-2">
|
||||
{/* Pulsing ring animation when recording */}
|
||||
{isRecording && (
|
||||
<div
|
||||
className="absolute inset-0 -m-1 rounded-full bg-sky-400/20 animate-pulse"
|
||||
aria-hidden="true"
|
||||
/>
|
||||
)}
|
||||
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClick}
|
||||
disabled={disabled}
|
||||
aria-label={isRecording ? "Stop voice input" : "Start voice input"}
|
||||
className={`
|
||||
relative z-10 flex items-center justify-center
|
||||
w-10 h-10 rounded-full transition-all duration-200
|
||||
focus:outline-none focus:ring-2 focus:ring-sky-400 focus:ring-offset-2
|
||||
disabled:opacity-50 disabled:cursor-not-allowed
|
||||
${
|
||||
isRecording
|
||||
? "bg-sky-500 text-white hover:bg-sky-600 shadow-md"
|
||||
: "bg-slate-100 text-slate-600 hover:bg-slate-200 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600"
|
||||
}
|
||||
`}
|
||||
>
|
||||
{isRecording ? (
|
||||
<MicOff className="w-5 h-5" aria-hidden="true" />
|
||||
) : (
|
||||
<Mic className="w-5 h-5" aria-hidden="true" />
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Audio level visualizer - shown during recording */}
|
||||
{isRecording && (
|
||||
<AudioVisualizer audioLevel={audioLevel} isActive={isRecording} barCount={5} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Recording status indicator */}
|
||||
{isRecording && (
|
||||
<div className="flex items-center gap-1.5 text-xs text-sky-600 dark:text-sky-400">
|
||||
<span className="w-2 h-2 rounded-full bg-sky-500 animate-pulse" aria-hidden="true" />
|
||||
<span>Listening...</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Transcription text display */}
|
||||
{displayText && (
|
||||
<p
|
||||
className={`
|
||||
text-sm max-w-md text-center px-3 py-1.5 rounded-lg
|
||||
${
|
||||
isRecording
|
||||
? "text-slate-500 dark:text-slate-400 bg-slate-50 dark:bg-slate-800/50 italic"
|
||||
: "text-slate-700 dark:text-slate-200 bg-slate-100 dark:bg-slate-800"
|
||||
}
|
||||
`}
|
||||
>
|
||||
{displayText}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{/* Error display - calm, non-aggressive */}
|
||||
{error && (
|
||||
<p
|
||||
className="text-sm text-amber-700 dark:text-amber-400 bg-amber-50 dark:bg-amber-900/20 px-3 py-1.5 rounded-lg max-w-md text-center"
|
||||
role="alert"
|
||||
>
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
8
apps/web/src/components/speech/index.ts
Normal file
8
apps/web/src/components/speech/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
export { VoiceInput } from "./VoiceInput";
|
||||
export type { VoiceInputProps } from "./VoiceInput";
|
||||
export { AudioVisualizer } from "./AudioVisualizer";
|
||||
export type { AudioVisualizerProps } from "./AudioVisualizer";
|
||||
export { AudioPlayer } from "./AudioPlayer";
|
||||
export type { AudioPlayerProps } from "./AudioPlayer";
|
||||
export { TextToSpeechButton } from "./TextToSpeechButton";
|
||||
export type { TextToSpeechButtonProps } from "./TextToSpeechButton";
|
||||
285
apps/web/src/hooks/useTextToSpeech.test.ts
Normal file
285
apps/web/src/hooks/useTextToSpeech.test.ts
Normal file
@@ -0,0 +1,285 @@
|
||||
/**
|
||||
* @file useTextToSpeech.test.ts
|
||||
* @description Tests for the useTextToSpeech hook that manages TTS API integration
|
||||
*/
|
||||
|
||||
import { renderHook, act } from "@testing-library/react";
|
||||
import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
|
||||
import { useTextToSpeech } from "./useTextToSpeech";
|
||||
import * as speechApi from "@/lib/api/speech";
|
||||
|
||||
// Mock the speech API module
|
||||
vi.mock("@/lib/api/speech", () => ({
|
||||
synthesizeSpeech: vi.fn(),
|
||||
getVoices: vi.fn(),
|
||||
}));
|
||||
|
||||
// Mock URL.createObjectURL and URL.revokeObjectURL
|
||||
const mockCreateObjectURL = vi.fn().mockReturnValue("blob:mock-audio-url");
|
||||
const mockRevokeObjectURL = vi.fn();
|
||||
|
||||
beforeEach(() => {
|
||||
global.URL.createObjectURL = mockCreateObjectURL;
|
||||
global.URL.revokeObjectURL = mockRevokeObjectURL;
|
||||
});
|
||||
|
||||
// Mock HTMLAudioElement
|
||||
class MockAudio {
|
||||
src = "";
|
||||
currentTime = 0;
|
||||
duration = 120;
|
||||
paused = true;
|
||||
playbackRate = 1;
|
||||
volume = 1;
|
||||
onended: (() => void) | null = null;
|
||||
ontimeupdate: (() => void) | null = null;
|
||||
onloadedmetadata: (() => void) | null = null;
|
||||
onerror: ((e: unknown) => void) | null = null;
|
||||
|
||||
play(): Promise<void> {
|
||||
this.paused = false;
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
pause(): void {
|
||||
this.paused = true;
|
||||
}
|
||||
|
||||
addEventListener(event: string, handler: () => void): void {
|
||||
if (event === "ended") this.onended = handler;
|
||||
if (event === "timeupdate") this.ontimeupdate = handler;
|
||||
if (event === "loadedmetadata") this.onloadedmetadata = handler;
|
||||
if (event === "error") this.onerror = handler;
|
||||
}
|
||||
|
||||
removeEventListener(): void {
|
||||
// no-op for tests
|
||||
}
|
||||
}
|
||||
|
||||
vi.stubGlobal("Audio", MockAudio);
|
||||
|
||||
const mockSynthesizeSpeech = speechApi.synthesizeSpeech as ReturnType<typeof vi.fn>;
|
||||
|
||||
describe("useTextToSpeech", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockCreateObjectURL.mockReturnValue("blob:mock-audio-url");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("initial state", () => {
|
||||
it("should return correct initial interface", () => {
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
expect(result.current.synthesize).toBeTypeOf("function");
|
||||
expect(result.current.play).toBeTypeOf("function");
|
||||
expect(result.current.pause).toBeTypeOf("function");
|
||||
expect(result.current.stop).toBeTypeOf("function");
|
||||
expect(result.current.audioUrl).toBeNull();
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
expect(result.current.error).toBeNull();
|
||||
expect(result.current.isPlaying).toBe(false);
|
||||
expect(result.current.duration).toBe(0);
|
||||
expect(result.current.currentTime).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("synthesize", () => {
|
||||
it("should call API and return audio blob URL", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello world");
|
||||
});
|
||||
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledWith({
|
||||
text: "Hello world",
|
||||
});
|
||||
expect(result.current.audioUrl).toBe("blob:mock-audio-url");
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
expect(result.current.error).toBeNull();
|
||||
});
|
||||
|
||||
it("should pass voice and tier options to API", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello", {
|
||||
voice: "alloy",
|
||||
tier: "premium",
|
||||
speed: 1.5,
|
||||
});
|
||||
});
|
||||
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledWith({
|
||||
text: "Hello",
|
||||
voice: "alloy",
|
||||
tier: "premium",
|
||||
speed: 1.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("should set loading state while synthesizing", async () => {
|
||||
let resolvePromise: ((value: Blob) => void) | undefined;
|
||||
const pendingPromise = new Promise<Blob>((resolve) => {
|
||||
resolvePromise = resolve;
|
||||
});
|
||||
mockSynthesizeSpeech.mockReturnValueOnce(pendingPromise);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
act(() => {
|
||||
void result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
expect(result.current.isLoading).toBe(true);
|
||||
|
||||
await act(async () => {
|
||||
resolvePromise?.(new Blob(["audio"], { type: "audio/mpeg" }));
|
||||
await pendingPromise;
|
||||
});
|
||||
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
});
|
||||
|
||||
it("should handle API errors gracefully", async () => {
|
||||
mockSynthesizeSpeech.mockRejectedValueOnce(new Error("Synthesis failed"));
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
expect(result.current.error).toBe("Synthesis failed");
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
expect(result.current.audioUrl).toBeNull();
|
||||
});
|
||||
|
||||
it("should cache audio for repeated synthesis of same text", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValue(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
// First call
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello world");
|
||||
});
|
||||
|
||||
// Second call with same text
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello world");
|
||||
});
|
||||
|
||||
// API should only be called once due to caching
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should not cache when options differ", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValue(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello", { voice: "alloy" });
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello", { voice: "nova" });
|
||||
});
|
||||
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("playback controls", () => {
|
||||
it("should play audio after synthesis", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.play();
|
||||
});
|
||||
|
||||
expect(result.current.isPlaying).toBe(true);
|
||||
});
|
||||
|
||||
it("should pause audio playback", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.play();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
result.current.pause();
|
||||
});
|
||||
|
||||
expect(result.current.isPlaying).toBe(false);
|
||||
});
|
||||
|
||||
it("should stop and reset playback", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.play();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
result.current.stop();
|
||||
});
|
||||
|
||||
expect(result.current.isPlaying).toBe(false);
|
||||
expect(result.current.currentTime).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("cleanup", () => {
|
||||
it("should revoke object URLs on unmount", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result, unmount } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
unmount();
|
||||
|
||||
expect(mockRevokeObjectURL).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
239
apps/web/src/hooks/useTextToSpeech.ts
Normal file
239
apps/web/src/hooks/useTextToSpeech.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
/**
|
||||
* useTextToSpeech hook
|
||||
* Manages TTS API integration with synthesis, caching, and playback state
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from "react";
|
||||
import { synthesizeSpeech } from "@/lib/api/speech";
|
||||
|
||||
export interface SynthesizeOptions {
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
format?: string;
|
||||
tier?: string;
|
||||
}
|
||||
|
||||
export interface UseTextToSpeechReturn {
|
||||
/** Synthesize text to speech audio */
|
||||
synthesize: (text: string, options?: SynthesizeOptions) => Promise<void>;
|
||||
/** The URL of the synthesized audio blob */
|
||||
audioUrl: string | null;
|
||||
/** Whether synthesis is in progress */
|
||||
isLoading: boolean;
|
||||
/** Error message if synthesis failed */
|
||||
error: string | null;
|
||||
/** Start or resume audio playback */
|
||||
play: () => Promise<void>;
|
||||
/** Pause audio playback */
|
||||
pause: () => void;
|
||||
/** Stop audio and reset to beginning */
|
||||
stop: () => void;
|
||||
/** Whether audio is currently playing */
|
||||
isPlaying: boolean;
|
||||
/** Total duration of the audio in seconds */
|
||||
duration: number;
|
||||
/** Current playback position in seconds */
|
||||
currentTime: number;
|
||||
}
|
||||
|
||||
/** Cache key generator for text + options combination */
|
||||
function getCacheKey(text: string, options?: SynthesizeOptions): string {
|
||||
return JSON.stringify({ text, ...options });
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for text-to-speech API integration with caching and playback controls
|
||||
*/
|
||||
export function useTextToSpeech(): UseTextToSpeechReturn {
|
||||
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
|
||||
// Audio element ref for playback control
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
// Cache: maps cache key -> blob URL
|
||||
const cacheRef = useRef<Map<string, string>>(new Map());
|
||||
|
||||
// Track all blob URLs for cleanup
|
||||
const blobUrlsRef = useRef<Set<string>>(new Set());
|
||||
|
||||
/**
|
||||
* Clean up audio element event listeners and state
|
||||
*/
|
||||
const cleanupAudio = useCallback(() => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
audio.removeEventListener("ended", handleEnded);
|
||||
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
|
||||
audioRef.current = null;
|
||||
}
|
||||
setIsPlaying(false);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Handle audio ended event
|
||||
*/
|
||||
function handleEnded(): void {
|
||||
setIsPlaying(false);
|
||||
setCurrentTime(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle audio time update event
|
||||
*/
|
||||
function handleTimeUpdate(): void {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
setCurrentTime(audio.currentTime);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle audio metadata loaded event
|
||||
*/
|
||||
function handleLoadedMetadata(): void {
|
||||
const audio = audioRef.current;
|
||||
if (audio && isFinite(audio.duration)) {
|
||||
setDuration(audio.duration);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up a new Audio element for a given URL
|
||||
*/
|
||||
const setupAudio = useCallback(
|
||||
(url: string) => {
|
||||
cleanupAudio();
|
||||
|
||||
const audio = new Audio(url);
|
||||
audio.addEventListener("ended", handleEnded);
|
||||
audio.addEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.addEventListener("loadedmetadata", handleLoadedMetadata);
|
||||
audioRef.current = audio;
|
||||
},
|
||||
[cleanupAudio]
|
||||
);
|
||||
|
||||
/**
|
||||
* Synthesize text to speech
|
||||
*/
|
||||
const synthesize = useCallback(
|
||||
async (text: string, options?: SynthesizeOptions): Promise<void> => {
|
||||
setError(null);
|
||||
|
||||
// Check cache first
|
||||
const cacheKey = getCacheKey(text, options);
|
||||
const cachedUrl = cacheRef.current.get(cacheKey);
|
||||
|
||||
if (cachedUrl) {
|
||||
setAudioUrl(cachedUrl);
|
||||
setupAudio(cachedUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
const blob = await synthesizeSpeech({
|
||||
text,
|
||||
...(options?.voice !== undefined && { voice: options.voice }),
|
||||
...(options?.speed !== undefined && { speed: options.speed }),
|
||||
...(options?.format !== undefined && { format: options.format }),
|
||||
...(options?.tier !== undefined && { tier: options.tier }),
|
||||
});
|
||||
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
// Store in cache and track for cleanup
|
||||
cacheRef.current.set(cacheKey, url);
|
||||
blobUrlsRef.current.add(url);
|
||||
|
||||
setAudioUrl(url);
|
||||
setupAudio(url);
|
||||
} catch (err) {
|
||||
const errorMsg = err instanceof Error ? err.message : "Speech synthesis failed";
|
||||
setError(errorMsg);
|
||||
setAudioUrl(null);
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
},
|
||||
[setupAudio]
|
||||
);
|
||||
|
||||
/**
|
||||
* Start or resume audio playback
|
||||
*/
|
||||
const play = useCallback(async (): Promise<void> => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
await audio.play();
|
||||
setIsPlaying(true);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Pause audio playback
|
||||
*/
|
||||
const pause = useCallback((): void => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
setIsPlaying(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Stop audio and reset to beginning
|
||||
*/
|
||||
const stop = useCallback((): void => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
audio.currentTime = 0;
|
||||
setIsPlaying(false);
|
||||
setCurrentTime(0);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Cleanup on unmount: revoke all blob URLs and clean up audio
|
||||
useEffect((): (() => void) => {
|
||||
return (): void => {
|
||||
// Clean up audio element
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
audio.removeEventListener("ended", handleEnded);
|
||||
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
|
||||
audioRef.current = null;
|
||||
}
|
||||
|
||||
// Revoke all blob URLs
|
||||
for (const url of blobUrlsRef.current) {
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
blobUrlsRef.current.clear();
|
||||
cacheRef.current.clear();
|
||||
};
|
||||
}, []);
|
||||
|
||||
return {
|
||||
synthesize,
|
||||
audioUrl,
|
||||
isLoading,
|
||||
error,
|
||||
play,
|
||||
pause,
|
||||
stop,
|
||||
isPlaying,
|
||||
duration,
|
||||
currentTime,
|
||||
};
|
||||
}
|
||||
362
apps/web/src/hooks/useVoiceInput.test.ts
Normal file
362
apps/web/src/hooks/useVoiceInput.test.ts
Normal file
@@ -0,0 +1,362 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { renderHook, act, waitFor } from "@testing-library/react";
|
||||
import { useVoiceInput } from "./useVoiceInput";
|
||||
import type { Socket } from "socket.io-client";
|
||||
import { io } from "socket.io-client";
|
||||
|
||||
// Mock socket.io-client
|
||||
vi.mock("socket.io-client");
|
||||
|
||||
// Mock MediaRecorder
|
||||
const mockMediaRecorder = {
|
||||
start: vi.fn(),
|
||||
stop: vi.fn(),
|
||||
pause: vi.fn(),
|
||||
resume: vi.fn(),
|
||||
state: "inactive" as RecordingState,
|
||||
ondataavailable: null as ((event: BlobEvent) => void) | null,
|
||||
onstop: null as (() => void) | null,
|
||||
onerror: null as ((event: Event) => void) | null,
|
||||
addEventListener: vi.fn((event: string, handler: EventListenerOrEventListenerObject) => {
|
||||
if (event === "dataavailable") {
|
||||
mockMediaRecorder.ondataavailable = handler as (event: BlobEvent) => void;
|
||||
} else if (event === "stop") {
|
||||
mockMediaRecorder.onstop = handler as () => void;
|
||||
} else if (event === "error") {
|
||||
mockMediaRecorder.onerror = handler as (event: Event) => void;
|
||||
}
|
||||
}),
|
||||
removeEventListener: vi.fn(),
|
||||
stream: {
|
||||
getTracks: vi.fn(() => [{ stop: vi.fn() }]),
|
||||
},
|
||||
};
|
||||
|
||||
// Mock MediaStream with getByteFrequencyData for audio level
|
||||
const mockAnalyserNode = {
|
||||
fftSize: 256,
|
||||
frequencyBinCount: 128,
|
||||
getByteFrequencyData: vi.fn((array: Uint8Array) => {
|
||||
// Simulate some audio data
|
||||
for (let i = 0; i < array.length; i++) {
|
||||
array[i] = 128;
|
||||
}
|
||||
}),
|
||||
connect: vi.fn(),
|
||||
disconnect: vi.fn(),
|
||||
};
|
||||
|
||||
const mockMediaStreamSource = {
|
||||
connect: vi.fn(),
|
||||
disconnect: vi.fn(),
|
||||
};
|
||||
|
||||
const mockAudioContext = {
|
||||
createAnalyser: vi.fn(() => mockAnalyserNode),
|
||||
createMediaStreamSource: vi.fn(() => mockMediaStreamSource),
|
||||
close: vi.fn(),
|
||||
state: "running",
|
||||
};
|
||||
|
||||
// Mock getUserMedia
|
||||
const mockGetUserMedia = vi.fn();
|
||||
|
||||
// Set up global mocks
|
||||
Object.defineProperty(global.navigator, "mediaDevices", {
|
||||
value: {
|
||||
getUserMedia: mockGetUserMedia,
|
||||
},
|
||||
writable: true,
|
||||
configurable: true,
|
||||
});
|
||||
|
||||
// Mock AudioContext
|
||||
vi.stubGlobal(
|
||||
"AudioContext",
|
||||
vi.fn(() => mockAudioContext)
|
||||
);
|
||||
|
||||
// Mock MediaRecorder constructor
|
||||
vi.stubGlobal(
|
||||
"MediaRecorder",
|
||||
vi.fn(() => mockMediaRecorder)
|
||||
);
|
||||
|
||||
// Add isTypeSupported static method
|
||||
(
|
||||
global.MediaRecorder as unknown as { isTypeSupported: (type: string) => boolean }
|
||||
).isTypeSupported = vi.fn(() => true);
|
||||
|
||||
describe("useVoiceInput", (): void => {
|
||||
let mockSocket: Partial<Socket>;
|
||||
let socketEventHandlers: Record<string, (data: unknown) => void>;
|
||||
|
||||
beforeEach((): void => {
|
||||
socketEventHandlers = {};
|
||||
|
||||
mockSocket = {
|
||||
on: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
|
||||
socketEventHandlers[event] = handler;
|
||||
return mockSocket;
|
||||
}) as unknown as Socket["on"],
|
||||
off: vi.fn(() => mockSocket) as unknown as Socket["off"],
|
||||
emit: vi.fn() as unknown as Socket["emit"],
|
||||
connect: vi.fn(),
|
||||
disconnect: vi.fn(),
|
||||
connected: true,
|
||||
};
|
||||
|
||||
(io as unknown as ReturnType<typeof vi.fn>).mockReturnValue(mockSocket);
|
||||
|
||||
// Reset MediaRecorder mock state
|
||||
mockMediaRecorder.state = "inactive";
|
||||
mockMediaRecorder.ondataavailable = null;
|
||||
mockMediaRecorder.onstop = null;
|
||||
mockMediaRecorder.onerror = null;
|
||||
|
||||
// Default: getUserMedia succeeds
|
||||
const mockStream = {
|
||||
getTracks: vi.fn(() => [{ stop: vi.fn() }]),
|
||||
} as unknown as MediaStream;
|
||||
mockGetUserMedia.mockResolvedValue(mockStream);
|
||||
});
|
||||
|
||||
afterEach((): void => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("should return the correct interface", (): void => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
expect(result.current).toHaveProperty("isRecording");
|
||||
expect(result.current).toHaveProperty("startRecording");
|
||||
expect(result.current).toHaveProperty("stopRecording");
|
||||
expect(result.current).toHaveProperty("transcript");
|
||||
expect(result.current).toHaveProperty("partialTranscript");
|
||||
expect(result.current).toHaveProperty("error");
|
||||
expect(result.current).toHaveProperty("audioLevel");
|
||||
});
|
||||
|
||||
it("should start with default state", (): void => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
expect(result.current.isRecording).toBe(false);
|
||||
expect(result.current.transcript).toBe("");
|
||||
expect(result.current.partialTranscript).toBe("");
|
||||
expect(result.current.error).toBeNull();
|
||||
expect(result.current.audioLevel).toBe(0);
|
||||
});
|
||||
|
||||
it("should start recording when startRecording is called", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(true);
|
||||
expect(mockGetUserMedia).toHaveBeenCalledWith({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
sampleRate: 16000,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("should stop recording when stopRecording is called", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(true);
|
||||
|
||||
act(() => {
|
||||
result.current.stopRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(false);
|
||||
});
|
||||
|
||||
it("should set error when microphone access is denied", async (): Promise<void> => {
|
||||
mockGetUserMedia.mockRejectedValueOnce(
|
||||
new DOMException("Permission denied", "NotAllowedError")
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(false);
|
||||
expect(result.current.error).toBeTruthy();
|
||||
expect(result.current.error).toContain("microphone");
|
||||
});
|
||||
|
||||
it("should connect to speech WebSocket namespace", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(io).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
path: "/socket.io",
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should emit start-transcription when recording begins", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(mockSocket.emit).toHaveBeenCalledWith(
|
||||
"start-transcription",
|
||||
expect.objectContaining({
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
||||
format: expect.any(String),
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should emit stop-transcription when recording stops", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
result.current.stopRecording();
|
||||
});
|
||||
|
||||
expect(mockSocket.emit).toHaveBeenCalledWith("stop-transcription");
|
||||
});
|
||||
|
||||
it("should handle partial transcription events", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-partial"]?.({
|
||||
text: "hello world",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(result.current.partialTranscript).toBe("hello world");
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle final transcription events", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-final"]?.({
|
||||
text: "hello world final",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(result.current.transcript).toBe("hello world final");
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle transcription error events", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-error"]?.({
|
||||
message: "Transcription failed",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(result.current.error).toBe("Transcription failed");
|
||||
});
|
||||
});
|
||||
|
||||
it("should call onTranscript callback when final transcription received", async (): Promise<void> => {
|
||||
const onTranscript = vi.fn();
|
||||
const { result } = renderHook(() => useVoiceInput({ onTranscript }));
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-final"]?.({
|
||||
text: "final text",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(onTranscript).toHaveBeenCalledWith("final text");
|
||||
});
|
||||
});
|
||||
|
||||
it("should clean up on unmount", async (): Promise<void> => {
|
||||
const { result, unmount } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
unmount();
|
||||
|
||||
expect(mockSocket.disconnect).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should not start recording if already recording", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
// Reset the call count
|
||||
mockGetUserMedia.mockClear();
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
// Should not have called getUserMedia again
|
||||
expect(mockGetUserMedia).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe("REST fallback", (): void => {
|
||||
it("should fall back to REST when WebSocket is unavailable", async (): Promise<void> => {
|
||||
// Simulate socket not connecting
|
||||
(mockSocket as { connected: boolean }).connected = false;
|
||||
|
||||
const { result } = renderHook(() => useVoiceInput({ useWebSocket: false }));
|
||||
|
||||
// Should still be able to start recording (REST mode)
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
409
apps/web/src/hooks/useVoiceInput.ts
Normal file
409
apps/web/src/hooks/useVoiceInput.ts
Normal file
@@ -0,0 +1,409 @@
|
||||
/**
|
||||
* useVoiceInput hook
|
||||
*
|
||||
* Custom hook for microphone capture and real-time transcription.
|
||||
* Supports WebSocket streaming for real-time partial transcriptions
|
||||
* with REST upload fallback when WebSocket is unavailable.
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from "react";
|
||||
import type { Socket } from "socket.io-client";
|
||||
import { io } from "socket.io-client";
|
||||
import { API_BASE_URL } from "@/lib/config";
|
||||
import { apiPostFormData } from "@/lib/api/client";
|
||||
|
||||
/** Options for the useVoiceInput hook */
|
||||
export interface UseVoiceInputOptions {
|
||||
/** Callback fired when final transcription is received */
|
||||
onTranscript?: (text: string) => void;
|
||||
/** Whether to use WebSocket streaming (default: true) */
|
||||
useWebSocket?: boolean;
|
||||
/** Audio sample rate in Hz (default: 16000) */
|
||||
sampleRate?: number;
|
||||
}
|
||||
|
||||
/** Return type for the useVoiceInput hook */
|
||||
export interface UseVoiceInputReturn {
|
||||
/** Whether the microphone is currently recording */
|
||||
isRecording: boolean;
|
||||
/** Start microphone capture and transcription */
|
||||
startRecording: () => Promise<void>;
|
||||
/** Stop microphone capture and transcription */
|
||||
stopRecording: () => void;
|
||||
/** The final transcription text */
|
||||
transcript: string;
|
||||
/** Partial transcription text (updates in real-time) */
|
||||
partialTranscript: string;
|
||||
/** Error message if something went wrong */
|
||||
error: string | null;
|
||||
/** Current audio input level (0-1) */
|
||||
audioLevel: number;
|
||||
}
|
||||
|
||||
interface TranscriptionPartialPayload {
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface TranscriptionFinalPayload {
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface TranscriptionErrorPayload {
|
||||
message: string;
|
||||
}
|
||||
|
||||
interface TranscribeResponse {
|
||||
data: {
|
||||
text: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the best MIME type for audio recording
|
||||
*/
|
||||
function getAudioMimeType(): string {
|
||||
if (typeof MediaRecorder === "undefined") {
|
||||
return "audio/webm";
|
||||
}
|
||||
const types = ["audio/webm;codecs=opus", "audio/webm", "audio/ogg;codecs=opus", "audio/mp4"];
|
||||
for (const type of types) {
|
||||
if (MediaRecorder.isTypeSupported(type)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
return "audio/webm";
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for microphone capture and real-time speech-to-text transcription.
|
||||
*
|
||||
* Uses WebSocket streaming by default for real-time partial transcriptions.
|
||||
* Falls back to REST upload (POST /api/speech/transcribe) if WebSocket
|
||||
* is disabled or unavailable.
|
||||
*/
|
||||
export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInputReturn {
|
||||
const { onTranscript, useWebSocket: useWs = true, sampleRate = 16000 } = options;
|
||||
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [transcript, setTranscript] = useState("");
|
||||
const [partialTranscript, setPartialTranscript] = useState("");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [audioLevel, setAudioLevel] = useState(0);
|
||||
|
||||
// Refs to hold mutable state without re-renders
|
||||
const socketRef = useRef<Socket | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||||
const animationFrameRef = useRef<number | null>(null);
|
||||
const onTranscriptRef = useRef(onTranscript);
|
||||
const recordedChunksRef = useRef<Blob[]>([]);
|
||||
const isRecordingRef = useRef(false);
|
||||
|
||||
// Keep callback ref up to date
|
||||
useEffect(() => {
|
||||
onTranscriptRef.current = onTranscript;
|
||||
}, [onTranscript]);
|
||||
|
||||
/**
|
||||
* Set up audio analysis for visualizing input level
|
||||
*/
|
||||
const setupAudioAnalysis = useCallback((stream: MediaStream): void => {
|
||||
try {
|
||||
const audioContext = new AudioContext();
|
||||
const analyser = audioContext.createAnalyser();
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
|
||||
analyser.fftSize = 256;
|
||||
source.connect(analyser);
|
||||
|
||||
audioContextRef.current = audioContext;
|
||||
analyserRef.current = analyser;
|
||||
|
||||
// Start level monitoring
|
||||
const dataArray = new Uint8Array(analyser.frequencyBinCount);
|
||||
|
||||
const updateLevel = (): void => {
|
||||
if (!isRecordingRef.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
analyser.getByteFrequencyData(dataArray);
|
||||
|
||||
// Calculate average level
|
||||
let sum = 0;
|
||||
for (const value of dataArray) {
|
||||
sum += value;
|
||||
}
|
||||
const average = sum / dataArray.length / 255;
|
||||
setAudioLevel(average);
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(updateLevel);
|
||||
};
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(updateLevel);
|
||||
} catch {
|
||||
// Audio analysis is non-critical; continue without it
|
||||
console.warn("Audio analysis not available");
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Clean up audio analysis resources
|
||||
*/
|
||||
const cleanupAudioAnalysis = useCallback((): void => {
|
||||
if (animationFrameRef.current !== null) {
|
||||
cancelAnimationFrame(animationFrameRef.current);
|
||||
animationFrameRef.current = null;
|
||||
}
|
||||
if (audioContextRef.current) {
|
||||
void audioContextRef.current.close();
|
||||
audioContextRef.current = null;
|
||||
}
|
||||
analyserRef.current = null;
|
||||
setAudioLevel(0);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Connect to the speech WebSocket namespace
|
||||
*/
|
||||
const connectSocket = useCallback((): Socket => {
|
||||
const socket = io(API_BASE_URL, {
|
||||
path: "/socket.io",
|
||||
transports: ["websocket", "polling"],
|
||||
});
|
||||
|
||||
socket.on("transcription-partial", (data: TranscriptionPartialPayload) => {
|
||||
setPartialTranscript(data.text);
|
||||
});
|
||||
|
||||
socket.on("transcription-final", (data: TranscriptionFinalPayload) => {
|
||||
setTranscript(data.text);
|
||||
setPartialTranscript("");
|
||||
onTranscriptRef.current?.(data.text);
|
||||
});
|
||||
|
||||
socket.on("transcription-error", (data: TranscriptionErrorPayload) => {
|
||||
setError(data.message);
|
||||
});
|
||||
|
||||
socketRef.current = socket;
|
||||
return socket;
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Disconnect the WebSocket
|
||||
*/
|
||||
const disconnectSocket = useCallback((): void => {
|
||||
if (socketRef.current) {
|
||||
socketRef.current.off("transcription-partial");
|
||||
socketRef.current.off("transcription-final");
|
||||
socketRef.current.off("transcription-error");
|
||||
socketRef.current.disconnect();
|
||||
socketRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Send recorded audio via REST API as fallback
|
||||
*/
|
||||
const sendAudioViaRest = useCallback(async (audioBlob: Blob): Promise<void> => {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("audio", audioBlob, "recording.webm");
|
||||
|
||||
const response = await apiPostFormData<TranscribeResponse>(
|
||||
"/api/speech/transcribe",
|
||||
formData
|
||||
);
|
||||
|
||||
if (response.data.text) {
|
||||
setTranscript(response.data.text);
|
||||
setPartialTranscript("");
|
||||
onTranscriptRef.current?.(response.data.text);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Transcription request failed";
|
||||
setError(message);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Stop all media tracks on the stream
|
||||
*/
|
||||
const stopMediaTracks = useCallback((): void => {
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => {
|
||||
track.stop();
|
||||
});
|
||||
streamRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Start microphone capture and transcription
|
||||
*/
|
||||
const startRecording = useCallback(async (): Promise<void> => {
|
||||
// Prevent double-start
|
||||
if (isRecordingRef.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
setError(null);
|
||||
setPartialTranscript("");
|
||||
recordedChunksRef.current = [];
|
||||
|
||||
try {
|
||||
// Request microphone access
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
sampleRate,
|
||||
},
|
||||
});
|
||||
|
||||
streamRef.current = stream;
|
||||
|
||||
// Set up audio level visualization
|
||||
setupAudioAnalysis(stream);
|
||||
|
||||
// Determine MIME type
|
||||
const mimeType = getAudioMimeType();
|
||||
|
||||
// Create MediaRecorder
|
||||
const mediaRecorder = new MediaRecorder(stream, { mimeType });
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
// Connect WebSocket if enabled
|
||||
let socket: Socket | null = null;
|
||||
if (useWs) {
|
||||
socket = connectSocket();
|
||||
|
||||
// Emit start-transcription event
|
||||
socket.emit("start-transcription", {
|
||||
format: mimeType,
|
||||
sampleRate,
|
||||
});
|
||||
}
|
||||
|
||||
// Handle audio data chunks
|
||||
mediaRecorder.addEventListener("dataavailable", (event: BlobEvent) => {
|
||||
if (event.data.size > 0) {
|
||||
if (socket?.connected) {
|
||||
// Stream chunks via WebSocket
|
||||
socket.emit("audio-chunk", event.data);
|
||||
} else {
|
||||
// Collect chunks for REST upload
|
||||
recordedChunksRef.current.push(event.data);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Handle recording stop
|
||||
mediaRecorder.addEventListener("stop", () => {
|
||||
// If using REST fallback, send collected audio
|
||||
if (!useWs || !socket?.connected) {
|
||||
if (recordedChunksRef.current.length > 0) {
|
||||
const audioBlob = new Blob(recordedChunksRef.current, {
|
||||
type: mimeType,
|
||||
});
|
||||
void sendAudioViaRest(audioBlob);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Handle errors
|
||||
mediaRecorder.addEventListener("error", () => {
|
||||
setError("Recording encountered an issue. Please try again.");
|
||||
setIsRecording(false);
|
||||
isRecordingRef.current = false;
|
||||
});
|
||||
|
||||
// Start recording with timeslice for streaming chunks (250ms intervals)
|
||||
mediaRecorder.start(250);
|
||||
setIsRecording(true);
|
||||
isRecordingRef.current = true;
|
||||
} catch (err) {
|
||||
// Handle specific error types
|
||||
if (err instanceof DOMException) {
|
||||
if (err.name === "NotAllowedError") {
|
||||
setError(
|
||||
"Microphone access was not granted. Please allow microphone access to use voice input."
|
||||
);
|
||||
} else if (err.name === "NotFoundError") {
|
||||
setError("No microphone found. Please connect a microphone and try again.");
|
||||
} else {
|
||||
setError("Unable to access the microphone. Please check your device settings.");
|
||||
}
|
||||
} else {
|
||||
setError("Unable to start voice input. Please try again.");
|
||||
}
|
||||
|
||||
// Clean up on failure
|
||||
stopMediaTracks();
|
||||
cleanupAudioAnalysis();
|
||||
}
|
||||
}, [
|
||||
useWs,
|
||||
sampleRate,
|
||||
setupAudioAnalysis,
|
||||
connectSocket,
|
||||
sendAudioViaRest,
|
||||
stopMediaTracks,
|
||||
cleanupAudioAnalysis,
|
||||
]);
|
||||
|
||||
/**
|
||||
* Stop microphone capture and transcription
|
||||
*/
|
||||
const stopRecording = useCallback((): void => {
|
||||
setIsRecording(false);
|
||||
isRecordingRef.current = false;
|
||||
|
||||
// Stop MediaRecorder
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
mediaRecorderRef.current = null;
|
||||
}
|
||||
|
||||
// Stop media tracks
|
||||
stopMediaTracks();
|
||||
|
||||
// Clean up audio analysis
|
||||
cleanupAudioAnalysis();
|
||||
|
||||
// Emit stop event and disconnect WebSocket
|
||||
if (socketRef.current) {
|
||||
socketRef.current.emit("stop-transcription");
|
||||
// Give the server a moment to process the final chunk before disconnecting
|
||||
setTimeout(() => {
|
||||
disconnectSocket();
|
||||
}, 500);
|
||||
}
|
||||
}, [stopMediaTracks, cleanupAudioAnalysis, disconnectSocket]);
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return (): void => {
|
||||
isRecordingRef.current = false;
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
}
|
||||
stopMediaTracks();
|
||||
cleanupAudioAnalysis();
|
||||
disconnectSocket();
|
||||
};
|
||||
}, [stopMediaTracks, cleanupAudioAnalysis, disconnectSocket]);
|
||||
|
||||
return {
|
||||
isRecording,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
transcript,
|
||||
partialTranscript,
|
||||
error,
|
||||
audioLevel,
|
||||
};
|
||||
}
|
||||
58
apps/web/src/lib/api/speech.ts
Normal file
58
apps/web/src/lib/api/speech.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
* Speech API client
|
||||
* Handles text-to-speech synthesis and voice listing via /api/speech
|
||||
*/
|
||||
|
||||
import { apiGet } from "./client";
|
||||
import { API_BASE_URL } from "../config";
|
||||
|
||||
export interface VoiceInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
language: string;
|
||||
gender?: string;
|
||||
preview_url?: string;
|
||||
}
|
||||
|
||||
export interface SynthesizeOptions {
|
||||
text: string;
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
format?: string;
|
||||
tier?: string;
|
||||
}
|
||||
|
||||
export interface VoicesResponse {
|
||||
data: VoiceInfo[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch available TTS voices
|
||||
*/
|
||||
export async function getVoices(): Promise<VoicesResponse> {
|
||||
return apiGet<VoicesResponse>("/api/speech/voices");
|
||||
}
|
||||
|
||||
/**
|
||||
* Synthesize text to speech audio
|
||||
* Returns the audio as a Blob since the API returns binary audio data
|
||||
*/
|
||||
export async function synthesizeSpeech(options: SynthesizeOptions): Promise<Blob> {
|
||||
const url = `${API_BASE_URL}/api/speech/synthesize`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
credentials: "include",
|
||||
body: JSON.stringify(options),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text().catch(() => "Unknown error");
|
||||
throw new Error(`Speech synthesis failed: ${errorText}`);
|
||||
}
|
||||
|
||||
return response.blob();
|
||||
}
|
||||
Reference in New Issue
Block a user