chore: upgrade Node.js runtime to v24 across codebase #419
178
apps/web/src/components/speech/AudioPlayer.test.tsx
Normal file
178
apps/web/src/components/speech/AudioPlayer.test.tsx
Normal file
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* @file AudioPlayer.test.tsx
|
||||
* @description Tests for the AudioPlayer component that provides inline TTS audio playback
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { AudioPlayer } from "./AudioPlayer";
|
||||
|
||||
// Mock HTMLAudioElement
|
||||
class MockAudio {
|
||||
src = "";
|
||||
currentTime = 0;
|
||||
duration = 60;
|
||||
paused = true;
|
||||
playbackRate = 1;
|
||||
volume = 1;
|
||||
onended: (() => void) | null = null;
|
||||
ontimeupdate: (() => void) | null = null;
|
||||
onloadedmetadata: (() => void) | null = null;
|
||||
onerror: ((e: unknown) => void) | null = null;
|
||||
|
||||
play(): Promise<void> {
|
||||
this.paused = false;
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
pause(): void {
|
||||
this.paused = true;
|
||||
}
|
||||
|
||||
addEventListener(event: string, handler: () => void): void {
|
||||
if (event === "ended") this.onended = handler;
|
||||
if (event === "timeupdate") this.ontimeupdate = handler;
|
||||
if (event === "loadedmetadata") this.onloadedmetadata = handler;
|
||||
if (event === "error") this.onerror = handler;
|
||||
}
|
||||
|
||||
removeEventListener(): void {
|
||||
// no-op for tests
|
||||
}
|
||||
}
|
||||
|
||||
vi.stubGlobal("Audio", MockAudio);
|
||||
|
||||
describe("AudioPlayer", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("rendering", () => {
|
||||
it("should render play button", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const playButton = screen.getByRole("button", { name: "Play audio" });
|
||||
expect(playButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render download button", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const downloadButton = screen.getByRole("button", { name: /download/i });
|
||||
expect(downloadButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render time display showing 0:00", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
expect(screen.getByText("0:00")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render speed control", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const speedButton = screen.getByRole("button", { name: "Playback speed" });
|
||||
expect(speedButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render progress bar", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const progressBar = screen.getByRole("progressbar");
|
||||
expect(progressBar).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not render when src is null", () => {
|
||||
const { container } = render(<AudioPlayer src={null} />);
|
||||
|
||||
expect(container.firstChild).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("play/pause", () => {
|
||||
it("should toggle to pause button when playing", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const playButton = screen.getByRole("button", { name: "Play audio" });
|
||||
await user.click(playButton);
|
||||
|
||||
expect(screen.getByRole("button", { name: "Pause audio" })).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("speed control", () => {
|
||||
it("should cycle through speed options on click", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const speedButton = screen.getByRole("button", { name: "Playback speed" });
|
||||
|
||||
// Default should be 1x
|
||||
expect(speedButton).toHaveTextContent("1x");
|
||||
|
||||
// Click to go to 1.5x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("1.5x");
|
||||
|
||||
// Click to go to 2x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("2x");
|
||||
|
||||
// Click to go to 0.5x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("0.5x");
|
||||
|
||||
// Click to go back to 1x
|
||||
await user.click(speedButton);
|
||||
expect(speedButton).toHaveTextContent("1x");
|
||||
});
|
||||
});
|
||||
|
||||
describe("accessibility", () => {
|
||||
it("should have proper aria labels on controls", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
expect(screen.getByRole("button", { name: "Play audio" })).toBeInTheDocument();
|
||||
expect(screen.getByRole("button", { name: /download/i })).toBeInTheDocument();
|
||||
expect(screen.getByRole("button", { name: "Playback speed" })).toBeInTheDocument();
|
||||
expect(screen.getByRole("progressbar")).toHaveAttribute("aria-label");
|
||||
});
|
||||
|
||||
it("should have region role on the player container", () => {
|
||||
render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
expect(screen.getByRole("region", { name: /audio player/i })).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("design", () => {
|
||||
it("should not use aggressive red colors", () => {
|
||||
const { container } = render(<AudioPlayer src="blob:test-audio" />);
|
||||
|
||||
const allElements = container.querySelectorAll("*");
|
||||
allElements.forEach((el) => {
|
||||
const className = el.className;
|
||||
if (typeof className === "string") {
|
||||
expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("callbacks", () => {
|
||||
it("should call onPlayStateChange when play state changes", async () => {
|
||||
const onPlayStateChange = vi.fn();
|
||||
const user = userEvent.setup();
|
||||
|
||||
render(<AudioPlayer src="blob:test-audio" onPlayStateChange={onPlayStateChange} />);
|
||||
|
||||
const playButton = screen.getByRole("button", { name: "Play audio" });
|
||||
await user.click(playButton);
|
||||
|
||||
expect(onPlayStateChange).toHaveBeenCalledWith(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
250
apps/web/src/components/speech/AudioPlayer.tsx
Normal file
250
apps/web/src/components/speech/AudioPlayer.tsx
Normal file
@@ -0,0 +1,250 @@
|
||||
/**
|
||||
* AudioPlayer Component
|
||||
* Inline audio player for TTS content with play/pause, progress,
|
||||
* speed control, download, and duration display.
|
||||
*
|
||||
* Follows PDA-friendly design: no aggressive colors, calm interface.
|
||||
*/
|
||||
|
||||
import { useState, useRef, useEffect, useCallback } from "react";
|
||||
import type { ReactElement } from "react";
|
||||
|
||||
/** Playback speed options */
|
||||
const SPEED_OPTIONS = [1, 1.5, 2, 0.5] as const;
|
||||
|
||||
export interface AudioPlayerProps {
|
||||
/** URL of the audio to play (blob URL or HTTP URL). If null, nothing renders. */
|
||||
src: string | null;
|
||||
/** Whether to auto-play when src changes */
|
||||
autoPlay?: boolean;
|
||||
/** Callback when play state changes */
|
||||
onPlayStateChange?: (isPlaying: boolean) => void;
|
||||
/** Optional className for the container */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format seconds into M:SS display
|
||||
*/
|
||||
function formatTime(seconds: number): string {
|
||||
if (!isFinite(seconds) || seconds < 0) return "0:00";
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${String(mins)}:${String(secs).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* AudioPlayer displays an inline audio player with controls for
|
||||
* play/pause, progress tracking, speed adjustment, and download.
|
||||
*/
|
||||
export function AudioPlayer({
|
||||
src,
|
||||
autoPlay = false,
|
||||
onPlayStateChange,
|
||||
className = "",
|
||||
}: AudioPlayerProps): ReactElement | null {
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const [speedIndex, setSpeedIndex] = useState(0);
|
||||
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
/**
|
||||
* Set up audio element when src changes
|
||||
*/
|
||||
useEffect((): (() => void) | undefined => {
|
||||
if (!src) return undefined;
|
||||
|
||||
const audio = new Audio(src);
|
||||
audioRef.current = audio;
|
||||
|
||||
const onLoadedMetadata = (): void => {
|
||||
if (isFinite(audio.duration)) {
|
||||
setDuration(audio.duration);
|
||||
}
|
||||
};
|
||||
|
||||
const onTimeUpdate = (): void => {
|
||||
setCurrentTime(audio.currentTime);
|
||||
};
|
||||
|
||||
const onEnded = (): void => {
|
||||
setIsPlaying(false);
|
||||
setCurrentTime(0);
|
||||
onPlayStateChange?.(false);
|
||||
};
|
||||
|
||||
audio.addEventListener("loadedmetadata", onLoadedMetadata);
|
||||
audio.addEventListener("timeupdate", onTimeUpdate);
|
||||
audio.addEventListener("ended", onEnded);
|
||||
|
||||
if (autoPlay) {
|
||||
void audio.play().then(() => {
|
||||
setIsPlaying(true);
|
||||
onPlayStateChange?.(true);
|
||||
});
|
||||
}
|
||||
|
||||
return (): void => {
|
||||
audio.pause();
|
||||
audio.removeEventListener("loadedmetadata", onLoadedMetadata);
|
||||
audio.removeEventListener("timeupdate", onTimeUpdate);
|
||||
audio.removeEventListener("ended", onEnded);
|
||||
audioRef.current = null;
|
||||
};
|
||||
}, [src, autoPlay, onPlayStateChange]);
|
||||
|
||||
/**
|
||||
* Toggle play/pause
|
||||
*/
|
||||
const togglePlayPause = useCallback(async (): Promise<void> => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio) return;
|
||||
|
||||
if (isPlaying) {
|
||||
audio.pause();
|
||||
setIsPlaying(false);
|
||||
onPlayStateChange?.(false);
|
||||
} else {
|
||||
await audio.play();
|
||||
setIsPlaying(true);
|
||||
onPlayStateChange?.(true);
|
||||
}
|
||||
}, [isPlaying, onPlayStateChange]);
|
||||
|
||||
/**
|
||||
* Cycle through speed options
|
||||
*/
|
||||
const cycleSpeed = useCallback((): void => {
|
||||
const nextIndex = (speedIndex + 1) % SPEED_OPTIONS.length;
|
||||
setSpeedIndex(nextIndex);
|
||||
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.playbackRate = SPEED_OPTIONS[nextIndex] ?? 1;
|
||||
}
|
||||
}, [speedIndex]);
|
||||
|
||||
/**
|
||||
* Handle progress bar click for seeking
|
||||
*/
|
||||
const handleProgressClick = useCallback(
|
||||
(event: React.MouseEvent<HTMLDivElement>): void => {
|
||||
const audio = audioRef.current;
|
||||
if (!audio || !duration) return;
|
||||
|
||||
const rect = event.currentTarget.getBoundingClientRect();
|
||||
const clickX = event.clientX - rect.left;
|
||||
const fraction = clickX / rect.width;
|
||||
audio.currentTime = fraction * duration;
|
||||
setCurrentTime(audio.currentTime);
|
||||
},
|
||||
[duration]
|
||||
);
|
||||
|
||||
/**
|
||||
* Handle download
|
||||
*/
|
||||
const handleDownload = useCallback((): void => {
|
||||
if (!src) return;
|
||||
|
||||
const link = document.createElement("a");
|
||||
link.href = src;
|
||||
link.download = "speech-audio.mp3";
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
document.body.removeChild(link);
|
||||
}, [src]);
|
||||
|
||||
// Don't render if no source
|
||||
if (!src) return null;
|
||||
|
||||
const progress = duration > 0 ? (currentTime / duration) * 100 : 0;
|
||||
const currentSpeed = SPEED_OPTIONS[speedIndex] ?? 1;
|
||||
|
||||
return (
|
||||
<div
|
||||
role="region"
|
||||
aria-label="Audio player"
|
||||
className={`flex items-center gap-2 rounded-lg border border-gray-200 bg-gray-50 px-3 py-2 ${className}`}
|
||||
>
|
||||
{/* Play/Pause Button */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void togglePlayPause()}
|
||||
aria-label={isPlaying ? "Pause audio" : "Play audio"}
|
||||
className="flex h-8 w-8 shrink-0 items-center justify-center rounded-full bg-blue-500 text-white transition-colors hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-blue-300"
|
||||
>
|
||||
{isPlaying ? (
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true">
|
||||
<rect x="6" y="4" width="4" height="16" rx="1" />
|
||||
<rect x="14" y="4" width="4" height="16" rx="1" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true">
|
||||
<polygon points="6,4 20,12 6,20" />
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Time Display */}
|
||||
<span className="min-w-[3.5rem] text-xs text-gray-500 tabular-nums">
|
||||
{formatTime(currentTime)}
|
||||
{duration > 0 && <span className="text-gray-400"> / {formatTime(duration)}</span>}
|
||||
</span>
|
||||
|
||||
{/* Progress Bar */}
|
||||
<div
|
||||
role="progressbar"
|
||||
aria-label="Audio progress"
|
||||
aria-valuenow={Math.round(progress)}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
className="relative h-1.5 flex-1 cursor-pointer rounded-full bg-gray-200"
|
||||
onClick={handleProgressClick}
|
||||
>
|
||||
<div
|
||||
className="absolute left-0 top-0 h-full rounded-full bg-blue-400 transition-all"
|
||||
style={{ width: `${String(Math.min(progress, 100))}%` }}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Speed Control */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={cycleSpeed}
|
||||
aria-label="Playback speed"
|
||||
className="min-w-[2.5rem] rounded px-1.5 py-0.5 text-xs font-medium text-gray-600 transition-colors hover:bg-gray-200 focus:outline-none focus:ring-2 focus:ring-blue-300"
|
||||
>
|
||||
{String(currentSpeed)}x
|
||||
</button>
|
||||
|
||||
{/* Download Button */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleDownload}
|
||||
aria-label="Download audio"
|
||||
className="flex h-7 w-7 shrink-0 items-center justify-center rounded text-gray-500 transition-colors hover:bg-gray-200 hover:text-gray-700 focus:outline-none focus:ring-2 focus:ring-blue-300"
|
||||
>
|
||||
<svg
|
||||
width="14"
|
||||
height="14"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4" />
|
||||
<polyline points="7 10 12 15 17 10" />
|
||||
<line x1="12" y1="15" x2="12" y2="3" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default AudioPlayer;
|
||||
70
apps/web/src/components/speech/AudioVisualizer.test.tsx
Normal file
70
apps/web/src/components/speech/AudioVisualizer.test.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { AudioVisualizer } from "./AudioVisualizer";
|
||||
|
||||
describe("AudioVisualizer", (): void => {
|
||||
it("should render the visualizer container", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0} isActive={false} />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
expect(container).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render visualization bars", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} />);
|
||||
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
expect(bars.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("should show inactive state when not active", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0} isActive={false} />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
expect(container).toBeInTheDocument();
|
||||
// Bars should be at minimum height when inactive
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
bars.forEach((bar) => {
|
||||
const style = bar.getAttribute("style");
|
||||
expect(style).toContain("height");
|
||||
});
|
||||
});
|
||||
|
||||
it("should reflect audio level in bar heights when active", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.8} isActive={true} />);
|
||||
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
// At least one bar should have non-minimal height
|
||||
const hasActiveBars = bars.some((bar) => {
|
||||
const style = bar.getAttribute("style") ?? "";
|
||||
const heightMatch = /height:\s*(\d+)/.exec(style);
|
||||
return heightMatch?.[1] ? parseInt(heightMatch[1], 10) > 4 : false;
|
||||
});
|
||||
expect(hasActiveBars).toBe(true);
|
||||
});
|
||||
|
||||
it("should use calm colors (no aggressive reds)", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
const allElements = container.querySelectorAll("*");
|
||||
allElements.forEach((el) => {
|
||||
const className = (el as HTMLElement).className;
|
||||
expect(className).not.toMatch(/bg-red-|text-red-/);
|
||||
});
|
||||
});
|
||||
|
||||
it("should accept custom className", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} className="custom-class" />);
|
||||
|
||||
const container = screen.getByTestId("audio-visualizer");
|
||||
expect(container.className).toContain("custom-class");
|
||||
});
|
||||
|
||||
it("should render with configurable bar count", (): void => {
|
||||
render(<AudioVisualizer audioLevel={0.5} isActive={true} barCount={8} />);
|
||||
|
||||
const bars = screen.getAllByTestId("visualizer-bar");
|
||||
expect(bars).toHaveLength(8);
|
||||
});
|
||||
});
|
||||
87
apps/web/src/components/speech/AudioVisualizer.tsx
Normal file
87
apps/web/src/components/speech/AudioVisualizer.tsx
Normal file
@@ -0,0 +1,87 @@
|
||||
/**
|
||||
* AudioVisualizer component
|
||||
*
|
||||
* Displays a simple audio level visualization using bars.
|
||||
* Uses the Web Audio API's AnalyserNode data (passed as audioLevel)
|
||||
* to show microphone input levels during recording.
|
||||
*
|
||||
* Design: Calm, non-aggressive colors following PDA-friendly guidelines.
|
||||
*/
|
||||
|
||||
import { useMemo } from "react";
|
||||
|
||||
export interface AudioVisualizerProps {
|
||||
/** Current audio level (0-1) */
|
||||
audioLevel: number;
|
||||
/** Whether the visualizer is actively listening */
|
||||
isActive: boolean;
|
||||
/** Number of bars to display (default: 5) */
|
||||
barCount?: number;
|
||||
/** Additional CSS classes */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate bar heights based on audio level.
|
||||
* Creates a natural-looking wave pattern where center bars are taller.
|
||||
*/
|
||||
function generateBarHeights(level: number, count: number): number[] {
|
||||
const heights: number[] = [];
|
||||
const center = (count - 1) / 2;
|
||||
|
||||
for (let i = 0; i < count; i++) {
|
||||
// Distance from center (0-1)
|
||||
const distFromCenter = Math.abs(i - center) / center;
|
||||
// Center bars are taller, edge bars shorter
|
||||
const multiplier = 1 - distFromCenter * 0.5;
|
||||
// Min height 4px, max height 24px when active
|
||||
const minHeight = 4;
|
||||
const maxHeight = 24;
|
||||
const height = minHeight + level * (maxHeight - minHeight) * multiplier;
|
||||
heights.push(Math.round(height));
|
||||
}
|
||||
|
||||
return heights;
|
||||
}
|
||||
|
||||
/**
|
||||
* Audio level visualizer with animated bars.
|
||||
* Shows microphone input levels during voice recording.
|
||||
*/
|
||||
export function AudioVisualizer({
|
||||
audioLevel,
|
||||
isActive,
|
||||
barCount = 5,
|
||||
className = "",
|
||||
}: AudioVisualizerProps): React.JSX.Element {
|
||||
const barHeights = useMemo(() => {
|
||||
if (!isActive) {
|
||||
return Array.from({ length: barCount }, () => 4);
|
||||
}
|
||||
return generateBarHeights(audioLevel, barCount);
|
||||
}, [audioLevel, isActive, barCount]);
|
||||
|
||||
return (
|
||||
<div
|
||||
data-testid="audio-visualizer"
|
||||
className={`flex items-center gap-0.5 ${className}`}
|
||||
role="img"
|
||||
aria-label={
|
||||
isActive
|
||||
? `Audio level: ${String(Math.round(audioLevel * 100))}%`
|
||||
: "Audio visualizer inactive"
|
||||
}
|
||||
>
|
||||
{barHeights.map((height, index) => (
|
||||
<div
|
||||
key={index}
|
||||
data-testid="visualizer-bar"
|
||||
className={`w-1 rounded-full transition-all duration-150 ease-out ${
|
||||
isActive ? "bg-sky-400" : "bg-slate-300 dark:bg-slate-600"
|
||||
}`}
|
||||
style={{ height: `${height.toString()}px` }}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
218
apps/web/src/components/speech/TextToSpeechButton.test.tsx
Normal file
218
apps/web/src/components/speech/TextToSpeechButton.test.tsx
Normal file
@@ -0,0 +1,218 @@
|
||||
/**
|
||||
* @file TextToSpeechButton.test.tsx
|
||||
* @description Tests for the TextToSpeechButton "Read aloud" component
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { TextToSpeechButton } from "./TextToSpeechButton";
|
||||
|
||||
// Mock the useTextToSpeech hook
|
||||
const mockSynthesize = vi.fn();
|
||||
const mockPlay = vi.fn();
|
||||
const mockPause = vi.fn();
|
||||
const mockStop = vi.fn();
|
||||
|
||||
vi.mock("@/hooks/useTextToSpeech", () => ({
|
||||
useTextToSpeech: vi.fn(() => ({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: false,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
})),
|
||||
}));
|
||||
|
||||
// Import after mocking
|
||||
import { useTextToSpeech } from "@/hooks/useTextToSpeech";
|
||||
|
||||
const mockUseTextToSpeech = useTextToSpeech as ReturnType<typeof vi.fn>;
|
||||
|
||||
// Mock HTMLAudioElement for AudioPlayer used inside TextToSpeechButton
|
||||
class MockAudio {
|
||||
src = "";
|
||||
currentTime = 0;
|
||||
duration = 60;
|
||||
paused = true;
|
||||
playbackRate = 1;
|
||||
volume = 1;
|
||||
onended: (() => void) | null = null;
|
||||
ontimeupdate: (() => void) | null = null;
|
||||
onloadedmetadata: (() => void) | null = null;
|
||||
onerror: ((e: unknown) => void) | null = null;
|
||||
|
||||
play(): Promise<void> {
|
||||
this.paused = false;
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
pause(): void {
|
||||
this.paused = true;
|
||||
}
|
||||
|
||||
addEventListener(): void {
|
||||
// no-op
|
||||
}
|
||||
|
||||
removeEventListener(): void {
|
||||
// no-op
|
||||
}
|
||||
}
|
||||
|
||||
vi.stubGlobal("Audio", MockAudio);
|
||||
|
||||
describe("TextToSpeechButton", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: false,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
});
|
||||
});
|
||||
|
||||
describe("rendering", () => {
|
||||
it("should render a read aloud button", () => {
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not render AudioPlayer initially when no audio is synthesized", () => {
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
expect(screen.queryByRole("region", { name: /audio player/i })).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("click behavior", () => {
|
||||
it("should call synthesize with text on click", async () => {
|
||||
const user = userEvent.setup();
|
||||
mockSynthesize.mockResolvedValueOnce(undefined);
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
await user.click(button);
|
||||
|
||||
expect(mockSynthesize).toHaveBeenCalledWith("Hello world", undefined);
|
||||
});
|
||||
|
||||
it("should pass voice and tier options when provided", async () => {
|
||||
const user = userEvent.setup();
|
||||
mockSynthesize.mockResolvedValueOnce(undefined);
|
||||
|
||||
render(<TextToSpeechButton text="Hello" voice="alloy" tier="premium" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
await user.click(button);
|
||||
|
||||
expect(mockSynthesize).toHaveBeenCalledWith("Hello", {
|
||||
voice: "alloy",
|
||||
tier: "premium",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("loading state", () => {
|
||||
it("should show loading indicator while synthesizing", () => {
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: true,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
});
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /synthesizing/i });
|
||||
expect(button).toBeInTheDocument();
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("audio player integration", () => {
|
||||
it("should show AudioPlayer when audio is available", () => {
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: "blob:mock-url",
|
||||
isLoading: false,
|
||||
error: null,
|
||||
isPlaying: false,
|
||||
duration: 30,
|
||||
currentTime: 0,
|
||||
});
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
expect(screen.getByRole("region", { name: /audio player/i })).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("error state", () => {
|
||||
it("should display error message when synthesis fails", () => {
|
||||
mockUseTextToSpeech.mockReturnValue({
|
||||
synthesize: mockSynthesize,
|
||||
play: mockPlay,
|
||||
pause: mockPause,
|
||||
stop: mockStop,
|
||||
audioUrl: null,
|
||||
isLoading: false,
|
||||
error: "Synthesis failed",
|
||||
isPlaying: false,
|
||||
duration: 0,
|
||||
currentTime: 0,
|
||||
});
|
||||
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
expect(screen.getByText(/synthesis failed/i)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("accessibility", () => {
|
||||
it("should have proper aria label on button", () => {
|
||||
render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const button = screen.getByRole("button", { name: /read aloud/i });
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("design", () => {
|
||||
it("should not use aggressive colors", () => {
|
||||
const { container } = render(<TextToSpeechButton text="Hello world" />);
|
||||
|
||||
const allElements = container.querySelectorAll("*");
|
||||
allElements.forEach((el) => {
|
||||
const className = el.className;
|
||||
if (typeof className === "string") {
|
||||
expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
126
apps/web/src/components/speech/TextToSpeechButton.tsx
Normal file
126
apps/web/src/components/speech/TextToSpeechButton.tsx
Normal file
@@ -0,0 +1,126 @@
|
||||
/**
|
||||
* TextToSpeechButton Component
|
||||
* "Read aloud" button that synthesizes text and plays it via AudioPlayer.
|
||||
*
|
||||
* Accepts text as a prop, with optional voice and tier selection.
|
||||
* Shows loading state during synthesis and integrates AudioPlayer for playback.
|
||||
*
|
||||
* Follows PDA-friendly design: no aggressive colors, calm interface.
|
||||
*/
|
||||
|
||||
import { useCallback } from "react";
|
||||
import type { ReactElement } from "react";
|
||||
import { useTextToSpeech } from "@/hooks/useTextToSpeech";
|
||||
import type { SynthesizeOptions } from "@/hooks/useTextToSpeech";
|
||||
import { AudioPlayer } from "./AudioPlayer";
|
||||
|
||||
export interface TextToSpeechButtonProps {
|
||||
/** The text to synthesize to speech */
|
||||
text: string;
|
||||
/** Optional voice ID to use */
|
||||
voice?: string;
|
||||
/** Optional tier (e.g. "standard", "premium") */
|
||||
tier?: string;
|
||||
/** Optional className for the container */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* TextToSpeechButton provides a "Read aloud" button that synthesizes
|
||||
* the given text and displays an AudioPlayer for playback control.
|
||||
*/
|
||||
export function TextToSpeechButton({
|
||||
text,
|
||||
voice,
|
||||
tier,
|
||||
className = "",
|
||||
}: TextToSpeechButtonProps): ReactElement {
|
||||
const { synthesize, audioUrl, isLoading, error } = useTextToSpeech();
|
||||
|
||||
/**
|
||||
* Handle read aloud button click
|
||||
*/
|
||||
const handleClick = useCallback(async (): Promise<void> => {
|
||||
let options: SynthesizeOptions | undefined;
|
||||
|
||||
if (voice !== undefined || tier !== undefined) {
|
||||
options = {};
|
||||
if (voice !== undefined) options.voice = voice;
|
||||
if (tier !== undefined) options.tier = tier;
|
||||
}
|
||||
|
||||
await synthesize(text, options);
|
||||
}, [text, voice, tier, synthesize]);
|
||||
|
||||
return (
|
||||
<div className={`flex flex-col gap-2 ${className}`}>
|
||||
{/* Read Aloud Button */}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => void handleClick()}
|
||||
disabled={isLoading}
|
||||
aria-label={isLoading ? "Synthesizing speech" : "Read aloud"}
|
||||
className="inline-flex items-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-1.5 text-sm font-medium text-gray-700 transition-colors hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-blue-300 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{isLoading ? (
|
||||
<>
|
||||
{/* Spinner */}
|
||||
<svg
|
||||
className="h-4 w-4 animate-spin text-gray-500"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<circle
|
||||
cx="12"
|
||||
cy="12"
|
||||
r="10"
|
||||
stroke="currentColor"
|
||||
strokeWidth="3"
|
||||
className="opacity-25"
|
||||
/>
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
|
||||
className="opacity-75"
|
||||
/>
|
||||
</svg>
|
||||
<span>Synthesizing...</span>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
{/* Speaker Icon */}
|
||||
<svg
|
||||
width="16"
|
||||
height="16"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
|
||||
<path d="M15.54 8.46a5 5 0 010 7.07" />
|
||||
<path d="M19.07 4.93a10 10 0 010 14.14" />
|
||||
</svg>
|
||||
<span>Read aloud</span>
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Error Display */}
|
||||
{error && (
|
||||
<p className="text-sm text-amber-600" role="alert">
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{/* Audio Player (shown after synthesis) */}
|
||||
{audioUrl && <AudioPlayer src={audioUrl} />}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default TextToSpeechButton;
|
||||
228
apps/web/src/components/speech/VoiceInput.test.tsx
Normal file
228
apps/web/src/components/speech/VoiceInput.test.tsx
Normal file
@@ -0,0 +1,228 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { VoiceInput } from "./VoiceInput";
|
||||
|
||||
// Mock the useVoiceInput hook
|
||||
const mockStartRecording = vi.fn();
|
||||
const mockStopRecording = vi.fn();
|
||||
|
||||
vi.mock("@/hooks/useVoiceInput", () => ({
|
||||
useVoiceInput: vi.fn(() => ({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
})),
|
||||
}));
|
||||
|
||||
// We need to import after mocking
|
||||
import { useVoiceInput } from "@/hooks/useVoiceInput";
|
||||
|
||||
describe("VoiceInput", (): void => {
|
||||
beforeEach((): void => {
|
||||
vi.clearAllMocks();
|
||||
// Reset mock implementation to default
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it("should render a microphone button", (): void => {
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should have accessible aria label", (): void => {
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
expect(button).toHaveAttribute("aria-label", "Start voice input");
|
||||
});
|
||||
|
||||
it("should call startRecording when mic button is clicked", async (): Promise<void> => {
|
||||
const user = userEvent.setup();
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
await user.click(button);
|
||||
|
||||
expect(mockStartRecording).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should show recording state when isRecording is true", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0.5,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /stop voice input/i,
|
||||
});
|
||||
expect(button).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should call stopRecording when mic button is clicked while recording", async (): Promise<void> => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0.5,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /stop voice input/i,
|
||||
});
|
||||
await user.click(button);
|
||||
|
||||
expect(mockStopRecording).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should display partial transcription text", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "hello worl",
|
||||
error: null,
|
||||
audioLevel: 0.3,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
expect(screen.getByText("hello worl")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display final transcript text", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "hello world",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
expect(screen.getByText("hello world")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display error message", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: "Microphone access not available",
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
expect(screen.getByText("Microphone access not available")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should call onTranscript callback prop", (): void => {
|
||||
const onTranscript = vi.fn();
|
||||
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "final text",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput onTranscript={onTranscript} />);
|
||||
|
||||
// The onTranscript prop is passed to the hook - we verify the prop is accepted
|
||||
expect(useVoiceInput).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
onTranscript,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should use calm, non-aggressive design for recording indicator", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: true,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: null,
|
||||
audioLevel: 0.5,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
// Check there are no aggressive red colors in the recording state
|
||||
const button = screen.getByRole("button", { name: /stop voice input/i });
|
||||
const className = button.className;
|
||||
expect(className).not.toMatch(/bg-red-|text-red-|border-red-/);
|
||||
});
|
||||
|
||||
it("should use calm design for error display", (): void => {
|
||||
vi.mocked(useVoiceInput).mockReturnValue({
|
||||
isRecording: false,
|
||||
startRecording: mockStartRecording,
|
||||
stopRecording: mockStopRecording,
|
||||
transcript: "",
|
||||
partialTranscript: "",
|
||||
error: "Something went wrong",
|
||||
audioLevel: 0,
|
||||
});
|
||||
|
||||
render(<VoiceInput />);
|
||||
|
||||
const errorEl = screen.getByText("Something went wrong");
|
||||
const className = errorEl.className;
|
||||
expect(className).not.toMatch(/text-red-600|bg-red-/);
|
||||
});
|
||||
|
||||
it("should be disabled when disabled prop is true", (): void => {
|
||||
render(<VoiceInput disabled />);
|
||||
|
||||
const button = screen.getByRole("button", {
|
||||
name: /start voice input/i,
|
||||
});
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
146
apps/web/src/components/speech/VoiceInput.tsx
Normal file
146
apps/web/src/components/speech/VoiceInput.tsx
Normal file
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* VoiceInput component
|
||||
*
|
||||
* Provides a microphone button with visual feedback for voice input.
|
||||
* Click to start/stop recording with real-time transcription display.
|
||||
*
|
||||
* Design principles:
|
||||
* - PDA-friendly: calm, non-aggressive colors
|
||||
* - Gentle pulsing animation for recording state (blue/green)
|
||||
* - Mobile-friendly touch interaction
|
||||
* - Accessible with proper aria labels
|
||||
*/
|
||||
|
||||
import { useVoiceInput } from "@/hooks/useVoiceInput";
|
||||
import type { UseVoiceInputOptions } from "@/hooks/useVoiceInput";
|
||||
import { AudioVisualizer } from "./AudioVisualizer";
|
||||
import { Mic, MicOff } from "lucide-react";
|
||||
|
||||
export interface VoiceInputProps {
|
||||
/** Callback fired when final transcription is received */
|
||||
onTranscript?: (text: string) => void;
|
||||
/** Whether to use WebSocket streaming (default: true) */
|
||||
useWebSocket?: boolean;
|
||||
/** Whether the input is disabled */
|
||||
disabled?: boolean;
|
||||
/** Additional CSS classes for the container */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Voice input component with microphone capture and real-time transcription.
|
||||
* Shows a mic button that toggles recording, with visual feedback
|
||||
* and transcription text display.
|
||||
*/
|
||||
export function VoiceInput({
|
||||
onTranscript,
|
||||
useWebSocket: useWs,
|
||||
disabled = false,
|
||||
className = "",
|
||||
}: VoiceInputProps): React.JSX.Element {
|
||||
const hookOptions: UseVoiceInputOptions = {};
|
||||
if (onTranscript !== undefined) {
|
||||
hookOptions.onTranscript = onTranscript;
|
||||
}
|
||||
if (useWs !== undefined) {
|
||||
hookOptions.useWebSocket = useWs;
|
||||
}
|
||||
|
||||
const {
|
||||
isRecording,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
transcript,
|
||||
partialTranscript,
|
||||
error,
|
||||
audioLevel,
|
||||
} = useVoiceInput(hookOptions);
|
||||
|
||||
const handleClick = (): void => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
void startRecording();
|
||||
}
|
||||
};
|
||||
|
||||
const displayText = isRecording ? partialTranscript : transcript;
|
||||
|
||||
return (
|
||||
<div className={`flex flex-col items-center gap-3 ${className}`}>
|
||||
{/* Mic button with recording indicator */}
|
||||
<div className="relative flex items-center gap-2">
|
||||
{/* Pulsing ring animation when recording */}
|
||||
{isRecording && (
|
||||
<div
|
||||
className="absolute inset-0 -m-1 rounded-full bg-sky-400/20 animate-pulse"
|
||||
aria-hidden="true"
|
||||
/>
|
||||
)}
|
||||
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClick}
|
||||
disabled={disabled}
|
||||
aria-label={isRecording ? "Stop voice input" : "Start voice input"}
|
||||
className={`
|
||||
relative z-10 flex items-center justify-center
|
||||
w-10 h-10 rounded-full transition-all duration-200
|
||||
focus:outline-none focus:ring-2 focus:ring-sky-400 focus:ring-offset-2
|
||||
disabled:opacity-50 disabled:cursor-not-allowed
|
||||
${
|
||||
isRecording
|
||||
? "bg-sky-500 text-white hover:bg-sky-600 shadow-md"
|
||||
: "bg-slate-100 text-slate-600 hover:bg-slate-200 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600"
|
||||
}
|
||||
`}
|
||||
>
|
||||
{isRecording ? (
|
||||
<MicOff className="w-5 h-5" aria-hidden="true" />
|
||||
) : (
|
||||
<Mic className="w-5 h-5" aria-hidden="true" />
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* Audio level visualizer - shown during recording */}
|
||||
{isRecording && (
|
||||
<AudioVisualizer audioLevel={audioLevel} isActive={isRecording} barCount={5} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Recording status indicator */}
|
||||
{isRecording && (
|
||||
<div className="flex items-center gap-1.5 text-xs text-sky-600 dark:text-sky-400">
|
||||
<span className="w-2 h-2 rounded-full bg-sky-500 animate-pulse" aria-hidden="true" />
|
||||
<span>Listening...</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Transcription text display */}
|
||||
{displayText && (
|
||||
<p
|
||||
className={`
|
||||
text-sm max-w-md text-center px-3 py-1.5 rounded-lg
|
||||
${
|
||||
isRecording
|
||||
? "text-slate-500 dark:text-slate-400 bg-slate-50 dark:bg-slate-800/50 italic"
|
||||
: "text-slate-700 dark:text-slate-200 bg-slate-100 dark:bg-slate-800"
|
||||
}
|
||||
`}
|
||||
>
|
||||
{displayText}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{/* Error display - calm, non-aggressive */}
|
||||
{error && (
|
||||
<p
|
||||
className="text-sm text-amber-700 dark:text-amber-400 bg-amber-50 dark:bg-amber-900/20 px-3 py-1.5 rounded-lg max-w-md text-center"
|
||||
role="alert"
|
||||
>
|
||||
{error}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
8
apps/web/src/components/speech/index.ts
Normal file
8
apps/web/src/components/speech/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
export { VoiceInput } from "./VoiceInput";
|
||||
export type { VoiceInputProps } from "./VoiceInput";
|
||||
export { AudioVisualizer } from "./AudioVisualizer";
|
||||
export type { AudioVisualizerProps } from "./AudioVisualizer";
|
||||
export { AudioPlayer } from "./AudioPlayer";
|
||||
export type { AudioPlayerProps } from "./AudioPlayer";
|
||||
export { TextToSpeechButton } from "./TextToSpeechButton";
|
||||
export type { TextToSpeechButtonProps } from "./TextToSpeechButton";
|
||||
285
apps/web/src/hooks/useTextToSpeech.test.ts
Normal file
285
apps/web/src/hooks/useTextToSpeech.test.ts
Normal file
@@ -0,0 +1,285 @@
|
||||
/**
|
||||
* @file useTextToSpeech.test.ts
|
||||
* @description Tests for the useTextToSpeech hook that manages TTS API integration
|
||||
*/
|
||||
|
||||
import { renderHook, act } from "@testing-library/react";
|
||||
import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
|
||||
import { useTextToSpeech } from "./useTextToSpeech";
|
||||
import * as speechApi from "@/lib/api/speech";
|
||||
|
||||
// Mock the speech API module
|
||||
vi.mock("@/lib/api/speech", () => ({
|
||||
synthesizeSpeech: vi.fn(),
|
||||
getVoices: vi.fn(),
|
||||
}));
|
||||
|
||||
// Mock URL.createObjectURL and URL.revokeObjectURL
|
||||
const mockCreateObjectURL = vi.fn().mockReturnValue("blob:mock-audio-url");
|
||||
const mockRevokeObjectURL = vi.fn();
|
||||
|
||||
beforeEach(() => {
|
||||
global.URL.createObjectURL = mockCreateObjectURL;
|
||||
global.URL.revokeObjectURL = mockRevokeObjectURL;
|
||||
});
|
||||
|
||||
// Mock HTMLAudioElement
|
||||
class MockAudio {
|
||||
src = "";
|
||||
currentTime = 0;
|
||||
duration = 120;
|
||||
paused = true;
|
||||
playbackRate = 1;
|
||||
volume = 1;
|
||||
onended: (() => void) | null = null;
|
||||
ontimeupdate: (() => void) | null = null;
|
||||
onloadedmetadata: (() => void) | null = null;
|
||||
onerror: ((e: unknown) => void) | null = null;
|
||||
|
||||
play(): Promise<void> {
|
||||
this.paused = false;
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
pause(): void {
|
||||
this.paused = true;
|
||||
}
|
||||
|
||||
addEventListener(event: string, handler: () => void): void {
|
||||
if (event === "ended") this.onended = handler;
|
||||
if (event === "timeupdate") this.ontimeupdate = handler;
|
||||
if (event === "loadedmetadata") this.onloadedmetadata = handler;
|
||||
if (event === "error") this.onerror = handler;
|
||||
}
|
||||
|
||||
removeEventListener(): void {
|
||||
// no-op for tests
|
||||
}
|
||||
}
|
||||
|
||||
vi.stubGlobal("Audio", MockAudio);
|
||||
|
||||
const mockSynthesizeSpeech = speechApi.synthesizeSpeech as ReturnType<typeof vi.fn>;
|
||||
|
||||
describe("useTextToSpeech", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockCreateObjectURL.mockReturnValue("blob:mock-audio-url");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("initial state", () => {
|
||||
it("should return correct initial interface", () => {
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
expect(result.current.synthesize).toBeTypeOf("function");
|
||||
expect(result.current.play).toBeTypeOf("function");
|
||||
expect(result.current.pause).toBeTypeOf("function");
|
||||
expect(result.current.stop).toBeTypeOf("function");
|
||||
expect(result.current.audioUrl).toBeNull();
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
expect(result.current.error).toBeNull();
|
||||
expect(result.current.isPlaying).toBe(false);
|
||||
expect(result.current.duration).toBe(0);
|
||||
expect(result.current.currentTime).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("synthesize", () => {
|
||||
it("should call API and return audio blob URL", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello world");
|
||||
});
|
||||
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledWith({
|
||||
text: "Hello world",
|
||||
});
|
||||
expect(result.current.audioUrl).toBe("blob:mock-audio-url");
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
expect(result.current.error).toBeNull();
|
||||
});
|
||||
|
||||
it("should pass voice and tier options to API", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello", {
|
||||
voice: "alloy",
|
||||
tier: "premium",
|
||||
speed: 1.5,
|
||||
});
|
||||
});
|
||||
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledWith({
|
||||
text: "Hello",
|
||||
voice: "alloy",
|
||||
tier: "premium",
|
||||
speed: 1.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("should set loading state while synthesizing", async () => {
|
||||
let resolvePromise: ((value: Blob) => void) | undefined;
|
||||
const pendingPromise = new Promise<Blob>((resolve) => {
|
||||
resolvePromise = resolve;
|
||||
});
|
||||
mockSynthesizeSpeech.mockReturnValueOnce(pendingPromise);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
act(() => {
|
||||
void result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
expect(result.current.isLoading).toBe(true);
|
||||
|
||||
await act(async () => {
|
||||
resolvePromise?.(new Blob(["audio"], { type: "audio/mpeg" }));
|
||||
await pendingPromise;
|
||||
});
|
||||
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
});
|
||||
|
||||
it("should handle API errors gracefully", async () => {
|
||||
mockSynthesizeSpeech.mockRejectedValueOnce(new Error("Synthesis failed"));
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
expect(result.current.error).toBe("Synthesis failed");
|
||||
expect(result.current.isLoading).toBe(false);
|
||||
expect(result.current.audioUrl).toBeNull();
|
||||
});
|
||||
|
||||
it("should cache audio for repeated synthesis of same text", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValue(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
// First call
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello world");
|
||||
});
|
||||
|
||||
// Second call with same text
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello world");
|
||||
});
|
||||
|
||||
// API should only be called once due to caching
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("should not cache when options differ", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValue(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello", { voice: "alloy" });
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello", { voice: "nova" });
|
||||
});
|
||||
|
||||
expect(mockSynthesizeSpeech).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("playback controls", () => {
|
||||
it("should play audio after synthesis", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.play();
|
||||
});
|
||||
|
||||
expect(result.current.isPlaying).toBe(true);
|
||||
});
|
||||
|
||||
it("should pause audio playback", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.play();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
result.current.pause();
|
||||
});
|
||||
|
||||
expect(result.current.isPlaying).toBe(false);
|
||||
});
|
||||
|
||||
it("should stop and reset playback", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
await act(async () => {
|
||||
await result.current.play();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
result.current.stop();
|
||||
});
|
||||
|
||||
expect(result.current.isPlaying).toBe(false);
|
||||
expect(result.current.currentTime).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("cleanup", () => {
|
||||
it("should revoke object URLs on unmount", async () => {
|
||||
const mockBlob = new Blob(["audio-data"], { type: "audio/mpeg" });
|
||||
mockSynthesizeSpeech.mockResolvedValueOnce(mockBlob);
|
||||
|
||||
const { result, unmount } = renderHook(() => useTextToSpeech());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.synthesize("Hello");
|
||||
});
|
||||
|
||||
unmount();
|
||||
|
||||
expect(mockRevokeObjectURL).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
239
apps/web/src/hooks/useTextToSpeech.ts
Normal file
239
apps/web/src/hooks/useTextToSpeech.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
/**
|
||||
* useTextToSpeech hook
|
||||
* Manages TTS API integration with synthesis, caching, and playback state
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from "react";
|
||||
import { synthesizeSpeech } from "@/lib/api/speech";
|
||||
|
||||
export interface SynthesizeOptions {
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
format?: string;
|
||||
tier?: string;
|
||||
}
|
||||
|
||||
export interface UseTextToSpeechReturn {
|
||||
/** Synthesize text to speech audio */
|
||||
synthesize: (text: string, options?: SynthesizeOptions) => Promise<void>;
|
||||
/** The URL of the synthesized audio blob */
|
||||
audioUrl: string | null;
|
||||
/** Whether synthesis is in progress */
|
||||
isLoading: boolean;
|
||||
/** Error message if synthesis failed */
|
||||
error: string | null;
|
||||
/** Start or resume audio playback */
|
||||
play: () => Promise<void>;
|
||||
/** Pause audio playback */
|
||||
pause: () => void;
|
||||
/** Stop audio and reset to beginning */
|
||||
stop: () => void;
|
||||
/** Whether audio is currently playing */
|
||||
isPlaying: boolean;
|
||||
/** Total duration of the audio in seconds */
|
||||
duration: number;
|
||||
/** Current playback position in seconds */
|
||||
currentTime: number;
|
||||
}
|
||||
|
||||
/** Cache key generator for text + options combination */
|
||||
function getCacheKey(text: string, options?: SynthesizeOptions): string {
|
||||
return JSON.stringify({ text, ...options });
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for text-to-speech API integration with caching and playback controls
|
||||
*/
|
||||
export function useTextToSpeech(): UseTextToSpeechReturn {
|
||||
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
|
||||
// Audio element ref for playback control
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
// Cache: maps cache key -> blob URL
|
||||
const cacheRef = useRef<Map<string, string>>(new Map());
|
||||
|
||||
// Track all blob URLs for cleanup
|
||||
const blobUrlsRef = useRef<Set<string>>(new Set());
|
||||
|
||||
/**
|
||||
* Clean up audio element event listeners and state
|
||||
*/
|
||||
const cleanupAudio = useCallback(() => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
audio.removeEventListener("ended", handleEnded);
|
||||
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
|
||||
audioRef.current = null;
|
||||
}
|
||||
setIsPlaying(false);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Handle audio ended event
|
||||
*/
|
||||
function handleEnded(): void {
|
||||
setIsPlaying(false);
|
||||
setCurrentTime(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle audio time update event
|
||||
*/
|
||||
function handleTimeUpdate(): void {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
setCurrentTime(audio.currentTime);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle audio metadata loaded event
|
||||
*/
|
||||
function handleLoadedMetadata(): void {
|
||||
const audio = audioRef.current;
|
||||
if (audio && isFinite(audio.duration)) {
|
||||
setDuration(audio.duration);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up a new Audio element for a given URL
|
||||
*/
|
||||
const setupAudio = useCallback(
|
||||
(url: string) => {
|
||||
cleanupAudio();
|
||||
|
||||
const audio = new Audio(url);
|
||||
audio.addEventListener("ended", handleEnded);
|
||||
audio.addEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.addEventListener("loadedmetadata", handleLoadedMetadata);
|
||||
audioRef.current = audio;
|
||||
},
|
||||
[cleanupAudio]
|
||||
);
|
||||
|
||||
/**
|
||||
* Synthesize text to speech
|
||||
*/
|
||||
const synthesize = useCallback(
|
||||
async (text: string, options?: SynthesizeOptions): Promise<void> => {
|
||||
setError(null);
|
||||
|
||||
// Check cache first
|
||||
const cacheKey = getCacheKey(text, options);
|
||||
const cachedUrl = cacheRef.current.get(cacheKey);
|
||||
|
||||
if (cachedUrl) {
|
||||
setAudioUrl(cachedUrl);
|
||||
setupAudio(cachedUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
const blob = await synthesizeSpeech({
|
||||
text,
|
||||
...(options?.voice !== undefined && { voice: options.voice }),
|
||||
...(options?.speed !== undefined && { speed: options.speed }),
|
||||
...(options?.format !== undefined && { format: options.format }),
|
||||
...(options?.tier !== undefined && { tier: options.tier }),
|
||||
});
|
||||
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
// Store in cache and track for cleanup
|
||||
cacheRef.current.set(cacheKey, url);
|
||||
blobUrlsRef.current.add(url);
|
||||
|
||||
setAudioUrl(url);
|
||||
setupAudio(url);
|
||||
} catch (err) {
|
||||
const errorMsg = err instanceof Error ? err.message : "Speech synthesis failed";
|
||||
setError(errorMsg);
|
||||
setAudioUrl(null);
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
},
|
||||
[setupAudio]
|
||||
);
|
||||
|
||||
/**
|
||||
* Start or resume audio playback
|
||||
*/
|
||||
const play = useCallback(async (): Promise<void> => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
await audio.play();
|
||||
setIsPlaying(true);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Pause audio playback
|
||||
*/
|
||||
const pause = useCallback((): void => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
setIsPlaying(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Stop audio and reset to beginning
|
||||
*/
|
||||
const stop = useCallback((): void => {
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
audio.currentTime = 0;
|
||||
setIsPlaying(false);
|
||||
setCurrentTime(0);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Cleanup on unmount: revoke all blob URLs and clean up audio
|
||||
useEffect((): (() => void) => {
|
||||
return (): void => {
|
||||
// Clean up audio element
|
||||
const audio = audioRef.current;
|
||||
if (audio) {
|
||||
audio.pause();
|
||||
audio.removeEventListener("ended", handleEnded);
|
||||
audio.removeEventListener("timeupdate", handleTimeUpdate);
|
||||
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
|
||||
audioRef.current = null;
|
||||
}
|
||||
|
||||
// Revoke all blob URLs
|
||||
for (const url of blobUrlsRef.current) {
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
blobUrlsRef.current.clear();
|
||||
cacheRef.current.clear();
|
||||
};
|
||||
}, []);
|
||||
|
||||
return {
|
||||
synthesize,
|
||||
audioUrl,
|
||||
isLoading,
|
||||
error,
|
||||
play,
|
||||
pause,
|
||||
stop,
|
||||
isPlaying,
|
||||
duration,
|
||||
currentTime,
|
||||
};
|
||||
}
|
||||
362
apps/web/src/hooks/useVoiceInput.test.ts
Normal file
362
apps/web/src/hooks/useVoiceInput.test.ts
Normal file
@@ -0,0 +1,362 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { renderHook, act, waitFor } from "@testing-library/react";
|
||||
import { useVoiceInput } from "./useVoiceInput";
|
||||
import type { Socket } from "socket.io-client";
|
||||
import { io } from "socket.io-client";
|
||||
|
||||
// Mock socket.io-client
|
||||
vi.mock("socket.io-client");
|
||||
|
||||
// Mock MediaRecorder
|
||||
const mockMediaRecorder = {
|
||||
start: vi.fn(),
|
||||
stop: vi.fn(),
|
||||
pause: vi.fn(),
|
||||
resume: vi.fn(),
|
||||
state: "inactive" as RecordingState,
|
||||
ondataavailable: null as ((event: BlobEvent) => void) | null,
|
||||
onstop: null as (() => void) | null,
|
||||
onerror: null as ((event: Event) => void) | null,
|
||||
addEventListener: vi.fn((event: string, handler: EventListenerOrEventListenerObject) => {
|
||||
if (event === "dataavailable") {
|
||||
mockMediaRecorder.ondataavailable = handler as (event: BlobEvent) => void;
|
||||
} else if (event === "stop") {
|
||||
mockMediaRecorder.onstop = handler as () => void;
|
||||
} else if (event === "error") {
|
||||
mockMediaRecorder.onerror = handler as (event: Event) => void;
|
||||
}
|
||||
}),
|
||||
removeEventListener: vi.fn(),
|
||||
stream: {
|
||||
getTracks: vi.fn(() => [{ stop: vi.fn() }]),
|
||||
},
|
||||
};
|
||||
|
||||
// Mock MediaStream with getByteFrequencyData for audio level
|
||||
const mockAnalyserNode = {
|
||||
fftSize: 256,
|
||||
frequencyBinCount: 128,
|
||||
getByteFrequencyData: vi.fn((array: Uint8Array) => {
|
||||
// Simulate some audio data
|
||||
for (let i = 0; i < array.length; i++) {
|
||||
array[i] = 128;
|
||||
}
|
||||
}),
|
||||
connect: vi.fn(),
|
||||
disconnect: vi.fn(),
|
||||
};
|
||||
|
||||
const mockMediaStreamSource = {
|
||||
connect: vi.fn(),
|
||||
disconnect: vi.fn(),
|
||||
};
|
||||
|
||||
const mockAudioContext = {
|
||||
createAnalyser: vi.fn(() => mockAnalyserNode),
|
||||
createMediaStreamSource: vi.fn(() => mockMediaStreamSource),
|
||||
close: vi.fn(),
|
||||
state: "running",
|
||||
};
|
||||
|
||||
// Mock getUserMedia
|
||||
const mockGetUserMedia = vi.fn();
|
||||
|
||||
// Set up global mocks
|
||||
Object.defineProperty(global.navigator, "mediaDevices", {
|
||||
value: {
|
||||
getUserMedia: mockGetUserMedia,
|
||||
},
|
||||
writable: true,
|
||||
configurable: true,
|
||||
});
|
||||
|
||||
// Mock AudioContext
|
||||
vi.stubGlobal(
|
||||
"AudioContext",
|
||||
vi.fn(() => mockAudioContext)
|
||||
);
|
||||
|
||||
// Mock MediaRecorder constructor
|
||||
vi.stubGlobal(
|
||||
"MediaRecorder",
|
||||
vi.fn(() => mockMediaRecorder)
|
||||
);
|
||||
|
||||
// Add isTypeSupported static method
|
||||
(
|
||||
global.MediaRecorder as unknown as { isTypeSupported: (type: string) => boolean }
|
||||
).isTypeSupported = vi.fn(() => true);
|
||||
|
||||
describe("useVoiceInput", (): void => {
|
||||
let mockSocket: Partial<Socket>;
|
||||
let socketEventHandlers: Record<string, (data: unknown) => void>;
|
||||
|
||||
beforeEach((): void => {
|
||||
socketEventHandlers = {};
|
||||
|
||||
mockSocket = {
|
||||
on: vi.fn((event: string, handler: (...args: unknown[]) => void) => {
|
||||
socketEventHandlers[event] = handler;
|
||||
return mockSocket;
|
||||
}) as unknown as Socket["on"],
|
||||
off: vi.fn(() => mockSocket) as unknown as Socket["off"],
|
||||
emit: vi.fn() as unknown as Socket["emit"],
|
||||
connect: vi.fn(),
|
||||
disconnect: vi.fn(),
|
||||
connected: true,
|
||||
};
|
||||
|
||||
(io as unknown as ReturnType<typeof vi.fn>).mockReturnValue(mockSocket);
|
||||
|
||||
// Reset MediaRecorder mock state
|
||||
mockMediaRecorder.state = "inactive";
|
||||
mockMediaRecorder.ondataavailable = null;
|
||||
mockMediaRecorder.onstop = null;
|
||||
mockMediaRecorder.onerror = null;
|
||||
|
||||
// Default: getUserMedia succeeds
|
||||
const mockStream = {
|
||||
getTracks: vi.fn(() => [{ stop: vi.fn() }]),
|
||||
} as unknown as MediaStream;
|
||||
mockGetUserMedia.mockResolvedValue(mockStream);
|
||||
});
|
||||
|
||||
afterEach((): void => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("should return the correct interface", (): void => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
expect(result.current).toHaveProperty("isRecording");
|
||||
expect(result.current).toHaveProperty("startRecording");
|
||||
expect(result.current).toHaveProperty("stopRecording");
|
||||
expect(result.current).toHaveProperty("transcript");
|
||||
expect(result.current).toHaveProperty("partialTranscript");
|
||||
expect(result.current).toHaveProperty("error");
|
||||
expect(result.current).toHaveProperty("audioLevel");
|
||||
});
|
||||
|
||||
it("should start with default state", (): void => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
expect(result.current.isRecording).toBe(false);
|
||||
expect(result.current.transcript).toBe("");
|
||||
expect(result.current.partialTranscript).toBe("");
|
||||
expect(result.current.error).toBeNull();
|
||||
expect(result.current.audioLevel).toBe(0);
|
||||
});
|
||||
|
||||
it("should start recording when startRecording is called", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(true);
|
||||
expect(mockGetUserMedia).toHaveBeenCalledWith({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
sampleRate: 16000,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("should stop recording when stopRecording is called", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(true);
|
||||
|
||||
act(() => {
|
||||
result.current.stopRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(false);
|
||||
});
|
||||
|
||||
it("should set error when microphone access is denied", async (): Promise<void> => {
|
||||
mockGetUserMedia.mockRejectedValueOnce(
|
||||
new DOMException("Permission denied", "NotAllowedError")
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(false);
|
||||
expect(result.current.error).toBeTruthy();
|
||||
expect(result.current.error).toContain("microphone");
|
||||
});
|
||||
|
||||
it("should connect to speech WebSocket namespace", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(io).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
expect.objectContaining({
|
||||
path: "/socket.io",
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should emit start-transcription when recording begins", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(mockSocket.emit).toHaveBeenCalledWith(
|
||||
"start-transcription",
|
||||
expect.objectContaining({
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
||||
format: expect.any(String),
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should emit stop-transcription when recording stops", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
result.current.stopRecording();
|
||||
});
|
||||
|
||||
expect(mockSocket.emit).toHaveBeenCalledWith("stop-transcription");
|
||||
});
|
||||
|
||||
it("should handle partial transcription events", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-partial"]?.({
|
||||
text: "hello world",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(result.current.partialTranscript).toBe("hello world");
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle final transcription events", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-final"]?.({
|
||||
text: "hello world final",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(result.current.transcript).toBe("hello world final");
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle transcription error events", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-error"]?.({
|
||||
message: "Transcription failed",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(result.current.error).toBe("Transcription failed");
|
||||
});
|
||||
});
|
||||
|
||||
it("should call onTranscript callback when final transcription received", async (): Promise<void> => {
|
||||
const onTranscript = vi.fn();
|
||||
const { result } = renderHook(() => useVoiceInput({ onTranscript }));
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
act(() => {
|
||||
socketEventHandlers["transcription-final"]?.({
|
||||
text: "final text",
|
||||
});
|
||||
});
|
||||
|
||||
await waitFor(() => {
|
||||
expect(onTranscript).toHaveBeenCalledWith("final text");
|
||||
});
|
||||
});
|
||||
|
||||
it("should clean up on unmount", async (): Promise<void> => {
|
||||
const { result, unmount } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
unmount();
|
||||
|
||||
expect(mockSocket.disconnect).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should not start recording if already recording", async (): Promise<void> => {
|
||||
const { result } = renderHook(() => useVoiceInput());
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
// Reset the call count
|
||||
mockGetUserMedia.mockClear();
|
||||
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
// Should not have called getUserMedia again
|
||||
expect(mockGetUserMedia).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe("REST fallback", (): void => {
|
||||
it("should fall back to REST when WebSocket is unavailable", async (): Promise<void> => {
|
||||
// Simulate socket not connecting
|
||||
(mockSocket as { connected: boolean }).connected = false;
|
||||
|
||||
const { result } = renderHook(() => useVoiceInput({ useWebSocket: false }));
|
||||
|
||||
// Should still be able to start recording (REST mode)
|
||||
await act(async () => {
|
||||
await result.current.startRecording();
|
||||
});
|
||||
|
||||
expect(result.current.isRecording).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
409
apps/web/src/hooks/useVoiceInput.ts
Normal file
409
apps/web/src/hooks/useVoiceInput.ts
Normal file
@@ -0,0 +1,409 @@
|
||||
/**
|
||||
* useVoiceInput hook
|
||||
*
|
||||
* Custom hook for microphone capture and real-time transcription.
|
||||
* Supports WebSocket streaming for real-time partial transcriptions
|
||||
* with REST upload fallback when WebSocket is unavailable.
|
||||
*/
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from "react";
|
||||
import type { Socket } from "socket.io-client";
|
||||
import { io } from "socket.io-client";
|
||||
import { API_BASE_URL } from "@/lib/config";
|
||||
import { apiPostFormData } from "@/lib/api/client";
|
||||
|
||||
/** Options for the useVoiceInput hook */
|
||||
export interface UseVoiceInputOptions {
|
||||
/** Callback fired when final transcription is received */
|
||||
onTranscript?: (text: string) => void;
|
||||
/** Whether to use WebSocket streaming (default: true) */
|
||||
useWebSocket?: boolean;
|
||||
/** Audio sample rate in Hz (default: 16000) */
|
||||
sampleRate?: number;
|
||||
}
|
||||
|
||||
/** Return type for the useVoiceInput hook */
|
||||
export interface UseVoiceInputReturn {
|
||||
/** Whether the microphone is currently recording */
|
||||
isRecording: boolean;
|
||||
/** Start microphone capture and transcription */
|
||||
startRecording: () => Promise<void>;
|
||||
/** Stop microphone capture and transcription */
|
||||
stopRecording: () => void;
|
||||
/** The final transcription text */
|
||||
transcript: string;
|
||||
/** Partial transcription text (updates in real-time) */
|
||||
partialTranscript: string;
|
||||
/** Error message if something went wrong */
|
||||
error: string | null;
|
||||
/** Current audio input level (0-1) */
|
||||
audioLevel: number;
|
||||
}
|
||||
|
||||
interface TranscriptionPartialPayload {
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface TranscriptionFinalPayload {
|
||||
text: string;
|
||||
}
|
||||
|
||||
interface TranscriptionErrorPayload {
|
||||
message: string;
|
||||
}
|
||||
|
||||
interface TranscribeResponse {
|
||||
data: {
|
||||
text: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the best MIME type for audio recording
|
||||
*/
|
||||
function getAudioMimeType(): string {
|
||||
if (typeof MediaRecorder === "undefined") {
|
||||
return "audio/webm";
|
||||
}
|
||||
const types = ["audio/webm;codecs=opus", "audio/webm", "audio/ogg;codecs=opus", "audio/mp4"];
|
||||
for (const type of types) {
|
||||
if (MediaRecorder.isTypeSupported(type)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
return "audio/webm";
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook for microphone capture and real-time speech-to-text transcription.
|
||||
*
|
||||
* Uses WebSocket streaming by default for real-time partial transcriptions.
|
||||
* Falls back to REST upload (POST /api/speech/transcribe) if WebSocket
|
||||
* is disabled or unavailable.
|
||||
*/
|
||||
export function useVoiceInput(options: UseVoiceInputOptions = {}): UseVoiceInputReturn {
|
||||
const { onTranscript, useWebSocket: useWs = true, sampleRate = 16000 } = options;
|
||||
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [transcript, setTranscript] = useState("");
|
||||
const [partialTranscript, setPartialTranscript] = useState("");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [audioLevel, setAudioLevel] = useState(0);
|
||||
|
||||
// Refs to hold mutable state without re-renders
|
||||
const socketRef = useRef<Socket | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||||
const animationFrameRef = useRef<number | null>(null);
|
||||
const onTranscriptRef = useRef(onTranscript);
|
||||
const recordedChunksRef = useRef<Blob[]>([]);
|
||||
const isRecordingRef = useRef(false);
|
||||
|
||||
// Keep callback ref up to date
|
||||
useEffect(() => {
|
||||
onTranscriptRef.current = onTranscript;
|
||||
}, [onTranscript]);
|
||||
|
||||
/**
|
||||
* Set up audio analysis for visualizing input level
|
||||
*/
|
||||
const setupAudioAnalysis = useCallback((stream: MediaStream): void => {
|
||||
try {
|
||||
const audioContext = new AudioContext();
|
||||
const analyser = audioContext.createAnalyser();
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
|
||||
analyser.fftSize = 256;
|
||||
source.connect(analyser);
|
||||
|
||||
audioContextRef.current = audioContext;
|
||||
analyserRef.current = analyser;
|
||||
|
||||
// Start level monitoring
|
||||
const dataArray = new Uint8Array(analyser.frequencyBinCount);
|
||||
|
||||
const updateLevel = (): void => {
|
||||
if (!isRecordingRef.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
analyser.getByteFrequencyData(dataArray);
|
||||
|
||||
// Calculate average level
|
||||
let sum = 0;
|
||||
for (const value of dataArray) {
|
||||
sum += value;
|
||||
}
|
||||
const average = sum / dataArray.length / 255;
|
||||
setAudioLevel(average);
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(updateLevel);
|
||||
};
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(updateLevel);
|
||||
} catch {
|
||||
// Audio analysis is non-critical; continue without it
|
||||
console.warn("Audio analysis not available");
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Clean up audio analysis resources
|
||||
*/
|
||||
const cleanupAudioAnalysis = useCallback((): void => {
|
||||
if (animationFrameRef.current !== null) {
|
||||
cancelAnimationFrame(animationFrameRef.current);
|
||||
animationFrameRef.current = null;
|
||||
}
|
||||
if (audioContextRef.current) {
|
||||
void audioContextRef.current.close();
|
||||
audioContextRef.current = null;
|
||||
}
|
||||
analyserRef.current = null;
|
||||
setAudioLevel(0);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Connect to the speech WebSocket namespace
|
||||
*/
|
||||
const connectSocket = useCallback((): Socket => {
|
||||
const socket = io(API_BASE_URL, {
|
||||
path: "/socket.io",
|
||||
transports: ["websocket", "polling"],
|
||||
});
|
||||
|
||||
socket.on("transcription-partial", (data: TranscriptionPartialPayload) => {
|
||||
setPartialTranscript(data.text);
|
||||
});
|
||||
|
||||
socket.on("transcription-final", (data: TranscriptionFinalPayload) => {
|
||||
setTranscript(data.text);
|
||||
setPartialTranscript("");
|
||||
onTranscriptRef.current?.(data.text);
|
||||
});
|
||||
|
||||
socket.on("transcription-error", (data: TranscriptionErrorPayload) => {
|
||||
setError(data.message);
|
||||
});
|
||||
|
||||
socketRef.current = socket;
|
||||
return socket;
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Disconnect the WebSocket
|
||||
*/
|
||||
const disconnectSocket = useCallback((): void => {
|
||||
if (socketRef.current) {
|
||||
socketRef.current.off("transcription-partial");
|
||||
socketRef.current.off("transcription-final");
|
||||
socketRef.current.off("transcription-error");
|
||||
socketRef.current.disconnect();
|
||||
socketRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Send recorded audio via REST API as fallback
|
||||
*/
|
||||
const sendAudioViaRest = useCallback(async (audioBlob: Blob): Promise<void> => {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("audio", audioBlob, "recording.webm");
|
||||
|
||||
const response = await apiPostFormData<TranscribeResponse>(
|
||||
"/api/speech/transcribe",
|
||||
formData
|
||||
);
|
||||
|
||||
if (response.data.text) {
|
||||
setTranscript(response.data.text);
|
||||
setPartialTranscript("");
|
||||
onTranscriptRef.current?.(response.data.text);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Transcription request failed";
|
||||
setError(message);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Stop all media tracks on the stream
|
||||
*/
|
||||
const stopMediaTracks = useCallback((): void => {
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => {
|
||||
track.stop();
|
||||
});
|
||||
streamRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Start microphone capture and transcription
|
||||
*/
|
||||
const startRecording = useCallback(async (): Promise<void> => {
|
||||
// Prevent double-start
|
||||
if (isRecordingRef.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
setError(null);
|
||||
setPartialTranscript("");
|
||||
recordedChunksRef.current = [];
|
||||
|
||||
try {
|
||||
// Request microphone access
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
sampleRate,
|
||||
},
|
||||
});
|
||||
|
||||
streamRef.current = stream;
|
||||
|
||||
// Set up audio level visualization
|
||||
setupAudioAnalysis(stream);
|
||||
|
||||
// Determine MIME type
|
||||
const mimeType = getAudioMimeType();
|
||||
|
||||
// Create MediaRecorder
|
||||
const mediaRecorder = new MediaRecorder(stream, { mimeType });
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
// Connect WebSocket if enabled
|
||||
let socket: Socket | null = null;
|
||||
if (useWs) {
|
||||
socket = connectSocket();
|
||||
|
||||
// Emit start-transcription event
|
||||
socket.emit("start-transcription", {
|
||||
format: mimeType,
|
||||
sampleRate,
|
||||
});
|
||||
}
|
||||
|
||||
// Handle audio data chunks
|
||||
mediaRecorder.addEventListener("dataavailable", (event: BlobEvent) => {
|
||||
if (event.data.size > 0) {
|
||||
if (socket?.connected) {
|
||||
// Stream chunks via WebSocket
|
||||
socket.emit("audio-chunk", event.data);
|
||||
} else {
|
||||
// Collect chunks for REST upload
|
||||
recordedChunksRef.current.push(event.data);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Handle recording stop
|
||||
mediaRecorder.addEventListener("stop", () => {
|
||||
// If using REST fallback, send collected audio
|
||||
if (!useWs || !socket?.connected) {
|
||||
if (recordedChunksRef.current.length > 0) {
|
||||
const audioBlob = new Blob(recordedChunksRef.current, {
|
||||
type: mimeType,
|
||||
});
|
||||
void sendAudioViaRest(audioBlob);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Handle errors
|
||||
mediaRecorder.addEventListener("error", () => {
|
||||
setError("Recording encountered an issue. Please try again.");
|
||||
setIsRecording(false);
|
||||
isRecordingRef.current = false;
|
||||
});
|
||||
|
||||
// Start recording with timeslice for streaming chunks (250ms intervals)
|
||||
mediaRecorder.start(250);
|
||||
setIsRecording(true);
|
||||
isRecordingRef.current = true;
|
||||
} catch (err) {
|
||||
// Handle specific error types
|
||||
if (err instanceof DOMException) {
|
||||
if (err.name === "NotAllowedError") {
|
||||
setError(
|
||||
"Microphone access was not granted. Please allow microphone access to use voice input."
|
||||
);
|
||||
} else if (err.name === "NotFoundError") {
|
||||
setError("No microphone found. Please connect a microphone and try again.");
|
||||
} else {
|
||||
setError("Unable to access the microphone. Please check your device settings.");
|
||||
}
|
||||
} else {
|
||||
setError("Unable to start voice input. Please try again.");
|
||||
}
|
||||
|
||||
// Clean up on failure
|
||||
stopMediaTracks();
|
||||
cleanupAudioAnalysis();
|
||||
}
|
||||
}, [
|
||||
useWs,
|
||||
sampleRate,
|
||||
setupAudioAnalysis,
|
||||
connectSocket,
|
||||
sendAudioViaRest,
|
||||
stopMediaTracks,
|
||||
cleanupAudioAnalysis,
|
||||
]);
|
||||
|
||||
/**
|
||||
* Stop microphone capture and transcription
|
||||
*/
|
||||
const stopRecording = useCallback((): void => {
|
||||
setIsRecording(false);
|
||||
isRecordingRef.current = false;
|
||||
|
||||
// Stop MediaRecorder
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
mediaRecorderRef.current = null;
|
||||
}
|
||||
|
||||
// Stop media tracks
|
||||
stopMediaTracks();
|
||||
|
||||
// Clean up audio analysis
|
||||
cleanupAudioAnalysis();
|
||||
|
||||
// Emit stop event and disconnect WebSocket
|
||||
if (socketRef.current) {
|
||||
socketRef.current.emit("stop-transcription");
|
||||
// Give the server a moment to process the final chunk before disconnecting
|
||||
setTimeout(() => {
|
||||
disconnectSocket();
|
||||
}, 500);
|
||||
}
|
||||
}, [stopMediaTracks, cleanupAudioAnalysis, disconnectSocket]);
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return (): void => {
|
||||
isRecordingRef.current = false;
|
||||
if (mediaRecorderRef.current && mediaRecorderRef.current.state !== "inactive") {
|
||||
mediaRecorderRef.current.stop();
|
||||
}
|
||||
stopMediaTracks();
|
||||
cleanupAudioAnalysis();
|
||||
disconnectSocket();
|
||||
};
|
||||
}, [stopMediaTracks, cleanupAudioAnalysis, disconnectSocket]);
|
||||
|
||||
return {
|
||||
isRecording,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
transcript,
|
||||
partialTranscript,
|
||||
error,
|
||||
audioLevel,
|
||||
};
|
||||
}
|
||||
58
apps/web/src/lib/api/speech.ts
Normal file
58
apps/web/src/lib/api/speech.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
* Speech API client
|
||||
* Handles text-to-speech synthesis and voice listing via /api/speech
|
||||
*/
|
||||
|
||||
import { apiGet } from "./client";
|
||||
import { API_BASE_URL } from "../config";
|
||||
|
||||
export interface VoiceInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
language: string;
|
||||
gender?: string;
|
||||
preview_url?: string;
|
||||
}
|
||||
|
||||
export interface SynthesizeOptions {
|
||||
text: string;
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
format?: string;
|
||||
tier?: string;
|
||||
}
|
||||
|
||||
export interface VoicesResponse {
|
||||
data: VoiceInfo[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch available TTS voices
|
||||
*/
|
||||
export async function getVoices(): Promise<VoicesResponse> {
|
||||
return apiGet<VoicesResponse>("/api/speech/voices");
|
||||
}
|
||||
|
||||
/**
|
||||
* Synthesize text to speech audio
|
||||
* Returns the audio as a Blob since the API returns binary audio data
|
||||
*/
|
||||
export async function synthesizeSpeech(options: SynthesizeOptions): Promise<Blob> {
|
||||
const url = `${API_BASE_URL}/api/speech/synthesize`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
credentials: "include",
|
||||
body: JSON.stringify(options),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text().catch(() => "Unknown error");
|
||||
throw new Error(`Speech synthesis failed: ${errorText}`);
|
||||
}
|
||||
|
||||
return response.blob();
|
||||
}
|
||||
Reference in New Issue
Block a user