Skip to content

Commit 4ffcec9

Browse files
committed
refactor: improve useVoiceInput clarity and touch detection
- Rename isMobile to HAS_TOUCH_DICTATION with clear doc comment - Remove screen size check (iPads have dictation regardless of size) - Add section headers for visual organization - Extract releaseStream helper to reduce duplication - Improve variable names (recorder, chunks, buffer) - Add early returns to reduce nesting in transcribe() - Rename refs for clarity (shouldSendRef, wasCancelledRef) - Better comments explaining the state machine and logic
1 parent 8c56c9c commit 4ffcec9

File tree

1 file changed

+135
-67
lines changed

1 file changed

+135
-67
lines changed

src/browser/hooks/useVoiceInput.ts

Lines changed: 135 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
/**
2-
* Hook for voice input using OpenAI Whisper API via MediaRecorder.
2+
* Voice input via OpenAI transcription (gpt-4o-transcribe).
33
*
4-
* Records audio, sends to backend for Whisper transcription, returns text.
5-
* Hidden on mobile (native keyboards have built-in dictation).
4+
* State machine: idle → recording → transcribing → idle
5+
*
6+
* Hidden on touch devices where native keyboard dictation is available.
67
*/
78

89
import { useState, useCallback, useRef, useEffect } from "react";
@@ -12,6 +13,7 @@ export type VoiceInputState = "idle" | "recording" | "transcribing";
1213
export interface UseVoiceInputOptions {
1314
onTranscript: (text: string) => void;
1415
onError?: (error: string) => void;
16+
/** Called after successful transcription if stop({ send: true }) was used */
1517
onSend?: () => void;
1618
openAIKeySet: boolean;
1719
}
@@ -20,69 +22,120 @@ export interface UseVoiceInputResult {
2022
state: VoiceInputState;
2123
isSupported: boolean;
2224
isApiKeySet: boolean;
23-
/** Show UI on supported desktop platforms (mobile has native dictation) */
25+
/** False on touch devices (they have native keyboard dictation) */
2426
shouldShowUI: boolean;
2527
start: () => void;
2628
stop: (options?: { send?: boolean }) => void;
27-
/** Cancel recording without transcribing (discard audio) */
2829
cancel: () => void;
2930
toggle: () => void;
3031
}
3132

32-
// Platform checks (evaluated once)
33-
const isMobile =
34-
typeof window !== "undefined" &&
35-
("ontouchstart" in window || navigator.maxTouchPoints > 0) &&
36-
window.innerWidth < 768;
33+
// =============================================================================
34+
// Platform Detection
35+
// =============================================================================
36+
37+
/**
38+
* Detect touch devices where native keyboard dictation is typically available.
39+
* This includes phones, tablets (iPad), and touch-enabled laptops in tablet mode.
40+
* We hide our voice UI on these devices to avoid redundancy with system dictation.
41+
*/
42+
function hasTouchDictation(): boolean {
43+
if (typeof window === "undefined") return false;
44+
const hasTouch = "ontouchstart" in window || navigator.maxTouchPoints > 0;
45+
// Touch-only check: most touch devices have native dictation.
46+
// We don't check screen size because iPads are large but still have dictation.
47+
return hasTouch;
48+
}
49+
50+
const HAS_TOUCH_DICTATION = hasTouchDictation();
51+
const HAS_MEDIA_RECORDER = typeof window !== "undefined" && typeof MediaRecorder !== "undefined";
3752

38-
const isSupported = typeof window !== "undefined" && typeof MediaRecorder !== "undefined";
53+
// =============================================================================
54+
// Hook
55+
// =============================================================================
3956

4057
export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResult {
4158
const [state, setState] = useState<VoiceInputState>("idle");
4259

43-
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
44-
const audioChunksRef = useRef<Blob[]>([]);
60+
// Refs for MediaRecorder lifecycle
61+
const recorderRef = useRef<MediaRecorder | null>(null);
4562
const streamRef = useRef<MediaStream | null>(null);
46-
const sendAfterTranscribeRef = useRef(false);
47-
const cancelledRef = useRef(false);
63+
const chunksRef = useRef<Blob[]>([]);
4864

49-
// Store callbacks in refs to avoid stale closures
65+
// Flags set before stopping to control post-stop behavior
66+
const shouldSendRef = useRef(false);
67+
const wasCancelledRef = useRef(false);
68+
69+
// Keep callbacks fresh without recreating functions
5070
const callbacksRef = useRef(options);
5171
useEffect(() => {
5272
callbacksRef.current = options;
5373
}, [options]);
5474

75+
// ---------------------------------------------------------------------------
76+
// Transcription
77+
// ---------------------------------------------------------------------------
78+
5579
const transcribe = useCallback(async (audioBlob: Blob) => {
5680
setState("transcribing");
57-
const shouldSend = sendAfterTranscribeRef.current;
58-
sendAfterTranscribeRef.current = false;
81+
82+
// Capture and reset flags
83+
const shouldSend = shouldSendRef.current;
84+
shouldSendRef.current = false;
5985

6086
try {
61-
const arrayBuffer = await audioBlob.arrayBuffer();
87+
// Encode audio as base64 for IPC transport
88+
const buffer = await audioBlob.arrayBuffer();
6289
const base64 = btoa(
63-
new Uint8Array(arrayBuffer).reduce((data, byte) => data + String.fromCharCode(byte), "")
90+
new Uint8Array(buffer).reduce((str, byte) => str + String.fromCharCode(byte), "")
6491
);
6592

6693
const result = await window.api.voice.transcribe(base64);
6794

68-
if (result.success && result.data.trim()) {
69-
callbacksRef.current.onTranscript(result.data);
70-
if (shouldSend) {
71-
setTimeout(() => callbacksRef.current.onSend?.(), 0);
72-
}
73-
} else if (!result.success) {
95+
if (!result.success) {
7496
callbacksRef.current.onError?.(result.error);
97+
return;
98+
}
99+
100+
const text = result.data.trim();
101+
if (!text) return; // Empty transcription, nothing to do
102+
103+
callbacksRef.current.onTranscript(text);
104+
105+
// If stop({ send: true }) was called, trigger send after React flushes
106+
if (shouldSend) {
107+
setTimeout(() => callbacksRef.current.onSend?.(), 0);
75108
}
76109
} catch (err) {
77-
const message = err instanceof Error ? err.message : String(err);
78-
callbacksRef.current.onError?.(`Transcription failed: ${message}`);
110+
const msg = err instanceof Error ? err.message : String(err);
111+
callbacksRef.current.onError?.(`Transcription failed: ${msg}`);
79112
} finally {
80113
setState("idle");
81114
}
82115
}, []);
83116

117+
// ---------------------------------------------------------------------------
118+
// Release microphone and clean up recorder
119+
// ---------------------------------------------------------------------------
120+
121+
const releaseStream = useCallback(() => {
122+
streamRef.current?.getTracks().forEach((t) => t.stop());
123+
streamRef.current = null;
124+
}, []);
125+
126+
// ---------------------------------------------------------------------------
127+
// Start Recording
128+
// ---------------------------------------------------------------------------
129+
84130
const start = useCallback(async () => {
85-
if (!isSupported || isMobile || state !== "idle" || !callbacksRef.current.openAIKeySet) return;
131+
// Guard: only start from idle state with valid configuration
132+
const canStart =
133+
HAS_MEDIA_RECORDER &&
134+
!HAS_TOUCH_DICTATION &&
135+
state === "idle" &&
136+
callbacksRef.current.openAIKeySet;
137+
138+
if (!canStart) return;
86139

87140
try {
88141
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
@@ -93,20 +146,22 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul
93146
: "audio/webm";
94147

95148
const recorder = new MediaRecorder(stream, { mimeType });
96-
audioChunksRef.current = [];
149+
chunksRef.current = [];
97150

98151
recorder.ondataavailable = (e) => {
99-
if (e.data.size > 0) audioChunksRef.current.push(e.data);
152+
if (e.data.size > 0) chunksRef.current.push(e.data);
100153
};
101154

102155
recorder.onstop = () => {
103-
const wasCancelled = cancelledRef.current;
104-
cancelledRef.current = false;
105-
const blob = new Blob(audioChunksRef.current, { type: mimeType });
106-
audioChunksRef.current = [];
107-
stream.getTracks().forEach((t) => t.stop());
108-
streamRef.current = null;
109-
if (wasCancelled) {
156+
// Check if this was a cancel (discard audio) or normal stop (transcribe)
157+
const cancelled = wasCancelledRef.current;
158+
wasCancelledRef.current = false;
159+
160+
const blob = new Blob(chunksRef.current, { type: mimeType });
161+
chunksRef.current = [];
162+
releaseStream();
163+
164+
if (cancelled) {
110165
setState("idle");
111166
} else {
112167
void transcribe(blob);
@@ -115,63 +170,76 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul
115170

116171
recorder.onerror = () => {
117172
callbacksRef.current.onError?.("Recording failed");
173+
releaseStream();
118174
setState("idle");
119-
stream.getTracks().forEach((t) => t.stop());
120-
streamRef.current = null;
121175
};
122176

123-
mediaRecorderRef.current = recorder;
177+
recorderRef.current = recorder;
124178
recorder.start();
125179
setState("recording");
126180
} catch (err) {
127-
const message = err instanceof Error ? err.message : String(err);
128-
const isPermissionError =
129-
message.includes("Permission denied") || message.includes("NotAllowedError");
181+
const msg = err instanceof Error ? err.message : String(err);
182+
const isPermissionDenied = msg.includes("Permission denied") || msg.includes("NotAllowed");
183+
130184
callbacksRef.current.onError?.(
131-
isPermissionError
185+
isPermissionDenied
132186
? "Microphone access denied. Please allow microphone access and try again."
133-
: `Failed to start recording: ${message}`
187+
: `Failed to start recording: ${msg}`
134188
);
135189
}
136-
}, [state, transcribe]);
190+
}, [state, transcribe, releaseStream]);
191+
192+
// ---------------------------------------------------------------------------
193+
// Stop Recording (triggers transcription)
194+
// ---------------------------------------------------------------------------
137195

138196
const stop = useCallback((options?: { send?: boolean }) => {
139-
if (options?.send) sendAfterTranscribeRef.current = true;
140-
if (mediaRecorderRef.current?.state !== "inactive") {
141-
mediaRecorderRef.current?.stop();
142-
mediaRecorderRef.current = null;
197+
if (options?.send) shouldSendRef.current = true;
198+
199+
if (recorderRef.current?.state !== "inactive") {
200+
recorderRef.current?.stop();
201+
recorderRef.current = null;
143202
}
144203
}, []);
145204

205+
// ---------------------------------------------------------------------------
206+
// Cancel Recording (discard audio, no transcription)
207+
// ---------------------------------------------------------------------------
208+
146209
const cancel = useCallback(() => {
147-
cancelledRef.current = true;
148-
if (mediaRecorderRef.current?.state !== "inactive") {
149-
mediaRecorderRef.current?.stop();
150-
mediaRecorderRef.current = null;
151-
}
152-
}, []);
210+
wasCancelledRef.current = true;
211+
stop();
212+
}, [stop]);
213+
214+
// ---------------------------------------------------------------------------
215+
// Toggle (convenience for keybinds)
216+
// ---------------------------------------------------------------------------
153217

154218
const toggle = useCallback(() => {
155-
if (state === "recording") {
156-
stop();
157-
} else if (state === "idle") {
158-
void start();
159-
}
219+
if (state === "recording") stop();
220+
else if (state === "idle") void start();
160221
}, [state, start, stop]);
161222

223+
// ---------------------------------------------------------------------------
162224
// Cleanup on unmount
225+
// ---------------------------------------------------------------------------
226+
163227
useEffect(() => {
164228
return () => {
165-
mediaRecorderRef.current?.stop();
166-
streamRef.current?.getTracks().forEach((t) => t.stop());
229+
recorderRef.current?.stop();
230+
releaseStream();
167231
};
168-
}, []);
232+
}, [releaseStream]);
233+
234+
// ---------------------------------------------------------------------------
235+
// Return
236+
// ---------------------------------------------------------------------------
169237

170238
return {
171239
state,
172-
isSupported,
240+
isSupported: HAS_MEDIA_RECORDER,
173241
isApiKeySet: callbacksRef.current.openAIKeySet,
174-
shouldShowUI: isSupported && !isMobile,
242+
shouldShowUI: HAS_MEDIA_RECORDER && !HAS_TOUCH_DICTATION,
175243
start: () => void start(),
176244
stop,
177245
cancel,

0 commit comments

Comments
 (0)