11/**
2- * Hook for voice input using OpenAI Whisper API via MediaRecorder .
2+ * Voice input via OpenAI transcription (gpt-4o-transcribe) .
33 *
4- * Records audio, sends to backend for Whisper transcription, returns text.
5- * Hidden on mobile (native keyboards have built-in dictation).
4+ * State machine: idle → recording → transcribing → idle
5+ *
6+ * Hidden on touch devices where native keyboard dictation is available.
67 */
78
89import { useState , useCallback , useRef , useEffect } from "react" ;
@@ -12,6 +13,7 @@ export type VoiceInputState = "idle" | "recording" | "transcribing";
1213export interface UseVoiceInputOptions {
1314 onTranscript : ( text : string ) => void ;
1415 onError ?: ( error : string ) => void ;
16+ /** Called after successful transcription if stop({ send: true }) was used */
1517 onSend ?: ( ) => void ;
1618 openAIKeySet : boolean ;
1719}
@@ -20,69 +22,120 @@ export interface UseVoiceInputResult {
2022 state : VoiceInputState ;
2123 isSupported : boolean ;
2224 isApiKeySet : boolean ;
23- /** Show UI on supported desktop platforms (mobile has native dictation) */
25+ /** False on touch devices (they have native keyboard dictation) */
2426 shouldShowUI : boolean ;
2527 start : ( ) => void ;
2628 stop : ( options ?: { send ?: boolean } ) => void ;
27- /** Cancel recording without transcribing (discard audio) */
2829 cancel : ( ) => void ;
2930 toggle : ( ) => void ;
3031}
3132
32- // Platform checks (evaluated once)
33- const isMobile =
34- typeof window !== "undefined" &&
35- ( "ontouchstart" in window || navigator . maxTouchPoints > 0 ) &&
36- window . innerWidth < 768 ;
33+ // =============================================================================
34+ // Platform Detection
35+ // =============================================================================
36+
37+ /**
38+ * Detect touch devices where native keyboard dictation is typically available.
39+ * This includes phones, tablets (iPad), and touch-enabled laptops in tablet mode.
40+ * We hide our voice UI on these devices to avoid redundancy with system dictation.
41+ */
42+ function hasTouchDictation ( ) : boolean {
43+ if ( typeof window === "undefined" ) return false ;
44+ const hasTouch = "ontouchstart" in window || navigator . maxTouchPoints > 0 ;
45+ // Touch-only check: most touch devices have native dictation.
46+ // We don't check screen size because iPads are large but still have dictation.
47+ return hasTouch ;
48+ }
49+
50+ const HAS_TOUCH_DICTATION = hasTouchDictation ( ) ;
51+ const HAS_MEDIA_RECORDER = typeof window !== "undefined" && typeof MediaRecorder !== "undefined" ;
3752
38- const isSupported = typeof window !== "undefined" && typeof MediaRecorder !== "undefined" ;
53+ // =============================================================================
54+ // Hook
55+ // =============================================================================
3956
4057export function useVoiceInput ( options : UseVoiceInputOptions ) : UseVoiceInputResult {
4158 const [ state , setState ] = useState < VoiceInputState > ( "idle" ) ;
4259
43- const mediaRecorderRef = useRef < MediaRecorder | null > ( null ) ;
44- const audioChunksRef = useRef < Blob [ ] > ( [ ] ) ;
60+ // Refs for MediaRecorder lifecycle
61+ const recorderRef = useRef < MediaRecorder | null > ( null ) ;
4562 const streamRef = useRef < MediaStream | null > ( null ) ;
46- const sendAfterTranscribeRef = useRef ( false ) ;
47- const cancelledRef = useRef ( false ) ;
63+ const chunksRef = useRef < Blob [ ] > ( [ ] ) ;
4864
49- // Store callbacks in refs to avoid stale closures
65+ // Flags set before stopping to control post-stop behavior
66+ const shouldSendRef = useRef ( false ) ;
67+ const wasCancelledRef = useRef ( false ) ;
68+
69+ // Keep callbacks fresh without recreating functions
5070 const callbacksRef = useRef ( options ) ;
5171 useEffect ( ( ) => {
5272 callbacksRef . current = options ;
5373 } , [ options ] ) ;
5474
75+ // ---------------------------------------------------------------------------
76+ // Transcription
77+ // ---------------------------------------------------------------------------
78+
5579 const transcribe = useCallback ( async ( audioBlob : Blob ) => {
5680 setState ( "transcribing" ) ;
57- const shouldSend = sendAfterTranscribeRef . current ;
58- sendAfterTranscribeRef . current = false ;
81+
82+ // Capture and reset flags
83+ const shouldSend = shouldSendRef . current ;
84+ shouldSendRef . current = false ;
5985
6086 try {
61- const arrayBuffer = await audioBlob . arrayBuffer ( ) ;
87+ // Encode audio as base64 for IPC transport
88+ const buffer = await audioBlob . arrayBuffer ( ) ;
6289 const base64 = btoa (
63- new Uint8Array ( arrayBuffer ) . reduce ( ( data , byte ) => data + String . fromCharCode ( byte ) , "" )
90+ new Uint8Array ( buffer ) . reduce ( ( str , byte ) => str + String . fromCharCode ( byte ) , "" )
6491 ) ;
6592
6693 const result = await window . api . voice . transcribe ( base64 ) ;
6794
68- if ( result . success && result . data . trim ( ) ) {
69- callbacksRef . current . onTranscript ( result . data ) ;
70- if ( shouldSend ) {
71- setTimeout ( ( ) => callbacksRef . current . onSend ?.( ) , 0 ) ;
72- }
73- } else if ( ! result . success ) {
95+ if ( ! result . success ) {
7496 callbacksRef . current . onError ?.( result . error ) ;
97+ return ;
98+ }
99+
100+ const text = result . data . trim ( ) ;
101+ if ( ! text ) return ; // Empty transcription, nothing to do
102+
103+ callbacksRef . current . onTranscript ( text ) ;
104+
105+ // If stop({ send: true }) was called, trigger send after React flushes
106+ if ( shouldSend ) {
107+ setTimeout ( ( ) => callbacksRef . current . onSend ?.( ) , 0 ) ;
75108 }
76109 } catch ( err ) {
77- const message = err instanceof Error ? err . message : String ( err ) ;
78- callbacksRef . current . onError ?.( `Transcription failed: ${ message } ` ) ;
110+ const msg = err instanceof Error ? err . message : String ( err ) ;
111+ callbacksRef . current . onError ?.( `Transcription failed: ${ msg } ` ) ;
79112 } finally {
80113 setState ( "idle" ) ;
81114 }
82115 } , [ ] ) ;
83116
117+ // ---------------------------------------------------------------------------
118+ // Release microphone and clean up recorder
119+ // ---------------------------------------------------------------------------
120+
121+ const releaseStream = useCallback ( ( ) => {
122+ streamRef . current ?. getTracks ( ) . forEach ( ( t ) => t . stop ( ) ) ;
123+ streamRef . current = null ;
124+ } , [ ] ) ;
125+
126+ // ---------------------------------------------------------------------------
127+ // Start Recording
128+ // ---------------------------------------------------------------------------
129+
84130 const start = useCallback ( async ( ) => {
85- if ( ! isSupported || isMobile || state !== "idle" || ! callbacksRef . current . openAIKeySet ) return ;
131+ // Guard: only start from idle state with valid configuration
132+ const canStart =
133+ HAS_MEDIA_RECORDER &&
134+ ! HAS_TOUCH_DICTATION &&
135+ state === "idle" &&
136+ callbacksRef . current . openAIKeySet ;
137+
138+ if ( ! canStart ) return ;
86139
87140 try {
88141 const stream = await navigator . mediaDevices . getUserMedia ( { audio : true } ) ;
@@ -93,20 +146,22 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul
93146 : "audio/webm" ;
94147
95148 const recorder = new MediaRecorder ( stream , { mimeType } ) ;
96- audioChunksRef . current = [ ] ;
149+ chunksRef . current = [ ] ;
97150
98151 recorder . ondataavailable = ( e ) => {
99- if ( e . data . size > 0 ) audioChunksRef . current . push ( e . data ) ;
152+ if ( e . data . size > 0 ) chunksRef . current . push ( e . data ) ;
100153 } ;
101154
102155 recorder . onstop = ( ) => {
103- const wasCancelled = cancelledRef . current ;
104- cancelledRef . current = false ;
105- const blob = new Blob ( audioChunksRef . current , { type : mimeType } ) ;
106- audioChunksRef . current = [ ] ;
107- stream . getTracks ( ) . forEach ( ( t ) => t . stop ( ) ) ;
108- streamRef . current = null ;
109- if ( wasCancelled ) {
156+ // Check if this was a cancel (discard audio) or normal stop (transcribe)
157+ const cancelled = wasCancelledRef . current ;
158+ wasCancelledRef . current = false ;
159+
160+ const blob = new Blob ( chunksRef . current , { type : mimeType } ) ;
161+ chunksRef . current = [ ] ;
162+ releaseStream ( ) ;
163+
164+ if ( cancelled ) {
110165 setState ( "idle" ) ;
111166 } else {
112167 void transcribe ( blob ) ;
@@ -115,63 +170,76 @@ export function useVoiceInput(options: UseVoiceInputOptions): UseVoiceInputResul
115170
116171 recorder . onerror = ( ) => {
117172 callbacksRef . current . onError ?.( "Recording failed" ) ;
173+ releaseStream ( ) ;
118174 setState ( "idle" ) ;
119- stream . getTracks ( ) . forEach ( ( t ) => t . stop ( ) ) ;
120- streamRef . current = null ;
121175 } ;
122176
123- mediaRecorderRef . current = recorder ;
177+ recorderRef . current = recorder ;
124178 recorder . start ( ) ;
125179 setState ( "recording" ) ;
126180 } catch ( err ) {
127- const message = err instanceof Error ? err . message : String ( err ) ;
128- const isPermissionError =
129- message . includes ( "Permission denied" ) || message . includes ( "NotAllowedError" ) ;
181+ const msg = err instanceof Error ? err . message : String ( err ) ;
182+ const isPermissionDenied = msg . includes ( "Permission denied" ) || msg . includes ( "NotAllowed" ) ;
183+
130184 callbacksRef . current . onError ?.(
131- isPermissionError
185+ isPermissionDenied
132186 ? "Microphone access denied. Please allow microphone access and try again."
133- : `Failed to start recording: ${ message } `
187+ : `Failed to start recording: ${ msg } `
134188 ) ;
135189 }
136- } , [ state , transcribe ] ) ;
190+ } , [ state , transcribe , releaseStream ] ) ;
191+
192+ // ---------------------------------------------------------------------------
193+ // Stop Recording (triggers transcription)
194+ // ---------------------------------------------------------------------------
137195
138196 const stop = useCallback ( ( options ?: { send ?: boolean } ) => {
139- if ( options ?. send ) sendAfterTranscribeRef . current = true ;
140- if ( mediaRecorderRef . current ?. state !== "inactive" ) {
141- mediaRecorderRef . current ?. stop ( ) ;
142- mediaRecorderRef . current = null ;
197+ if ( options ?. send ) shouldSendRef . current = true ;
198+
199+ if ( recorderRef . current ?. state !== "inactive" ) {
200+ recorderRef . current ?. stop ( ) ;
201+ recorderRef . current = null ;
143202 }
144203 } , [ ] ) ;
145204
205+ // ---------------------------------------------------------------------------
206+ // Cancel Recording (discard audio, no transcription)
207+ // ---------------------------------------------------------------------------
208+
146209 const cancel = useCallback ( ( ) => {
147- cancelledRef . current = true ;
148- if ( mediaRecorderRef . current ?. state !== "inactive" ) {
149- mediaRecorderRef . current ?. stop ( ) ;
150- mediaRecorderRef . current = null ;
151- }
152- } , [ ] ) ;
210+ wasCancelledRef . current = true ;
211+ stop ( ) ;
212+ } , [ stop ] ) ;
213+
214+ // ---------------------------------------------------------------------------
215+ // Toggle (convenience for keybinds)
216+ // ---------------------------------------------------------------------------
153217
154218 const toggle = useCallback ( ( ) => {
155- if ( state === "recording" ) {
156- stop ( ) ;
157- } else if ( state === "idle" ) {
158- void start ( ) ;
159- }
219+ if ( state === "recording" ) stop ( ) ;
220+ else if ( state === "idle" ) void start ( ) ;
160221 } , [ state , start , stop ] ) ;
161222
223+ // ---------------------------------------------------------------------------
162224 // Cleanup on unmount
225+ // ---------------------------------------------------------------------------
226+
163227 useEffect ( ( ) => {
164228 return ( ) => {
165- mediaRecorderRef . current ?. stop ( ) ;
166- streamRef . current ?. getTracks ( ) . forEach ( ( t ) => t . stop ( ) ) ;
229+ recorderRef . current ?. stop ( ) ;
230+ releaseStream ( ) ;
167231 } ;
168- } , [ ] ) ;
232+ } , [ releaseStream ] ) ;
233+
234+ // ---------------------------------------------------------------------------
235+ // Return
236+ // ---------------------------------------------------------------------------
169237
170238 return {
171239 state,
172- isSupported,
240+ isSupported : HAS_MEDIA_RECORDER ,
173241 isApiKeySet : callbacksRef . current . openAIKeySet ,
174- shouldShowUI : isSupported && ! isMobile ,
242+ shouldShowUI : HAS_MEDIA_RECORDER && ! HAS_TOUCH_DICTATION ,
175243 start : ( ) => void start ( ) ,
176244 stop,
177245 cancel,
0 commit comments