diff --git a/apps/llm/app/voice_chat/index.tsx b/apps/llm/app/voice_chat/index.tsx
index 79a713c93..0bf4c9b30 100644
--- a/apps/llm/app/voice_chat/index.tsx
+++ b/apps/llm/app/voice_chat/index.tsx
@@ -76,7 +76,11 @@ function VoiceChatScreen() {
       });
       recorder.start();
       const transcription = await speechToText.stream();
-      await llm.sendMessage(transcription);
+      await llm.sendMessage(
+        typeof transcription === 'string'
+          ? transcription
+          : transcription.map((w) => w.word).join(' ')
+      );
     }
   };
 
@@ -105,7 +109,13 @@ function VoiceChatScreen() {
                       ...llm.messageHistory,
                       {
                         role: 'user',
-                        content: speechToText.committedTranscription,
+                        content:
+                          typeof speechToText.committedTranscription ===
+                          'string'
+                            ? speechToText.committedTranscription
+                            : speechToText.committedTranscription
+                                .map((w) => w.word)
+                                .join(' '),
                       },
                     ]
                   : llm.messageHistory
diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx
index 1e4525986..98fff26a4 100644
--- a/apps/speech/screens/SpeechToTextScreen.tsx
+++ b/apps/speech/screens/SpeechToTextScreen.tsx
@@ -8,9 +8,14 @@ import {
   TextInput,
   KeyboardAvoidingView,
   Platform,
+  Switch,
 } from 'react-native';
 import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
-import { useSpeechToText, WHISPER_TINY_EN } from 'react-native-executorch';
+import {
+  useSpeechToText,
+  WHISPER_TINY_EN,
+  Word,
+} from 'react-native-executorch';
 import FontAwesome from '@expo/vector-icons/FontAwesome';
 import {
   AudioManager,
@@ -28,7 +33,10 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
     model: WHISPER_TINY_EN,
   });
 
-  const [transcription, setTranscription] = useState('');
+  const [transcription, setTranscription] = useState<string | Word[]>('');
+
+  const [enableTimestamps, setEnableTimestamps] = useState(false);
+
   const [audioURL, setAudioURL] = useState('');
   const [liveTranscribing, setLiveTranscribing] = useState(false);
   const scrollViewRef = useRef<ScrollView>(null);
@@ -50,6 +58,15 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
     AudioManager.requestRecordingPermissions();
   }, []);
 
+  const getText = (data: string | Word[] | undefined) => {
+    if (!data) return '';
+    if (typeof data === 'string') return data;
+
+    return data
+      .map((w) => `${w.word} (${w.start.toFixed(2)}s - ${w.end.toFixed(2)}s)\n`)
+      .join('');
+  };
+
   const handleTranscribeFromURL = async () => {
     if (!audioURL.trim()) {
       console.warn('Please provide a valid audio file URL');
@@ -66,7 +83,11 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
     try {
       const decodedAudioData = await audioContext.decodeAudioDataSource(uri);
       const audioBuffer = decodedAudioData.getChannelData(0);
-      setTranscription(await model.transcribe(audioBuffer));
+
+      const result = await model.transcribe(audioBuffer, {
+        enableTimestamps: enableTimestamps as any,
+      });
+      setTranscription(result);
     } catch (error) {
       console.error('Error decoding audio data', error);
       console.warn('Note: Supported file formats: mp3, wav, flac');
@@ -76,14 +97,15 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
 
   const handleStartTranscribeFromMicrophone = async () => {
     setLiveTranscribing(true);
-    setTranscription('');
+    setTranscription(enableTimestamps ? [] : '');
+
     recorder.onAudioReady(({ buffer }) => {
       model.streamInsert(buffer.getChannelData(0));
     });
     recorder.start();
 
     try {
-      await model.stream();
+      await model.stream({ enableTimestamps: enableTimestamps });
     } catch (error) {
       console.error('Error during live transcription:', error);
     }
@@ -106,6 +128,13 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
   const readyToTranscribe = !model.isGenerating && model.isReady;
   const recordingButtonDisabled = isSimulator || !readyToTranscribe;
 
+  const hasResult = transcription.length > 0;
+
+  const displayedText = hasResult
+    ? getText(transcription)
+    : getText(model.committedTranscription) +
+      getText(model.nonCommittedTranscription);
+
   return (
     <SafeAreaProvider>
       <SafeAreaView style={styles.container}>
@@ -126,6 +155,20 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
             <Text>Status: {getModelStatus()}</Text>
           </View>
 
+          <View style={styles.toggleContainer}>
+            <Text style={styles.toggleLabel}>Enable Timestamps</Text>
+            <Switch
+              value={enableTimestamps}
+              onValueChange={(val) => {
+                setEnableTimestamps(val);
+                setTranscription(val ? [] : '');
+              }}
+              trackColor={{ false: '#767577', true: '#0f186e' }}
+              thumbColor={enableTimestamps ? '#fff' : '#f4f3f4'}
+              disabled={model.isGenerating}
+            />
+          </View>
+
           <View style={styles.transcriptionContainer}>
             <Text style={styles.transcriptionLabel}>Transcription</Text>
             <ScrollView
@@ -135,12 +178,7 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
                 scrollViewRef.current?.scrollToEnd({ animated: true })
               }
             >
-              <Text>
-                {transcription !== ''
-                  ? transcription
-                  : model.committedTranscription +
-                    model.nonCommittedTranscription}
-              </Text>
+              <Text>{displayedText}</Text>
             </ScrollView>
           </View>
 
@@ -229,6 +267,17 @@ const styles = StyleSheet.create({
     marginTop: 12,
     alignItems: 'center',
   },
+  toggleContainer: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    marginTop: 10,
+    marginBottom: 5,
+  },
+  toggleLabel: {
+    fontSize: 16,
+    marginRight: 10,
+    color: '#0f186e',
+  },
   transcriptionContainer: {
     flex: 1,
     width: '100%',
diff --git a/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md b/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md
index d94c96a66..b3171c77f 100644
--- a/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md
+++ b/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md
@@ -75,25 +75,31 @@ For more information on loading resources, take a look at [loading models](../..
 
 ### Returns
 
-| Field                       | Type                                                                                                 | Description                                                                                                                                                                                                                                                                                                                   |
-| --------------------------- | ---------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `transcribe`                | `(waveform: Float32Array \| number[], options?: DecodingOptions \| undefined) => Promise<string>`    | Starts a transcription process for a given input array, which should be a waveform at 16kHz. The second argument is an options object, e.g. `{ language: 'es' }` for multilingual models. Resolves a promise with the output transcription when the model is finished. Passing `number[]` is deprecated.                      |
-| `stream`                    | `(options?: DecodingOptions \| undefined) => Promise<string>`                                        | Starts a streaming transcription process. Use in combination with `streamInsert` to feed audio chunks and `streamStop` to end the stream. The argument is an options object, e.g. `{ language: 'es' }` for multilingual models. Updates `committedTranscription` and `nonCommittedTranscription` as transcription progresses. |
-| `streamInsert`              | `(waveform: Float32Array \| number[]) => void`                                                       | Inserts a chunk of audio data (sampled at 16kHz) into the ongoing streaming transcription. Call this repeatedly as new audio data becomes available. Passing `number[]` is deprecated.                                                                                                                                        |
-| `streamStop`                | `() => void`                                                                                         | Stops the ongoing streaming transcription process.                                                                                                                                                                                                                                                                            |
-| `encode`                    | `(waveform: Float32Array \| number[]) => Promise<Float32Array>`                                      | Runs the encoding part of the model on the provided waveform. Passing `number[]` is deprecated.                                                                                                                                                                                                                               |
-| `decode`                    | `(tokens: number[] \| Int32Array, encoderOutput: Float32Array \| number[]) => Promise<Float32Array>` | Runs the decoder of the model. Passing `number[]` is deprecated.                                                                                                                                                                                                                                                              |
-| `committedTranscription`    | `string`                                                                                             | Contains the part of the transcription that is finalized and will not change. Useful for displaying stable results during streaming.                                                                                                                                                                                          |
-| `nonCommittedTranscription` | `string`                                                                                             | Contains the part of the transcription that is still being processed and may change. Useful for displaying live, partial results during streaming.                                                                                                                                                                            |
-| `error`                     | `string \| null`                                                                                     | Contains the error message if the model failed to load.                                                                                                                                                                                                                                                                       |
-| `isGenerating`              | `boolean`                                                                                            | Indicates whether the model is currently processing an inference.                                                                                                                                                                                                                                                             |
-| `isReady`                   | `boolean`                                                                                            | Indicates whether the model has successfully loaded and is ready for inference.                                                                                                                                                                                                                                               |
-| `downloadProgress`          | `number`                                                                                             | Tracks the progress of the model download process.                                                                                                                                                                                                                                                                            |
+| Field                       | Type                                                                                                 | Description                                                                                                                                                                            |
+| --------------------------- | ---------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `transcribe`                | `(waveform: Float32Array                                                                             | number[], options?: DecodingOptions & { enableTimestamps?: boolean }) => Promise<string                                                                                                | Word[]>`                                                                                                                                                                                                                                                                                                                                                                                                                                             | Starts a transcription process for a given input array, which should be a waveform at 16kHz. For multilingual models, specify the language in `options`, e.g. `{ language: 'es' }` for multilingual models. If `enableTimestamps` is true, returns transcription with timestamps (`Word[]>`). If `enableTimestamps` is false (default), returns transcription as a string. Resolves a promise with the output transcription when the model is finished. Passing `number[]` is deprecated. |
+| `stream`                    | `(options?: DecodingOptions & { enableTimestamps?: boolean }) => Promise<string                      | Word[]>`                                                                                                                                                                               | Starts a streaming transcription process. Use in combination with `streamInsert` to feed audio chunks and `streamStop` to end the stream. The argument is an options object, e.g. `{ language: 'es' }` for multilingual models. As in `transcribe`, you can decide either you want transcription with timestamps or not by setting `enableTimestamps`. Updates `committedTranscription` and `nonCommittedTranscription` as transcription progresses. |
+| `streamInsert`              | `(waveform: Float32Array \| number[]) => void`                                                       | Inserts a chunk of audio data (sampled at 16kHz) into the ongoing streaming transcription. Call this repeatedly as new audio data becomes available. Passing `number[]` is deprecated. |
+| `streamStop`                | `() => void`                                                                                         | Stops the ongoing streaming transcription process.                                                                                                                                     |
+| `encode`                    | `(waveform: Float32Array \| number[]) => Promise<Float32Array>`                                      | Runs the encoding part of the model on the provided waveform. Passing `number[]` is deprecated.                                                                                        |
+| `decode`                    | `(tokens: number[] \| Int32Array, encoderOutput: Float32Array \| number[]) => Promise<Float32Array>` | Runs the decoder of the model. Passing `number[]` is deprecated.                                                                                                                       |
+| `committedTranscription`    | `string`                                                                                             | Contains the part of the transcription that is finalized and will not change. Useful for displaying stable results during streaming.                                                   |
+| `nonCommittedTranscription` | `string`                                                                                             | Contains the part of the transcription that is still being processed and may change. Useful for displaying live, partial results during streaming.                                     |
+| `error`                     | `string \| null`                                                                                     | Contains the error message if the model failed to load.                                                                                                                                |
+| `isGenerating`              | `boolean`                                                                                            | Indicates whether the model is currently processing an inference.                                                                                                                      |
+| `isReady`                   | `boolean`                                                                                            | Indicates whether the model has successfully loaded and is ready for inference.                                                                                                        |
+| `downloadProgress`          | `number`                                                                                             | Tracks the progress of the model download process.                                                                                                                                     |
 
 <details>
 <summary>Type definitions</summary>
 
 ```typescript
+interface Word {
+  word: string;
+  start: number;
+  end: number;
+}
+
 // Languages supported by whisper (Multilingual)
 type SpeechToTextLanguage =
   | 'af'
@@ -174,6 +180,7 @@ type SpeechToTextLanguage =
 
 interface DecodingOptions {
   language?: SpeechToTextLanguage;
+  enableTimestamps?: boolean;
 }
 
 interface SpeechToTextModelConfig {
@@ -204,12 +211,25 @@ const model = useSpeechToText({
 const transcription = await model.transcribe(spanishAudio, { language: 'es' });
 ```
 
+### Timestamps
+
+You can obtain word-level timestamps by setting `enableTimestamps: true` in the options. This changes the return type from a string to an array of `Word` objects.
+
+```typescript
+const words = await model.transcribe(audioBuffer, { enableTimestamps: true });
+// words: [{ word: "Hello", start: 0.0, end: 0.4 }, ...]
+```
+
 ## Example
 
 ```tsx
 import React, { useState } from 'react';
-import { Button, Text } from 'react-native';
-import { useSpeechToText, WHISPER_TINY_EN } from 'react-native-executorch';
+import { Button, Text, View } from 'react-native';
+import {
+  useSpeechToText,
+  WHISPER_TINY_EN,
+  Word,
+} from 'react-native-executorch';
 import { AudioContext } from 'react-native-audio-api';
 import * as FileSystem from 'expo-file-system';
 
@@ -218,7 +238,7 @@ function App() {
     model: WHISPER_TINY_EN,
   });
 
-  const [transcription, setTranscription] = useState('');
+  const [transcription, setTranscription] = useState<string | Word[]>('');
 
   const loadAudio = async () => {
     const { uri } = await FileSystem.downloadAsync(
@@ -235,14 +255,38 @@ function App() {
 
   const handleTranscribe = async () => {
     const audio = await loadAudio();
-    await model.transcribe(audio);
+    // Default text transcription
+    const result = await model.transcribe(audio);
+    setTranscription(result);
+  };
+
+  const handleTranscribeWithTimestamps = async () => {
+    const audio = await loadAudio();
+    // Transcription with timestamps
+    const result = await model.transcribe(audio, { enableTimestamps: true });
+    setTranscription(result);
+  };
+
+  const renderContent = () => {
+    if (typeof transcription === 'string') {
+      return <Text>{transcription}</Text>;
+    }
+    return transcription.map((w, i) => (
+      <Text key={i}>
+        {w.word} ({w.start.toFixed(2)}s)
+      </Text>
+    ));
   };
 
   return (
-    <>
-      <Text>{transcription}</Text>
-      <Button onPress={handleTranscribe} title="Transcribe" />
-    </>
+    <View>
+      {renderContent()}
+      <Button onPress={handleTranscribe} title="Transcribe (Text)" />
+      <Button
+        onPress={handleTranscribeWithTimestamps}
+        title="Transcribe (Timestamps)"
+      />
+    </View>
   );
 }
 ```
@@ -251,7 +295,7 @@ function App() {
 
 ```tsx
 import React, { useEffect, useState } from 'react';
-import { Text, Button } from 'react-native';
+import { Text, Button, View } from 'react-native';
 import { useSpeechToText, WHISPER_TINY_EN } from 'react-native-executorch';
 import { AudioManager, AudioRecorder } from 'react-native-audio-api';
 import * as FileSystem from 'expo-file-system';
@@ -285,6 +329,7 @@ function App() {
     recorder.start();
 
     try {
+      // Pass { enableTimestamps: true } here if you want Word[] updates
       await model.stream();
     } catch (error) {
       console.error('Error during streaming transcription:', error);
@@ -296,18 +341,24 @@ function App() {
     model.streamStop();
   };
 
+  // Helper to safely render mixed types
+  const getText = (data: string | any[]) => {
+    if (typeof data === 'string') return data;
+    return data.map((w) => w.word).join('');
+  };
+
   return (
-    <>
+    <View>
       <Text>
-        {model.committedTranscription}
-        {model.nonCommittedTranscription}
+        {getText(model.committedTranscription)}
+        {getText(model.nonCommittedTranscription)}
       </Text>
       <Button
         onPress={handleStartStreamingTranscribe}
         title="Start Streaming"
       />
       <Button onPress={handleStopStreamingTranscribe} title="Stop Streaming" />
-    </>
+    </View>
   );
 }
 ```
diff --git a/docs/docs/03-typescript-api/01-natural-language-processing/SpeechToTextModule.md b/docs/docs/03-typescript-api/01-natural-language-processing/SpeechToTextModule.md
index f93600c00..db0266580 100644
--- a/docs/docs/03-typescript-api/01-natural-language-processing/SpeechToTextModule.md
+++ b/docs/docs/03-typescript-api/01-natural-language-processing/SpeechToTextModule.md
@@ -14,21 +14,27 @@ await model.load(WHISPER_TINY_EN, (progress) => {
   console.log(progress);
 });
 
-await model.transcribe(waveform);
+// Standard transcription (returns string)
+const text = await model.transcribe(waveform);
+
+// Transcription with timestamps (returns Word[])
+const textWithTimestamps = await model.transcribe(waveform, {
+  enableTimestamps: true,
+});
 ```
 
 ### Methods
 
-| Method         | Type                                                                                                       | Description                                                                                                                                                                                                   |
-| -------------- | ---------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `load`         | `(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>` | Loads the model specified by the config object. `onDownloadProgressCallback` allows you to monitor the current progress of the model download.                                                                |
-| `delete`       | `(): void`                                                                                                 | Unloads the model from memory.                                                                                                                                                                                |
-| `encode`       | `(waveform: Float32Array \| number[]): Promise<Float32Array>`                                              | Runs the encoding part of the model on the provided waveform. Returns the encoded waveform as a Float32Array. Passing `number[]` is deprecated.                                                               |
-| `decode`       | `(tokens: number[] \| Int32Array, encoderOutput: Float32Array \| number[]): Promise<Float32Array>`         | Runs the decoder of the model. Passing `number[]` is deprecated.                                                                                                                                              |
-| `transcribe`   | `(waveform: Float32Array \| number[], options?: DecodingOptions): Promise<string>`                         | Starts a transcription process for a given input array (16kHz waveform). For multilingual models, specify the language in `options`. Returns the transcription as a string. Passing `number[]` is deprecated. |
-| `stream`       | `(options?: DecodingOptions): AsyncGenerator<{ committed: string; nonCommitted: string }>`                 | Starts a streaming transcription session. Yields objects with `committed` and `nonCommitted` transcriptions. Use with `streamInsert` and `streamStop` to control the stream.                                  |
-| `streamStop`   | `(): void`                                                                                                 | Stops the current streaming transcription session.                                                                                                                                                            |
-| `streamInsert` | `(waveform: Float32Array \| number[]): void`                                                               | Inserts a new audio chunk into the streaming transcription session. Passing `number[]` is deprecated.                                                                                                         |
+| Method         | Type                                                                                                       | Description                                                                                                                                                                                                                                                                                         |
+| -------------- | ---------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `load`         | `(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>` | Loads the model specified by the config object. `onDownloadProgressCallback` allows you to monitor the current progress of the model download.                                                                                                                                                      |
+| `delete`       | `(): void`                                                                                                 | Unloads the model from memory.                                                                                                                                                                                                                                                                      |
+| `encode`       | `(waveform: Float32Array \| number[]): Promise<Float32Array>`                                              | Runs the encoding part of the model on the provided waveform. Returns the encoded waveform as a Float32Array. Passing `number[]` is deprecated.                                                                                                                                                     |
+| `decode`       | `(tokens: number[] \| Int32Array, encoderOutput: Float32Array \| number[]): Promise<Float32Array>`         | Runs the decoder of the model. Passing `number[]` is deprecated.                                                                                                                                                                                                                                    |
+| `transcribe`   | `(waveform: Float32Array                                                                                   | number[], options?: DecodingOptions & { enableTimestamps?: boolean }): Promise<string                                                                                                                                                                                                               | Word[]>` | Starts a transcription process for a given input array (16kHz waveform). For multilingual models, specify the language in `options`. If `enableTimestamps` is true, returns transcription with timestamps (`Word[]>`). If `enableTimestamps` is false (default), returns transcription as a string. Passing `number[]` is deprecated. |
+| `stream`       | `(options?: DecodingOptions & { enableTimestamps?: boolean }): AsyncGenerator<StreamResult>`               | Starts a streaming transcription session. Yields objects with `committed` and `nonCommitted` transcriptions. As in `transcribe`, you can decide either you want transcription with timestamps or not by setting `enableTimestamps`. Use with `streamInsert` and `streamStop` to control the stream. |
+| `streamStop`   | `(): void`                                                                                                 | Stops the current streaming transcription session.                                                                                                                                                                                                                                                  |
+| `streamInsert` | `(waveform: Float32Array \| number[]): void`                                                               | Inserts a new audio chunk into the streaming transcription session. Passing `number[]` is deprecated.                                                                                                                                                                                               |
 
 :::info
 
@@ -40,6 +46,12 @@ await model.transcribe(waveform);
 <summary>Type definitions</summary>
 
 ```typescript
+interface Word {
+  word: string;
+  start: number;
+  end: number;
+}
+
 // Languages supported by whisper (Multilingual)
 type SpeechToTextLanguage =
   | 'af'
@@ -120,6 +132,7 @@ type SpeechToTextLanguage =
 
 interface DecodingOptions {
   language?: SpeechToTextLanguage;
+  enableTimestamps?: boolean;
 }
 
 interface SpeechToTextModelConfig {
@@ -154,7 +167,7 @@ For more information on loading resources, take a look at [loading models](../..
 
 ## Running the model
 
-To run the model, you can use the `transcribe` method. It accepts one argument, which is an array of numbers representing a waveform at 16kHz sampling rate. The method returns a promise, which can resolve either to an error or a string containing the output text.
+To run the model, you can use the `transcribe` method. It accepts one argument, which is an array of numbers representing a waveform at 16kHz sampling rate. The method returns a promise, which can resolve either to an error, a string (text only), or an array of `Word` objects (text with timestamps), depending on the `enableTimestamps` option.
 
 ### Multilingual transcription
 
@@ -171,6 +184,15 @@ await model.load(WHISPER_TINY, (progress) => {
 const transcription = await model.transcribe(spanishAudio, { language: 'es' });
 ```
 
+### Timestamps
+
+To get word-level timestamps, set `enableTimestamps` to `true`.
+
+```typescript
+const words = await model.transcribe(audioBuffer, { enableTimestamps: true });
+// words: [{ word: "Hello", start: 0.0, end: 0.5 }, ...]
+```
+
 ## Example
 
 ### Transcription
@@ -196,8 +218,13 @@ const audioBuffer = decodedAudioData.getChannelData(0);
 
 // Transcribe the audio
 try {
-  const transcription = await model.transcribe(audioBuffer);
-  console.log(transcription);
+  // Option 1: Text only
+  const text = await model.transcribe(audioBuffer);
+  console.log('Text:', text);
+
+  // Option 2: With timestamps
+  const words = await model.transcribe(audioBuffer, { enableTimestamps: true });
+  console.log('Words:', words);
 } catch (error) {
   console.error('Error during audio transcription', error);
 }
@@ -237,6 +264,7 @@ recorder.start();
 // Start streaming transcription
 try {
   let transcription = '';
+  // Note: Pass { enableTimestamps: true } here to get Word[] objects instead
   for await (const { committed, nonCommitted } of model.stream()) {
     console.log('Streaming transcription:', { committed, nonCommitted });
     transcription += committed;
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 97e8d91fb..e95e930e7 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -18,8 +18,11 @@
 #include <rnexecutorch/models/object_detection/Constants.h>
 #include <rnexecutorch/models/object_detection/Types.h>
 #include <rnexecutorch/models/ocr/Types.h>
+#include <rnexecutorch/models/speech_to_text/types/Word.h>
 #include <rnexecutorch/models/voice_activity_detection/Types.h>
 
+using rnexecutorch::models::speech_to_text::types::Word;
+
 namespace rnexecutorch::jsi_conversion {
 
 using namespace facebook;
@@ -62,6 +65,22 @@ getValue<std::shared_ptr<jsi::Function>>(const jsi::Value &val,
       val.asObject(runtime).asFunction(runtime));
 }
 
+template <>
+inline Word getValue<Word>(const jsi::Value &val, jsi::Runtime &runtime) {
+  jsi::Object obj = val.asObject(runtime);
+  
+  std::string content = getValue<std::string>(obj.getProperty(runtime, "word"), runtime);
+  
+  double start = obj.getProperty(runtime, "start").asNumber();
+  double end = obj.getProperty(runtime, "end").asNumber();
+
+  return Word{
+      .content = std::move(content),
+      .start = static_cast<float>(start),
+      .end = static_cast<float>(end)
+  };
+}
+
 template <>
 inline JSTensorViewIn getValue<JSTensorViewIn>(const jsi::Value &val,
                                                jsi::Runtime &runtime) {
@@ -218,6 +237,12 @@ getValue<std::vector<int64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
   return getArrayAsVector<int64_t>(val, runtime);
 }
 
+template <>
+inline std::vector<Word> getValue<std::vector<Word>>(const jsi::Value &val,
+                                                     jsi::Runtime &runtime) {
+  return getArrayAsVector<Word>(val, runtime);
+}
+
 // Template specializations for std::span<T> types
 template <>
 inline std::span<float> getValue<std::span<float>>(const jsi::Value &val,
@@ -284,6 +309,22 @@ inline jsi::Value getJsiValue(std::shared_ptr<jsi::Object> valuePtr,
   return std::move(*valuePtr);
 }
 
+inline jsi::Value getJsiValue(const Word &word, jsi::Runtime &runtime) {
+  jsi::Object obj(runtime);
+  obj.setProperty(runtime, "word", jsi::String::createFromUtf8(runtime, word.content));
+  obj.setProperty(runtime, "start", static_cast<double>(word.start));
+  obj.setProperty(runtime, "end", static_cast<double>(word.end));
+  return obj;
+}
+
+inline jsi::Value getJsiValue(const std::vector<Word> &vec, jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); ++i) {
+    array.setValueAtIndex(runtime, i, getJsiValue(vec[i], runtime));
+  }
+  return {runtime, array};
+}
+
 inline jsi::Value getJsiValue(const std::vector<int32_t> &vec,
                               jsi::Runtime &runtime) {
   jsi::Array array(runtime, vec.size());
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index c8232fe8c..1843e8672 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -72,6 +72,11 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        promiseHostFunction<&Model::transcribe>,
                                        "transcribe"));
 
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::transcribeStringOnly>,
+                              "transcribeStringOnly"));
+
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::stream>,
                                        "stream"));
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp
index 3c81eb8e9..aa9980bd8 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp
@@ -43,10 +43,33 @@ SpeechToText::decode(std::span<int32_t> tokens,
   return std::make_shared<OwningArrayBuffer>(decoderOutput);
 }
 
-std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
+std::vector<Word> SpeechToText::transcribe(std::span<float> waveform,
                                            std::string languageOption) const {
   std::vector<Segment> segments =
       this->asr->transcribe(waveform, DecodingOptions(languageOption));
+  std::vector<Word> transcription;
+
+  size_t transcriptionLength = 0;
+  for (auto &segment : segments) {
+    transcriptionLength += segment.words.size();
+  }
+
+  transcription.reserve(segments.size());
+
+  for (auto &segment : segments) {
+    for (auto &word : segment.words) {
+      transcription.push_back(word);
+    }
+  }
+
+  return transcription;
+}
+
+std::vector<char>
+SpeechToText::transcribeStringOnly(std::span<float> waveform,
+                                   std::string languageOption) const {
+  std::vector<Segment> segments =
+      this->asr->transcribe(waveform, DecodingOptions(languageOption));
   std::string transcription;
 
   size_t transcriptionLength = 0;
@@ -66,29 +89,48 @@ std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
   return {transcription.begin(), transcription.end()};
 }
 
+std::vector<char> mergeWordsToString(const std::vector<Word> &words) {
+  std::string result;
+  size_t totalLength = 0;
+
+  for (const auto &word : words) {
+    totalLength += word.content.size();
+  }
+  result.reserve(totalLength);
+
+  for (const auto &word : words) {
+    result += word.content;
+  }
+
+  return {result.begin(), result.end()};
+}
+
 size_t SpeechToText::getMemoryLowerBound() const noexcept {
   return this->encoder->getMemoryLowerBound() +
          this->decoder->getMemoryLowerBound();
 }
 
 void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
-                          std::string languageOption) {
+                          std::string languageOption, bool enableTimestamps) {
   if (this->isStreaming) {
     throw RnExecutorchError(RnExecutorchErrorCode::StreamingInProgress,
                             "Streaming is already in progress!");
   }
 
-  auto nativeCallback =
-      [this, callback](const std::vector<char> &committedVec,
-                       const std::vector<char> &nonCommittedVec, bool isDone) {
-        this->callInvoker->invokeAsync([callback, committedVec, nonCommittedVec,
-                                        isDone](jsi::Runtime &rt) {
-          callback->call(
-              rt, rnexecutorch::jsi_conversion::getJsiValue(committedVec, rt),
-              rnexecutorch::jsi_conversion::getJsiValue(nonCommittedVec, rt),
-              jsi::Value(isDone));
+  auto nativeCallback = [this, callback](const auto &committedVec,
+                                         const auto &nonCommittedVec,
+                                         bool isDone) {
+    this->callInvoker->invokeAsync(
+        [callback, committedVec, nonCommittedVec, isDone](jsi::Runtime &rt) {
+          jsi::Value committedJsi =
+              rnexecutorch::jsi_conversion::getJsiValue(committedVec, rt);
+          jsi::Value nonCommittedJsi =
+              rnexecutorch::jsi_conversion::getJsiValue(nonCommittedVec, rt);
+
+          callback->call(rt, std::move(committedJsi),
+                         std::move(nonCommittedJsi), jsi::Value(isDone));
         });
-      };
+  };
 
   this->isStreaming = true;
   while (this->isStreaming) {
@@ -100,14 +142,22 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
     ProcessResult res =
         this->processor->processIter(DecodingOptions(languageOption));
 
-    nativeCallback({res.committed.begin(), res.committed.end()},
-                   {res.nonCommitted.begin(), res.nonCommitted.end()}, false);
+    if (enableTimestamps) {
+      nativeCallback(res.committed, res.nonCommitted, false);
+    } else {
+      nativeCallback(mergeWordsToString(res.committed),
+                     mergeWordsToString(res.nonCommitted), false);
+    }
     this->readyToProcess = false;
   }
 
-  std::string committed = this->processor->finish();
+  std::vector<Word> committed = this->processor->finish();
 
-  nativeCallback({committed.begin(), committed.end()}, {}, true);
+  if (enableTimestamps) {
+    nativeCallback(committed, std::vector<Word>{}, true);
+  } else {
+    nativeCallback(mergeWordsToString(committed), std::vector<char>(), true);
+  }
 
   this->resetStreamState();
 }
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h
index d2111d378..883436f4a 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h
@@ -23,14 +23,18 @@ class SpeechToText {
   [[nodiscard(
       "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
   decode(std::span<int32_t> tokens, std::span<float> encoderOutput) const;
-  [[nodiscard("Registered non-void function")]] std::vector<char>
+  [[nodiscard("Registered non-void function")]] std::vector<Word>
   transcribe(std::span<float> waveform, std::string languageOption) const;
 
+  [[nodiscard("Registered non-void function")]]
+  std::vector<char> transcribeStringOnly(std::span<float> waveform,
+                                         std::string languageOption) const;
+
   size_t getMemoryLowerBound() const noexcept;
 
   // Stream
   void stream(std::shared_ptr<jsi::Function> callback,
-              std::string languageOption);
+              std::string languageOption, bool enableTimestamps);
   void streamStop();
   void streamInsert(std::span<float> waveform);
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp
index c6a99e9a2..3137d274b 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp
@@ -34,8 +34,14 @@ ProcessResult OnlineASRProcessor::processIter(const DecodingOptions &options) {
     chunkCompletedSegment(res);
   }
 
+  auto move_to_vector = [](auto& container) {
+      return std::vector<Word>(std::make_move_iterator(container.begin()),
+                              std::make_move_iterator(container.end()));
+  };
+
   std::deque<Word> nonCommittedWords = this->hypothesisBuffer.complete();
-  return {this->toFlush(flushed), this->toFlush(nonCommittedWords)};
+
+  return { move_to_vector(flushed), move_to_vector(nonCommittedWords) };
 }
 
 void OnlineASRProcessor::chunkCompletedSegment(std::span<const Segment> res) {
@@ -77,23 +83,14 @@ void OnlineASRProcessor::chunkAt(float time) {
   this->bufferTimeOffset = time;
 }
 
-std::string OnlineASRProcessor::finish() {
-  const std::deque<Word> buffer = this->hypothesisBuffer.complete();
-  std::string committedText = this->toFlush(buffer);
+std::vector<Word> OnlineASRProcessor::finish() {
+  std::deque<Word> bufferDeq = this->hypothesisBuffer.complete();
+  std::vector<Word> buffer(std::make_move_iterator(bufferDeq.begin()),
+                           std::make_move_iterator(bufferDeq.end()));
+
   this->bufferTimeOffset += static_cast<float>(audioBuffer.size()) /
                             OnlineASRProcessor::kSamplingRate;
-  return committedText;
-}
-
-std::string OnlineASRProcessor::toFlush(const std::deque<Word> &words) const {
-  std::string text;
-  text.reserve(std::accumulate(
-      words.cbegin(), words.cend(), 0,
-      [](size_t sum, const Word &w) { return sum + w.content.size(); }));
-  for (const auto &word : words) {
-    text.append(word.content);
-  }
-  return text;
+  return buffer;
 }
 
 } // namespace rnexecutorch::models::speech_to_text::stream
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h
index c50b56271..3abaad3b6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h
@@ -12,7 +12,7 @@ class OnlineASRProcessor {
 
   void insertAudioChunk(std::span<const float> audio);
   types::ProcessResult processIter(const types::DecodingOptions &options);
-  std::string finish();
+  std::vector<Word> finish();
 
   std::vector<float> audioBuffer;
 
@@ -26,8 +26,6 @@ class OnlineASRProcessor {
 
   void chunkCompletedSegment(std::span<const types::Segment> res);
   void chunkAt(float time);
-
-  std::string toFlush(const std::deque<types::Word> &words) const;
 };
 
 } // namespace rnexecutorch::models::speech_to_text::stream
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h
index 0cb05e5a6..681495e2a 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h
@@ -5,8 +5,8 @@
 namespace rnexecutorch::models::speech_to_text::types {
 
 struct ProcessResult {
-  std::string committed;
-  std::string nonCommitted;
+  std::vector<Word> committed;
+  std::vector<Word> nonCommitted;
 };
 
 } // namespace rnexecutorch::models::speech_to_text::types
diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts
index 3e1324f54..107053a47 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts
@@ -1,5 +1,8 @@
 import { useEffect, useCallback, useState } from 'react';
-import { SpeechToTextModule } from '../../modules/natural_language_processing/SpeechToTextModule';
+import {
+  SpeechToTextModule,
+  Word,
+} from '../../modules/natural_language_processing/SpeechToTextModule';
 import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils';
@@ -17,9 +20,13 @@ export const useSpeechToText = ({
   const [downloadProgress, setDownloadProgress] = useState(0);
 
   const [modelInstance] = useState(() => new SpeechToTextModule());
-  const [committedTranscription, setCommittedTranscription] = useState('');
-  const [nonCommittedTranscription, setNonCommittedTranscription] =
-    useState('');
+
+  const [committedTranscription, setCommittedTranscription] = useState<
+    string | Word[]
+  >('');
+  const [nonCommittedTranscription, setNonCommittedTranscription] = useState<
+    string | Word[]
+  >('');
 
   useEffect(() => {
     if (preventLoad) return;
@@ -75,7 +82,7 @@ export const useSpeechToText = ({
   );
 
   const stream = useCallback(
-    async (options?: DecodingOptions) => {
+    async (options?: DecodingOptions & { enableTimestamps?: boolean }) => {
       if (!isReady)
         throw new RnExecutorchError(
           RnExecutorchErrorCode.ModuleNotLoaded,
@@ -86,22 +93,53 @@ export const useSpeechToText = ({
           RnExecutorchErrorCode.ModelGenerating,
           'The model is currently generating. Please wait until previous model run is complete.'
         );
+
       setIsGenerating(true);
-      setCommittedTranscription('');
-      setNonCommittedTranscription('');
-      let transcription = '';
+
+      const enableTimestamps = options?.enableTimestamps ?? false;
+      setCommittedTranscription(enableTimestamps ? [] : '');
+      setNonCommittedTranscription(enableTimestamps ? [] : '');
+
+      let fullResult: string | Word[] = enableTimestamps ? [] : '';
+
       try {
-        for await (const { committed, nonCommitted } of modelInstance.stream(
-          options
-        )) {
-          setCommittedTranscription((prev) => prev + committed);
-          setNonCommittedTranscription(nonCommitted);
-          transcription += committed;
+        const streamGen = modelInstance.stream(
+          options as any
+        ) as AsyncGenerator<{
+          committed: string | Word[];
+          nonCommitted: string | Word[];
+        }>;
+
+        for await (const { committed, nonCommitted } of streamGen) {
+          if (typeof committed === 'string') {
+            const nc = nonCommitted as unknown as string;
+
+            if (committed.length > 0) {
+              setCommittedTranscription((prev) => {
+                const prevStr = typeof prev === 'string' ? prev : '';
+                return prevStr + committed;
+              });
+              (fullResult as string) += committed;
+            }
+            setNonCommittedTranscription(nc);
+          } else {
+            const committedWords = committed as Word[];
+            const nonCommittedWords = nonCommitted as Word[];
+
+            if (committedWords && committedWords.length > 0) {
+              setCommittedTranscription((prev) => {
+                const prevArr = Array.isArray(prev) ? prev : [];
+                return [...prevArr, ...committedWords];
+              });
+              (fullResult as Word[]).push(...committedWords);
+            }
+            setNonCommittedTranscription(nonCommittedWords);
+          }
         }
       } finally {
         setIsGenerating(false);
       }
-      return transcription;
+      return fullResult;
     },
     [isReady, isGenerating, modelInstance]
   );
@@ -129,7 +167,9 @@ export const useSpeechToText = ({
     nonCommittedTranscription,
     encode: stateWrapper(SpeechToTextModule.prototype.encode),
     decode: stateWrapper(SpeechToTextModule.prototype.decode),
-    transcribe: stateWrapper(SpeechToTextModule.prototype.transcribe),
+    transcribe: stateWrapper(
+      SpeechToTextModule.prototype.transcribe
+    ) as SpeechToTextModule['transcribe'],
     stream,
     streamStop: wrapper(SpeechToTextModule.prototype.streamStop),
     streamInsert: wrapper(SpeechToTextModule.prototype.streamInsert),
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/SpeechToTextModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/SpeechToTextModule.ts
index 9619547c8..98520a2e7 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/SpeechToTextModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/SpeechToTextModule.ts
@@ -4,9 +4,14 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils';
 
+export interface Word {
+  word: string;
+  start: number;
+  end: number;
+}
+
 export class SpeechToTextModule {
   private nativeModule: any;
-
   private modelConfig!: SpeechToTextModelConfig;
 
   private textDecoder = new TextDecoder('utf-8', {
@@ -85,10 +90,22 @@ export class SpeechToTextModule {
     );
   }
 
+  public async transcribe(
+    waveform: Float32Array | number[],
+    options?: DecodingOptions & { enableTimestamps: true }
+  ): Promise<Word[]>;
+
+  // eslint-disable-next-line no-dupe-class-members
+  public async transcribe(
+    waveform: Float32Array | number[],
+    options?: DecodingOptions & { enableTimestamps?: false | undefined }
+  ): Promise<string>;
+
+  // eslint-disable-next-line no-dupe-class-members
   public async transcribe(
     waveform: Float32Array | number[],
     options: DecodingOptions = {}
-  ): Promise<string> {
+  ): Promise<string | Word[]> {
     this.validateOptions(options);
 
     if (Array.isArray(waveform)) {
@@ -97,19 +114,44 @@ export class SpeechToTextModule {
       );
       waveform = new Float32Array(waveform);
     }
-    const transcriptionBytes = await this.nativeModule.transcribe(
-      waveform,
-      options.language || ''
-    );
-    return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
+
+    const language = options.language || '';
+
+    if (options.enableTimestamps) {
+      return await this.nativeModule.transcribe(waveform, language);
+    } else {
+      const transcriptionBytes = await this.nativeModule.transcribeStringOnly(
+        waveform,
+        language
+      );
+
+      return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
+    }
   }
 
-  public async *stream(
-    options: DecodingOptions = {}
-  ): AsyncGenerator<{ committed: string; nonCommitted: string }> {
+  public stream(
+    options: DecodingOptions & { enableTimestamps: true }
+  ): AsyncGenerator<{ committed: Word[]; nonCommitted: Word[] }>;
+
+  // eslint-disable-next-line no-dupe-class-members
+  public stream(
+    options?: DecodingOptions & { enableTimestamps?: false | undefined }
+  ): AsyncGenerator<{ committed: string; nonCommitted: string }>;
+
+  // eslint-disable-next-line no-dupe-class-members
+  public async *stream(options: DecodingOptions = {}): AsyncGenerator<{
+    committed: string | Word[];
+    nonCommitted: string | Word[];
+  }> {
     this.validateOptions(options);
 
-    const queue: { committed: string; nonCommitted: string }[] = [];
+    const enableTimestamps = options.enableTimestamps === true;
+
+    const queue: {
+      committed: string | Word[];
+      nonCommitted: string | Word[];
+    }[] = [];
+
     let waiter: (() => void) | null = null;
     let finished = false;
     let error: unknown;
@@ -121,21 +163,34 @@ export class SpeechToTextModule {
 
     (async () => {
       try {
-        await this.nativeModule.stream(
-          (committed: number[], nonCommitted: number[], isDone: boolean) => {
-            queue.push({
-              committed: this.textDecoder.decode(new Uint8Array(committed)),
-              nonCommitted: this.textDecoder.decode(
-                new Uint8Array(nonCommitted)
-              ),
-            });
-            if (isDone) {
-              finished = true;
+        const callback = (
+          committed: any,
+          nonCommitted: any,
+          isDone: boolean
+        ) => {
+          if (!enableTimestamps) {
+            try {
+              queue.push({
+                committed: this.textDecoder.decode(new Uint8Array(committed)),
+                nonCommitted: this.textDecoder.decode(
+                  new Uint8Array(nonCommitted)
+                ),
+              });
+            } catch (err) {
+              Logger.error('[Stream Decode Error]', err);
             }
-            wake();
-          },
-          options.language || ''
-        );
+          } else {
+            queue.push({ committed, nonCommitted });
+          }
+
+          if (isDone) finished = true;
+          wake();
+        };
+
+        const language = options.language || '';
+
+        await this.nativeModule.stream(callback, language, enableTimestamps);
+
         finished = true;
         wake();
       } catch (e) {
diff --git a/packages/react-native-executorch/src/types/stt.ts b/packages/react-native-executorch/src/types/stt.ts
index 20627ca11..8f95eb16d 100644
--- a/packages/react-native-executorch/src/types/stt.ts
+++ b/packages/react-native-executorch/src/types/stt.ts
@@ -80,6 +80,7 @@ export type SpeechToTextLanguage =
 
 export interface DecodingOptions {
   language?: SpeechToTextLanguage;
+  enableTimestamps?: boolean;
 }
 
 export interface SpeechToTextModelConfig {