@@ -56,6 +56,7 @@ struct TfLiteConfig {
5656 TfLiteAudioFeatureProvider* featureProvider = nullptr ;
5757 const char ** labels = nullptr ;
5858 bool useAllOpsResolver = false ;
59+ // callback for command handler
5960 void (*respondToCommand)(const char * found_command, uint8_t score,
6061 bool is_new_command) = nullptr ;
6162
@@ -86,21 +87,40 @@ struct TfLiteConfig {
8687 int kFeatureSliceDurationMs = 30 ;
8788
8889 // number of new slices to collect before evaluating the model
89- int kSlicesToProcess = 3 ;
90+ int kSlicesToProcess = 2 ;
91+
92+ // Parameters for RecognizeCommands
93+ int32_t average_window_duration_ms = 1000 ;
94+ uint8_t detection_threshold = 200 ;
95+ int32_t suppression_ms = 1500 ;
96+ int32_t minimum_count = 3 ;
97+
98+ // input for FrontendConfig
99+ float filterbank_lower_band_limit = 125.0 ;
100+ float filterbank_upper_band_limit = 7500.0 ;
101+ float noise_reduction_smoothing_bits = 10 ;
102+ float noise_reduction_even_smoothing = 0.025 ;
103+ float noise_reduction_odd_smoothing = 0.06 ;
104+ float noise_reduction_min_signal_remaining = 0.05 ;
105+ bool pcan_gain_control_enable_pcan = 1 ;
106+ float pcan_gain_control_strength = 0.95 ;
107+ float pcan_gain_control_offset = 80.0 ;
108+ float pcan_gain_control_gain_bits = 21 ;
109+ bool log_scale_enable_log = 1 ;
110+ uint8_t log_scale_scale_shift = 6 ;
111+
112+ int featureElementCount () {
113+ return kFeatureSliceSize * kFeatureSliceCount ;
114+ }
90115
91- int featureElementCount () { return kFeatureSliceSize * kFeatureSliceCount ; }
92116 int audioSampleSize () {
93117 return kFeatureSliceDurationMs * (sample_rate / 1000 );
94118 }
119+
95120 int strideSampleSize () {
96121 return kFeatureSliceStrideMs * (sample_rate / 1000 );
97122 }
98123
99- // Parameters for RecognizeCommands
100- int32_t average_window_duration_ms = 1000 ;
101- uint8_t detection_threshold = 200 ;
102- int32_t suppression_ms = 1500 ;
103- int32_t minimum_count = 3 ;
104124};
105125
106126// Partial implementation of std::dequeue, just providing the functionality
@@ -512,20 +532,19 @@ class TfLiteAudioFeatureProvider {
512532 LOGD (LOG_METHOD);
513533 config.window .size_ms = cfg.kFeatureSliceDurationMs ;
514534 config.window .step_size_ms = cfg.kFeatureSliceStrideMs ;
515- config.noise_reduction .smoothing_bits = 10 ;
516535 config.filterbank .num_channels = cfg.kFeatureSliceSize ;
517- config.filterbank .lower_band_limit = 125.0 ;
518- config.filterbank .upper_band_limit = 7500.0 ;
519- config.noise_reduction .smoothing_bits = 10 ;
520- config.noise_reduction .even_smoothing = 0.025 ;
521- config.noise_reduction .odd_smoothing = 0.06 ;
522- config.noise_reduction .min_signal_remaining = 0.05 ;
523- config.pcan_gain_control .enable_pcan = 1 ;
524- config.pcan_gain_control .strength = 0.95 ;
525- config.pcan_gain_control .offset = 80.0 ;
526- config.pcan_gain_control .gain_bits = 21 ;
527- config.log_scale .enable_log = 1 ;
528- config.log_scale .scale_shift = 6 ;
536+ config.filterbank .lower_band_limit = cfg. filterbank_lower_band_limit ;
537+ config.filterbank .upper_band_limit = cfg. filterbank_upper_band_limit ;
538+ config.noise_reduction .smoothing_bits = cfg. noise_reduction_smoothing_bits ;
539+ config.noise_reduction .even_smoothing = cfg. noise_reduction_even_smoothing ;
540+ config.noise_reduction .odd_smoothing = cfg. noise_reduction_odd_smoothing ;
541+ config.noise_reduction .min_signal_remaining = cfg. noise_reduction_min_signal_remaining ;
542+ config.pcan_gain_control .enable_pcan = cfg. pcan_gain_control_enable_pcan ;
543+ config.pcan_gain_control .strength = cfg. pcan_gain_control_strength ;
544+ config.pcan_gain_control .offset = cfg. pcan_gain_control_offset ;
545+ config.pcan_gain_control .gain_bits = cfg. pcan_gain_control_gain_bits ;
546+ config.log_scale .enable_log = cfg. log_scale_enable_log ;
547+ config.log_scale .scale_shift = cfg. log_scale_scale_shift ;
529548 if (!FrontendPopulateState (&config, &g_micro_features_state,
530549 cfg.sample_rate )) {
531550 LOGE (" frontendPopulateState() failed" );
@@ -715,6 +734,7 @@ class TfLiteAudioOutput : public AudioPrint {
715734 current_time += cfg.kFeatureSliceStrideMs ;
716735 // determine slice
717736 total_slice_count++;
737+
718738 int8_t * feature_buffer = feature_provider->addSlice ();
719739 if (total_slice_count >= cfg.kSlicesToProcess ) {
720740 processSlices (feature_buffer);
0 commit comments