diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 2a1aba7..285ea69 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -13,12 +13,14 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize)
 {
   _conv.SetMaxBufferSize(maxBufferSize);
   _input_mixin.SetMaxBufferSize(maxBufferSize);
-  _z.resize(this->_conv.get_out_channels(), maxBufferSize);
+  const long z_channels = this->_conv.get_out_channels(); // This is 2*bottleneck when gated, bottleneck when not
+  _z.resize(z_channels, maxBufferSize);
   _1x1.SetMaxBufferSize(maxBufferSize);
   // Pre-allocate output buffers
   const long channels = this->get_channels();
   this->_output_next_layer.resize(channels, maxBufferSize);
-  this->_output_head.resize(channels, maxBufferSize);
+  // _output_head stores the activated portion: bottleneck rows (the actual bottleneck value, not doubled)
+  this->_output_head.resize(this->_bottleneck, maxBufferSize);
 }
 
 void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)
@@ -30,7 +32,7 @@ void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)
 
 void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames)
 {
-  const long channels = this->get_channels();
+  const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels
 
   // Step 1: input convolutions
   this->_conv.Process(input, num_frames);
@@ -50,19 +52,20 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     // do this column-wise:
     for (int i = 0; i < num_frames; i++)
     {
-      this->_activation->apply(this->_z.block(0, i, channels, 1));
+      this->_activation->apply(this->_z.block(0, i, bottleneck, 1));
       // TODO Need to support other activation functions here instead of hardcoded sigmoid
-      activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, i, channels, 1));
+      activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(bottleneck, i, bottleneck, 1));
     }
-    this->_z.block(0, 0, channels, num_frames).array() *= this->_z.block(channels, 0, channels, num_frames).array();
-    _1x1.process_(_z.topRows(channels), num_frames); // Might not be RT safe
+    this->_z.block(0, 0, bottleneck, num_frames).array() *=
+      this->_z.block(bottleneck, 0, bottleneck, num_frames).array();
+    _1x1.process_(_z.topRows(bottleneck), num_frames); // Might not be RT safe
   }
 
   // Store output to head (skip connection: activated conv output)
   if (!this->_gated)
     this->_output_head.leftCols(num_frames).noalias() = this->_z.leftCols(num_frames);
   else
-    this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(channels).leftCols(num_frames);
+    this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(bottleneck).leftCols(num_frames);
   // Store output to next layer (residual connection: input + _1x1 output)
   this->_output_next_layer.leftCols(num_frames).noalias() =
     input.leftCols(num_frames) + _1x1.GetOutput().leftCols(num_frames);
@@ -72,15 +75,17 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
 // LayerArray =================================================================
 
 nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size,
-                                       const int channels, const int kernel_size, const std::vector<int>& dilations,
-                                       const std::string activation, const bool gated, const bool head_bias,
-                                       const int groups_input, const int groups_1x1)
+                                       const int channels, const int bottleneck, const int kernel_size,
+                                       const std::vector<int>& dilations, const std::string activation,
+                                       const bool gated, const bool head_bias, const int groups_input,
+                                       const int groups_1x1)
 : _rechannel(input_size, channels, false)
-, _head_rechannel(channels, head_size, head_bias)
+, _head_rechannel(bottleneck, head_size, head_bias)
+, _bottleneck(bottleneck)
 {
   for (size_t i = 0; i < dilations.size(); i++)
-    this->_layers.push_back(
-      _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
+    this->_layers.push_back(_Layer(
+      condition_size, channels, bottleneck, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
 }
 
 void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
@@ -94,7 +99,7 @@ void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
   // Pre-allocate output buffers
   const long channels = this->_get_channels();
   this->_layer_outputs.resize(channels, maxBufferSize);
-  this->_head_inputs.resize(channels, maxBufferSize);
+  this->_head_inputs.resize(this->_bottleneck, maxBufferSize);
 }
 
 
@@ -199,9 +204,9 @@ nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>
   {
     this->_layer_arrays.push_back(nam::wavenet::_LayerArray(
       layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size,
-      layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations,
-      layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias,
-      layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
+      layer_array_params[i].channels, layer_array_params[i].bottleneck, layer_array_params[i].kernel_size,
+      layer_array_params[i].dilations, layer_array_params[i].activation, layer_array_params[i].gated,
+      layer_array_params[i].head_bias, layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
     if (i > 0)
       if (layer_array_params[i].channels != layer_array_params[i - 1].head_size)
       {
@@ -300,8 +305,10 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
     nlohmann::json layer_config = config["layers"][i];
     const int groups = layer_config.value("groups", 1); // defaults to 1
     const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1
+    const int channels = layer_config["channels"];
+    const int bottleneck = layer_config.value("bottleneck", channels); // defaults to channels if not present
     layer_array_params.push_back(nam::wavenet::LayerArrayParams(
-      layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"],
+      layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], channels, bottleneck,
       layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"],
       layer_config["head_bias"], groups, groups_1x1));
   }
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index 71d2eff..832673b 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -16,13 +16,14 @@ namespace wavenet
 class _Layer
 {
 public:
-  _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation,
+  _Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation,
          const std::string activation, const bool gated, const int groups_input, const int groups_1x1)
-  : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input)
-  , _input_mixin(condition_size, gated ? 2 * channels : channels, false)
-  , _1x1(channels, channels, true, groups_1x1)
+  : _conv(channels, gated ? 2 * bottleneck : bottleneck, kernel_size, true, dilation, groups_input)
+  , _input_mixin(condition_size, gated ? 2 * bottleneck : bottleneck, false)
+  , _1x1(bottleneck, channels, true, groups_1x1)
   , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
-  , _gated(gated) {};
+  , _gated(gated)
+  , _bottleneck(bottleneck) {};
   // Resize all arrays to be able to process `maxBufferSize` frames.
   void SetMaxBufferSize(const int maxBufferSize);
   // Set the parameters of this module
@@ -71,18 +72,21 @@ class _Layer
 
   activations::Activation* _activation;
   const bool _gated;
+  const int _bottleneck; // Internal channel count (not doubled when gated)
 };
 
 class LayerArrayParams
 {
 public:
   LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
-                   const int kernel_size_, const std::vector<int>&& dilations_, const std::string activation_,
-                   const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_)
+                   const int bottleneck_, const int kernel_size_, const std::vector<int>&& dilations_,
+                   const std::string activation_, const bool gated_, const bool head_bias_, const int groups_input,
+                   const int groups_1x1_)
   : input_size(input_size_)
   , condition_size(condition_size_)
   , head_size(head_size_)
   , channels(channels_)
+  , bottleneck(bottleneck_)
   , kernel_size(kernel_size_)
   , dilations(std::move(dilations_))
   , activation(activation_)
@@ -97,6 +101,7 @@ class LayerArrayParams
   const int condition_size;
   const int head_size;
   const int channels;
+  const int bottleneck;
   const int kernel_size;
   std::vector<int> dilations;
   const std::string activation;
@@ -111,8 +116,9 @@ class _LayerArray
 {
 public:
   _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
-              const int kernel_size, const std::vector<int>& dilations, const std::string activation, const bool gated,
-              const bool head_bias, const int groups_input, const int groups_1x1);
+              const int bottleneck, const int kernel_size, const std::vector<int>& dilations,
+              const std::string activation, const bool gated, const bool head_bias, const int groups_input,
+              const int groups_1x1);
 
   void SetMaxBufferSize(const int maxBufferSize);
 
@@ -150,12 +156,15 @@ class _LayerArray
   std::vector<_Layer> _layers;
   // Output from last layer (for next layer array)
   Eigen::MatrixXf _layer_outputs;
-  // Accumulated head inputs from all layers
+  // Accumulated head inputs from all layers (bottleneck channels)
   Eigen::MatrixXf _head_inputs;
 
-  // Rechannel for the head
+  // Rechannel for the head (bottleneck -> head_size)
   Conv1x1 _head_rechannel;
 
+  // Bottleneck size (internal channel count)
+  const int _bottleneck;
+
   long _get_channels() const;
   // Common processing logic after head inputs are set
   void ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames);
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 01cf211..1fd5802 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -20,7 +20,7 @@ set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0")
 # Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG
 # We use a compile option to undefine it, which works on GCC, Clang, and MSVC
 target_compile_options(run_tests PRIVATE
-	$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-U_NDEBUG>
+	$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-UNDEBUG>
 )
 
 source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES})
diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp
index aa28629..33c4d45 100644
--- a/tools/run_tests.cpp
+++ b/tools/run_tests.cpp
@@ -104,6 +104,8 @@ int main()
   test_wavenet::test_layer::test_non_gated_layer();
   test_wavenet::test_layer::test_layer_activations();
   test_wavenet::test_layer::test_layer_multichannel();
+  test_wavenet::test_layer::test_layer_bottleneck();
+  test_wavenet::test_layer::test_layer_bottleneck_gated();
   test_wavenet::test_layer_array::test_layer_array_basic();
   test_wavenet::test_layer_array::test_layer_array_receptive_field();
   test_wavenet::test_layer_array::test_layer_array_with_head_input();
@@ -118,6 +120,7 @@ int main()
   test_wavenet::test_conv1d_grouped_process_realtime_safe();
   test_wavenet::test_conv1d_grouped_dilated_process_realtime_safe();
   test_wavenet::test_layer_process_realtime_safe();
+  test_wavenet::test_layer_bottleneck_process_realtime_safe();
   test_wavenet::test_layer_grouped_process_realtime_safe();
   test_wavenet::test_layer_array_process_realtime_safe();
   test_wavenet::test_process_realtime_safe();
diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp
index 3d20679..d75ae1c 100644
--- a/tools/test/test_wavenet/test_full.cpp
+++ b/tools/test/test_wavenet/test_full.cpp
@@ -19,6 +19,7 @@ void test_wavenet_model()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -29,7 +30,7 @@ void test_wavenet_model()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
@@ -85,15 +86,16 @@ void test_wavenet_multiple_arrays()
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   // First array
   std::vector<int> dilations1{1};
+  const int bottleneck = channels;
   const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
-                                                              kernel_size, std::move(dilations1), activation, gated,
-                                                              head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations1), activation,
+                                                              gated, head_bias, groups, groups_1x1));
   // Second array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
-                                                              kernel_size, std::move(dilations2), activation, gated,
-                                                              head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations2), activation,
+                                                              gated, head_bias, groups, groups_1x1));
 
   std::vector<float> weights;
   // Array 0: rechannel, layer, head_rechannel
@@ -127,6 +129,7 @@ void test_wavenet_zero_input()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -137,7 +140,7 @@ void test_wavenet_zero_input()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
@@ -168,6 +171,7 @@ void test_wavenet_different_buffer_sizes()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -178,7 +182,7 @@ void test_wavenet_different_buffer_sizes()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
@@ -210,6 +214,7 @@ void test_wavenet_prewarm()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 3;
   std::vector<int> dilations{1, 2, 4};
   const std::string activation = "ReLU";
@@ -220,7 +225,7 @@ void test_wavenet_prewarm()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp
index 10eccf4..5d53be4 100644
--- a/tools/test/test_wavenet/test_layer.cpp
+++ b/tools/test/test_wavenet/test_layer.cpp
@@ -18,14 +18,15 @@ void test_gated()
   // Issue 101
   const int conditionSize = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernelSize = 1;
   const int dilation = 1;
   const std::string activation = "ReLU";
   const bool gated = true;
   const int groups_input = 1;
   const int groups_1x1 = 1;
-  auto layer =
-    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   // Conv, input mixin, 1x1
   std::vector<float> weights{
@@ -48,7 +49,7 @@ void test_gated()
   Eigen::MatrixXf input, condition, headInput, output;
   input.resize(channels, numFrames);
   condition.resize(conditionSize, numFrames);
-  headInput.resize(channels, numFrames);
+  headInput.resize(bottleneck, numFrames);
   output.resize(channels, numFrames);
 
   const float signalValue = 0.25f;
@@ -92,6 +93,7 @@ void test_layer_getters()
 {
   const int conditionSize = 2;
   const int channels = 4;
+  const int bottleneck = channels;
   const int kernelSize = 3;
   const int dilation = 2;
   const std::string activation = "Tanh";
@@ -99,8 +101,8 @@ void test_layer_getters()
   const int groups_input = 1;
   const int groups_1x1 = 1;
 
-  auto layer =
-    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   assert(layer.get_channels() == channels);
   assert(layer.get_kernel_size() == kernelSize);
@@ -112,6 +114,7 @@ void test_non_gated_layer()
 {
   const int conditionSize = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernelSize = 1;
   const int dilation = 1;
   const std::string activation = "ReLU";
@@ -119,8 +122,8 @@ void test_non_gated_layer()
   const int groups_input = 1;
   const int groups_1x1 = 1;
 
-  auto layer =
-    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   // For non-gated: conv outputs 1 channel, input_mixin outputs 1 channel, 1x1 outputs 1 channel
   // Conv: (1,1,1) weight + (1,) bias
@@ -152,7 +155,7 @@ void test_non_gated_layer()
 
   assert(layer_output.rows() == channels);
   assert(layer_output.cols() == numFrames);
-  assert(head_output.rows() == channels);
+  assert(head_output.rows() == bottleneck);
   assert(head_output.cols() == numFrames);
 
   // With identity-like weights: input=1, condition=1
@@ -183,10 +186,11 @@ void test_layer_activations()
 
   // Test Tanh activation
   {
+    const int bottleneck = channels;
     const int groups_input = 1;
     const int groups_1x1 = 1;
-    auto layer =
-      nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, "Tanh", gated, groups_input, groups_1x1);
+    auto layer = nam::wavenet::_Layer(
+      conditionSize, channels, bottleneck, kernelSize, dilation, "Tanh", gated, groups_input, groups_1x1);
     std::vector<float> weights{1.0f, 0.0f, 1.0f, 1.0f, 0.0f};
     auto it = weights.begin();
     layer.set_weights_(it);
@@ -213,6 +217,7 @@ void test_layer_multichannel()
 {
   const int conditionSize = 2;
   const int channels = 3;
+  const int bottleneck = channels;
   const int kernelSize = 1;
   const int dilation = 1;
   const std::string activation = "ReLU";
@@ -220,8 +225,8 @@ void test_layer_multichannel()
   const int groups_input = 1;
   const int groups_1x1 = 1;
 
-  auto layer =
-    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   assert(layer.get_channels() == channels);
 
@@ -272,7 +277,158 @@ void test_layer_multichannel()
 
   assert(layer_output.rows() == channels);
   assert(layer_output.cols() == numFrames);
-  assert(head_output.rows() == channels);
+  assert(head_output.rows() == bottleneck);
+  assert(head_output.cols() == numFrames);
+}
+
+// Test layer with bottleneck different from channels
+void test_layer_bottleneck()
+{
+  const int conditionSize = 1;
+  const int channels = 4;
+  const int bottleneck = 2; // bottleneck < channels
+  const int kernelSize = 1;
+  const int dilation = 1;
+  const std::string activation = "ReLU";
+  const bool gated = false;
+  const int groups_input = 1;
+  const int groups_1x1 = 1;
+
+  auto layer = nam::wavenet::_Layer(
+    conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
+
+  // With bottleneck < channels, the internal conv and input_mixin should have bottleneck channels,
+  // but the 1x1 should map from bottleneck back to channels
+  // Conv: (channels, bottleneck, kernelSize=1) + bias -> outputs bottleneck channels
+  // Input mixin: (conditionSize, bottleneck) -> outputs bottleneck channels
+  // 1x1: (bottleneck, channels) + bias -> outputs channels channels
+
+  // Set weights
+  std::vector<float> weights;
+  // Conv weights: out_channels x in_channels x kernelSize = bottleneck x channels x kernelSize = 2 x 4 x 1 = 8 weights
+  // Weight layout for Conv1D: for each out_channel, for each in_channel, for each kernel position
+  // Use identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck)
+  for (int out_ch = 0; out_ch < bottleneck; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < channels; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // Conv bias: bottleneck values
+  weights.insert(weights.end(), {0.0f, 0.0f});
+  // Input mixin: conditionSize x bottleneck = 1 x 2 = 2 weights
+  weights.insert(weights.end(), {1.0f, 1.0f});
+  // 1x1 weights: out_channels x in_channels = channels x bottleneck = 4 x 2 = 8 weights
+  // Weight layout for Conv1x1: for each out_channel, for each in_channel
+  // Identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck)
+  for (int out_ch = 0; out_ch < channels; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < bottleneck; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // 1x1 bias: channels values
+  weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f});
+
+  auto it = weights.begin();
+  layer.set_weights_(it);
+  assert(it == weights.end());
+
+  const int numFrames = 2;
+  layer.SetMaxBufferSize(numFrames);
+
+  Eigen::MatrixXf input(channels, numFrames);
+  Eigen::MatrixXf condition(conditionSize, numFrames);
+  input.fill(1.0f);
+  condition.fill(1.0f);
+
+  layer.Process(input, condition, numFrames);
+
+  auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames);
+  auto head_output = layer.GetOutputHead().leftCols(numFrames);
+
+  // Layer output should have channels rows (for next layer)
+  assert(layer_output.rows() == channels);
+  assert(layer_output.cols() == numFrames);
+  // Head output should have bottleneck rows (internal channel count)
+  assert(head_output.rows() == bottleneck);
+  assert(head_output.cols() == numFrames);
+}
+
+// Test layer with bottleneck and gated activation
+void test_layer_bottleneck_gated()
+{
+  const int conditionSize = 1;
+  const int channels = 4;
+  const int bottleneck = 2; // bottleneck < channels
+  const int kernelSize = 1;
+  const int dilation = 1;
+  const std::string activation = "ReLU";
+  const bool gated = true; // gated doubles the internal bottleneck channels
+  const int groups_input = 1;
+  const int groups_1x1 = 1;
+
+  auto layer = nam::wavenet::_Layer(
+    conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
+
+  // With gated=true and bottleneck=2, internal channels should be 2*bottleneck=4
+  // Conv: (channels, 2*bottleneck, kernelSize=1) = (4, 4, 1) + bias
+  // Input mixin: (conditionSize, 2*bottleneck) = (1, 4)
+  // 1x1: (bottleneck, channels) = (2, 4) + bias
+
+  // Set weights
+  std::vector<float> weights;
+  // Conv weights: out_channels x in_channels x kernelSize = (2*bottleneck) x channels x kernelSize = 4 x 4 x 1 = 16
+  // weights Weight layout for Conv1D: for each out_channel, for each in_channel, for each kernel position Identity
+  // pattern: out_channel i connects to in_channel i (for i < min(2*bottleneck, channels))
+  for (int out_ch = 0; out_ch < 2 * bottleneck; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < channels; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // Conv bias: 2*bottleneck = 4 values
+  weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f});
+  // Input mixin: conditionSize x (2*bottleneck) = 1 x 4 = 4 weights
+  weights.insert(weights.end(), {1.0f, 1.0f, 1.0f, 1.0f});
+  // 1x1 weights: out_channels x in_channels = channels x bottleneck = 4 x 2 = 8 weights
+  // Weight layout for Conv1x1: for each out_channel, for each in_channel
+  // Identity pattern: out_channel i connects to in_channel i (for i < bottleneck)
+  for (int out_ch = 0; out_ch < channels; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < bottleneck; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // 1x1 bias: channels = 4 values
+  weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f});
+
+  auto it = weights.begin();
+  layer.set_weights_(it);
+  assert(it == weights.end());
+
+  const int numFrames = 2;
+  layer.SetMaxBufferSize(numFrames);
+
+  Eigen::MatrixXf input(channels, numFrames);
+  Eigen::MatrixXf condition(conditionSize, numFrames);
+  input.fill(1.0f);
+  condition.fill(1.0f);
+
+  layer.Process(input, condition, numFrames);
+
+  auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames);
+  auto head_output = layer.GetOutputHead().leftCols(numFrames);
+
+  // Layer output should have channels rows (for next layer)
+  assert(layer_output.rows() == channels);
+  assert(layer_output.cols() == numFrames);
+  // Head output should have bottleneck rows (the activated portion, not the full 2*bottleneck)
+  assert(head_output.rows() == bottleneck);
   assert(head_output.cols() == numFrames);
 }
 }; // namespace test_layer
diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp
index 41c435a..a4581c2 100644
--- a/tools/test/test_wavenet/test_layer_array.cpp
+++ b/tools/test/test_wavenet/test_layer_array.cpp
@@ -19,6 +19,7 @@ void test_layer_array_basic()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1, 2};
   const std::string activation = "ReLU";
@@ -27,8 +28,8 @@ void test_layer_array_basic()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
-                                               activation, gated, head_bias, groups, groups_1x1);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
+                                               dilations, activation, gated, head_bias, groups, groups_1x1);
 
   const int numFrames = 4;
   layer_array.SetMaxBufferSize(numFrames);
@@ -75,6 +76,7 @@ void test_layer_array_receptive_field()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 3;
   std::vector<int> dilations{1, 2, 4};
   const std::string activation = "ReLU";
@@ -83,8 +85,8 @@ void test_layer_array_receptive_field()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
-                                               activation, gated, head_bias, groups, groups_1x1);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
+                                               dilations, activation, gated, head_bias, groups, groups_1x1);
 
   long rf = layer_array.get_receptive_field();
   // Expected: sum of dilation * (kernel_size - 1) for each layer
@@ -103,6 +105,7 @@ void test_layer_array_with_head_input()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -111,8 +114,8 @@ void test_layer_array_with_head_input()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
-                                               activation, gated, head_bias, groups, groups_1x1);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
+                                               dilations, activation, gated, head_bias, groups, groups_1x1);
 
   const int numFrames = 2;
   layer_array.SetMaxBufferSize(numFrames);
diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp
index a7a5e8f..91d8628 100644
--- a/tools/test/test_wavenet/test_real_time_safe.cpp
+++ b/tools/test/test_wavenet/test_real_time_safe.cpp
@@ -429,6 +429,7 @@ void test_layer_process_realtime_safe()
   // Setup: Create a Layer
   const int condition_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   const int dilation = 1;
   const std::string activation = "ReLU";
@@ -436,8 +437,8 @@ void test_layer_process_realtime_safe()
   const int groups_input = 1;
   const int groups_1x1 = 1;
 
-  auto layer =
-    nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated,
+                                    groups_input, groups_1x1);
 
   // Set weights
   std::vector<float> weights{1.0f, 0.0f, // Conv (weight, bias)
@@ -477,12 +478,98 @@ void test_layer_process_realtime_safe()
   }
 }
 
+// Test that Layer::Process() method with bottleneck != channels does not allocate or free memory
+void test_layer_bottleneck_process_realtime_safe()
+{
+  // Setup: Create a Layer with bottleneck different from channels
+  const int condition_size = 1;
+  const int channels = 4;
+  const int bottleneck = 2; // bottleneck < channels
+  const int kernel_size = 1;
+  const int dilation = 1;
+  const std::string activation = "ReLU";
+  const bool gated = false;
+  const int groups_input = 1;
+  const int groups_1x1 = 1;
+
+  auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated,
+                                    groups_input, groups_1x1);
+
+  // Set weights for bottleneck != channels
+  // Conv: (channels, bottleneck, kernelSize=1) = (4, 2, 1) + bias
+  // Input mixin: (conditionSize, bottleneck) = (1, 2)
+  // 1x1: (bottleneck, channels) = (2, 4) + bias
+  std::vector<float> weights;
+  // Conv weights: out_channels x in_channels x kernelSize = bottleneck x channels x kernelSize = 2 x 4 x 1 = 8 weights
+  // Weight layout for Conv1D: for each out_channel, for each in_channel, for each kernel position
+  // Identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck)
+  for (int out_ch = 0; out_ch < bottleneck; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < channels; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // Conv bias: bottleneck values
+  weights.insert(weights.end(), {0.0f, 0.0f});
+  // Input mixin: conditionSize x bottleneck = 1 x 2 = 2 weights
+  weights.insert(weights.end(), {1.0f, 1.0f});
+  // 1x1 weights: out_channels x in_channels = channels x bottleneck = 4 x 2 = 8 weights
+  // Weight layout for Conv1x1: for each out_channel, for each in_channel
+  // Identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck)
+  for (int out_ch = 0; out_ch < channels; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < bottleneck; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // 1x1 bias: channels values
+  weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f});
+
+  auto it = weights.begin();
+  layer.set_weights_(it);
+
+  const int maxBufferSize = 256;
+  layer.SetMaxBufferSize(maxBufferSize);
+
+  // Test with several different buffer sizes
+  std::vector<int> buffer_sizes{1, 8, 16, 32, 64, 128, 256};
+
+  for (int buffer_size : buffer_sizes)
+  {
+    // Prepare input/condition matrices (allocate before tracking)
+    Eigen::MatrixXf input(channels, buffer_size);
+    Eigen::MatrixXf condition(condition_size, buffer_size);
+    input.setConstant(0.5f);
+    condition.setConstant(0.5f);
+
+    std::string test_name = "Layer Process (bottleneck=" + std::to_string(bottleneck) + ", channels=" +
+                            std::to_string(channels) + ") - Buffer size " + std::to_string(buffer_size);
+    run_allocation_test_no_allocations(
+      nullptr, // No setup needed
+      [&]() {
+        // Call Process() - this should not allocate or free
+        layer.Process(input, condition, buffer_size);
+      },
+      nullptr, // No teardown needed
+      test_name.c_str());
+
+    // Verify output is valid
+    auto output = layer.GetOutputNextLayer().leftCols(buffer_size);
+    assert(output.rows() == channels && output.cols() == buffer_size);
+    assert(std::isfinite(output(0, 0)));
+    assert(std::isfinite(output(channels - 1, buffer_size - 1)));
+  }
+}
+
 // Test that Layer::Process() method with grouped convolution (groups_input > 1) does not allocate or free memory
 void test_layer_grouped_process_realtime_safe()
 {
   // Setup: Create a Layer with grouped convolution
   const int condition_size = 1;
   const int channels = 4; // Must be divisible by groups_input
+  const int bottleneck = channels;
   const int kernel_size = 2;
   const int dilation = 1;
   const std::string activation = "ReLU";
@@ -490,8 +577,8 @@ void test_layer_grouped_process_realtime_safe()
   const int groups_input = 2; // groups_input > 1
   const int groups_1x1 = 2; // 1x1 is also grouped
 
-  auto layer =
-    nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated,
+                                    groups_input, groups_1x1);
 
   // Set weights for grouped convolution
   // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels
@@ -592,6 +679,7 @@ void test_layer_array_process_realtime_safe()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -600,8 +688,8 @@ void test_layer_array_process_realtime_safe()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
-                                               activation, gated, head_bias, groups, groups_1x1);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
+                                               dilations, activation, gated, head_bias, groups, groups_1x1);
 
   // Set weights: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1)
   std::vector<float> weights{1.0f, // Rechannel
@@ -666,15 +754,16 @@ void test_process_realtime_safe()
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   // First layer array
   std::vector<int> dilations1{1};
+  const int bottleneck = channels;
   const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
-                                                              kernel_size, std::move(dilations1), activation, gated,
-                                                              head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations1), activation,
+                                                              gated, head_bias, groups, groups_1x1));
   // Second layer array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
-                                                              kernel_size, std::move(dilations2), activation, gated,
-                                                              head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations2), activation,
+                                                              gated, head_bias, groups, groups_1x1));
 
   // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1)
   //          Array 1: same structure