From 2c4da8a97d6be68fe1fbf094a35be1d7cae6151e Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 16:23:51 -0800 Subject: [PATCH 1/7] [REFINE] Update WaveNet Layer and LayerArray constructors to include bottleneck parameter - Modified the constructors of _Layer and _LayerArray to accept a new bottleneck parameter, enhancing the flexibility of the layer configurations. - Updated relevant method calls and test cases to reflect the new parameter, ensuring consistency across the codebase. - Adjusted JSON configuration handling to support the bottleneck parameter, maintaining backward compatibility with existing configurations. --- NAM/wavenet.cpp | 21 ++++++++++++--------- NAM/wavenet.h | 20 ++++++++++++-------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 2a1aba7..f9b60bf 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -72,15 +72,16 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma // LayerArray ================================================================= nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size, - const int channels, const int kernel_size, const std::vector& dilations, - const std::string activation, const bool gated, const bool head_bias, - const int groups_input, const int groups_1x1) + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, const std::string activation, + const bool gated, const bool head_bias, const int groups_input, + const int groups_1x1) : _rechannel(input_size, channels, false) , _head_rechannel(channels, head_size, head_bias) { for (size_t i = 0; i < dilations.size(); i++) - this->_layers.push_back( - _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1)); + this->_layers.push_back(_Layer( + condition_size, channels, bottleneck, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1)); } void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize) @@ -199,9 +200,9 @@ nam::wavenet::WaveNet::WaveNet(const std::vector { this->_layer_arrays.push_back(nam::wavenet::_LayerArray( layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size, - layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations, - layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias, - layer_array_params[i].groups_input, layer_array_params[i].groups_1x1)); + layer_array_params[i].channels, layer_array_params[i].bottleneck, layer_array_params[i].kernel_size, + layer_array_params[i].dilations, layer_array_params[i].activation, layer_array_params[i].gated, + layer_array_params[i].head_bias, layer_array_params[i].groups_input, layer_array_params[i].groups_1x1)); if (i > 0) if (layer_array_params[i].channels != layer_array_params[i - 1].head_size) { @@ -300,8 +301,10 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st nlohmann::json layer_config = config["layers"][i]; const int groups = layer_config.value("groups", 1); // defaults to 1 const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1 + const int channels = layer_config["channels"]; + const int bottleneck = layer_config.value("bottleneck", channels); // defaults to channels if not present layer_array_params.push_back(nam::wavenet::LayerArrayParams( - layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"], + layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], channels, bottleneck, layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"], layer_config["head_bias"], groups, groups_1x1)); } diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 71d2eff..ccc778f 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -16,11 +16,11 @@ namespace wavenet class _Layer { public: - _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation, + _Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation, const std::string activation, const bool gated, const int groups_input, const int groups_1x1) - : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input) - , _input_mixin(condition_size, gated ? 2 * channels : channels, false) - , _1x1(channels, channels, true, groups_1x1) + : _conv(channels, gated ? 2 * bottleneck : bottleneck, kernel_size, true, dilation, groups_input) + , _input_mixin(condition_size, gated ? 2 * bottleneck : bottleneck, false) + , _1x1(bottleneck, channels, true, groups_1x1) , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters , _gated(gated) {}; // Resize all arrays to be able to process `maxBufferSize` frames. @@ -77,12 +77,14 @@ class LayerArrayParams { public: LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_, - const int kernel_size_, const std::vector&& dilations_, const std::string activation_, - const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_) + const int bottleneck_, const int kernel_size_, const std::vector&& dilations_, + const std::string activation_, const bool gated_, const bool head_bias_, const int groups_input, + const int groups_1x1_) : input_size(input_size_) , condition_size(condition_size_) , head_size(head_size_) , channels(channels_) + , bottleneck(bottleneck_) , kernel_size(kernel_size_) , dilations(std::move(dilations_)) , activation(activation_) @@ -97,6 +99,7 @@ class LayerArrayParams const int condition_size; const int head_size; const int channels; + const int bottleneck; const int kernel_size; std::vector dilations; const std::string activation; @@ -111,8 +114,9 @@ class _LayerArray { public: _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, - const int kernel_size, const std::vector& dilations, const std::string activation, const bool gated, - const bool head_bias, const int groups_input, const int groups_1x1); + const int bottleneck, const int kernel_size, const std::vector& dilations, + const std::string activation, const bool gated, const bool head_bias, const int groups_input, + const int groups_1x1); void SetMaxBufferSize(const int maxBufferSize); From ccabcb2e0bbbde7bd835c34a2381a0437fce1c77 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 16:57:44 -0800 Subject: [PATCH 2/7] [FEATURE] Introduce bottleneck parameter in Layer and LayerArray tests - Added a bottleneck parameter to the constructors of _Layer and _LayerArray in various test cases, enhancing the flexibility of layer configurations. - Implemented new test cases for layers with bottleneck configurations, including both gated and non-gated scenarios. - Updated existing tests to utilize the bottleneck parameter, ensuring comprehensive coverage and consistency across the codebase. --- tools/run_tests.cpp | 2 + tools/test/test_wavenet/test_full.cpp | 18 +- tools/test/test_wavenet/test_layer.cpp | 169 ++++++++++++++++-- tools/test/test_wavenet/test_layer_array.cpp | 15 +- .../test/test_wavenet/test_real_time_safe.cpp | 24 +-- 5 files changed, 195 insertions(+), 33 deletions(-) diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index aa28629..11047f9 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -104,6 +104,8 @@ int main() test_wavenet::test_layer::test_non_gated_layer(); test_wavenet::test_layer::test_layer_activations(); test_wavenet::test_layer::test_layer_multichannel(); + test_wavenet::test_layer::test_layer_bottleneck(); + test_wavenet::test_layer::test_layer_bottleneck_gated(); test_wavenet::test_layer_array::test_layer_array_basic(); test_wavenet::test_layer_array::test_layer_array_receptive_field(); test_wavenet::test_layer_array::test_layer_array_with_head_input(); diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index 3d20679..04a46c3 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -85,15 +85,16 @@ void test_wavenet_multiple_arrays() std::vector layer_array_params; // First array std::vector dilations1{1}; + const int bottleneck = channels; const int groups_1x1 = 1; layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, - kernel_size, std::move(dilations1), activation, gated, - head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations1), activation, + gated, head_bias, groups, groups_1x1)); // Second array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels, - kernel_size, std::move(dilations2), activation, gated, - head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations2), activation, + gated, head_bias, groups, groups_1x1)); std::vector weights; // Array 0: rechannel, layer, head_rechannel @@ -127,6 +128,7 @@ void test_wavenet_zero_input() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; std::vector dilations{1}; const std::string activation = "ReLU"; @@ -137,7 +139,7 @@ void test_wavenet_zero_input() const int groups = 1; const int groups_1x1 = 1; - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, + nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -168,6 +170,7 @@ void test_wavenet_different_buffer_sizes() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; std::vector dilations{1}; const std::string activation = "ReLU"; @@ -178,7 +181,7 @@ void test_wavenet_different_buffer_sizes() const int groups = 1; const int groups_1x1 = 1; - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, + nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -210,6 +213,7 @@ void test_wavenet_prewarm() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 3; std::vector dilations{1, 2, 4}; const std::string activation = "ReLU"; @@ -220,7 +224,7 @@ void test_wavenet_prewarm() const int groups = 1; const int groups_1x1 = 1; - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, + nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index 10eccf4..1701a66 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -18,14 +18,15 @@ void test_gated() // Issue 101 const int conditionSize = 1; const int channels = 1; + const int bottleneck = channels; const int kernelSize = 1; const int dilation = 1; const std::string activation = "ReLU"; const bool gated = true; const int groups_input = 1; const int groups_1x1 = 1; - auto layer = - nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, + groups_input, groups_1x1); // Conv, input mixin, 1x1 std::vector weights{ @@ -92,6 +93,7 @@ void test_layer_getters() { const int conditionSize = 2; const int channels = 4; + const int bottleneck = channels; const int kernelSize = 3; const int dilation = 2; const std::string activation = "Tanh"; @@ -99,8 +101,8 @@ void test_layer_getters() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = - nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, + groups_input, groups_1x1); assert(layer.get_channels() == channels); assert(layer.get_kernel_size() == kernelSize); @@ -112,6 +114,7 @@ void test_non_gated_layer() { const int conditionSize = 1; const int channels = 1; + const int bottleneck = channels; const int kernelSize = 1; const int dilation = 1; const std::string activation = "ReLU"; @@ -119,8 +122,8 @@ void test_non_gated_layer() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = - nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, + groups_input, groups_1x1); // For non-gated: conv outputs 1 channel, input_mixin outputs 1 channel, 1x1 outputs 1 channel // Conv: (1,1,1) weight + (1,) bias @@ -183,10 +186,11 @@ void test_layer_activations() // Test Tanh activation { + const int bottleneck = channels; const int groups_input = 1; const int groups_1x1 = 1; - auto layer = - nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, "Tanh", gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, "Tanh", gated, + groups_input, groups_1x1); std::vector weights{1.0f, 0.0f, 1.0f, 1.0f, 0.0f}; auto it = weights.begin(); layer.set_weights_(it); @@ -213,6 +217,7 @@ void test_layer_multichannel() { const int conditionSize = 2; const int channels = 3; + const int bottleneck = channels; const int kernelSize = 1; const int dilation = 1; const std::string activation = "ReLU"; @@ -220,8 +225,8 @@ void test_layer_multichannel() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = - nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, + groups_input, groups_1x1); assert(layer.get_channels() == channels); @@ -275,6 +280,150 @@ void test_layer_multichannel() assert(head_output.rows() == channels); assert(head_output.cols() == numFrames); } + +// Test layer with bottleneck different from channels +void test_layer_bottleneck() +{ + const int conditionSize = 1; + const int channels = 4; + const int bottleneck = 2; // bottleneck < channels + const int kernelSize = 1; + const int dilation = 1; + const std::string activation = "ReLU"; + const bool gated = false; + const int groups_input = 1; + const int groups_1x1 = 1; + + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, + groups_input, groups_1x1); + + // With bottleneck < channels, the internal conv and input_mixin should have bottleneck channels, + // but the 1x1 should map from bottleneck back to channels + // Conv: (channels, bottleneck, kernelSize=1) + bias -> outputs bottleneck channels + // Input mixin: (conditionSize, bottleneck) -> outputs bottleneck channels + // 1x1: (bottleneck, channels) + bias -> outputs channels channels + + // Set weights + std::vector weights; + // Conv weights: channels x bottleneck x kernelSize = 4 x 2 x 1 = 8 weights + // Use identity-like pattern for first two input channels + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < bottleneck; j++) + { + weights.push_back((i == j) ? 1.0f : 0.0f); + } + } + // Conv bias: bottleneck values + weights.insert(weights.end(), {0.0f, 0.0f}); + // Input mixin: conditionSize x bottleneck = 1 x 2 = 2 weights + weights.insert(weights.end(), {1.0f, 1.0f}); + // 1x1 weights: bottleneck x channels = 2 x 4 = 8 weights + // Identity-like pattern + for (int i = 0; i < bottleneck; i++) + { + for (int j = 0; j < channels; j++) + { + weights.push_back((i == j) ? 1.0f : 0.0f); + } + } + // 1x1 bias: channels values + weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f}); + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + auto head_output = layer.GetOutputHead().leftCols(numFrames); + + // Outputs should still have channels rows (not bottleneck) + assert(layer_output.rows() == channels); + assert(layer_output.cols() == numFrames); + assert(head_output.rows() == channels); + assert(head_output.cols() == numFrames); +} + +// Test layer with bottleneck and gated activation +void test_layer_bottleneck_gated() +{ + const int conditionSize = 1; + const int channels = 4; + const int bottleneck = 2; // bottleneck < channels + const int kernelSize = 1; + const int dilation = 1; + const std::string activation = "ReLU"; + const bool gated = true; // gated doubles the internal bottleneck channels + const int groups_input = 1; + const int groups_1x1 = 1; + + auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, + groups_input, groups_1x1); + + // With gated=true and bottleneck=2, internal channels should be 2*bottleneck=4 + // Conv: (channels, 2*bottleneck, kernelSize=1) = (4, 4, 1) + bias + // Input mixin: (conditionSize, 2*bottleneck) = (1, 4) + // 1x1: (bottleneck, channels) = (2, 4) + bias + + // Set weights + std::vector weights; + // Conv weights: channels x (2*bottleneck) x kernelSize = 4 x 4 x 1 = 16 weights + // Identity pattern + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < 2 * bottleneck; j++) + { + weights.push_back((i == j) ? 1.0f : 0.0f); + } + } + // Conv bias: 2*bottleneck = 4 values + weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f}); + // Input mixin: conditionSize x (2*bottleneck) = 1 x 4 = 4 weights + weights.insert(weights.end(), {1.0f, 1.0f, 1.0f, 1.0f}); + // 1x1 weights: bottleneck x channels = 2 x 4 = 8 weights + for (int i = 0; i < bottleneck; i++) + { + for (int j = 0; j < channels; j++) + { + weights.push_back((i == j) ? 1.0f : 0.0f); + } + } + // 1x1 bias: channels = 4 values + weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f}); + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int numFrames = 2; + layer.SetMaxBufferSize(numFrames); + + Eigen::MatrixXf input(channels, numFrames); + Eigen::MatrixXf condition(conditionSize, numFrames); + input.fill(1.0f); + condition.fill(1.0f); + + layer.Process(input, condition, numFrames); + + auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); + auto head_output = layer.GetOutputHead().leftCols(numFrames); + + // Outputs should still have channels rows + assert(layer_output.rows() == channels); + assert(layer_output.cols() == numFrames); + assert(head_output.rows() == channels); + assert(head_output.cols() == numFrames); +} }; // namespace test_layer } // namespace test_wavenet \ No newline at end of file diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index 41c435a..a4581c2 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -19,6 +19,7 @@ void test_layer_array_basic() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; std::vector dilations{1, 2}; const std::string activation = "ReLU"; @@ -27,8 +28,8 @@ void test_layer_array_basic() const int groups = 1; const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, - activation, gated, head_bias, groups, groups_1x1); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, + dilations, activation, gated, head_bias, groups, groups_1x1); const int numFrames = 4; layer_array.SetMaxBufferSize(numFrames); @@ -75,6 +76,7 @@ void test_layer_array_receptive_field() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 3; std::vector dilations{1, 2, 4}; const std::string activation = "ReLU"; @@ -83,8 +85,8 @@ void test_layer_array_receptive_field() const int groups = 1; const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, - activation, gated, head_bias, groups, groups_1x1); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, + dilations, activation, gated, head_bias, groups, groups_1x1); long rf = layer_array.get_receptive_field(); // Expected: sum of dilation * (kernel_size - 1) for each layer @@ -103,6 +105,7 @@ void test_layer_array_with_head_input() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; std::vector dilations{1}; const std::string activation = "ReLU"; @@ -111,8 +114,8 @@ void test_layer_array_with_head_input() const int groups = 1; const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, - activation, gated, head_bias, groups, groups_1x1); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, + dilations, activation, gated, head_bias, groups, groups_1x1); const int numFrames = 2; layer_array.SetMaxBufferSize(numFrames); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index a7a5e8f..f8bd172 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -429,6 +429,7 @@ void test_layer_process_realtime_safe() // Setup: Create a Layer const int condition_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; const int dilation = 1; const std::string activation = "ReLU"; @@ -436,8 +437,8 @@ void test_layer_process_realtime_safe() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = - nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, + groups_input, groups_1x1); // Set weights std::vector weights{1.0f, 0.0f, // Conv (weight, bias) @@ -483,6 +484,7 @@ void test_layer_grouped_process_realtime_safe() // Setup: Create a Layer with grouped convolution const int condition_size = 1; const int channels = 4; // Must be divisible by groups_input + const int bottleneck = channels; const int kernel_size = 2; const int dilation = 1; const std::string activation = "ReLU"; @@ -490,8 +492,8 @@ void test_layer_grouped_process_realtime_safe() const int groups_input = 2; // groups_input > 1 const int groups_1x1 = 2; // 1x1 is also grouped - auto layer = - nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, + groups_input, groups_1x1); // Set weights for grouped convolution // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels @@ -592,6 +594,7 @@ void test_layer_array_process_realtime_safe() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; std::vector dilations{1}; const std::string activation = "ReLU"; @@ -600,8 +603,8 @@ void test_layer_array_process_realtime_safe() const int groups = 1; const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, - activation, gated, head_bias, groups, groups_1x1); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, + dilations, activation, gated, head_bias, groups, groups_1x1); // Set weights: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) std::vector weights{1.0f, // Rechannel @@ -666,15 +669,16 @@ void test_process_realtime_safe() std::vector layer_array_params; // First layer array std::vector dilations1{1}; + const int bottleneck = channels; const int groups_1x1 = 1; layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, - kernel_size, std::move(dilations1), activation, gated, - head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations1), activation, + gated, head_bias, groups, groups_1x1)); // Second layer array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels, - kernel_size, std::move(dilations2), activation, gated, - head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations2), activation, + gated, head_bias, groups, groups_1x1)); // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) // Array 1: same structure From c99f85f5babb64df2292219e9b22953a1abf7c65 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 17:06:54 -0800 Subject: [PATCH 3/7] [REFINE] Temporarily disable bottleneck layer tests and update weight initialization comments - Commented out tests for bottleneck and gated bottleneck layers in run_tests.cpp while investigating a resize error. - Updated weight initialization logic in test_layer.cpp to clarify the layout for Conv1D and 1x1 convolutions, ensuring consistency with the new bottleneck parameter. - Adjusted comments for better clarity on weight patterns and dimensions in the test cases. --- tools/CMakeLists.txt | 2 +- tools/run_tests.cpp | 4 ++-- tools/test/test_wavenet/test_full.cpp | 3 ++- tools/test/test_wavenet/test_layer.cpp | 22 ++++++++++++---------- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 01cf211..1fd5802 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -20,7 +20,7 @@ set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0") # Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG # We use a compile option to undefine it, which works on GCC, Clang, and MSVC target_compile_options(run_tests PRIVATE - $<$,$,$>:-U_NDEBUG> + $<$,$,$>:-UNDEBUG> ) source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES}) diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 11047f9..a826327 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -104,8 +104,8 @@ int main() test_wavenet::test_layer::test_non_gated_layer(); test_wavenet::test_layer::test_layer_activations(); test_wavenet::test_layer::test_layer_multichannel(); - test_wavenet::test_layer::test_layer_bottleneck(); - test_wavenet::test_layer::test_layer_bottleneck_gated(); + // test_wavenet::test_layer::test_layer_bottleneck(); // Temporarily disabled - investigating resize error + // test_wavenet::test_layer::test_layer_bottleneck_gated(); // Temporarily disabled test_wavenet::test_layer_array::test_layer_array_basic(); test_wavenet::test_layer_array::test_layer_array_receptive_field(); test_wavenet::test_layer_array::test_layer_array_with_head_input(); diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index 04a46c3..d75ae1c 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -19,6 +19,7 @@ void test_wavenet_model() const int condition_size = 1; const int head_size = 1; const int channels = 1; + const int bottleneck = channels; const int kernel_size = 1; std::vector dilations{1}; const std::string activation = "ReLU"; @@ -29,7 +30,7 @@ void test_wavenet_model() const int groups = 1; const int groups_1x1 = 1; - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, + nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index 1701a66..1f08365 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -305,26 +305,28 @@ void test_layer_bottleneck() // Set weights std::vector weights; - // Conv weights: channels x bottleneck x kernelSize = 4 x 2 x 1 = 8 weights - // Use identity-like pattern for first two input channels - for (int i = 0; i < channels; i++) + // Conv weights: out_channels x in_channels x kernelSize = bottleneck x channels x kernelSize = 2 x 4 x 1 = 8 weights + // Weight layout for Conv1D: for each out_channel, for each in_channel, for each kernel position + // Use identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck) + for (int out_ch = 0; out_ch < bottleneck; out_ch++) { - for (int j = 0; j < bottleneck; j++) + for (int in_ch = 0; in_ch < channels; in_ch++) { - weights.push_back((i == j) ? 1.0f : 0.0f); + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); } } // Conv bias: bottleneck values weights.insert(weights.end(), {0.0f, 0.0f}); // Input mixin: conditionSize x bottleneck = 1 x 2 = 2 weights weights.insert(weights.end(), {1.0f, 1.0f}); - // 1x1 weights: bottleneck x channels = 2 x 4 = 8 weights - // Identity-like pattern - for (int i = 0; i < bottleneck; i++) + // 1x1 weights: out_channels x in_channels = channels x bottleneck = 4 x 2 = 8 weights + // Weight layout for Conv1x1: for each out_channel, for each in_channel + // Identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck) + for (int out_ch = 0; out_ch < channels; out_ch++) { - for (int j = 0; j < channels; j++) + for (int in_ch = 0; in_ch < bottleneck; in_ch++) { - weights.push_back((i == j) ? 1.0f : 0.0f); + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); } } // 1x1 bias: channels values From e0989e8ede3c085cfc70b9bbaaa9c8993f5c8c77 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 17:36:22 -0800 Subject: [PATCH 4/7] [REFINE] Update WaveNet Layer to utilize bottleneck parameter - Adjusted the WaveNet Layer's SetMaxBufferSize and Process methods to correctly use the bottleneck parameter for resizing internal buffers. - Updated the handling of activation functions to ensure they operate on the correct number of channels based on the bottleneck. - Modified test cases to reflect changes in the Layer constructor and ensure proper functionality with the bottleneck configuration. - Enhanced comments for clarity regarding the internal channel structure and weight initialization in tests. --- NAM/wavenet.cpp | 23 ++++++---- NAM/wavenet.h | 11 +++-- tools/run_tests.cpp | 4 +- tools/test/test_wavenet/test_layer.cpp | 63 ++++++++++++++------------ 4 files changed, 58 insertions(+), 43 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index f9b60bf..5fbe9eb 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -13,12 +13,14 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize) { _conv.SetMaxBufferSize(maxBufferSize); _input_mixin.SetMaxBufferSize(maxBufferSize); - _z.resize(this->_conv.get_out_channels(), maxBufferSize); + const long z_channels = this->_conv.get_out_channels(); // This is 2*bottleneck when gated, bottleneck when not + _z.resize(z_channels, maxBufferSize); _1x1.SetMaxBufferSize(maxBufferSize); // Pre-allocate output buffers const long channels = this->get_channels(); this->_output_next_layer.resize(channels, maxBufferSize); - this->_output_head.resize(channels, maxBufferSize); + // _output_head stores the activated portion: bottleneck rows (the actual bottleneck value, not doubled) + this->_output_head.resize(this->_bottleneck, maxBufferSize); } void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) @@ -31,6 +33,7 @@ void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) { const long channels = this->get_channels(); + const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels // Step 1: input convolutions this->_conv.Process(input, num_frames); @@ -50,19 +53,20 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma // do this column-wise: for (int i = 0; i < num_frames; i++) { - this->_activation->apply(this->_z.block(0, i, channels, 1)); + this->_activation->apply(this->_z.block(0, i, bottleneck, 1)); // TODO Need to support other activation functions here instead of hardcoded sigmoid - activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, i, channels, 1)); + activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(bottleneck, i, bottleneck, 1)); } - this->_z.block(0, 0, channels, num_frames).array() *= this->_z.block(channels, 0, channels, num_frames).array(); - _1x1.process_(_z.topRows(channels), num_frames); // Might not be RT safe + this->_z.block(0, 0, bottleneck, num_frames).array() *= + this->_z.block(bottleneck, 0, bottleneck, num_frames).array(); + _1x1.process_(_z.topRows(bottleneck), num_frames); // Might not be RT safe } // Store output to head (skip connection: activated conv output) if (!this->_gated) this->_output_head.leftCols(num_frames).noalias() = this->_z.leftCols(num_frames); else - this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(channels).leftCols(num_frames); + this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(bottleneck).leftCols(num_frames); // Store output to next layer (residual connection: input + _1x1 output) this->_output_next_layer.leftCols(num_frames).noalias() = input.leftCols(num_frames) + _1x1.GetOutput().leftCols(num_frames); @@ -77,7 +81,8 @@ nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition const bool gated, const bool head_bias, const int groups_input, const int groups_1x1) : _rechannel(input_size, channels, false) -, _head_rechannel(channels, head_size, head_bias) +, _head_rechannel(bottleneck, head_size, head_bias) +, _bottleneck(bottleneck) { for (size_t i = 0; i < dilations.size(); i++) this->_layers.push_back(_Layer( @@ -95,7 +100,7 @@ void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize) // Pre-allocate output buffers const long channels = this->_get_channels(); this->_layer_outputs.resize(channels, maxBufferSize); - this->_head_inputs.resize(channels, maxBufferSize); + this->_head_inputs.resize(this->_bottleneck, maxBufferSize); } diff --git a/NAM/wavenet.h b/NAM/wavenet.h index ccc778f..832673b 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -22,7 +22,8 @@ class _Layer , _input_mixin(condition_size, gated ? 2 * bottleneck : bottleneck, false) , _1x1(bottleneck, channels, true, groups_1x1) , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters - , _gated(gated) {}; + , _gated(gated) + , _bottleneck(bottleneck) {}; // Resize all arrays to be able to process `maxBufferSize` frames. void SetMaxBufferSize(const int maxBufferSize); // Set the parameters of this module @@ -71,6 +72,7 @@ class _Layer activations::Activation* _activation; const bool _gated; + const int _bottleneck; // Internal channel count (not doubled when gated) }; class LayerArrayParams @@ -154,12 +156,15 @@ class _LayerArray std::vector<_Layer> _layers; // Output from last layer (for next layer array) Eigen::MatrixXf _layer_outputs; - // Accumulated head inputs from all layers + // Accumulated head inputs from all layers (bottleneck channels) Eigen::MatrixXf _head_inputs; - // Rechannel for the head + // Rechannel for the head (bottleneck -> head_size) Conv1x1 _head_rechannel; + // Bottleneck size (internal channel count) + const int _bottleneck; + long _get_channels() const; // Common processing logic after head inputs are set void ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames); diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index a826327..11047f9 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -104,8 +104,8 @@ int main() test_wavenet::test_layer::test_non_gated_layer(); test_wavenet::test_layer::test_layer_activations(); test_wavenet::test_layer::test_layer_multichannel(); - // test_wavenet::test_layer::test_layer_bottleneck(); // Temporarily disabled - investigating resize error - // test_wavenet::test_layer::test_layer_bottleneck_gated(); // Temporarily disabled + test_wavenet::test_layer::test_layer_bottleneck(); + test_wavenet::test_layer::test_layer_bottleneck_gated(); test_wavenet::test_layer_array::test_layer_array_basic(); test_wavenet::test_layer_array::test_layer_array_receptive_field(); test_wavenet::test_layer_array::test_layer_array_with_head_input(); diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index 1f08365..e594cac 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -25,8 +25,8 @@ void test_gated() const bool gated = true; const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1); // Conv, input mixin, 1x1 std::vector weights{ @@ -101,8 +101,8 @@ void test_layer_getters() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1); assert(layer.get_channels() == channels); assert(layer.get_kernel_size() == kernelSize); @@ -122,8 +122,8 @@ void test_non_gated_layer() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1); // For non-gated: conv outputs 1 channel, input_mixin outputs 1 channel, 1x1 outputs 1 channel // Conv: (1,1,1) weight + (1,) bias @@ -155,7 +155,7 @@ void test_non_gated_layer() assert(layer_output.rows() == channels); assert(layer_output.cols() == numFrames); - assert(head_output.rows() == channels); + assert(head_output.rows() == bottleneck); assert(head_output.cols() == numFrames); // With identity-like weights: input=1, condition=1 @@ -189,8 +189,8 @@ void test_layer_activations() const int bottleneck = channels; const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, "Tanh", gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, "Tanh", gated, groups_input, groups_1x1); std::vector weights{1.0f, 0.0f, 1.0f, 1.0f, 0.0f}; auto it = weights.begin(); layer.set_weights_(it); @@ -225,8 +225,8 @@ void test_layer_multichannel() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1); assert(layer.get_channels() == channels); @@ -277,7 +277,7 @@ void test_layer_multichannel() assert(layer_output.rows() == channels); assert(layer_output.cols() == numFrames); - assert(head_output.rows() == channels); + assert(head_output.rows() == bottleneck); assert(head_output.cols() == numFrames); } @@ -294,8 +294,8 @@ void test_layer_bottleneck() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1); // With bottleneck < channels, the internal conv and input_mixin should have bottleneck channels, // but the 1x1 should map from bottleneck back to channels @@ -349,10 +349,11 @@ void test_layer_bottleneck() auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); auto head_output = layer.GetOutputHead().leftCols(numFrames); - // Outputs should still have channels rows (not bottleneck) + // Layer output should have channels rows (for next layer) assert(layer_output.rows() == channels); assert(layer_output.cols() == numFrames); - assert(head_output.rows() == channels); + // Head output should have bottleneck rows (internal channel count) + assert(head_output.rows() == bottleneck); assert(head_output.cols() == numFrames); } @@ -369,8 +370,8 @@ void test_layer_bottleneck_gated() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + conditionSize, channels, bottleneck, kernelSize, dilation, activation, gated, groups_input, groups_1x1); // With gated=true and bottleneck=2, internal channels should be 2*bottleneck=4 // Conv: (channels, 2*bottleneck, kernelSize=1) = (4, 4, 1) + bias @@ -379,25 +380,28 @@ void test_layer_bottleneck_gated() // Set weights std::vector weights; - // Conv weights: channels x (2*bottleneck) x kernelSize = 4 x 4 x 1 = 16 weights - // Identity pattern - for (int i = 0; i < channels; i++) + // Conv weights: out_channels x in_channels x kernelSize = (2*bottleneck) x channels x kernelSize = 4 x 4 x 1 = 16 + // weights Weight layout for Conv1D: for each out_channel, for each in_channel, for each kernel position Identity + // pattern: out_channel i connects to in_channel i (for i < min(2*bottleneck, channels)) + for (int out_ch = 0; out_ch < 2 * bottleneck; out_ch++) { - for (int j = 0; j < 2 * bottleneck; j++) + for (int in_ch = 0; in_ch < channels; in_ch++) { - weights.push_back((i == j) ? 1.0f : 0.0f); + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); } } // Conv bias: 2*bottleneck = 4 values weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f}); // Input mixin: conditionSize x (2*bottleneck) = 1 x 4 = 4 weights weights.insert(weights.end(), {1.0f, 1.0f, 1.0f, 1.0f}); - // 1x1 weights: bottleneck x channels = 2 x 4 = 8 weights - for (int i = 0; i < bottleneck; i++) + // 1x1 weights: out_channels x in_channels = channels x bottleneck = 4 x 2 = 8 weights + // Weight layout for Conv1x1: for each out_channel, for each in_channel + // Identity pattern: out_channel i connects to in_channel i (for i < bottleneck) + for (int out_ch = 0; out_ch < channels; out_ch++) { - for (int j = 0; j < channels; j++) + for (int in_ch = 0; in_ch < bottleneck; in_ch++) { - weights.push_back((i == j) ? 1.0f : 0.0f); + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); } } // 1x1 bias: channels = 4 values @@ -420,10 +424,11 @@ void test_layer_bottleneck_gated() auto layer_output = layer.GetOutputNextLayer().leftCols(numFrames); auto head_output = layer.GetOutputHead().leftCols(numFrames); - // Outputs should still have channels rows + // Layer output should have channels rows (for next layer) assert(layer_output.rows() == channels); assert(layer_output.cols() == numFrames); - assert(head_output.rows() == channels); + // Head output should have bottleneck rows (the activated portion, not the full 2*bottleneck) + assert(head_output.rows() == bottleneck); assert(head_output.cols() == numFrames); } }; // namespace test_layer From 962db0db74ee8e4eafd51ee9f7211446d7c69630 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 19:09:11 -0800 Subject: [PATCH 5/7] [REFINE] Update headInput resizing in WaveNet layer test - Modified the headInput matrix resizing in test_layer.cpp to utilize the bottleneck parameter instead of channels, ensuring alignment with recent changes in the WaveNet layer configuration. - This adjustment enhances the accuracy of the test cases by reflecting the updated architecture that incorporates the bottleneck parameter. --- tools/test/test_wavenet/test_layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index e594cac..5d53be4 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -49,7 +49,7 @@ void test_gated() Eigen::MatrixXf input, condition, headInput, output; input.resize(channels, numFrames); condition.resize(conditionSize, numFrames); - headInput.resize(channels, numFrames); + headInput.resize(bottleneck, numFrames); output.resize(channels, numFrames); const float signalValue = 0.25f; From 995de59bef609a4fabe1dd245ced584af99775f1 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 19:22:16 -0800 Subject: [PATCH 6/7] Remove unused variable --- NAM/wavenet.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 5fbe9eb..285ea69 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -32,7 +32,6 @@ void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) { - const long channels = this->get_channels(); const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels // Step 1: input convolutions From d2c01278e9b1184276bede3bb3d4837afd036f46 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 20:00:44 -0800 Subject: [PATCH 7/7] Add test for Layer::Process() with bottleneck configuration - Introduced a new test case, test_layer_bottleneck_process_realtime_safe(), to validate that the Layer::Process() method operates correctly when the bottleneck parameter differs from the number of channels. - Ensured that the test checks for memory allocation during processing, maintaining real-time safety. - Updated run_tests.cpp to include this new test, enhancing coverage for bottleneck scenarios in the WaveNet layer. --- tools/run_tests.cpp | 1 + .../test/test_wavenet/test_real_time_safe.cpp | 85 +++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 11047f9..33c4d45 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -120,6 +120,7 @@ int main() test_wavenet::test_conv1d_grouped_process_realtime_safe(); test_wavenet::test_conv1d_grouped_dilated_process_realtime_safe(); test_wavenet::test_layer_process_realtime_safe(); + test_wavenet::test_layer_bottleneck_process_realtime_safe(); test_wavenet::test_layer_grouped_process_realtime_safe(); test_wavenet::test_layer_array_process_realtime_safe(); test_wavenet::test_process_realtime_safe(); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index f8bd172..91d8628 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -478,6 +478,91 @@ void test_layer_process_realtime_safe() } } +// Test that Layer::Process() method with bottleneck != channels does not allocate or free memory +void test_layer_bottleneck_process_realtime_safe() +{ + // Setup: Create a Layer with bottleneck different from channels + const int condition_size = 1; + const int channels = 4; + const int bottleneck = 2; // bottleneck < channels + const int kernel_size = 1; + const int dilation = 1; + const std::string activation = "ReLU"; + const bool gated = false; + const int groups_input = 1; + const int groups_1x1 = 1; + + auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, + groups_input, groups_1x1); + + // Set weights for bottleneck != channels + // Conv: (channels, bottleneck, kernelSize=1) = (4, 2, 1) + bias + // Input mixin: (conditionSize, bottleneck) = (1, 2) + // 1x1: (bottleneck, channels) = (2, 4) + bias + std::vector weights; + // Conv weights: out_channels x in_channels x kernelSize = bottleneck x channels x kernelSize = 2 x 4 x 1 = 8 weights + // Weight layout for Conv1D: for each out_channel, for each in_channel, for each kernel position + // Identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck) + for (int out_ch = 0; out_ch < bottleneck; out_ch++) + { + for (int in_ch = 0; in_ch < channels; in_ch++) + { + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); + } + } + // Conv bias: bottleneck values + weights.insert(weights.end(), {0.0f, 0.0f}); + // Input mixin: conditionSize x bottleneck = 1 x 2 = 2 weights + weights.insert(weights.end(), {1.0f, 1.0f}); + // 1x1 weights: out_channels x in_channels = channels x bottleneck = 4 x 2 = 8 weights + // Weight layout for Conv1x1: for each out_channel, for each in_channel + // Identity-like pattern: out_channel i connects to in_channel i (for i < bottleneck) + for (int out_ch = 0; out_ch < channels; out_ch++) + { + for (int in_ch = 0; in_ch < bottleneck; in_ch++) + { + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); + } + } + // 1x1 bias: channels values + weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f}); + + auto it = weights.begin(); + layer.set_weights_(it); + + const int maxBufferSize = 256; + layer.SetMaxBufferSize(maxBufferSize); + + // Test with several different buffer sizes + std::vector buffer_sizes{1, 8, 16, 32, 64, 128, 256}; + + for (int buffer_size : buffer_sizes) + { + // Prepare input/condition matrices (allocate before tracking) + Eigen::MatrixXf input(channels, buffer_size); + Eigen::MatrixXf condition(condition_size, buffer_size); + input.setConstant(0.5f); + condition.setConstant(0.5f); + + std::string test_name = "Layer Process (bottleneck=" + std::to_string(bottleneck) + ", channels=" + + std::to_string(channels) + ") - Buffer size " + std::to_string(buffer_size); + run_allocation_test_no_allocations( + nullptr, // No setup needed + [&]() { + // Call Process() - this should not allocate or free + layer.Process(input, condition, buffer_size); + }, + nullptr, // No teardown needed + test_name.c_str()); + + // Verify output is valid + auto output = layer.GetOutputNextLayer().leftCols(buffer_size); + assert(output.rows() == channels && output.cols() == buffer_size); + assert(std::isfinite(output(0, 0))); + assert(std::isfinite(output(channels - 1, buffer_size - 1))); + } +} + // Test that Layer::Process() method with grouped convolution (groups_input > 1) does not allocate or free memory void test_layer_grouped_process_realtime_safe() {