sdatkinson · sdatkinson · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
@@ -2,6 +2,7 @@
 #include <cmath> // pow, tanh, expf
 #include <filesystem>
 #include <fstream>
+#include <stdexcept>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -206,8 +207,21 @@ std::unique_ptr<nam::DSP> nam::linear::Factory(const nlohmann::json& config, std
 
 // Conv1x1 ====================================================================
 
-nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias)
+nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups)
 {
+  // Validate that channels divide evenly by groups
+  if (in_channels % groups != 0)
+  {
+    throw std::runtime_error("in_channels (" + std::to_string(in_channels) + ") must be divisible by numGroups ("
+                             + std::to_string(groups) + ")");
+  }
+  if (out_channels % groups != 0)
+  {
+    throw std::runtime_error("out_channels (" + std::to_string(out_channels) + ") must be divisible by numGroups ("
+                             + std::to_string(groups) + ")");
+  }
+
+  this->_num_groups = groups;
   this->_weight.resize(out_channels, in_channels);
   this->_do_bias = _bias;
   if (_bias)
@@ -222,26 +236,114 @@ void nam::Conv1x1::SetMaxBufferSize(const int maxBufferSize)
 
 void nam::Conv1x1::set_weights_(std::vector<float>::iterator& weights)
 {
-  for (int i = 0; i < this->_weight.rows(); i++)
-    for (int j = 0; j < this->_weight.cols(); j++)
-      this->_weight(i, j) = *(weights++);
+  if (this->_weight.size() > 0)
+  {
+    const long out_channels = this->_weight.rows();
+    const long in_channels = this->_weight.cols();
+    const int numGroups = this->_num_groups;
+    const long out_per_group = out_channels / numGroups;
+    const long in_per_group = in_channels / numGroups;
+
+    // For grouped convolutions, weights are organized per group
+    // Weight layout: weights are [group0, group1, ..., groupN-1]
+    // Each group's weight matrix is (out_channels/numGroups, in_channels/numGroups)
+    for (int g = 0; g < numGroups; g++)
+    {
+      for (auto i = 0; i < out_per_group; i++)
+      {
+        for (auto j = 0; j < in_per_group; j++)
+        {
+          this->_weight(g * out_per_group + i, g * in_per_group + j) = *(weights++);
+        }
+      }
+    }
+  }
   if (this->_do_bias)
     for (int i = 0; i < this->_bias.size(); i++)
       this->_bias(i) = *(weights++);
 }
 
 Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int num_frames) const
 {
-  if (this->_do_bias)
-    return (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias;
+  const int numGroups = this->_num_groups;
+  const long in_channels = get_in_channels();
+  const long out_channels = get_out_channels();
+  const long in_per_group = in_channels / numGroups;
+  const long out_per_group = out_channels / numGroups;
+
+  Eigen::MatrixXf result(out_channels, num_frames);
+
+  if (numGroups == 1)
+  {
+    // Standard convolution (no grouping)
+    if (this->_do_bias)
+      result = (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias;
+    else
+      result = this->_weight * input.leftCols(num_frames);
+  }
   else
-    return this->_weight * input.leftCols(num_frames);
+  {
+    // Grouped convolution: process each group separately
+    result.setZero();
+    for (int g = 0; g < numGroups; g++)
+    {
+      // Extract input slice for this group
+      auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group);
+
+      // Extract weight slice for this group
+      auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group);
+
+      // Extract output slice for this group
+      auto output_group = result.middleRows(g * out_per_group, out_per_group);
+
+      // Perform grouped convolution: output_group = weight_group * input_group
+      output_group.noalias() = weight_group * input_group;
+    }
+
+    // Add bias if present
+    if (this->_do_bias)
+      result.colwise() += this->_bias;
+  }
+
+  return result;
 }
 
 void nam::Conv1x1::process_(const Eigen::MatrixXf& input, const int num_frames)
 {
   assert(num_frames <= _output.cols());
-  _output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames);
+
+  const int numGroups = this->_num_groups;
+  const long in_channels = get_in_channels();
+  const long out_channels = get_out_channels();
+  const long in_per_group = in_channels / numGroups;
+  const long out_per_group = out_channels / numGroups;
+
+  if (numGroups == 1)
+  {
+    // Standard convolution (no grouping)
+    _output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames);
+  }
+  else
+  {
+    // Grouped convolution: process each group separately
+    _output.leftCols(num_frames).setZero();
+    for (int g = 0; g < numGroups; g++)
+    {
+      // Extract input slice for this group
+      auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group);
+
+      // Extract weight slice for this group
+      auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group);
+
+      // Extract output slice for this group
+      auto output_group = _output.leftCols(num_frames).middleRows(g * out_per_group, out_per_group);
+
+      // Perform grouped convolution: output_group = weight_group * input_group
+      output_group.noalias() = weight_group * input_group;
+    }
+  }
+
+  // Add bias if present
   if (this->_do_bias)
   {
     _output.leftCols(num_frames).colwise() += this->_bias;

diff --git a/NAM/dsp.h b/NAM/dsp.h
@@ -177,7 +177,7 @@ std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& w
 class Conv1x1
 {
 public:
-  Conv1x1(const int in_channels, const int out_channels, const bool _bias);
+  Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups = 1);
   // Get the entire internal output buffer. This is intended for internal wiring
   // between layers/arrays; callers should treat the buffer as pre-allocated
   // storage and only consider the first `num_frames` columns valid for a given
@@ -199,6 +199,7 @@ class Conv1x1
 protected:
   Eigen::MatrixXf _weight;
   Eigen::VectorXf _bias;
+  int _num_groups;
 
 private:
   Eigen::MatrixXf _output;

diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
@@ -74,13 +74,13 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
 nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size,
                                        const int channels, const int kernel_size, const std::vector<int>& dilations,
                                        const std::string activation, const bool gated, const bool head_bias,
-                                       const int groups_input)
+                                       const int groups_input, const int groups_1x1)
 : _rechannel(input_size, channels, false)
 , _head_rechannel(channels, head_size, head_bias)
 {
   for (size_t i = 0; i < dilations.size(); i++)
     this->_layers.push_back(
-      _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input));
+      _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
 }
 
 void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
@@ -201,7 +201,7 @@ nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>
       layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size,
       layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations,
       layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias,
-      layer_array_params[i].groups_input));
+      layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
     if (i > 0)
       if (layer_array_params[i].channels != layer_array_params[i - 1].head_size)
       {
@@ -299,10 +299,11 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
   {
     nlohmann::json layer_config = config["layers"][i];
     const int groups = layer_config.value("groups", 1); // defaults to 1
+    const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1
     layer_array_params.push_back(nam::wavenet::LayerArrayParams(
       layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"],
       layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"],
-      layer_config["head_bias"], groups));
+      layer_config["head_bias"], groups, groups_1x1));
   }
   const bool with_head = !config["head"].is_null();
   const float head_scale = config["head_scale"];

diff --git a/NAM/wavenet.h b/NAM/wavenet.h
@@ -17,10 +17,10 @@ class _Layer
 {
 public:
   _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation,
-         const std::string activation, const bool gated, const int groups_input)
+         const std::string activation, const bool gated, const int groups_input, const int groups_1x1)
   : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input)
   , _input_mixin(condition_size, gated ? 2 * channels : channels, false)
-  , _1x1(channels, channels, true)
+  , _1x1(channels, channels, true, groups_1x1)
   , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
   , _gated(gated) {};
   // Resize all arrays to be able to process `maxBufferSize` frames.
@@ -78,7 +78,7 @@ class LayerArrayParams
 public:
   LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
                    const int kernel_size_, const std::vector<int>&& dilations_, const std::string activation_,
-                   const bool gated_, const bool head_bias_, const int groups_input)
+                   const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_)
   : input_size(input_size_)
   , condition_size(condition_size_)
   , head_size(head_size_)
@@ -89,6 +89,7 @@ class LayerArrayParams
   , gated(gated_)
   , head_bias(head_bias_)
   , groups_input(groups_input)
+  , groups_1x1(groups_1x1_)
   {
   }
 
@@ -102,6 +103,7 @@ class LayerArrayParams
   const bool gated;
   const bool head_bias;
   const int groups_input;
+  const int groups_1x1;
 };
 
 // An array of layers with the same channels, kernel sizes, activations.
@@ -110,7 +112,7 @@ class _LayerArray
 public:
   _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
               const int kernel_size, const std::vector<int>& dilations, const std::string activation, const bool gated,
-              const bool head_bias, const int groups_input);
+              const bool head_bias, const int groups_input, const int groups_1x1);
 
   void SetMaxBufferSize(const int maxBufferSize);
 

diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp
@@ -4,6 +4,7 @@
 #include <iostream>
 #include "test/test_activations.cpp"
 #include "test/test_conv1d.cpp"
+#include "test/test_conv_1x1.cpp"
 #include "test/test_convnet.cpp"
 #include "test/test_dsp.cpp"
 #include "test/test_fast_lut.cpp"
@@ -83,6 +84,21 @@ int main()
   test_conv1d::test_process_grouped_channel_isolation();
   test_conv1d::test_get_num_weights_grouped();
 
+  test_conv_1x1::test_construct();
+  test_conv_1x1::test_construct_with_groups();
+  test_conv_1x1::test_construct_validation_in_channels();
+  test_conv_1x1::test_construct_validation_out_channels();
+  test_conv_1x1::test_process_basic();
+  test_conv_1x1::test_process_with_bias();
+  test_conv_1x1::test_process_underscore();
+  test_conv_1x1::test_process_grouped_basic();
+  test_conv_1x1::test_process_grouped_with_bias();
+  test_conv_1x1::test_process_grouped_multiple_groups();
+  test_conv_1x1::test_process_grouped_channel_isolation();
+  test_conv_1x1::test_process_underscore_grouped();
+  test_conv_1x1::test_set_max_buffer_size();
+  test_conv_1x1::test_process_multiple_calls();
+
   test_wavenet::test_layer::test_gated();
   test_wavenet::test_layer::test_layer_getters();
   test_wavenet::test_layer::test_non_gated_layer();