Skip to content
Merged
118 changes: 110 additions & 8 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <cmath> // pow, tanh, expf
#include <filesystem>
#include <fstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -206,8 +207,21 @@ std::unique_ptr<nam::DSP> nam::linear::Factory(const nlohmann::json& config, std

// Conv1x1 ====================================================================

nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias)
nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups)
{
// Validate that channels divide evenly by groups
if (in_channels % groups != 0)
{
throw std::runtime_error("in_channels (" + std::to_string(in_channels) + ") must be divisible by numGroups ("
+ std::to_string(groups) + ")");
}
if (out_channels % groups != 0)
{
throw std::runtime_error("out_channels (" + std::to_string(out_channels) + ") must be divisible by numGroups ("
+ std::to_string(groups) + ")");
}

this->_num_groups = groups;
this->_weight.resize(out_channels, in_channels);
this->_do_bias = _bias;
if (_bias)
Expand All @@ -222,26 +236,114 @@ void nam::Conv1x1::SetMaxBufferSize(const int maxBufferSize)

void nam::Conv1x1::set_weights_(std::vector<float>::iterator& weights)
{
for (int i = 0; i < this->_weight.rows(); i++)
for (int j = 0; j < this->_weight.cols(); j++)
this->_weight(i, j) = *(weights++);
if (this->_weight.size() > 0)
{
const long out_channels = this->_weight.rows();
const long in_channels = this->_weight.cols();
const int numGroups = this->_num_groups;
const long out_per_group = out_channels / numGroups;
const long in_per_group = in_channels / numGroups;

// For grouped convolutions, weights are organized per group
// Weight layout: weights are [group0, group1, ..., groupN-1]
// Each group's weight matrix is (out_channels/numGroups, in_channels/numGroups)
for (int g = 0; g < numGroups; g++)
{
for (auto i = 0; i < out_per_group; i++)
{
for (auto j = 0; j < in_per_group; j++)
{
this->_weight(g * out_per_group + i, g * in_per_group + j) = *(weights++);
}
}
}
}
if (this->_do_bias)
for (int i = 0; i < this->_bias.size(); i++)
this->_bias(i) = *(weights++);
}

Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int num_frames) const
{
if (this->_do_bias)
return (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias;
const int numGroups = this->_num_groups;
const long in_channels = get_in_channels();
const long out_channels = get_out_channels();
const long in_per_group = in_channels / numGroups;
const long out_per_group = out_channels / numGroups;

Eigen::MatrixXf result(out_channels, num_frames);

if (numGroups == 1)
{
// Standard convolution (no grouping)
if (this->_do_bias)
result = (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias;
else
result = this->_weight * input.leftCols(num_frames);
}
else
return this->_weight * input.leftCols(num_frames);
{
// Grouped convolution: process each group separately
result.setZero();
for (int g = 0; g < numGroups; g++)
{
// Extract input slice for this group
auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group);

// Extract weight slice for this group
auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group);

// Extract output slice for this group
auto output_group = result.middleRows(g * out_per_group, out_per_group);

// Perform grouped convolution: output_group = weight_group * input_group
output_group.noalias() = weight_group * input_group;
}

// Add bias if present
if (this->_do_bias)
result.colwise() += this->_bias;
}

return result;
}

void nam::Conv1x1::process_(const Eigen::MatrixXf& input, const int num_frames)
{
assert(num_frames <= _output.cols());
_output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames);

const int numGroups = this->_num_groups;
const long in_channels = get_in_channels();
const long out_channels = get_out_channels();
const long in_per_group = in_channels / numGroups;
const long out_per_group = out_channels / numGroups;

if (numGroups == 1)
{
// Standard convolution (no grouping)
_output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames);
}
else
{
// Grouped convolution: process each group separately
_output.leftCols(num_frames).setZero();
for (int g = 0; g < numGroups; g++)
{
// Extract input slice for this group
auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group);

// Extract weight slice for this group
auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group);

// Extract output slice for this group
auto output_group = _output.leftCols(num_frames).middleRows(g * out_per_group, out_per_group);

// Perform grouped convolution: output_group = weight_group * input_group
output_group.noalias() = weight_group * input_group;
}
}

// Add bias if present
if (this->_do_bias)
{
_output.leftCols(num_frames).colwise() += this->_bias;
Expand Down
3 changes: 2 additions & 1 deletion NAM/dsp.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& w
class Conv1x1
{
public:
Conv1x1(const int in_channels, const int out_channels, const bool _bias);
Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups = 1);
// Get the entire internal output buffer. This is intended for internal wiring
// between layers/arrays; callers should treat the buffer as pre-allocated
// storage and only consider the first `num_frames` columns valid for a given
Expand All @@ -199,6 +199,7 @@ class Conv1x1
protected:
Eigen::MatrixXf _weight;
Eigen::VectorXf _bias;
int _num_groups;

private:
Eigen::MatrixXf _output;
Expand Down
9 changes: 5 additions & 4 deletions NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size,
const int channels, const int kernel_size, const std::vector<int>& dilations,
const std::string activation, const bool gated, const bool head_bias,
const int groups_input)
const int groups_input, const int groups_1x1)
: _rechannel(input_size, channels, false)
, _head_rechannel(channels, head_size, head_bias)
{
for (size_t i = 0; i < dilations.size(); i++)
this->_layers.push_back(
_Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input));
_Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
}

void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
Expand Down Expand Up @@ -201,7 +201,7 @@ nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>
layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size,
layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations,
layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias,
layer_array_params[i].groups_input));
layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
if (i > 0)
if (layer_array_params[i].channels != layer_array_params[i - 1].head_size)
{
Expand Down Expand Up @@ -299,10 +299,11 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
{
nlohmann::json layer_config = config["layers"][i];
const int groups = layer_config.value("groups", 1); // defaults to 1
const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1
layer_array_params.push_back(nam::wavenet::LayerArrayParams(
layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"],
layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"],
layer_config["head_bias"], groups));
layer_config["head_bias"], groups, groups_1x1));
}
const bool with_head = !config["head"].is_null();
const float head_scale = config["head_scale"];
Expand Down
10 changes: 6 additions & 4 deletions NAM/wavenet.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ class _Layer
{
public:
_Layer(const int condition_size, const int channels, const int kernel_size, const int dilation,
const std::string activation, const bool gated, const int groups_input)
const std::string activation, const bool gated, const int groups_input, const int groups_1x1)
: _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input)
, _input_mixin(condition_size, gated ? 2 * channels : channels, false)
, _1x1(channels, channels, true)
, _1x1(channels, channels, true, groups_1x1)
, _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
, _gated(gated) {};
// Resize all arrays to be able to process `maxBufferSize` frames.
Expand Down Expand Up @@ -78,7 +78,7 @@ class LayerArrayParams
public:
LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
const int kernel_size_, const std::vector<int>&& dilations_, const std::string activation_,
const bool gated_, const bool head_bias_, const int groups_input)
const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_)
: input_size(input_size_)
, condition_size(condition_size_)
, head_size(head_size_)
Expand All @@ -89,6 +89,7 @@ class LayerArrayParams
, gated(gated_)
, head_bias(head_bias_)
, groups_input(groups_input)
, groups_1x1(groups_1x1_)
{
}

Expand All @@ -102,6 +103,7 @@ class LayerArrayParams
const bool gated;
const bool head_bias;
const int groups_input;
const int groups_1x1;
};

// An array of layers with the same channels, kernel sizes, activations.
Expand All @@ -110,7 +112,7 @@ class _LayerArray
public:
_LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
const int kernel_size, const std::vector<int>& dilations, const std::string activation, const bool gated,
const bool head_bias, const int groups_input);
const bool head_bias, const int groups_input, const int groups_1x1);

void SetMaxBufferSize(const int maxBufferSize);

Expand Down
16 changes: 16 additions & 0 deletions tools/run_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <iostream>
#include "test/test_activations.cpp"
#include "test/test_conv1d.cpp"
#include "test/test_conv_1x1.cpp"
#include "test/test_convnet.cpp"
#include "test/test_dsp.cpp"
#include "test/test_fast_lut.cpp"
Expand Down Expand Up @@ -83,6 +84,21 @@ int main()
test_conv1d::test_process_grouped_channel_isolation();
test_conv1d::test_get_num_weights_grouped();

test_conv_1x1::test_construct();
test_conv_1x1::test_construct_with_groups();
test_conv_1x1::test_construct_validation_in_channels();
test_conv_1x1::test_construct_validation_out_channels();
test_conv_1x1::test_process_basic();
test_conv_1x1::test_process_with_bias();
test_conv_1x1::test_process_underscore();
test_conv_1x1::test_process_grouped_basic();
test_conv_1x1::test_process_grouped_with_bias();
test_conv_1x1::test_process_grouped_multiple_groups();
test_conv_1x1::test_process_grouped_channel_isolation();
test_conv_1x1::test_process_underscore_grouped();
test_conv_1x1::test_set_max_buffer_size();
test_conv_1x1::test_process_multiple_calls();

test_wavenet::test_layer::test_gated();
test_wavenet::test_layer::test_layer_getters();
test_wavenet::test_layer::test_non_gated_layer();
Expand Down
Loading