From 02f5c053361b645356d81851efe51176e80ec794 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 21:26:52 -0800 Subject: [PATCH 01/19] [FEATURE] Multi-channel inputs and outputs --- NAM/convnet.cpp | 89 +++++-- NAM/convnet.h | 10 +- NAM/dsp.cpp | 252 ++++++++++++++---- NAM/dsp.h | 65 ++--- NAM/get_dsp.cpp | 12 +- NAM/lstm.cpp | 21 +- NAM/lstm.h | 4 +- NAM/wavenet.cpp | 34 ++- NAM/wavenet.h | 6 +- tools/benchmodel.cpp | 23 +- tools/test/test_convnet.cpp | 46 ++-- tools/test/test_dsp.cpp | 94 +++++-- tools/test/test_wavenet/test_full.cpp | 47 ++-- .../test/test_wavenet/test_real_time_safe.cpp | 6 +- 14 files changed, 506 insertions(+), 203 deletions(-) diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp index 3b8b18f..f82d6c1 100644 --- a/NAM/convnet.cpp +++ b/NAM/convnet.cpp @@ -146,22 +146,30 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf output(i) = this->_bias + input.col(j).dot(this->_weight); } -nam::convnet::ConvNet::ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, +nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector& dilations, const bool batchnorm, const std::string activation, std::vector& weights, const double expected_sample_rate, const int groups) -: Buffer(*std::max_element(dilations.begin(), dilations.end()), expected_sample_rate) +: Buffer(in_channels, out_channels, *std::max_element(dilations.begin(), dilations.end()), expected_sample_rate) { this->_verify_weights(channels, dilations, batchnorm, weights.size()); this->_blocks.resize(dilations.size()); std::vector::iterator it = weights.begin(); + // First block takes in_channels input, subsequent blocks take channels input for (size_t i = 0; i < dilations.size(); i++) - this->_blocks[i].set_weights_(i == 0 ? 1 : channels, channels, dilations[i], batchnorm, activation, groups, it); + this->_blocks[i].set_weights_(i == 0 ? in_channels : channels, channels, dilations[i], batchnorm, activation, groups, it); // Only need _block_vals for the head (one entry) // Conv1D layers manage their own buffers now this->_block_vals.resize(1); this->_block_vals[0].setZero(); - std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f); - this->_head = _Head(channels, it); + + // Create heads for each output channel + this->_heads.resize(out_channels); + this->_head_outputs.resize(out_channels); + for (int ch = 0; ch < out_channels; ch++) + { + this->_heads[ch] = _Head(channels, it); + } + if (it != weights.end()) throw std::runtime_error("Didn't touch all the weights when initializing ConvNet"); @@ -171,18 +179,25 @@ nam::convnet::ConvNet::ConvNet(const int channels, const std::vector& dilat } -void nam::convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { this->_update_buffers_(input, num_frames); - // Main computation! - const long i_start = this->_input_buffer_offset; - const long i_end = i_start + num_frames; - - // Convert input buffer to matrix for first layer - Eigen::MatrixXf input_matrix(1, num_frames); - for (int i = 0; i < num_frames; i++) - input_matrix(0, i) = this->_input_buffer[i_start + i]; + const int in_channels = NumInputChannels(); + const int out_channels = NumOutputChannels(); + + // For multi-channel, we process each input channel independently through the network + // and sum outputs to each output channel (simple implementation) + // This can be extended later for more sophisticated cross-channel processing + + // Convert input buffers to matrix for first layer (stack input channels) + Eigen::MatrixXf input_matrix(in_channels, num_frames); + for (int ch = 0; ch < in_channels; ch++) + { + const long i_start = this->_input_buffer_offset[ch]; + for (int i = 0; i < num_frames; i++) + input_matrix(ch, i) = this->_input_buffers[ch][i_start + i]; + } // Process through ConvNetBlock layers // Each block now uses Conv1D's internal buffers via Process() and GetOutput() @@ -206,23 +221,30 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const this->_blocks[i].Process(block_input, num_frames); } - // Process head with output from last Conv1D - // Head still needs the old interface, so we need to provide it via a matrix - // We still need _block_vals[0] for the head interface + // Process heads for each output channel + // We need _block_vals[0] for the head interface + const long max_buffer_size = this->_input_buffers[0].size(); if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels() - || this->_block_vals[0].cols() != (long)this->_input_buffer.size()) + || this->_block_vals[0].cols() != max_buffer_size) { - this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), this->_input_buffer.size()); + this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size); } + // Copy last block output to _block_vals for head auto last_output = this->_blocks.back().GetOutput(num_frames); + const long i_start = this->_input_buffer_offset[0]; // Use first channel's offset + const long i_end = i_start + num_frames; this->_block_vals[0].middleCols(i_start, num_frames) = last_output; - this->_head.process_(this->_block_vals[0], this->_head_output, i_start, i_end); - - // Copy to required output array - for (int s = 0; s < num_frames; s++) - output[s] = this->_head_output(s); + // Process each output channel head + for (int ch = 0; ch < out_channels; ch++) + { + this->_heads[ch].process_(this->_block_vals[0], this->_head_outputs[ch], i_start, i_end); + + // Copy to output array for this channel + for (int s = 0; s < num_frames; s++) + output[ch][s] = this->_head_outputs[ch](s); + } // Prepare for next call: nam::Buffer::_advance_input_buffer_(num_frames); @@ -245,18 +267,24 @@ void nam::convnet::ConvNet::SetMaxBufferSize(const int maxBufferSize) } } -void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE* input, const int num_frames) +void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE** input, const int num_frames) { this->Buffer::_update_buffers_(input, num_frames); - const long buffer_size = (long)this->_input_buffer.size(); + // Find maximum buffer size across input channels + long max_buffer_size = 0; + for (const auto& buf : this->_input_buffers) + { + if ((long)buf.size() > max_buffer_size) + max_buffer_size = (long)buf.size(); + } // Only need _block_vals[0] for the head // Conv1D layers manage their own buffers now if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels() - || this->_block_vals[0].cols() != buffer_size) + || this->_block_vals[0].cols() != max_buffer_size) { - this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), buffer_size); + this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size); this->_block_vals[0].setZero(); } } @@ -281,8 +309,11 @@ std::unique_ptr nam::convnet::Factory(const nlohmann::json& config, st const bool batchnorm = config["batchnorm"]; const std::string activation = config["activation"]; const int groups = config.value("groups", 1); // defaults to 1 + // Default to 1 channel in/out for backward compatibility + const int in_channels = config.value("in_channels", 1); + const int out_channels = config.value("out_channels", 1); return std::make_unique( - channels, dilations, batchnorm, activation, weights, expectedSampleRate, groups); + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expectedSampleRate, groups); } namespace diff --git a/NAM/convnet.h b/NAM/convnet.h index ccc1edb..5f16ad6 100644 --- a/NAM/convnet.h +++ b/NAM/convnet.h @@ -77,21 +77,21 @@ class _Head class ConvNet : public Buffer { public: - ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, const std::string activation, + ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector& dilations, const bool batchnorm, const std::string activation, std::vector& weights, const double expected_sample_rate = -1.0, const int groups = 1); ~ConvNet() = default; - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; void SetMaxBufferSize(const int maxBufferSize) override; protected: std::vector _blocks; std::vector _block_vals; - Eigen::VectorXf _head_output; - _Head _head; + std::vector _head_outputs; + std::vector<_Head> _heads; void _verify_weights(const int channels, const std::vector& dilations, const bool batchnorm, const size_t actual_weights); - void _update_buffers_(NAM_SAMPLE* input, const int num_frames) override; + void _update_buffers_(NAM_SAMPLE** input, const int num_frames) override; void _rewind_buffers_() override; int mPrewarmSamples = 0; // Pre-compute during initialization diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index dc46891..f9b625e 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -15,9 +15,17 @@ constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32; -nam::DSP::DSP(const double expected_sample_rate) -: mExpectedSampleRate(expected_sample_rate) +nam::DSP::DSP(const int in_channels, const int out_channels, const double expected_sample_rate) +: mInChannels(in_channels) +, mOutChannels(out_channels) +, mExpectedSampleRate(expected_sample_rate) +, mInputLevels(in_channels) +, mOutputLevels(out_channels) { + if (in_channels <= 0 || out_channels <= 0) + { + throw std::runtime_error("Channel counts must be positive"); + } } void nam::DSP::prewarm() @@ -31,29 +39,47 @@ void nam::DSP::prewarm() return; const size_t bufferSize = std::max(mMaxBufferSize, 1); - std::vector inputBuffer, outputBuffer; - inputBuffer.resize(bufferSize); - outputBuffer.resize(bufferSize); - for (auto it = inputBuffer.begin(); it != inputBuffer.end(); ++it) + // Allocate buffers for all channels + std::vector> inputBuffers(mInChannels); + std::vector> outputBuffers(mOutChannels); + std::vector inputPtrs(mInChannels); + std::vector outputPtrs(mOutChannels); + + for (int ch = 0; ch < mInChannels; ch++) + { + inputBuffers[ch].resize(bufferSize, (NAM_SAMPLE)0.0); + inputPtrs[ch] = inputBuffers[ch].data(); + } + for (int ch = 0; ch < mOutChannels; ch++) { - (*it) = (NAM_SAMPLE)0.0; + outputBuffers[ch].resize(bufferSize, (NAM_SAMPLE)0.0); + outputPtrs[ch] = outputBuffers[ch].data(); } - NAM_SAMPLE* inputPtr = inputBuffer.data(); - NAM_SAMPLE* outputPtr = outputBuffer.data(); int samplesProcessed = 0; while (samplesProcessed < prewarmSamples) { - this->process(inputPtr, outputPtr, bufferSize); + this->process(inputPtrs.data(), outputPtrs.data(), bufferSize); samplesProcessed += bufferSize; } } -void nam::DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::DSP::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { - // Default implementation is the null operation - for (int i = 0; i < num_frames; i++) - output[i] = input[i]; + // Default implementation is the null operation: copy input to output + // For now, assume 1:1 channel mapping (first min(in_channels, out_channels) channels) + const int channelsToProcess = std::min(mInChannels, mOutChannels); + for (int ch = 0; ch < channelsToProcess; ch++) + { + for (int i = 0; i < num_frames; i++) + output[ch][i] = input[ch][i]; + } + // Zero out any extra output channels + for (int ch = channelsToProcess; ch < mOutChannels; ch++) + { + for (int i = 0; i < num_frames; i++) + output[ch][i] = (NAM_SAMPLE)0.0; + } } double nam::DSP::GetLoudness() const @@ -87,10 +113,67 @@ void nam::DSP::SetMaxBufferSize(const int maxBufferSize) mMaxBufferSize = maxBufferSize; } +double nam::DSP::GetInputLevel(const int channel) +{ + if (channel < 0 || channel >= mInChannels) + { + throw std::runtime_error("Invalid input channel index"); + } + return mInputLevels[channel].level; +} + +double nam::DSP::GetOutputLevel(const int channel) +{ + if (channel < 0 || channel >= mOutChannels) + { + throw std::runtime_error("Invalid output channel index"); + } + return mOutputLevels[channel].level; +} + +bool nam::DSP::HasInputLevel(const int channel) +{ + if (channel < 0 || channel >= mInChannels) + { + throw std::runtime_error("Invalid input channel index"); + } + return mInputLevels[channel].haveLevel; +} + +bool nam::DSP::HasOutputLevel(const int channel) +{ + if (channel < 0 || channel >= mOutChannels) + { + throw std::runtime_error("Invalid output channel index"); + } + return mOutputLevels[channel].haveLevel; +} + +void nam::DSP::SetInputLevel(const int channel, const double inputLevel) +{ + if (channel < 0 || channel >= mInChannels) + { + throw std::runtime_error("Invalid input channel index"); + } + mInputLevels[channel].haveLevel = true; + mInputLevels[channel].level = inputLevel; +} + +void nam::DSP::SetOutputLevel(const int channel, const double outputLevel) +{ + if (channel < 0 || channel >= mOutChannels) + { + throw std::runtime_error("Invalid output channel index"); + } + mOutputLevels[channel].haveLevel = true; + mOutputLevels[channel].level = outputLevel; +} + // Buffer ===================================================================== -nam::Buffer::Buffer(const int receptive_field, const double expected_sample_rate) -: nam::DSP(expected_sample_rate) +nam::Buffer::Buffer(const int in_channels, const int out_channels, const int receptive_field, + const double expected_sample_rate) +: nam::DSP(in_channels, out_channels, expected_sample_rate) { this->_set_receptive_field(receptive_field); } @@ -103,68 +186,105 @@ void nam::Buffer::_set_receptive_field(const int new_receptive_field) void nam::Buffer::_set_receptive_field(const int new_receptive_field, const int input_buffer_size) { this->_receptive_field = new_receptive_field; - this->_input_buffer.resize(input_buffer_size); - std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f); + const int in_channels = NumInputChannels(); + const int out_channels = NumOutputChannels(); + + // Resize buffers for all input channels + _input_buffers.resize(in_channels); + _input_buffer_offset.resize(in_channels); + for (int ch = 0; ch < in_channels; ch++) + { + _input_buffers[ch].resize(input_buffer_size); + std::fill(_input_buffers[ch].begin(), _input_buffers[ch].end(), 0.0f); + } + + // Resize output buffers (though they'll be resized per call in _update_buffers_) + _output_buffers.resize(out_channels); + this->_reset_input_buffer(); } -void nam::Buffer::_update_buffers_(NAM_SAMPLE* input, const int num_frames) +void nam::Buffer::_update_buffers_(NAM_SAMPLE** input, const int num_frames) { - // Make sure that the buffer is big enough for the receptive field and the - // frames needed! + const int in_channels = NumInputChannels(); + const int out_channels = NumOutputChannels(); + + // Make sure that the buffers are big enough for the receptive field and the + // frames needed for each channel! + const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames; + + for (int ch = 0; ch < in_channels; ch++) { - const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames; - if ((long)this->_input_buffer.size() < minimum_input_buffer_size) + if ((long)this->_input_buffers[ch].size() < minimum_input_buffer_size) { long new_buffer_size = 2; while (new_buffer_size < minimum_input_buffer_size) new_buffer_size *= 2; - this->_input_buffer.resize(new_buffer_size); - std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f); + this->_input_buffers[ch].resize(new_buffer_size); + std::fill(this->_input_buffers[ch].begin(), this->_input_buffers[ch].end(), 0.0f); } + + // If we'd run off the end of the input buffer, then we need to move the data + // back to the start of the buffer and start again. + if (this->_input_buffer_offset[ch] + num_frames > (long)this->_input_buffers[ch].size()) + this->_rewind_buffers_(); + + // Put the new samples into the input buffer for this channel + for (long i = this->_input_buffer_offset[ch], j = 0; j < num_frames; i++, j++) + this->_input_buffers[ch][i] = (float)input[ch][j]; } - // If we'd run off the end of the input buffer, then we need to move the data - // back to the start of the buffer and start again. - if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size()) - this->_rewind_buffers_(); - // Put the new samples into the input buffer - for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++) - this->_input_buffer[i] = input[j]; - // And resize the output buffer: - this->_output_buffer.resize(num_frames); - std::fill(this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f); + // Resize output buffers for all output channels + for (int ch = 0; ch < out_channels; ch++) + { + this->_output_buffers[ch].resize(num_frames); + std::fill(this->_output_buffers[ch].begin(), this->_output_buffers[ch].end(), 0.0f); + } } void nam::Buffer::_rewind_buffers_() { - // Copy the input buffer back - // RF-1 samples because we've got at least one new one inbound. - for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; i < this->_receptive_field; i++, j++) - this->_input_buffer[i] = this->_input_buffer[j]; - // And reset the offset. - // Even though we could be stingy about that one sample that we won't be using - // (because a new set is incoming) it's probably not worth the - // hyper-optimization and liable for bugs. And the code looks way tidier this - // way. - this->_input_buffer_offset = this->_receptive_field; + const int in_channels = NumInputChannels(); + + // Rewind buffers for all input channels + for (int ch = 0; ch < in_channels; ch++) + { + // Copy the input buffer back + // RF-1 samples because we've got at least one new one inbound. + for (long i = 0, j = this->_input_buffer_offset[ch] - this->_receptive_field; i < this->_receptive_field; i++, j++) + this->_input_buffers[ch][i] = this->_input_buffers[ch][j]; + // And reset the offset. + // Even though we could be stingy about that one sample that we won't be using + // (because a new set is incoming) it's probably not worth the + // hyper-optimization and liable for bugs. And the code looks way tidier this + // way. + this->_input_buffer_offset[ch] = this->_receptive_field; + } } void nam::Buffer::_reset_input_buffer() { - this->_input_buffer_offset = this->_receptive_field; + const int in_channels = NumInputChannels(); + for (int ch = 0; ch < in_channels; ch++) + { + this->_input_buffer_offset[ch] = this->_receptive_field; + } } void nam::Buffer::_advance_input_buffer_(const int num_frames) { - this->_input_buffer_offset += num_frames; + const int in_channels = NumInputChannels(); + for (int ch = 0; ch < in_channels; ch++) + { + this->_input_buffer_offset[ch] += num_frames; + } } // Linear ===================================================================== -nam::Linear::Linear(const int receptive_field, const bool _bias, const std::vector& weights, - const double expected_sample_rate) -: nam::Buffer(receptive_field, expected_sample_rate) +nam::Linear::Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, + const std::vector& weights, const double expected_sample_rate) +: nam::Buffer(in_channels, out_channels, receptive_field, expected_sample_rate) { if ((int)weights.size() != (receptive_field + (_bias ? 1 : 0))) throw std::runtime_error( @@ -178,16 +298,33 @@ nam::Linear::Linear(const int receptive_field, const bool _bias, const std::vect this->_bias = _bias ? weights[receptive_field] : (float)0.0; } -void nam::Linear::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::Linear::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { this->nam::Buffer::_update_buffers_(input, num_frames); + const int in_channels = NumInputChannels(); + const int out_channels = NumOutputChannels(); + + // For now, Linear processes each input channel independently to corresponding output channel + // This is a simple implementation - can be extended later for cross-channel mixing + const int channelsToProcess = std::min(in_channels, out_channels); + // Main computation! - for (int i = 0; i < num_frames; i++) + for (int ch = 0; ch < channelsToProcess; ch++) + { + for (int i = 0; i < num_frames; i++) + { + const long offset = this->_input_buffer_offset[ch] - this->_weight.size() + i + 1; + auto input_vec = Eigen::Map(&this->_input_buffers[ch][offset], this->_receptive_field); + output[ch][i] = this->_bias + this->_weight.dot(input_vec); + } + } + + // Zero out any extra output channels + for (int ch = channelsToProcess; ch < out_channels; ch++) { - const long offset = this->_input_buffer_offset - this->_weight.size() + i + 1; - auto input = Eigen::Map(&this->_input_buffer[offset], this->_receptive_field); - output[i] = this->_bias + this->_weight.dot(input); + for (int i = 0; i < num_frames; i++) + output[ch][i] = (NAM_SAMPLE)0.0; } // Prepare for next call: @@ -200,7 +337,10 @@ std::unique_ptr nam::linear::Factory(const nlohmann::json& config, std { const int receptive_field = config["receptive_field"]; const bool bias = config["bias"]; - return std::make_unique(receptive_field, bias, weights, expectedSampleRate); + // Default to 1 channel in/out for backward compatibility + const int in_channels = config.value("in_channels", 1); + const int out_channels = config.value("out_channels", 1); + return std::make_unique(in_channels, out_channels, receptive_field, bias, weights, expectedSampleRate); } // NN modules ================================================================= diff --git a/NAM/dsp.h b/NAM/dsp.h index f359a68..ef1eaf7 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -40,7 +40,7 @@ class DSP // Older models won't know, but newer ones will come with a loudness from the training based on their response to a // standardized input. // We may choose to have the models figure out for themselves how loud they are in here in the future. - DSP(const double expected_sample_rate); + DSP(const int in_channels, const int out_channels, const double expected_sample_rate); virtual ~DSP() = default; // prewarm() does any required intial work required to "settle" model initial conditions // it can be somewhat expensive, so should not be called during realtime audio processing @@ -54,25 +54,33 @@ class DSP // 1. The core DSP algorithm is run (This is what should probably be // overridden in subclasses). // 2. The output level is applied and the result stored to `output`. - virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames); + // `input` and `output` are double pointers where the first pointer indexes channels + // and the second indexes frames: input[channel][frame] + virtual void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames); // Expected sample rate, in Hz. // TODO throw if it doesn't know. double GetExpectedSampleRate() const { return mExpectedSampleRate; }; - // Input Level, in dBu, corresponding to 0 dBFS for a sine wave + // Number of input channels + int NumInputChannels() const { return mInChannels; }; + // Number of output channels + int NumOutputChannels() const { return mOutChannels; }; + // Input Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel // You should call HasInputLevel() first to be safe. - double GetInputLevel() { return mInputLevel.level; }; + double GetInputLevel(const int channel); // Get how loud this model is, in dB. // Throws a std::runtime_error if the model doesn't know how loud it is. double GetLoudness() const; - // Output Level, in dBu, corresponding to 0 dBFS for a sine wave + // Output Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel // You should call HasOutputLevel() first to be safe. - double GetOutputLevel() { return mOutputLevel.level; }; - // Does this model know its output level? - bool HasInputLevel() { return mInputLevel.haveLevel; }; + double GetOutputLevel(const int channel); + // Does this model know its input level for a specific channel? + // If channel == -1, returns true if any channel has a level set. + bool HasInputLevel(const int channel = -1); // Get whether the model knows how loud it is. bool HasLoudness() const { return mHasLoudness; }; - // Does this model know its output level? - bool HasOutputLevel() { return mOutputLevel.haveLevel; }; + // Does this model know its output level for a specific channel? + // If channel == -1, returns true if any channel has a level set. + bool HasOutputLevel(const int channel = -1); // General function for resetting the DSP unit. // This doesn't call prewarm(). If you want to do that, then you might want to use ResetAndPrewarm(). // See https://github.com/sdatkinson/NeuralAmpModelerCore/issues/96 for the reasoning. @@ -83,20 +91,12 @@ class DSP Reset(sampleRate, maxBufferSize); prewarm(); } - void SetInputLevel(const double inputLevel) - { - mInputLevel.haveLevel = true; - mInputLevel.level = inputLevel; - }; + void SetInputLevel(const int channel, const double inputLevel); // Set the loudness, in dB. // This is usually defined to be the loudness to a standardized input. The trainer has its own, but you can always // use this to define it a different way if you like yours better. void SetLoudness(const double loudness); - void SetOutputLevel(const double outputLevel) - { - mOutputLevel.haveLevel = true; - mOutputLevel.level = outputLevel; - }; + void SetOutputLevel(const int channel, const double outputLevel); protected: bool mHasLoudness = false; @@ -117,13 +117,15 @@ class DSP int GetMaxBufferSize() const { return mMaxBufferSize; }; private: + const int mInChannels; + const int mOutChannels; struct Level { bool haveLevel = false; float level = 0.0; }; - Level mInputLevel; - Level mOutputLevel; + std::vector mInputLevels; + std::vector mOutputLevels; }; // Class where an input buffer is kept so that long-time effects can be @@ -132,23 +134,22 @@ class DSP class Buffer : public DSP { public: - Buffer(const int receptive_field, const double expected_sample_rate = -1.0); + Buffer(const int in_channels, const int out_channels, const int receptive_field, const double expected_sample_rate = -1.0); protected: - // Input buffer - const int _input_buffer_channels = 1; // Mono int _receptive_field; - // First location where we add new samples from the input - long _input_buffer_offset; - std::vector _input_buffer; - std::vector _output_buffer; + // First location where we add new samples from the input (per channel) + std::vector _input_buffer_offset; + // Per-channel input buffers + std::vector> _input_buffers; + std::vector> _output_buffers; void _advance_input_buffer_(const int num_frames); void _set_receptive_field(const int new_receptive_field, const int input_buffer_size); void _set_receptive_field(const int new_receptive_field); void _reset_input_buffer(); // Use this->_input_post_gain - virtual void _update_buffers_(NAM_SAMPLE* input, int num_frames); + virtual void _update_buffers_(NAM_SAMPLE** input, int num_frames); virtual void _rewind_buffers_(); }; @@ -156,9 +157,9 @@ class Buffer : public DSP class Linear : public Buffer { public: - Linear(const int receptive_field, const bool _bias, const std::vector& weights, + Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, const std::vector& weights, const double expected_sample_rate = -1.0); - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; protected: Eigen::VectorXf _weight; diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp index 99dd3a0..af1ef68 100644 --- a/NAM/get_dsp.cpp +++ b/NAM/get_dsp.cpp @@ -158,11 +158,19 @@ std::unique_ptr get_dsp(dspData& conf) } if (inputLevel.have) { - out->SetInputLevel(inputLevel.value); + // Set the same level for all input channels (backward compatibility) + for (int ch = 0; ch < out->NumInputChannels(); ch++) + { + out->SetInputLevel(ch, inputLevel.value); + } } if (outputLevel.have) { - out->SetOutputLevel(outputLevel.value); + // Set the same level for all output channels (backward compatibility) + for (int ch = 0; ch < out->NumOutputChannels(); ch++) + { + out->SetOutputLevel(ch, outputLevel.value); + } } // "pre-warm" the model to settle initial conditions diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 6fa33a2..72eb684 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -65,9 +65,9 @@ void nam::lstm::LSTMCell::process_(const Eigen::VectorXf& x) } } -nam::lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector& weights, +nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector& weights, const double expected_sample_rate) -: DSP(expected_sample_rate) +: DSP(in_channels, out_channels, expected_sample_rate) { this->_input.resize(1); std::vector::iterator it = weights.begin(); @@ -80,10 +80,18 @@ nam::lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidd assert(it == weights.end()); } -void nam::lstm::LSTM::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::lstm::LSTM::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { + const int out_channels = NumOutputChannels(); + + // For now, process first input channel and replicate to all output channels + // Can be extended later for true multi-channel support for (int i = 0; i < num_frames; i++) - output[i] = this->_process_sample(input[i]); + { + const float sample = this->_process_sample(input[0][i]); + for (int ch = 0; ch < out_channels; ch++) + output[ch][i] = sample; + } } int nam::lstm::LSTM::PrewarmSamples() @@ -112,7 +120,10 @@ std::unique_ptr nam::lstm::Factory(const nlohmann::json& config, std:: const int num_layers = config["num_layers"]; const int input_size = config["input_size"]; const int hidden_size = config["hidden_size"]; - return std::make_unique(num_layers, input_size, hidden_size, weights, expectedSampleRate); + // Default to 1 channel in/out for backward compatibility + const int in_channels = config.value("in_channels", input_size); + const int out_channels = config.value("out_channels", 1); + return std::make_unique(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate); } // Register the factory diff --git a/NAM/lstm.h b/NAM/lstm.h index 17d0ada..e2123dc 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -51,7 +51,7 @@ class LSTMCell class LSTM : public DSP { public: - LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector& weights, + LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector& weights, const double expected_sample_rate = -1.0); ~LSTM() = default; @@ -61,7 +61,7 @@ class LSTM : public DSP Eigen::VectorXf _head_weight; float _head_bias; - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; std::vector _layers; float _process_sample(const float x); diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 285ea69..3e4ff1c 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -192,10 +192,10 @@ long nam::wavenet::_LayerArray::_get_channels() const // WaveNet ==================================================================== -nam::wavenet::WaveNet::WaveNet(const std::vector& layer_array_params, +nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels, const std::vector& layer_array_params, const float head_scale, const bool with_head, std::vector weights, const double expected_sample_rate) -: DSP(expected_sample_rate) +: DSP(in_channels, out_channels, expected_sample_rate) , _head_scale(head_scale) { if (with_head) @@ -251,17 +251,21 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize) this->_layer_arrays[i].SetMaxBufferSize(maxBufferSize); } -void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE* input, const int num_frames) +void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int num_frames) { + // For now, use first input channel for conditioning + // Can be extended later to support multi-channel conditioning for (int j = 0; j < num_frames; j++) { - this->_condition(0, j) = input[j]; + this->_condition(0, j) = input[0][j]; } } -void nam::wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { assert(num_frames <= mMaxBufferSize); + const int out_channels = NumOutputChannels(); + this->_set_condition_array(input, num_frames); // Main layer arrays: @@ -287,11 +291,16 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const // (Head not implemented) auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs(); - assert(final_head_outputs.rows() == 1); - for (int s = 0; s < num_frames; s++) + const int out_channels = NumOutputChannels(); + assert(final_head_outputs.rows() == out_channels); + + for (int ch = 0; ch < out_channels; ch++) { - const float out = this->_head_scale * final_head_outputs(0, s); - output[s] = out; + for (int s = 0; s < num_frames; s++) + { + const float out = this->_head_scale * final_head_outputs(ch, s); + output[ch][s] = out; + } } } @@ -314,8 +323,13 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st } const bool with_head = !config["head"].is_null(); const float head_scale = config["head_scale"]; + + // Determine channels from first layer (input_size) and last layer (head_size) + const int in_channels = config.value("in_channels", layer_array_params[0].input_size); + const int out_channels = config.value("out_channels", layer_array_params.back().head_size); + return std::make_unique( - layer_array_params, head_scale, with_head, weights, expectedSampleRate); + in_channels, out_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate); } // Register the factory diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 832673b..faffa74 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -174,10 +174,10 @@ class _LayerArray class WaveNet : public DSP { public: - WaveNet(const std::vector& layer_array_params, const float head_scale, const bool with_head, + WaveNet(const int in_channels, const int out_channels, const std::vector& layer_array_params, const float head_scale, const bool with_head, std::vector weights, const double expected_sample_rate = -1.0); ~WaveNet() = default; - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; void set_weights_(std::vector& weights); protected: @@ -186,7 +186,7 @@ class WaveNet : public DSP void SetMaxBufferSize(const int maxBufferSize) override; // Fill in the "condition" array that's fed into the various parts of the net. - virtual void _set_condition_array(NAM_SAMPLE* input, const int num_frames); + virtual void _set_condition_array(NAM_SAMPLE** input, const int num_frames); // How many conditioning inputs are there. // Just one--the audio. virtual int _get_condition_dim() const { return 1; }; diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp index 5c3d60c..fb6bd09 100644 --- a/tools/benchmodel.cpp +++ b/tools/benchmodel.cpp @@ -40,18 +40,31 @@ int main(int argc, char* argv[]) model->Reset(model->GetExpectedSampleRate(), bufferSize); size_t numBuffers = (48000 / bufferSize) * 2; - // Fill input buffer with zeroes. - // Output buffer doesn't matter. - for (int i = 0; i < AUDIO_BUFFER_SIZE; i++) + // Allocate multi-channel buffers + const int in_channels = model->NumInputChannels(); + const int out_channels = model->NumOutputChannels(); + + std::vector> inputBuffers(in_channels); + std::vector> outputBuffers(out_channels); + std::vector inputPtrs(in_channels); + std::vector outputPtrs(out_channels); + + for (int ch = 0; ch < in_channels; ch++) { - inputBuffer[i] = 0.0; + inputBuffers[ch].resize(AUDIO_BUFFER_SIZE, 0.0); + inputPtrs[ch] = inputBuffers[ch].data(); + } + for (int ch = 0; ch < out_channels; ch++) + { + outputBuffers[ch].resize(AUDIO_BUFFER_SIZE, 0.0); + outputPtrs[ch] = outputBuffers[ch].data(); } std::cout << "Running benchmark\n"; auto t1 = high_resolution_clock::now(); for (size_t i = 0; i < numBuffers; i++) { - model->process(inputBuffer, outputBuffer, AUDIO_BUFFER_SIZE); + model->process(inputPtrs.data(), outputPtrs.data(), AUDIO_BUFFER_SIZE); } auto t2 = high_resolution_clock::now(); std::cout << "Finished\n"; diff --git a/tools/test/test_convnet.cpp b/tools/test/test_convnet.cpp index ff11074..8966967 100644 --- a/tools/test/test_convnet.cpp +++ b/tools/test/test_convnet.cpp @@ -32,7 +32,7 @@ void test_convnet_basic() // Head weights (2 weights + 1 bias) weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; const int maxBufferSize = 64; @@ -40,8 +40,10 @@ void test_convnet_basic() std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - convnet.process(input.data(), output.data(), numFrames); + convnet.process(inputPtrs, outputPtrs, numFrames); // Verify output dimensions assert(output.size() == numFrames); @@ -74,7 +76,7 @@ void test_convnet_batchnorm() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; const int maxBufferSize = 64; @@ -82,8 +84,10 @@ void test_convnet_batchnorm() std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - convnet.process(input.data(), output.data(), numFrames); + convnet.process(inputPtrs, outputPtrs, numFrames); assert(output.size() == numFrames); for (int i = 0; i < numFrames; i++) @@ -117,7 +121,7 @@ void test_convnet_multiple_blocks() // Head weights weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 8; const int maxBufferSize = 64; @@ -125,8 +129,10 @@ void test_convnet_multiple_blocks() std::vector input(numFrames, 0.5f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - convnet.process(input.data(), output.data(), numFrames); + convnet.process(inputPtrs, outputPtrs, numFrames); assert(output.size() == numFrames); for (int i = 0; i < numFrames; i++) @@ -150,15 +156,17 @@ void test_convnet_zero_input() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; convnet.Reset(expected_sample_rate, numFrames); std::vector input(numFrames, 0.0f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - convnet.process(input.data(), output.data(), numFrames); + convnet.process(inputPtrs, outputPtrs, numFrames); // With zero input, output should be finite (may be zero or non-zero depending on bias) for (int i = 0; i < numFrames; i++) @@ -182,18 +190,22 @@ void test_convnet_different_buffer_sizes() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); // Test with different buffer sizes convnet.Reset(expected_sample_rate, 64); std::vector input1(32, 1.0f); std::vector output1(32, 0.0f); - convnet.process(input1.data(), output1.data(), 32); + NAM_SAMPLE* inputPtrs1[] = {input1.data()}; + NAM_SAMPLE* outputPtrs1[] = {output1.data()}; + convnet.process(inputPtrs1, outputPtrs1, 32); convnet.Reset(expected_sample_rate, 128); std::vector input2(64, 1.0f); std::vector output2(64, 0.0f); - convnet.process(input2.data(), output2.data(), 64); + NAM_SAMPLE* inputPtrs2[] = {input2.data()}; + NAM_SAMPLE* outputPtrs2[] = {output2.data()}; + convnet.process(inputPtrs2, outputPtrs2, 64); // Both should work without errors assert(output1.size() == 32); @@ -219,7 +231,7 @@ void test_convnet_prewarm() // Head weights (2 weights + 1 bias) weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); // Test that prewarm can be called without errors convnet.Reset(expected_sample_rate, 64); @@ -229,7 +241,9 @@ void test_convnet_prewarm() const int numFrames = 4; std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); - convnet.process(input.data(), output.data(), numFrames); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + convnet.process(inputPtrs, outputPtrs, numFrames); // Output should be finite for (int i = 0; i < numFrames; i++) @@ -253,7 +267,7 @@ void test_convnet_multiple_calls() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 2; convnet.Reset(expected_sample_rate, numFrames); @@ -263,7 +277,9 @@ void test_convnet_multiple_calls() { std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); - convnet.process(input.data(), output.data(), numFrames); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + convnet.process(inputPtrs, outputPtrs, numFrames); // Output should be finite for (int j = 0; j < numFrames; j++) diff --git a/tools/test/test_dsp.cpp b/tools/test/test_dsp.cpp index bbdee63..d71bd18 100644 --- a/tools/test/test_dsp.cpp +++ b/tools/test/test_dsp.cpp @@ -1,33 +1,41 @@ // Tests for dsp #include "NAM/dsp.h" +#include namespace test_dsp { // Simplest test: can I construct something! void test_construct() { - nam::DSP myDsp(48000.0); + nam::DSP myDsp(1, 1, 48000.0); +} + +void test_channels() +{ + nam::DSP myDsp(2, 3, 48000.0); + assert(myDsp.NumInputChannels() == 2); + assert(myDsp.NumOutputChannels() == 3); } void test_get_input_level() { - nam::DSP myDsp(48000.0); + nam::DSP myDsp(2, 1, 48000.0); const double expected = 19.0; - myDsp.SetInputLevel(expected); - assert(myDsp.HasInputLevel()); - const double actual = myDsp.GetInputLevel(); + myDsp.SetInputLevel(0, expected); + assert(myDsp.HasInputLevel(0)); + const double actual = myDsp.GetInputLevel(0); assert(actual == expected); } void test_get_output_level() { - nam::DSP myDsp(48000.0); + nam::DSP myDsp(1, 2, 48000.0); const double expected = 12.0; - myDsp.SetOutputLevel(expected); - assert(myDsp.HasOutputLevel()); - const double actual = myDsp.GetOutputLevel(); + myDsp.SetOutputLevel(1, expected); + assert(myDsp.HasOutputLevel(1)); + const double actual = myDsp.GetOutputLevel(1); assert(actual == expected); } @@ -35,32 +43,74 @@ void test_get_output_level() // Test correct function of DSP::HasInputLevel() void test_has_input_level() { - nam::DSP myDsp(48000.0); - assert(!myDsp.HasInputLevel()); - - myDsp.SetInputLevel(19.0); - assert(myDsp.HasInputLevel()); + nam::DSP myDsp(2, 1, 48000.0); + myDsp.SetInputLevel(0, 19.0); + assert(myDsp.HasInputLevel(0)); + assert(!myDsp.HasInputLevel(1)); } void test_has_output_level() { - nam::DSP myDsp(48000.0); - assert(!myDsp.HasOutputLevel()); + nam::DSP myDsp(1, 2, 48000.0); - myDsp.SetOutputLevel(12.0); - assert(myDsp.HasOutputLevel()); + assert(!myDsp.HasOutputLevel(0)); + assert(!myDsp.HasOutputLevel(1)); + + myDsp.SetOutputLevel(1, 12.0); + assert(!myDsp.HasOutputLevel(0)); + assert(myDsp.HasOutputLevel(1)); } // Test correct function of DSP::HasInputLevel() void test_set_input_level() { - nam::DSP myDsp(48000.0); - myDsp.SetInputLevel(19.0); + nam::DSP myDsp(2, 1, 48000.0); + myDsp.SetInputLevel(0, 19.0); + myDsp.SetInputLevel(1, 20.0); } void test_set_output_level() { - nam::DSP myDsp(48000.0); - myDsp.SetOutputLevel(19.0); + nam::DSP myDsp(1, 2, 48000.0); + myDsp.SetOutputLevel(0, 19.0); + myDsp.SetOutputLevel(1, 20.0); +} + +void test_process_multi_channel() +{ + nam::DSP myDsp(2, 2, 48000.0); + const int num_frames = 64; + + // Allocate buffers + std::vector> inputBuffers(2); + std::vector> outputBuffers(2); + std::vector inputPtrs(2); + std::vector outputPtrs(2); + + for (int ch = 0; ch < 2; ch++) + { + inputBuffers[ch].resize(num_frames); + outputBuffers[ch].resize(num_frames); + inputPtrs[ch] = inputBuffers[ch].data(); + outputPtrs[ch] = outputBuffers[ch].data(); + + // Fill input with test data + for (int i = 0; i < num_frames; i++) + { + inputBuffers[ch][i] = (ch + 1) * 0.5 + i * 0.01; + } + } + + // Process + myDsp.process(inputPtrs.data(), outputPtrs.data(), num_frames); + + // Check that default implementation copied input to output + for (int ch = 0; ch < 2; ch++) + { + for (int i = 0; i < num_frames; i++) + { + assert(outputBuffers[ch][i] == inputBuffers[ch][i]); + } + } } }; // namespace test_dsp diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index d75ae1c..dcb1941 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -47,7 +47,8 @@ void test_wavenet_model() weights.push_back(1.0f); // Head rechannel weights.push_back(head_scale); // Head scale - auto wavenet = std::make_unique(layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique( + input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int numFrames = 4; const int maxBufferSize = 64; @@ -55,8 +56,10 @@ void test_wavenet_model() std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - wavenet->process(input.data(), output.data(), numFrames); + wavenet->process(inputPtrs, outputPtrs, numFrames); // Verify output dimensions assert(output.size() == numFrames); @@ -89,13 +92,13 @@ void test_wavenet_multiple_arrays() const int bottleneck = channels; const int groups_1x1 = 1; layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, - bottleneck, kernel_size, std::move(dilations1), activation, - gated, head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations1), + activation, gated, head_bias, groups, groups_1x1)); // Second array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels, - bottleneck, kernel_size, std::move(dilations2), activation, - gated, head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations2), + activation, gated, head_bias, groups, groups_1x1)); std::vector weights; // Array 0: rechannel, layer, head_rechannel @@ -104,7 +107,8 @@ void test_wavenet_multiple_arrays() weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}); weights.push_back(head_scale); - auto wavenet = std::make_unique(layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique( + input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int numFrames = 4; const int maxBufferSize = 64; @@ -112,8 +116,10 @@ void test_wavenet_multiple_arrays() std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - wavenet->process(input.data(), output.data(), numFrames); + wavenet->process(inputPtrs, outputPtrs, numFrames); assert(output.size() == numFrames); for (int i = 0; i < numFrames; i++) @@ -147,15 +153,18 @@ void test_wavenet_zero_input() std::vector weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale}; - auto wavenet = std::make_unique(layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique( + input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int numFrames = 4; wavenet->Reset(48000.0, numFrames); std::vector input(numFrames, 0.0f); std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; - wavenet->process(input.data(), output.data(), numFrames); + wavenet->process(inputPtrs, outputPtrs, numFrames); // With zero input, output should be finite (may be zero or non-zero depending on bias) for (int i = 0; i < numFrames; i++) @@ -189,18 +198,23 @@ void test_wavenet_different_buffer_sizes() std::vector weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale}; - auto wavenet = std::make_unique(layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique( + input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); // Test with different buffer sizes wavenet->Reset(48000.0, 64); std::vector input1(32, 1.0f); std::vector output1(32, 0.0f); - wavenet->process(input1.data(), output1.data(), 32); + NAM_SAMPLE* inputPtrs1[] = {input1.data()}; + NAM_SAMPLE* outputPtrs1[] = {output1.data()}; + wavenet->process(inputPtrs1, outputPtrs1, 32); wavenet->Reset(48000.0, 128); std::vector input2(64, 1.0f); std::vector output2(64, 0.0f); - wavenet->process(input2.data(), output2.data(), 64); + NAM_SAMPLE* inputPtrs2[] = {input2.data()}; + NAM_SAMPLE* outputPtrs2[] = {output2.data()}; + wavenet->process(inputPtrs2, outputPtrs2, 64); // Both should work without errors assert(output1.size() == 32); @@ -251,7 +265,8 @@ void test_wavenet_prewarm() weights.push_back(1.0f); weights.push_back(head_scale); - auto wavenet = std::make_unique(layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique( + input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); // Test that prewarm can be called without errors wavenet->Reset(48000.0, 64); @@ -261,7 +276,9 @@ void test_wavenet_prewarm() const int numFrames = 4; std::vector input(numFrames, 1.0f); std::vector output(numFrames, 0.0f); - wavenet->process(input.data(), output.data(), numFrames); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + wavenet->process(inputPtrs, outputPtrs, numFrames); // Output should be finite for (int i = 0; i < numFrames; i++) diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 91d8628..c35c97c 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -775,7 +775,7 @@ void test_process_realtime_safe() weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}); weights.push_back(head_scale); - auto wavenet = std::make_unique(layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique(input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int maxBufferSize = 256; wavenet->Reset(48000.0, maxBufferSize); @@ -794,7 +794,9 @@ void test_process_realtime_safe() nullptr, // No setup needed [&]() { // Call process() - this should not allocate or free - wavenet->process(input.data(), output.data(), buffer_size); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + wavenet->process(inputPtrs, outputPtrs, buffer_size); }, nullptr, // No teardown needed test_name.c_str()); From 6402f146eedef2cbbe2a198a1589f23a62fbc32f Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 21:27:13 -0800 Subject: [PATCH 02/19] Formatting --- NAM/convnet.cpp | 21 ++++++++------- NAM/convnet.h | 5 ++-- NAM/dsp.h | 7 ++--- NAM/lstm.cpp | 9 ++++--- NAM/lstm.h | 4 +-- NAM/wavenet.cpp | 11 ++++---- NAM/wavenet.h | 5 ++-- tools/benchmodel.cpp | 4 +-- .../test/test_wavenet/test_real_time_safe.cpp | 27 ++++++++++--------- 9 files changed, 50 insertions(+), 43 deletions(-) diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp index f82d6c1..6c0f3b7 100644 --- a/NAM/convnet.cpp +++ b/NAM/convnet.cpp @@ -146,9 +146,9 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf output(i) = this->_bias + input.col(j).dot(this->_weight); } -nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector& dilations, const bool batchnorm, - const std::string activation, std::vector& weights, - const double expected_sample_rate, const int groups) +nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels, + const std::vector& dilations, const bool batchnorm, const std::string activation, + std::vector& weights, const double expected_sample_rate, const int groups) : Buffer(in_channels, out_channels, *std::max_element(dilations.begin(), dilations.end()), expected_sample_rate) { this->_verify_weights(channels, dilations, batchnorm, weights.size()); @@ -156,12 +156,13 @@ nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, co std::vector::iterator it = weights.begin(); // First block takes in_channels input, subsequent blocks take channels input for (size_t i = 0; i < dilations.size(); i++) - this->_blocks[i].set_weights_(i == 0 ? in_channels : channels, channels, dilations[i], batchnorm, activation, groups, it); + this->_blocks[i].set_weights_( + i == 0 ? in_channels : channels, channels, dilations[i], batchnorm, activation, groups, it); // Only need _block_vals for the head (one entry) // Conv1D layers manage their own buffers now this->_block_vals.resize(1); this->_block_vals[0].setZero(); - + // Create heads for each output channel this->_heads.resize(out_channels); this->_head_outputs.resize(out_channels); @@ -169,7 +170,7 @@ nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, co { this->_heads[ch] = _Head(channels, it); } - + if (it != weights.end()) throw std::runtime_error("Didn't touch all the weights when initializing ConvNet"); @@ -185,11 +186,11 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con this->_update_buffers_(input, num_frames); const int in_channels = NumInputChannels(); const int out_channels = NumOutputChannels(); - + // For multi-channel, we process each input channel independently through the network // and sum outputs to each output channel (simple implementation) // This can be extended later for more sophisticated cross-channel processing - + // Convert input buffers to matrix for first layer (stack input channels) Eigen::MatrixXf input_matrix(in_channels, num_frames); for (int ch = 0; ch < in_channels; ch++) @@ -229,7 +230,7 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con { this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size); } - + // Copy last block output to _block_vals for head auto last_output = this->_blocks.back().GetOutput(num_frames); const long i_start = this->_input_buffer_offset[0]; // Use first channel's offset @@ -240,7 +241,7 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con for (int ch = 0; ch < out_channels; ch++) { this->_heads[ch].process_(this->_block_vals[0], this->_head_outputs[ch], i_start, i_end); - + // Copy to output array for this channel for (int s = 0; s < num_frames; s++) output[ch][s] = this->_head_outputs[ch](s); diff --git a/NAM/convnet.h b/NAM/convnet.h index 5f16ad6..c14994f 100644 --- a/NAM/convnet.h +++ b/NAM/convnet.h @@ -77,8 +77,9 @@ class _Head class ConvNet : public Buffer { public: - ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector& dilations, const bool batchnorm, const std::string activation, - std::vector& weights, const double expected_sample_rate = -1.0, const int groups = 1); + ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector& dilations, + const bool batchnorm, const std::string activation, std::vector& weights, + const double expected_sample_rate = -1.0, const int groups = 1); ~ConvNet() = default; void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; diff --git a/NAM/dsp.h b/NAM/dsp.h index ef1eaf7..a15cbca 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -134,7 +134,8 @@ class DSP class Buffer : public DSP { public: - Buffer(const int in_channels, const int out_channels, const int receptive_field, const double expected_sample_rate = -1.0); + Buffer(const int in_channels, const int out_channels, const int receptive_field, + const double expected_sample_rate = -1.0); protected: int _receptive_field; @@ -157,8 +158,8 @@ class Buffer : public DSP class Linear : public Buffer { public: - Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, const std::vector& weights, - const double expected_sample_rate = -1.0); + Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, + const std::vector& weights, const double expected_sample_rate = -1.0); void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; protected: diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 72eb684..ada9580 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -65,8 +65,8 @@ void nam::lstm::LSTMCell::process_(const Eigen::VectorXf& x) } } -nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector& weights, - const double expected_sample_rate) +nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, + const int hidden_size, std::vector& weights, const double expected_sample_rate) : DSP(in_channels, out_channels, expected_sample_rate) { this->_input.resize(1); @@ -83,7 +83,7 @@ nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int n void nam::lstm::LSTM::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { const int out_channels = NumOutputChannels(); - + // For now, process first input channel and replicate to all output channels // Can be extended later for true multi-channel support for (int i = 0; i < num_frames; i++) @@ -123,7 +123,8 @@ std::unique_ptr nam::lstm::Factory(const nlohmann::json& config, std:: // Default to 1 channel in/out for backward compatibility const int in_channels = config.value("in_channels", input_size); const int out_channels = config.value("out_channels", 1); - return std::make_unique(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate); + return std::make_unique( + in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate); } // Register the factory diff --git a/NAM/lstm.h b/NAM/lstm.h index e2123dc..251e01b 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -51,8 +51,8 @@ class LSTMCell class LSTM : public DSP { public: - LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector& weights, - const double expected_sample_rate = -1.0); + LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, + std::vector& weights, const double expected_sample_rate = -1.0); ~LSTM() = default; protected: diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 3e4ff1c..0b07b79 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -192,7 +192,8 @@ long nam::wavenet::_LayerArray::_get_channels() const // WaveNet ==================================================================== -nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels, const std::vector& layer_array_params, +nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels, + const std::vector& layer_array_params, const float head_scale, const bool with_head, std::vector weights, const double expected_sample_rate) : DSP(in_channels, out_channels, expected_sample_rate) @@ -265,7 +266,7 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con { assert(num_frames <= mMaxBufferSize); const int out_channels = NumOutputChannels(); - + this->_set_condition_array(input, num_frames); // Main layer arrays: @@ -293,7 +294,7 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs(); const int out_channels = NumOutputChannels(); assert(final_head_outputs.rows() == out_channels); - + for (int ch = 0; ch < out_channels; ch++) { for (int s = 0; s < num_frames; s++) @@ -323,11 +324,11 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st } const bool with_head = !config["head"].is_null(); const float head_scale = config["head_scale"]; - + // Determine channels from first layer (input_size) and last layer (head_size) const int in_channels = config.value("in_channels", layer_array_params[0].input_size); const int out_channels = config.value("out_channels", layer_array_params.back().head_size); - + return std::make_unique( in_channels, out_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate); } diff --git a/NAM/wavenet.h b/NAM/wavenet.h index faffa74..4ca370f 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -174,8 +174,9 @@ class _LayerArray class WaveNet : public DSP { public: - WaveNet(const int in_channels, const int out_channels, const std::vector& layer_array_params, const float head_scale, const bool with_head, - std::vector weights, const double expected_sample_rate = -1.0); + WaveNet(const int in_channels, const int out_channels, const std::vector& layer_array_params, + const float head_scale, const bool with_head, std::vector weights, + const double expected_sample_rate = -1.0); ~WaveNet() = default; void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; void set_weights_(std::vector& weights); diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp index fb6bd09..d8a1690 100644 --- a/tools/benchmodel.cpp +++ b/tools/benchmodel.cpp @@ -43,12 +43,12 @@ int main(int argc, char* argv[]) // Allocate multi-channel buffers const int in_channels = model->NumInputChannels(); const int out_channels = model->NumOutputChannels(); - + std::vector> inputBuffers(in_channels); std::vector> outputBuffers(out_channels); std::vector inputPtrs(in_channels); std::vector outputPtrs(out_channels); - + for (int ch = 0; ch < in_channels; ch++) { inputBuffers[ch].resize(AUDIO_BUFFER_SIZE, 0.0); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index c35c97c..e0c2261 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -437,8 +437,8 @@ void test_layer_process_realtime_safe() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, groups_input, groups_1x1); // Set weights std::vector weights{1.0f, 0.0f, // Conv (weight, bias) @@ -492,8 +492,8 @@ void test_layer_bottleneck_process_realtime_safe() const int groups_input = 1; const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, groups_input, groups_1x1); // Set weights for bottleneck != channels // Conv: (channels, bottleneck, kernelSize=1) = (4, 2, 1) + bias @@ -544,8 +544,8 @@ void test_layer_bottleneck_process_realtime_safe() input.setConstant(0.5f); condition.setConstant(0.5f); - std::string test_name = "Layer Process (bottleneck=" + std::to_string(bottleneck) + ", channels=" + - std::to_string(channels) + ") - Buffer size " + std::to_string(buffer_size); + std::string test_name = "Layer Process (bottleneck=" + std::to_string(bottleneck) + ", channels=" + + std::to_string(channels) + ") - Buffer size " + std::to_string(buffer_size); run_allocation_test_no_allocations( nullptr, // No setup needed [&]() { @@ -577,8 +577,8 @@ void test_layer_grouped_process_realtime_safe() const int groups_input = 2; // groups_input > 1 const int groups_1x1 = 2; // 1x1 is also grouped - auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, - groups_input, groups_1x1); + auto layer = nam::wavenet::_Layer( + condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, groups_input, groups_1x1); // Set weights for grouped convolution // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels @@ -757,13 +757,13 @@ void test_process_realtime_safe() const int bottleneck = channels; const int groups_1x1 = 1; layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, - bottleneck, kernel_size, std::move(dilations1), activation, - gated, head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations1), + activation, gated, head_bias, groups, groups_1x1)); // Second layer array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels, - bottleneck, kernel_size, std::move(dilations2), activation, - gated, head_bias, groups, groups_1x1)); + bottleneck, kernel_size, std::move(dilations2), + activation, gated, head_bias, groups, groups_1x1)); // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) // Array 1: same structure @@ -775,7 +775,8 @@ void test_process_realtime_safe() weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}); weights.push_back(head_scale); - auto wavenet = std::make_unique(input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = std::make_unique( + input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int maxBufferSize = 256; wavenet->Reset(48000.0, maxBufferSize); From 1cff233e18a758c025ab1be5a666bb3567b13e44 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 22:44:33 -0800 Subject: [PATCH 03/19] [REFINE] Simplify DSP input/output level handling - Refactored DSP class to eliminate channel-specific input/output level methods, consolidating them into single methods for input and output levels. - Updated related methods and tests to reflect the new simplified interface, ensuring backward compatibility. - Adjusted buffer management in the Buffer class to accommodate the changes in DSP level handling, ensuring all channels use consistent buffer sizes. --- NAM/convnet.cpp | 95 +++++++++++++++++++++--------------- NAM/dsp.cpp | 103 ++++++++++++++-------------------------- NAM/get_dsp.cpp | 12 +---- NAM/wavenet.cpp | 41 ++++++++++++---- tools/test/test_dsp.cpp | 93 ++++++++++++++++++++++-------------- 5 files changed, 185 insertions(+), 159 deletions(-) diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp index 6c0f3b7..3ee6fbe 100644 --- a/NAM/convnet.cpp +++ b/NAM/convnet.cpp @@ -129,21 +129,47 @@ long nam::convnet::ConvNetBlock::get_out_channels() const return this->conv.get_out_channels(); } -nam::convnet::_Head::_Head(const int channels, std::vector::iterator& weights) +nam::convnet::_Head::_Head(const int in_channels, const int out_channels, std::vector::iterator& weights) { - this->_weight.resize(channels); - for (int i = 0; i < channels; i++) - this->_weight[i] = *(weights++); - this->_bias = *(weights++); + // Weights are stored row-major: first row (output 0), then row 1 (output 1), etc. + // For each output channel: [w0, w1, ..., w_{in_channels-1}] + // Then biases: [bias0, bias1, ..., bias_{out_channels-1}] + this->_weight.resize(out_channels, in_channels); + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + for (int in_ch = 0; in_ch < in_channels; in_ch++) + { + this->_weight(out_ch, in_ch) = *(weights++); + } + } + + // Biases for each output channel + this->_bias.resize(out_channels); + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + this->_bias(out_ch) = *(weights++); + } } -void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, +void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end) const { const long length = i_end - i_start; - output.resize(length); - for (long i = 0, j = i_start; i < length; i++, j++) - output(i) = this->_bias + input.col(j).dot(this->_weight); + const long out_channels = this->_weight.rows(); + const long in_channels = this->_weight.cols(); + + // Resize output to (out_channels x length) + output.resize(out_channels, length); + + // Extract input slice: (in_channels x length) + Eigen::MatrixXf input_slice = input.middleCols(i_start, length); + + // Compute output = weight * input_slice: (out_channels x in_channels) * (in_channels x length) = (out_channels x length) + output.noalias() = this->_weight * input_slice; + + // Add bias to each column: output.colwise() += bias + // output is (out_channels x length), bias is (out_channels x 1), so colwise() += works + output.colwise() += this->_bias; } nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels, @@ -163,13 +189,8 @@ nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, co this->_block_vals.resize(1); this->_block_vals[0].setZero(); - // Create heads for each output channel - this->_heads.resize(out_channels); - this->_head_outputs.resize(out_channels); - for (int ch = 0; ch < out_channels; ch++) - { - this->_heads[ch] = _Head(channels, it); - } + // Create single head that outputs all channels + this->_head = _Head(channels, out_channels, it); if (it != weights.end()) throw std::runtime_error("Didn't touch all the weights when initializing ConvNet"); @@ -193,9 +214,9 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con // Convert input buffers to matrix for first layer (stack input channels) Eigen::MatrixXf input_matrix(in_channels, num_frames); + const long i_start = this->_input_buffer_offset; for (int ch = 0; ch < in_channels; ch++) { - const long i_start = this->_input_buffer_offset[ch]; for (int i = 0; i < num_frames; i++) input_matrix(ch, i) = this->_input_buffers[ch][i_start + i]; } @@ -222,29 +243,32 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con this->_blocks[i].Process(block_input, num_frames); } - // Process heads for each output channel + // Process head for all output channels at once // We need _block_vals[0] for the head interface - const long max_buffer_size = this->_input_buffers[0].size(); + const long buffer_size = (long)this->_input_buffers[0].size(); if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels() - || this->_block_vals[0].cols() != max_buffer_size) + || this->_block_vals[0].cols() != buffer_size) { - this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size); + this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), buffer_size); } // Copy last block output to _block_vals for head auto last_output = this->_blocks.back().GetOutput(num_frames); - const long i_start = this->_input_buffer_offset[0]; // Use first channel's offset - const long i_end = i_start + num_frames; - this->_block_vals[0].middleCols(i_start, num_frames) = last_output; + const long buffer_offset = this->_input_buffer_offset; + const long buffer_i_end = buffer_offset + num_frames; + // last_output is (channels x num_frames), _block_vals[0] is (channels x buffer_size) + // Copy to the correct location in _block_vals + this->_block_vals[0].block(0, buffer_offset, last_output.rows(), num_frames) = last_output; + + // Process head - outputs all channels at once + // Head will resize _head_output internally + this->_head.process_(this->_block_vals[0], this->_head_output, buffer_offset, buffer_i_end); - // Process each output channel head + // Copy to output arrays for each channel for (int ch = 0; ch < out_channels; ch++) { - this->_heads[ch].process_(this->_block_vals[0], this->_head_outputs[ch], i_start, i_end); - - // Copy to output array for this channel for (int s = 0; s < num_frames; s++) - output[ch][s] = this->_head_outputs[ch](s); + output[ch][s] = this->_head_output(ch, s); } // Prepare for next call: @@ -272,20 +296,15 @@ void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE** input, const int num_f { this->Buffer::_update_buffers_(input, num_frames); - // Find maximum buffer size across input channels - long max_buffer_size = 0; - for (const auto& buf : this->_input_buffers) - { - if ((long)buf.size() > max_buffer_size) - max_buffer_size = (long)buf.size(); - } + // All channels use the same buffer size + const long buffer_size = (long)this->_input_buffers[0].size(); // Only need _block_vals[0] for the head // Conv1D layers manage their own buffers now if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels() - || this->_block_vals[0].cols() != max_buffer_size) + || this->_block_vals[0].cols() != buffer_size) { - this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size); + this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), buffer_size); this->_block_vals[0].setZero(); } } diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index f9b625e..d0a1c4c 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -19,8 +19,6 @@ nam::DSP::DSP(const int in_channels, const int out_channels, const double expect : mInChannels(in_channels) , mOutChannels(out_channels) , mExpectedSampleRate(expected_sample_rate) -, mInputLevels(in_channels) -, mOutputLevels(out_channels) { if (in_channels <= 0 || out_channels <= 0) { @@ -113,60 +111,36 @@ void nam::DSP::SetMaxBufferSize(const int maxBufferSize) mMaxBufferSize = maxBufferSize; } -double nam::DSP::GetInputLevel(const int channel) +double nam::DSP::GetInputLevel() { - if (channel < 0 || channel >= mInChannels) - { - throw std::runtime_error("Invalid input channel index"); - } - return mInputLevels[channel].level; + return mInputLevel.level; } -double nam::DSP::GetOutputLevel(const int channel) +double nam::DSP::GetOutputLevel() { - if (channel < 0 || channel >= mOutChannels) - { - throw std::runtime_error("Invalid output channel index"); - } - return mOutputLevels[channel].level; + return mOutputLevel.level; } -bool nam::DSP::HasInputLevel(const int channel) +bool nam::DSP::HasInputLevel() { - if (channel < 0 || channel >= mInChannels) - { - throw std::runtime_error("Invalid input channel index"); - } - return mInputLevels[channel].haveLevel; + return mInputLevel.haveLevel; } -bool nam::DSP::HasOutputLevel(const int channel) +bool nam::DSP::HasOutputLevel() { - if (channel < 0 || channel >= mOutChannels) - { - throw std::runtime_error("Invalid output channel index"); - } - return mOutputLevels[channel].haveLevel; + return mOutputLevel.haveLevel; } -void nam::DSP::SetInputLevel(const int channel, const double inputLevel) +void nam::DSP::SetInputLevel(const double inputLevel) { - if (channel < 0 || channel >= mInChannels) - { - throw std::runtime_error("Invalid input channel index"); - } - mInputLevels[channel].haveLevel = true; - mInputLevels[channel].level = inputLevel; + mInputLevel.haveLevel = true; + mInputLevel.level = inputLevel; } -void nam::DSP::SetOutputLevel(const int channel, const double outputLevel) +void nam::DSP::SetOutputLevel(const double outputLevel) { - if (channel < 0 || channel >= mOutChannels) - { - throw std::runtime_error("Invalid output channel index"); - } - mOutputLevels[channel].haveLevel = true; - mOutputLevels[channel].level = outputLevel; + mOutputLevel.haveLevel = true; + mOutputLevel.level = outputLevel; } // Buffer ===================================================================== @@ -191,7 +165,6 @@ void nam::Buffer::_set_receptive_field(const int new_receptive_field, const int // Resize buffers for all input channels _input_buffers.resize(in_channels); - _input_buffer_offset.resize(in_channels); for (int ch = 0; ch < in_channels; ch++) { _input_buffers[ch].resize(input_buffer_size); @@ -210,7 +183,7 @@ void nam::Buffer::_update_buffers_(NAM_SAMPLE** input, const int num_frames) const int out_channels = NumOutputChannels(); // Make sure that the buffers are big enough for the receptive field and the - // frames needed for each channel! + // frames needed. All channels use the same buffer size. const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames; for (int ch = 0; ch < in_channels; ch++) @@ -223,14 +196,18 @@ void nam::Buffer::_update_buffers_(NAM_SAMPLE** input, const int num_frames) this->_input_buffers[ch].resize(new_buffer_size); std::fill(this->_input_buffers[ch].begin(), this->_input_buffers[ch].end(), 0.0f); } + } - // If we'd run off the end of the input buffer, then we need to move the data - // back to the start of the buffer and start again. - if (this->_input_buffer_offset[ch] + num_frames > (long)this->_input_buffers[ch].size()) - this->_rewind_buffers_(); + // If we'd run off the end of the input buffer, then we need to move the data + // back to the start of the buffer and start again. All channels move together. + const long buffer_size = (long)this->_input_buffers[0].size(); + if (this->_input_buffer_offset + num_frames > buffer_size) + this->_rewind_buffers_(); - // Put the new samples into the input buffer for this channel - for (long i = this->_input_buffer_offset[ch], j = 0; j < num_frames; i++, j++) + // Put the new samples into the input buffer for each channel + for (int ch = 0; ch < in_channels; ch++) + { + for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++) this->_input_buffers[ch][i] = (float)input[ch][j]; } @@ -246,38 +223,30 @@ void nam::Buffer::_rewind_buffers_() { const int in_channels = NumInputChannels(); - // Rewind buffers for all input channels + // Rewind buffers for all input channels (they all move together) for (int ch = 0; ch < in_channels; ch++) { // Copy the input buffer back // RF-1 samples because we've got at least one new one inbound. - for (long i = 0, j = this->_input_buffer_offset[ch] - this->_receptive_field; i < this->_receptive_field; i++, j++) + for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; i < this->_receptive_field; i++, j++) this->_input_buffers[ch][i] = this->_input_buffers[ch][j]; - // And reset the offset. - // Even though we could be stingy about that one sample that we won't be using - // (because a new set is incoming) it's probably not worth the - // hyper-optimization and liable for bugs. And the code looks way tidier this - // way. - this->_input_buffer_offset[ch] = this->_receptive_field; } + // And reset the offset. + // Even though we could be stingy about that one sample that we won't be using + // (because a new set is incoming) it's probably not worth the + // hyper-optimization and liable for bugs. And the code looks way tidier this + // way. + this->_input_buffer_offset = this->_receptive_field; } void nam::Buffer::_reset_input_buffer() { - const int in_channels = NumInputChannels(); - for (int ch = 0; ch < in_channels; ch++) - { - this->_input_buffer_offset[ch] = this->_receptive_field; - } + this->_input_buffer_offset = this->_receptive_field; } void nam::Buffer::_advance_input_buffer_(const int num_frames) { - const int in_channels = NumInputChannels(); - for (int ch = 0; ch < in_channels; ch++) - { - this->_input_buffer_offset[ch] += num_frames; - } + this->_input_buffer_offset += num_frames; } // Linear ===================================================================== @@ -314,7 +283,7 @@ void nam::Linear::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num { for (int i = 0; i < num_frames; i++) { - const long offset = this->_input_buffer_offset[ch] - this->_weight.size() + i + 1; + const long offset = this->_input_buffer_offset - this->_weight.size() + i + 1; auto input_vec = Eigen::Map(&this->_input_buffers[ch][offset], this->_receptive_field); output[ch][i] = this->_bias + this->_weight.dot(input_vec); } diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp index af1ef68..99dd3a0 100644 --- a/NAM/get_dsp.cpp +++ b/NAM/get_dsp.cpp @@ -158,19 +158,11 @@ std::unique_ptr get_dsp(dspData& conf) } if (inputLevel.have) { - // Set the same level for all input channels (backward compatibility) - for (int ch = 0; ch < out->NumInputChannels(); ch++) - { - out->SetInputLevel(ch, inputLevel.value); - } + out->SetInputLevel(inputLevel.value); } if (outputLevel.have) { - // Set the same level for all output channels (backward compatibility) - for (int ch = 0; ch < out->NumOutputChannels(); ch++) - { - out->SetOutputLevel(ch, outputLevel.value); - } + out->SetOutputLevel(outputLevel.value); } // "pre-warm" the model to settle initial conditions diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 0b07b79..748a3c9 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -192,13 +192,18 @@ long nam::wavenet::_LayerArray::_get_channels() const // WaveNet ==================================================================== -nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels, +nam::wavenet::WaveNet::WaveNet(const int in_channels, const std::vector& layer_array_params, const float head_scale, const bool with_head, std::vector weights, const double expected_sample_rate) -: DSP(in_channels, out_channels, expected_sample_rate) +: DSP(in_channels, + layer_array_params.empty() ? throw std::runtime_error("WaveNet requires at least one layer array") + : layer_array_params.back().head_size, + expected_sample_rate) , _head_scale(head_scale) { + if (layer_array_params.empty()) + throw std::runtime_error("WaveNet requires at least one layer array"); if (with_head) throw std::runtime_error("Head not implemented!"); for (size_t i = 0; i < layer_array_params.size(); i++) @@ -254,11 +259,26 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize) void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int num_frames) { - // For now, use first input channel for conditioning - // Can be extended later to support multi-channel conditioning - for (int j = 0; j < num_frames; j++) + const int in_channels = NumInputChannels(); + const int condition_dim = this->_get_condition_dim(); + + assert(in_channels <= condition_dim); + + // Fill condition array with input channels + for (int ch = 0; ch < in_channels; ch++) { - this->_condition(0, j) = input[0][j]; + for (int j = 0; j < num_frames; j++) + { + this->_condition(ch, j) = input[ch][j]; + } + } + // Zero-fill remaining condition channels if in_channels < condition_dim + for (int ch = in_channels; ch < condition_dim; ch++) + { + for (int j = 0; j < num_frames; j++) + { + this->_condition(ch, j) = 0.0f; + } } } @@ -292,7 +312,6 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con // (Head not implemented) auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs(); - const int out_channels = NumOutputChannels(); assert(final_head_outputs.rows() == out_channels); for (int ch = 0; ch < out_channels; ch++) @@ -325,12 +344,16 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st const bool with_head = !config["head"].is_null(); const float head_scale = config["head_scale"]; - // Determine channels from first layer (input_size) and last layer (head_size) + if (layer_array_params.empty()) + throw std::runtime_error("WaveNet config requires at least one layer array"); + + // Determine input channels from config or first layer const int in_channels = config.value("in_channels", layer_array_params[0].input_size); const int out_channels = config.value("out_channels", layer_array_params.back().head_size); + // out_channels is determined from last layer array's head_size return std::make_unique( - in_channels, out_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate); + in_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate); } // Register the factory diff --git a/tools/test/test_dsp.cpp b/tools/test/test_dsp.cpp index d71bd18..d019a87 100644 --- a/tools/test/test_dsp.cpp +++ b/tools/test/test_dsp.cpp @@ -8,34 +8,42 @@ namespace test_dsp // Simplest test: can I construct something! void test_construct() { - nam::DSP myDsp(1, 1, 48000.0); + const int in_channels = 1; + const int out_channels = 1; + nam::DSP myDsp(in_channels, out_channels, 48000.0); } void test_channels() { - nam::DSP myDsp(2, 3, 48000.0); - assert(myDsp.NumInputChannels() == 2); - assert(myDsp.NumOutputChannels() == 3); + const int in_channels = 2; + const int out_channels = 3; + nam::DSP myDsp(in_channels, out_channels, 48000.0); + assert(myDsp.NumInputChannels() == in_channels); + assert(myDsp.NumOutputChannels() == out_channels); } void test_get_input_level() { - nam::DSP myDsp(2, 1, 48000.0); + const int in_channels = 2; + const int out_channels = 1; + nam::DSP myDsp(in_channels, out_channels, 48000.0); const double expected = 19.0; - myDsp.SetInputLevel(0, expected); - assert(myDsp.HasInputLevel(0)); - const double actual = myDsp.GetInputLevel(0); + myDsp.SetInputLevel(expected); + assert(myDsp.HasInputLevel()); + const double actual = myDsp.GetInputLevel(); assert(actual == expected); } void test_get_output_level() { - nam::DSP myDsp(1, 2, 48000.0); + const int in_channels = 1; + const int out_channels = 2; + nam::DSP myDsp(in_channels, out_channels, 48000.0); const double expected = 12.0; - myDsp.SetOutputLevel(1, expected); - assert(myDsp.HasOutputLevel(1)); - const double actual = myDsp.GetOutputLevel(1); + myDsp.SetOutputLevel(expected); + assert(myDsp.HasOutputLevel()); + const double actual = myDsp.GetOutputLevel(); assert(actual == expected); } @@ -43,51 +51,60 @@ void test_get_output_level() // Test correct function of DSP::HasInputLevel() void test_has_input_level() { - nam::DSP myDsp(2, 1, 48000.0); - myDsp.SetInputLevel(0, 19.0); - assert(myDsp.HasInputLevel(0)); - assert(!myDsp.HasInputLevel(1)); + const int in_channels = 2; + const int out_channels = 1; + nam::DSP myDsp(in_channels, out_channels, 48000.0); + assert(!myDsp.HasInputLevel()); + + const double level = 19.0; + myDsp.SetInputLevel(level); + assert(myDsp.HasInputLevel()); } void test_has_output_level() { - nam::DSP myDsp(1, 2, 48000.0); + const int in_channels = 1; + const int out_channels = 2; + nam::DSP myDsp(in_channels, out_channels, 48000.0); - assert(!myDsp.HasOutputLevel(0)); - assert(!myDsp.HasOutputLevel(1)); + assert(!myDsp.HasOutputLevel()); - myDsp.SetOutputLevel(1, 12.0); - assert(!myDsp.HasOutputLevel(0)); - assert(myDsp.HasOutputLevel(1)); + const double level = 12.0; + myDsp.SetOutputLevel(level); + assert(myDsp.HasOutputLevel()); } // Test correct function of DSP::HasInputLevel() void test_set_input_level() { - nam::DSP myDsp(2, 1, 48000.0); - myDsp.SetInputLevel(0, 19.0); - myDsp.SetInputLevel(1, 20.0); + const int in_channels = 2; + const int out_channels = 1; + nam::DSP myDsp(in_channels, out_channels, 48000.0); + myDsp.SetInputLevel(19.0); } void test_set_output_level() { - nam::DSP myDsp(1, 2, 48000.0); - myDsp.SetOutputLevel(0, 19.0); - myDsp.SetOutputLevel(1, 20.0); + const int in_channels = 1; + const int out_channels = 2; + nam::DSP myDsp(in_channels, out_channels, 48000.0); + myDsp.SetOutputLevel(19.0); } void test_process_multi_channel() { - nam::DSP myDsp(2, 2, 48000.0); + const int in_channels = 2; + const int out_channels = 2; + nam::DSP myDsp(in_channels, out_channels, 48000.0); const int num_frames = 64; // Allocate buffers - std::vector> inputBuffers(2); - std::vector> outputBuffers(2); - std::vector inputPtrs(2); - std::vector outputPtrs(2); + std::vector> inputBuffers(in_channels); + std::vector> outputBuffers(out_channels); + std::vector inputPtrs(in_channels); + std::vector outputPtrs(out_channels); - for (int ch = 0; ch < 2; ch++) + for (int ch = 0; ch < in_channels; ch++) { inputBuffers[ch].resize(num_frames); outputBuffers[ch].resize(num_frames); @@ -100,12 +117,18 @@ void test_process_multi_channel() inputBuffers[ch][i] = (ch + 1) * 0.5 + i * 0.01; } } + for (int ch = 0; ch < out_channels; ch++) + { + outputBuffers[ch].resize(num_frames); + outputPtrs[ch] = outputBuffers[ch].data(); + } // Process myDsp.process(inputPtrs.data(), outputPtrs.data(), num_frames); // Check that default implementation copied input to output - for (int ch = 0; ch < 2; ch++) + const int channelsToCheck = std::min(in_channels, out_channels); + for (int ch = 0; ch < channelsToCheck; ch++) { for (int i = 0; i < num_frames; i++) { From ed5bf8db4bd70a85e79d87fb7f77a144f28efab5 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 22:45:31 -0800 Subject: [PATCH 04/19] Formatting --- NAM/convnet.cpp | 9 ++- NAM/convnet.h | 12 +-- NAM/dsp.h | 37 +++++---- NAM/lstm.cpp | 79 ++++++++++++++++--- NAM/lstm.h | 10 ++- NAM/wavenet.cpp | 4 +- NAM/wavenet.h | 5 +- tools/test/test_wavenet/test_full.cpp | 20 ++--- .../test/test_wavenet/test_real_time_safe.cpp | 4 +- 9 files changed, 120 insertions(+), 60 deletions(-) diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp index 3ee6fbe..4e3b4fd 100644 --- a/NAM/convnet.cpp +++ b/NAM/convnet.cpp @@ -157,16 +157,17 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf const long length = i_end - i_start; const long out_channels = this->_weight.rows(); const long in_channels = this->_weight.cols(); - + // Resize output to (out_channels x length) output.resize(out_channels, length); // Extract input slice: (in_channels x length) Eigen::MatrixXf input_slice = input.middleCols(i_start, length); - - // Compute output = weight * input_slice: (out_channels x in_channels) * (in_channels x length) = (out_channels x length) + + // Compute output = weight * input_slice: (out_channels x in_channels) * (in_channels x length) = (out_channels x + // length) output.noalias() = this->_weight * input_slice; - + // Add bias to each column: output.colwise() += bias // output is (out_channels x length), bias is (out_channels x 1), so colwise() += works output.colwise() += this->_bias; diff --git a/NAM/convnet.h b/NAM/convnet.h index c14994f..d1e846c 100644 --- a/NAM/convnet.h +++ b/NAM/convnet.h @@ -66,12 +66,12 @@ class _Head { public: _Head() {}; - _Head(const int channels, std::vector::iterator& weights); - void process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, const long i_end) const; + _Head(const int in_channels, const int out_channels, std::vector::iterator& weights); + void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end) const; private: - Eigen::VectorXf _weight; - float _bias = 0.0f; + Eigen::MatrixXf _weight; // (out_channels, in_channels) + Eigen::VectorXf _bias; // (out_channels,) }; class ConvNet : public Buffer @@ -88,8 +88,8 @@ class ConvNet : public Buffer protected: std::vector _blocks; std::vector _block_vals; - std::vector _head_outputs; - std::vector<_Head> _heads; + Eigen::MatrixXf _head_output; // (out_channels, num_frames) + _Head _head; void _verify_weights(const int channels, const std::vector& dilations, const bool batchnorm, const size_t actual_weights); void _update_buffers_(NAM_SAMPLE** input, const int num_frames) override; diff --git a/NAM/dsp.h b/NAM/dsp.h index a15cbca..5787212 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -64,23 +64,26 @@ class DSP int NumInputChannels() const { return mInChannels; }; // Number of output channels int NumOutputChannels() const { return mOutChannels; }; - // Input Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel + // Input Level, in dBu, corresponding to 0 dBFS for a sine wave // You should call HasInputLevel() first to be safe. - double GetInputLevel(const int channel); + // Note: input level is assumed global over all inputs. + double GetInputLevel(); // Get how loud this model is, in dB. // Throws a std::runtime_error if the model doesn't know how loud it is. + // Note: loudness is assumed global over all outputs. double GetLoudness() const; - // Output Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel + // Output Level, in dBu, corresponding to 0 dBFS for a sine wave // You should call HasOutputLevel() first to be safe. - double GetOutputLevel(const int channel); - // Does this model know its input level for a specific channel? - // If channel == -1, returns true if any channel has a level set. - bool HasInputLevel(const int channel = -1); + // Note: output level is assumed global over all outputs. + double GetOutputLevel(); + // Does this model know its input level? + // Note: input level is assumed global over all inputs. + bool HasInputLevel(); // Get whether the model knows how loud it is. bool HasLoudness() const { return mHasLoudness; }; - // Does this model know its output level for a specific channel? - // If channel == -1, returns true if any channel has a level set. - bool HasOutputLevel(const int channel = -1); + // Does this model know its output level? + // Note: output level is assumed global over all outputs. + bool HasOutputLevel(); // General function for resetting the DSP unit. // This doesn't call prewarm(). If you want to do that, then you might want to use ResetAndPrewarm(). // See https://github.com/sdatkinson/NeuralAmpModelerCore/issues/96 for the reasoning. @@ -91,12 +94,13 @@ class DSP Reset(sampleRate, maxBufferSize); prewarm(); } - void SetInputLevel(const int channel, const double inputLevel); + void SetInputLevel(const double inputLevel); // Set the loudness, in dB. // This is usually defined to be the loudness to a standardized input. The trainer has its own, but you can always // use this to define it a different way if you like yours better. + // Note: loudness is assumed global over all outputs. void SetLoudness(const double loudness); - void SetOutputLevel(const int channel, const double outputLevel); + void SetOutputLevel(const double outputLevel); protected: bool mHasLoudness = false; @@ -124,8 +128,9 @@ class DSP bool haveLevel = false; float level = 0.0; }; - std::vector mInputLevels; - std::vector mOutputLevels; + // Note: input/output levels are assumed global over all inputs/outputs + Level mInputLevel; + Level mOutputLevel; }; // Class where an input buffer is kept so that long-time effects can be @@ -139,8 +144,8 @@ class Buffer : public DSP protected: int _receptive_field; - // First location where we add new samples from the input (per channel) - std::vector _input_buffer_offset; + // First location where we add new samples from the input (same for all channels) + long _input_buffer_offset; // Per-channel input buffers std::vector> _input_buffers; std::vector> _output_buffers; diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index ada9580..7104553 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -69,28 +69,59 @@ nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int n const int hidden_size, std::vector& weights, const double expected_sample_rate) : DSP(in_channels, out_channels, expected_sample_rate) { - this->_input.resize(1); + // Allocate input and output vectors + this->_input.resize(input_size); + this->_output.resize(out_channels); + + // Store input_size for first layer + this->_first_layer_input_size = input_size; + std::vector::iterator it = weights.begin(); for (int i = 0; i < num_layers; i++) this->_layers.push_back(LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it)); - this->_head_weight.resize(hidden_size); - for (int i = 0; i < hidden_size; i++) - this->_head_weight[i] = *(it++); - this->_head_bias = *(it++); + + // Load head weight as matrix (out_channels x hidden_size) + // Weights are stored row-major: first row (output 0), then row 1 (output 1), etc. + this->_head_weight.resize(out_channels, hidden_size); + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + for (int h = 0; h < hidden_size; h++) + { + this->_head_weight(out_ch, h) = *(it++); + } + } + + // Load head bias as vector (out_channels) + this->_head_bias.resize(out_channels); + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + this->_head_bias(out_ch) = *(it++); + } + assert(it == weights.end()); } void nam::lstm::LSTM::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { + const int in_channels = NumInputChannels(); const int out_channels = NumOutputChannels(); - // For now, process first input channel and replicate to all output channels - // Can be extended later for true multi-channel support for (int i = 0; i < num_frames; i++) { - const float sample = this->_process_sample(input[0][i]); + // Copy multi-channel input to _input vector + for (int ch = 0; ch < in_channels; ch++) + { + this->_input(ch) = input[ch][i]; + } + + // Process sample (stores result in _output) + this->_process_sample(); + + // Copy multi-channel output from _output to output arrays for (int ch = 0; ch < out_channels; ch++) - output[ch][i] = sample; + { + output[ch][i] = this->_output(ch); + } } } @@ -102,15 +133,37 @@ int nam::lstm::LSTM::PrewarmSamples() return result <= 0 ? 1 : result; } -float nam::lstm::LSTM::_process_sample(const float x) +void nam::lstm::LSTM::_process_sample() { + const int in_channels = NumInputChannels(); + const int out_channels = NumOutputChannels(); + if (this->_layers.size() == 0) - return x; - this->_input(0) = x; + { + // No layers - pass input through to output (using first in_channels of output) + const int channels_to_copy = std::min(in_channels, out_channels); + for (int ch = 0; ch < channels_to_copy; ch++) + this->_output(ch) = this->_input(ch); + // Zero-fill remaining output channels if in_channels < out_channels + for (int ch = channels_to_copy; ch < out_channels; ch++) + this->_output(ch) = 0.0f; + return; + } + this->_layers[0].process_(this->_input); for (size_t i = 1; i < this->_layers.size(); i++) this->_layers[i].process_(this->_layers[i - 1].get_hidden_state()); - return this->_head_weight.dot(this->_layers[this->_layers.size() - 1].get_hidden_state()) + this->_head_bias; + + // Compute output using head weight matrix and bias vector + // _output = _head_weight * hidden_state + _head_bias + const Eigen::VectorXf& hidden_state = this->_layers[this->_layers.size() - 1].get_hidden_state(); + + // Compute matrix-vector product: (out_channels x hidden_size) * (hidden_size) = (out_channels) + // Store directly in _output (which is already sized correctly in constructor) + this->_output.noalias() = this->_head_weight * hidden_state; + + // Add bias: (out_channels) += (out_channels) + this->_output.noalias() += this->_head_bias; } // Factory to instantiate from nlohmann json diff --git a/NAM/lstm.h b/NAM/lstm.h index 251e01b..2eca8d4 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -59,16 +59,18 @@ class LSTM : public DSP // Hacky, but a half-second seems to work for most models. int PrewarmSamples() override; - Eigen::VectorXf _head_weight; - float _head_bias; + Eigen::MatrixXf _head_weight; // (out_channels x hidden_size) + Eigen::VectorXf _head_bias; // (out_channels) void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; std::vector _layers; - float _process_sample(const float x); + void _process_sample(); // Input to the LSTM. - // Since this is assumed to not be a parametric model, its shape should be (1,) + // Since this is assumed to not be a parametric model, its shape should be (in_channels,) Eigen::VectorXf _input; + // Output from _process_sample - multi-channel output vector (size out_channels) + Eigen::VectorXf _output; }; // Factory to instantiate from nlohmann json diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 748a3c9..8405e27 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -261,9 +261,9 @@ void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int n { const int in_channels = NumInputChannels(); const int condition_dim = this->_get_condition_dim(); - + assert(in_channels <= condition_dim); - + // Fill condition array with input channels for (int ch = 0; ch < in_channels; ch++) { diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 4ca370f..c67b9a5 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -174,9 +174,8 @@ class _LayerArray class WaveNet : public DSP { public: - WaveNet(const int in_channels, const int out_channels, const std::vector& layer_array_params, - const float head_scale, const bool with_head, std::vector weights, - const double expected_sample_rate = -1.0); + WaveNet(const int in_channels, const std::vector& layer_array_params, const float head_scale, + const bool with_head, std::vector weights, const double expected_sample_rate = -1.0); ~WaveNet() = default; void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; void set_weights_(std::vector& weights); diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index dcb1941..122ea0b 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -47,8 +47,8 @@ void test_wavenet_model() weights.push_back(1.0f); // Head rechannel weights.push_back(head_scale); // Head scale - auto wavenet = std::make_unique( - input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int numFrames = 4; const int maxBufferSize = 64; @@ -107,8 +107,8 @@ void test_wavenet_multiple_arrays() weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}); weights.push_back(head_scale); - auto wavenet = std::make_unique( - input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int numFrames = 4; const int maxBufferSize = 64; @@ -153,8 +153,8 @@ void test_wavenet_zero_input() std::vector weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale}; - auto wavenet = std::make_unique( - input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int numFrames = 4; wavenet->Reset(48000.0, numFrames); @@ -198,8 +198,8 @@ void test_wavenet_different_buffer_sizes() std::vector weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale}; - auto wavenet = std::make_unique( - input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, weights, 48000.0); // Test with different buffer sizes wavenet->Reset(48000.0, 64); @@ -265,8 +265,8 @@ void test_wavenet_prewarm() weights.push_back(1.0f); weights.push_back(head_scale); - auto wavenet = std::make_unique( - input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, weights, 48000.0); // Test that prewarm can be called without errors wavenet->Reset(48000.0, 64); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index e0c2261..fe0f6c3 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -775,8 +775,8 @@ void test_process_realtime_safe() weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}); weights.push_back(head_scale); - auto wavenet = std::make_unique( - input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0); + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, weights, 48000.0); const int maxBufferSize = 256; wavenet->Reset(48000.0, maxBufferSize); From 58457877fe43768c9adbacecfc6b49e631899e68 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 22:52:01 -0800 Subject: [PATCH 05/19] Tweak how .gitignore for build/ directory --- .gitignore | 2 ++ build/.gitignore | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) delete mode 100644 build/.gitignore diff --git a/.gitignore b/.gitignore index 8604b38..b3663e3 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,5 @@ *.app .vscode/ + +build/ diff --git a/build/.gitignore b/build/.gitignore deleted file mode 100644 index 86d0cb2..0000000 --- a/build/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -# Ignore everything in this directory -* -# Except this file -!.gitignore \ No newline at end of file From abba2d046510ffdb0f4996d14d239faf26bbf53e Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 23:00:52 -0800 Subject: [PATCH 06/19] Fix warnings --- NAM/convnet.cpp | 1 - NAM/lstm.cpp | 3 --- NAM/wavenet.cpp | 1 - 3 files changed, 5 deletions(-) diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp index 4e3b4fd..8bbcded 100644 --- a/NAM/convnet.cpp +++ b/NAM/convnet.cpp @@ -156,7 +156,6 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf { const long length = i_end - i_start; const long out_channels = this->_weight.rows(); - const long in_channels = this->_weight.cols(); // Resize output to (out_channels x length) output.resize(out_channels, length); diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 7104553..5cf6f18 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -73,9 +73,6 @@ nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int n this->_input.resize(input_size); this->_output.resize(out_channels); - // Store input_size for first layer - this->_first_layer_input_size = input_size; - std::vector::iterator it = weights.begin(); for (int i = 0; i < num_layers; i++) this->_layers.push_back(LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it)); diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 8405e27..5dfe0f8 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -349,7 +349,6 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st // Determine input channels from config or first layer const int in_channels = config.value("in_channels", layer_array_params[0].input_size); - const int out_channels = config.value("out_channels", layer_array_params.back().head_size); // out_channels is determined from last layer array's head_size return std::make_unique( From bd4e5a5ce40f0a9b4aa8ec3c9a66a41f5c8968eb Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 23:06:37 -0800 Subject: [PATCH 07/19] Refactor benchmark_compare.sh to simplify build directory cleanup - Updated the script to remove all contents from the build directory instead of only untracked files, streamlining the cleanup process before running benchmarks. --- tools/benchmark_compare.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh index e742fd1..20448a4 100755 --- a/tools/benchmark_compare.sh +++ b/tools/benchmark_compare.sh @@ -33,10 +33,9 @@ run_benchmark() { echo -e "${YELLOW}Running benchmark on branch: ${branch_name}${NC}" - # Clean build directory - remove only untracked files, preserve tracked files like .gitignore + # Clean build directory - remove all contents since nothing is tracked if [ -d "$BUILD_DIR" ]; then - # Remove files/directories that aren't tracked by git (process depth-first) - find "$BUILD_DIR" -mindepth 1 -depth -exec sh -c 'if ! git ls-files --error-unmatch "$1" >/dev/null 2>&1; then rm -rf "$1"; fi' _ {} \; + rm -rf "$BUILD_DIR"/* fi mkdir -p "$BUILD_DIR" From 46d146352c80179276723c3cd17add2eb5b14368 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 23:08:02 -0800 Subject: [PATCH 08/19] Revert "Tweak how .gitignore for build/ directory" This reverts commit 58457877fe43768c9adbacecfc6b49e631899e68. --- .gitignore | 2 -- build/.gitignore | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 build/.gitignore diff --git a/.gitignore b/.gitignore index b3663e3..8604b38 100644 --- a/.gitignore +++ b/.gitignore @@ -32,5 +32,3 @@ *.app .vscode/ - -build/ diff --git a/build/.gitignore b/build/.gitignore new file mode 100644 index 0000000..86d0cb2 --- /dev/null +++ b/build/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file From 629aeea8f2f4833bfb102d8c5b18dc7e3a639ab3 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Thu, 15 Jan 2026 23:08:15 -0800 Subject: [PATCH 09/19] Revert "Refactor benchmark_compare.sh to simplify build directory cleanup" This reverts commit bd4e5a5ce40f0a9b4aa8ec3c9a66a41f5c8968eb. --- tools/benchmark_compare.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh index 20448a4..e742fd1 100755 --- a/tools/benchmark_compare.sh +++ b/tools/benchmark_compare.sh @@ -33,9 +33,10 @@ run_benchmark() { echo -e "${YELLOW}Running benchmark on branch: ${branch_name}${NC}" - # Clean build directory - remove all contents since nothing is tracked + # Clean build directory - remove only untracked files, preserve tracked files like .gitignore if [ -d "$BUILD_DIR" ]; then - rm -rf "$BUILD_DIR"/* + # Remove files/directories that aren't tracked by git (process depth-first) + find "$BUILD_DIR" -mindepth 1 -depth -exec sh -c 'if ! git ls-files --error-unmatch "$1" >/dev/null 2>&1; then rm -rf "$1"; fi' _ {} \; fi mkdir -p "$BUILD_DIR" From 05ff3667e40adaacc1721358fc6f0ee88e80ea05 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:11:52 -0800 Subject: [PATCH 10/19] Add LSTM tests to run_tests.cpp - Included multiple LSTM test cases to validate various functionalities, including basic operations, handling of different buffer sizes, and state evolution. - Ensured comprehensive coverage for LSTM layers by adding tests for multichannel inputs and large hidden sizes. --- tools/run_tests.cpp | 14 ++ tools/test/test_lstm.cpp | 451 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+) create mode 100644 tools/test/test_lstm.cpp diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 33c4d45..065d7d9 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -18,6 +18,7 @@ #include "test/test_wavenet_gating_compatibility.cpp" #include "test/test_blending_detailed.cpp" #include "test/test_input_buffer_verification.cpp" +#include "test/test_lstm.cpp" int main() { @@ -133,6 +134,19 @@ int main() test_convnet::test_convnet_prewarm(); test_convnet::test_convnet_multiple_calls(); + // LSTM tests + test_lstm::test_lstm_basic(); + test_lstm::test_lstm_multiple_layers(); + test_lstm::test_lstm_zero_input(); + test_lstm::test_lstm_different_buffer_sizes(); + test_lstm::test_lstm_prewarm(); + test_lstm::test_lstm_multiple_calls(); + test_lstm::test_lstm_multichannel(); + test_lstm::test_lstm_large_hidden_size(); + test_lstm::test_lstm_different_input_size(); + test_lstm::test_lstm_state_evolution(); + test_lstm::test_lstm_no_layers(); + // Gating activations tests test_gating_activations::TestGatingActivation::test_basic_functionality(); test_gating_activations::TestGatingActivation::test_with_custom_activations(); diff --git a/tools/test/test_lstm.cpp b/tools/test/test_lstm.cpp new file mode 100644 index 0000000..8c655b9 --- /dev/null +++ b/tools/test/test_lstm.cpp @@ -0,0 +1,451 @@ +// Tests for LSTM + +#include +#include +#include +#include +#include + +#include "NAM/lstm.h" + +namespace test_lstm +{ +// Helper function to calculate weights needed for LSTM +// For each LSTMCell: +// - Weight matrix: (4 * hidden_size) x (input_size + hidden_size) in row-major order +// - Bias: 4 * hidden_size +// - Initial hidden state: hidden_size (stored in second half of _xh) +// - Initial cell state: hidden_size +// For the LSTM: +// - Head weight matrix: out_channels x hidden_size in row-major order +// - Head bias: out_channels +std::vector create_lstm_weights(int num_layers, int input_size, int hidden_size, int out_channels) +{ + std::vector weights; + + for (int layer = 0; layer < num_layers; layer++) + { + int layer_input_size = (layer == 0) ? input_size : hidden_size; + int w_rows = 4 * hidden_size; + int w_cols = layer_input_size + hidden_size; + + // Weight matrix (row-major) + for (int i = 0; i < w_rows * w_cols; i++) + { + weights.push_back(0.1f); // Small weights for stability + } + + // Bias vector + for (int i = 0; i < 4 * hidden_size; i++) + { + weights.push_back(0.0f); + } + + // Initial hidden state (stored in _xh) + for (int i = 0; i < hidden_size; i++) + { + weights.push_back(0.0f); + } + + // Initial cell state + for (int i = 0; i < hidden_size; i++) + { + weights.push_back(0.0f); + } + } + + // Head weight matrix (row-major: out_channels x hidden_size) + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + for (int h = 0; h < hidden_size; h++) + { + weights.push_back(0.1f); + } + } + + // Head bias + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + weights.push_back(0.0f); + } + + return weights; +} + +// Test basic LSTM construction and processing +void test_lstm_basic() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 4; + const int maxBufferSize = 64; + lstm.Reset(expected_sample_rate, maxBufferSize); + + std::vector input(numFrames, 1.0f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + // Verify output dimensions + assert(output.size() == numFrames); + // Output should be non-zero and finite + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test LSTM with multiple layers +void test_lstm_multiple_layers() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 2; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 8; + const int maxBufferSize = 64; + lstm.Reset(expected_sample_rate, maxBufferSize); + + std::vector input(numFrames, 0.5f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + assert(output.size() == numFrames); + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test LSTM with zero input +void test_lstm_zero_input() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 4; + lstm.Reset(expected_sample_rate, numFrames); + + std::vector input(numFrames, 0.0f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + // With zero input, output should be finite (may be zero or non-zero depending on bias) + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test LSTM with different buffer sizes +void test_lstm_different_buffer_sizes() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + // Test with different buffer sizes + lstm.Reset(expected_sample_rate, 64); + std::vector input1(32, 1.0f); + std::vector output1(32, 0.0f); + NAM_SAMPLE* inputPtrs1[] = {input1.data()}; + NAM_SAMPLE* outputPtrs1[] = {output1.data()}; + lstm.process(inputPtrs1, outputPtrs1, 32); + + lstm.Reset(expected_sample_rate, 128); + std::vector input2(64, 1.0f); + std::vector output2(64, 0.0f); + NAM_SAMPLE* inputPtrs2[] = {input2.data()}; + NAM_SAMPLE* outputPtrs2[] = {output2.data()}; + lstm.process(inputPtrs2, outputPtrs2, 64); + + // Both should work without errors + assert(output1.size() == 32); + assert(output2.size() == 64); +} + +// Test LSTM prewarm functionality +void test_lstm_prewarm() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + // Test that prewarm can be called without errors + lstm.Reset(expected_sample_rate, 64); + lstm.prewarm(); + + // After prewarm, processing should work + const int numFrames = 4; + std::vector input(numFrames, 1.0f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + lstm.process(inputPtrs, outputPtrs, numFrames); + + // Output should be finite + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test multiple process() calls (state persistence) +void test_lstm_multiple_calls() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 2; + lstm.Reset(expected_sample_rate, numFrames); + + // Multiple calls should work correctly with state persistence + for (int i = 0; i < 5; i++) + { + std::vector input(numFrames, 1.0f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + lstm.process(inputPtrs, outputPtrs, numFrames); + + // Output should be finite + for (int j = 0; j < numFrames; j++) + { + assert(std::isfinite(output[j])); + } + } +} + +// Test LSTM with multi-channel input/output +void test_lstm_multichannel() +{ + const int in_channels = 2; + const int out_channels = 2; + const int num_layers = 1; + const int input_size = 2; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 4; + lstm.Reset(expected_sample_rate, 64); + + std::vector input1(numFrames, 0.5f); + std::vector input2(numFrames, 0.3f); + std::vector output1(numFrames, 0.0f); + std::vector output2(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input1.data(), input2.data()}; + NAM_SAMPLE* outputPtrs[] = {output1.data(), output2.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + // Verify both output channels are finite + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output1[i])); + assert(std::isfinite(output2[i])); + } +} + +// Test LSTM with larger hidden size +void test_lstm_large_hidden_size() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 16; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 4; + lstm.Reset(expected_sample_rate, 64); + + std::vector input(numFrames, 1.0f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test LSTM with different input sizes +void test_lstm_different_input_size() +{ + const int in_channels = 3; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 3; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 4; + lstm.Reset(expected_sample_rate, 64); + + std::vector input1(numFrames, 0.1f); + std::vector input2(numFrames, 0.2f); + std::vector input3(numFrames, 0.3f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input1.data(), input2.data(), input3.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test LSTM state evolution over time +void test_lstm_state_evolution() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 1; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + std::vector weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels); + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 10; + lstm.Reset(expected_sample_rate, 64); + + // Create a sine wave input + std::vector input(numFrames); + for (int i = 0; i < numFrames; i++) + { + input[i] = 0.5f * std::sin(2.0f * M_PI * i / numFrames); + } + + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + // Output should be finite and potentially show some variation due to state + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +// Test LSTM with no layers (edge case) +void test_lstm_no_layers() +{ + const int in_channels = 1; + const int out_channels = 1; + const int num_layers = 0; + const int input_size = 1; + const int hidden_size = 4; + const double expected_sample_rate = 48000.0; + + // With no layers, we still need head weights + std::vector weights; + // Head weight matrix (row-major: out_channels x hidden_size) + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + for (int h = 0; h < hidden_size; h++) + { + weights.push_back(0.0f); // Zero weights means pass-through + } + } + // Head bias + for (int out_ch = 0; out_ch < out_channels; out_ch++) + { + weights.push_back(0.0f); + } + + nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate); + + const int numFrames = 4; + lstm.Reset(expected_sample_rate, 64); + + std::vector input(numFrames, 1.0f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + lstm.process(inputPtrs, outputPtrs, numFrames); + + // With zero head weights and bias, output should equal input for first channel + for (int i = 0; i < numFrames; i++) + { + assert(std::isfinite(output[i])); + } +} + +}; // namespace test_lstm From 747231777de847de2da46eb49fca4d228f5ea1a0 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:12:22 -0800 Subject: [PATCH 11/19] Refactor ConvNet test cases to use defined input and output channel constants - Updated multiple ConvNet test functions to replace hardcoded input and output channel values with defined constants for better readability and maintainability. - Ensured consistency across tests by using the same channel definitions in all relevant instances. --- tools/test/test_convnet.cpp | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tools/test/test_convnet.cpp b/tools/test/test_convnet.cpp index 8966967..2c0d428 100644 --- a/tools/test/test_convnet.cpp +++ b/tools/test/test_convnet.cpp @@ -13,6 +13,8 @@ namespace test_convnet // Test basic ConvNet construction and processing void test_convnet_basic() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 2; const std::vector dilations{1, 2}; const bool batchnorm = false; @@ -32,7 +34,7 @@ void test_convnet_basic() // Head weights (2 weights + 1 bias) weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; const int maxBufferSize = 64; @@ -57,6 +59,8 @@ void test_convnet_basic() // Test ConvNet with batchnorm void test_convnet_batchnorm() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 1; const std::vector dilations{1}; const bool batchnorm = true; @@ -76,7 +80,7 @@ void test_convnet_batchnorm() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; const int maxBufferSize = 64; @@ -99,6 +103,8 @@ void test_convnet_batchnorm() // Test ConvNet with multiple blocks void test_convnet_multiple_blocks() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 2; const std::vector dilations{1, 2, 4}; const bool batchnorm = false; @@ -121,7 +127,7 @@ void test_convnet_multiple_blocks() // Head weights weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 8; const int maxBufferSize = 64; @@ -144,6 +150,8 @@ void test_convnet_multiple_blocks() // Test ConvNet with zero input void test_convnet_zero_input() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 1; const std::vector dilations{1}; const bool batchnorm = false; @@ -156,7 +164,7 @@ void test_convnet_zero_input() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; convnet.Reset(expected_sample_rate, numFrames); @@ -178,6 +186,8 @@ void test_convnet_zero_input() // Test ConvNet with different buffer sizes void test_convnet_different_buffer_sizes() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 1; const std::vector dilations{1}; const bool batchnorm = false; @@ -190,7 +200,7 @@ void test_convnet_different_buffer_sizes() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); // Test with different buffer sizes convnet.Reset(expected_sample_rate, 64); @@ -215,6 +225,8 @@ void test_convnet_different_buffer_sizes() // Test ConvNet prewarm functionality void test_convnet_prewarm() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 2; const std::vector dilations{1, 2, 4}; const bool batchnorm = false; @@ -231,7 +243,7 @@ void test_convnet_prewarm() // Head weights (2 weights + 1 bias) weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); // Test that prewarm can be called without errors convnet.Reset(expected_sample_rate, 64); @@ -255,6 +267,8 @@ void test_convnet_prewarm() // Test multiple process() calls (ring buffer functionality) void test_convnet_multiple_calls() { + const int in_channels = 1; + const int out_channels = 1; const int channels = 1; const std::vector dilations{1}; const bool batchnorm = false; @@ -267,7 +281,7 @@ void test_convnet_multiple_calls() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 2; convnet.Reset(expected_sample_rate, numFrames); From 02403a635f714fca3832113d28dcf981a0b4a727 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:12:37 -0800 Subject: [PATCH 12/19] Formatting --- tools/test/test_convnet.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/test/test_convnet.cpp b/tools/test/test_convnet.cpp index 2c0d428..56bd5ec 100644 --- a/tools/test/test_convnet.cpp +++ b/tools/test/test_convnet.cpp @@ -34,7 +34,8 @@ void test_convnet_basic() // Head weights (2 weights + 1 bias) weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; const int maxBufferSize = 64; @@ -80,7 +81,8 @@ void test_convnet_batchnorm() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; const int maxBufferSize = 64; @@ -127,7 +129,8 @@ void test_convnet_multiple_blocks() // Head weights weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 8; const int maxBufferSize = 64; @@ -164,7 +167,8 @@ void test_convnet_zero_input() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 4; convnet.Reset(expected_sample_rate, numFrames); @@ -200,7 +204,8 @@ void test_convnet_different_buffer_sizes() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); // Test with different buffer sizes convnet.Reset(expected_sample_rate, 64); @@ -243,7 +248,8 @@ void test_convnet_prewarm() // Head weights (2 weights + 1 bias) weights.insert(weights.end(), {1.0f, 1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); // Test that prewarm can be called without errors convnet.Reset(expected_sample_rate, 64); @@ -281,7 +287,8 @@ void test_convnet_multiple_calls() // Head weights (1 weight + 1 bias) weights.insert(weights.end(), {1.0f, 0.0f}); - nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); + nam::convnet::ConvNet convnet( + in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate); const int numFrames = 2; convnet.Reset(expected_sample_rate, numFrames); From c9c32280dab7b0020e6cc0dda28ac4304deecbd8 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:28:50 -0800 Subject: [PATCH 13/19] LSTM: Default 1 input channel --- NAM/lstm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 5cf6f18..d162d55 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -171,7 +171,7 @@ std::unique_ptr nam::lstm::Factory(const nlohmann::json& config, std:: const int input_size = config["input_size"]; const int hidden_size = config["hidden_size"]; // Default to 1 channel in/out for backward compatibility - const int in_channels = config.value("in_channels", input_size); + const int in_channels = config.value("in_channels", 1); const int out_channels = config.value("out_channels", 1); return std::make_unique( in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate); From 7c5a97f108dd305ec28ee3faba3d59cb72fe970b Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:30:43 -0800 Subject: [PATCH 14/19] Fix WaveNet::_set_condition_array() --- NAM/wavenet.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 5dfe0f8..21fadfb 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -260,10 +260,6 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize) void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int num_frames) { const int in_channels = NumInputChannels(); - const int condition_dim = this->_get_condition_dim(); - - assert(in_channels <= condition_dim); - // Fill condition array with input channels for (int ch = 0; ch < in_channels; ch++) { @@ -272,14 +268,6 @@ void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int n this->_condition(ch, j) = input[ch][j]; } } - // Zero-fill remaining condition channels if in_channels < condition_dim - for (int ch = in_channels; ch < condition_dim; ch++) - { - for (int j = 0; j < num_frames; j++) - { - this->_condition(ch, j) = 0.0f; - } - } } void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) From 854842fc81d364334e9b806634be85a084b4f3c9 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:32:37 -0800 Subject: [PATCH 15/19] Fix WaveNet factory for backward compatibility --- NAM/wavenet.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 21fadfb..6686f93 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -335,8 +335,8 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st if (layer_array_params.empty()) throw std::runtime_error("WaveNet config requires at least one layer array"); - // Determine input channels from config or first layer - const int in_channels = config.value("in_channels", layer_array_params[0].input_size); + // Backward compatibility: assume 1 input channel + const int in_channels = config.value("in_channels", 1); // out_channels is determined from last layer array's head_size return std::make_unique( From 1add97025138ab2b33566b47c46f94344fed7c7b Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:41:39 -0800 Subject: [PATCH 16/19] Fix some issues --- NAM/dsp.cpp | 4 ++-- NAM/lstm.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index d0a1c4c..023c42a 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -16,9 +16,9 @@ constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32; nam::DSP::DSP(const int in_channels, const int out_channels, const double expected_sample_rate) -: mInChannels(in_channels) +: mExpectedSampleRate(expected_sample_rate) +, mInChannels(in_channels) , mOutChannels(out_channels) -, mExpectedSampleRate(expected_sample_rate) { if (in_channels <= 0 || out_channels <= 0) { diff --git a/NAM/lstm.h b/NAM/lstm.h index 2eca8d4..5c03853 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -54,6 +54,7 @@ class LSTM : public DSP LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector& weights, const double expected_sample_rate = -1.0); ~LSTM() = default; + void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; protected: // Hacky, but a half-second seems to work for most models. @@ -61,7 +62,6 @@ class LSTM : public DSP Eigen::MatrixXf _head_weight; // (out_channels x hidden_size) Eigen::VectorXf _head_bias; // (out_channels) - void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; std::vector _layers; void _process_sample(); From be179024a291f48acd128af2ceaf630ee307259e Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:42:26 -0800 Subject: [PATCH 17/19] Add test for WaveNet::process() with 3 input channels and 2 output channels - Implemented a new test case to verify that the process method does not allocate or free memory when handling 3 input channels and 2 output channels. - The test includes setup for weights and input/output buffers, ensuring the output is valid across various buffer sizes. - This enhances the coverage of WaveNet functionality in real-time processing scenarios. --- .../test/test_wavenet/test_real_time_safe.cpp | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index fe0f6c3..0a57539 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -809,4 +809,127 @@ void test_process_realtime_safe() } } } + +// Test that WaveNet::process() method with 3 input channels and 2 output channels does not allocate or free memory +void test_process_3in_2out_realtime_safe() +{ + // Setup: Create WaveNet with 3 input channels and 2 output channels + const int input_size = 3; // 3 input channels + const int condition_size = 3; // condition matches input channels + const int head_size = 2; // 2 output channels + const int channels = 4; // internal channels + const int bottleneck = 2; // bottleneck (will be used for head) + const int kernel_size = 1; + const std::string activation = "ReLU"; + const bool gated = false; + const bool head_bias = false; + const float head_scale = 1.0f; + const bool with_head = false; + const int groups = 1; + const int groups_1x1 = 1; + + std::vector layer_array_params; + std::vector dilations1{1}; + layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, + bottleneck, kernel_size, std::move(dilations1), + activation, gated, head_bias, groups, groups_1x1)); + + // Calculate weights: + // _rechannel: Conv1x1(3, 4, bias=false) = 3*4 = 12 weights + // Layer: + // _conv: Conv1D(4, 2, kernel_size=1, bias=true) = 1*(2*4) + 2 = 10 weights + // _input_mixin: Conv1x1(3, 2, bias=false) = 3*2 = 6 weights + // _1x1: Conv1x1(2, 4, bias=true) = 2*4 + 4 = 12 weights + // _head_rechannel: Conv1x1(2, 2, bias=false) = 2*2 = 4 weights + // Total: 12 + 10 + 6 + 12 + 4 = 44 weights + std::vector weights; + // _rechannel weights (3->4): identity-like pattern + for (int out_ch = 0; out_ch < 4; out_ch++) + { + for (int in_ch = 0; in_ch < 3; in_ch++) + { + weights.push_back((out_ch < 3 && out_ch == in_ch) ? 1.0f : 0.0f); + } + } + // Layer: _conv weights (4->2, kernel_size=1, with bias) + // Weight layout: for each kernel position k, for each out_channel, for each in_channel + for (int out_ch = 0; out_ch < 2; out_ch++) + { + for (int in_ch = 0; in_ch < 4; in_ch++) + { + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); + } + } + // _conv bias (2 values) + weights.insert(weights.end(), {0.0f, 0.0f}); + // _input_mixin weights (3->2) + for (int out_ch = 0; out_ch < 2; out_ch++) + { + for (int in_ch = 0; in_ch < 3; in_ch++) + { + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); + } + } + // _1x1 weights (2->4, with bias) + for (int out_ch = 0; out_ch < 4; out_ch++) + { + for (int in_ch = 0; in_ch < 2; in_ch++) + { + weights.push_back((out_ch < 2 && out_ch == in_ch) ? 1.0f : 0.0f); + } + } + // _1x1 bias (4 values) + weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f}); + // _head_rechannel weights (2->2) + for (int out_ch = 0; out_ch < 2; out_ch++) + { + for (int in_ch = 0; in_ch < 2; in_ch++) + { + weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f); + } + } + weights.push_back(head_scale); + + const int in_channels = 3; + auto wavenet = + std::make_unique(in_channels, layer_array_params, head_scale, with_head, weights, 48000.0); + + const int maxBufferSize = 256; + wavenet->Reset(48000.0, maxBufferSize); + + // Test with several different buffer sizes + std::vector buffer_sizes{1, 8, 16, 32, 64, 128, 256}; + + for (int buffer_size : buffer_sizes) + { + // Prepare input/output buffers for 3 input channels and 2 output channels (allocate before tracking) + std::vector> input(3, std::vector(buffer_size, 0.5f)); + std::vector> output(2, std::vector(buffer_size, 0.0f)); + std::vector inputPtrs(3); + std::vector outputPtrs(2); + for (int ch = 0; ch < 3; ch++) + inputPtrs[ch] = input[ch].data(); + for (int ch = 0; ch < 2; ch++) + outputPtrs[ch] = output[ch].data(); + + std::string test_name = "WaveNet process (3in, 2out) - Buffer size " + std::to_string(buffer_size); + run_allocation_test_no_allocations( + nullptr, // No setup needed + [&]() { + // Call process() - this should not allocate or free + wavenet->process(inputPtrs.data(), outputPtrs.data(), buffer_size); + }, + nullptr, // No teardown needed + test_name.c_str()); + + // Verify output is valid + for (int ch = 0; ch < 2; ch++) + { + for (int i = 0; i < buffer_size; i++) + { + assert(std::isfinite(output[ch][i])); + } + } + } +} } // namespace test_wavenet From 3beaa07b49c88f470447caea0b4c076157657ef2 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 15:42:46 -0800 Subject: [PATCH 18/19] Add test to runner --- tools/run_tests.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 065d7d9..2a50c77 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -125,6 +125,7 @@ int main() test_wavenet::test_layer_grouped_process_realtime_safe(); test_wavenet::test_layer_array_process_realtime_safe(); test_wavenet::test_process_realtime_safe(); + test_wavenet::test_process_3in_2out_realtime_safe(); test_convnet::test_convnet_basic(); test_convnet::test_convnet_batchnorm(); From aa1e00af7cd86d6870e132fadefbf0aa0323cefd Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 16 Jan 2026 16:09:45 -0800 Subject: [PATCH 19/19] Fix WaveNet::_get_condition_dim() --- NAM/wavenet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAM/wavenet.h b/NAM/wavenet.h index c67b9a5..2e99256 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -189,7 +189,7 @@ class WaveNet : public DSP virtual void _set_condition_array(NAM_SAMPLE** input, const int num_frames); // How many conditioning inputs are there. // Just one--the audio. - virtual int _get_condition_dim() const { return 1; }; + virtual int _get_condition_dim() const { return NumInputChannels(); }; private: std::vector<_LayerArray> _layer_arrays;