From 02f5c053361b645356d81851efe51176e80ec794 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 21:26:52 -0800
Subject: [PATCH 01/19] [FEATURE] Multi-channel inputs and outputs

---
 NAM/convnet.cpp                               |  89 +++++--
 NAM/convnet.h                                 |  10 +-
 NAM/dsp.cpp                                   | 252 ++++++++++++++----
 NAM/dsp.h                                     |  65 ++---
 NAM/get_dsp.cpp                               |  12 +-
 NAM/lstm.cpp                                  |  21 +-
 NAM/lstm.h                                    |   4 +-
 NAM/wavenet.cpp                               |  34 ++-
 NAM/wavenet.h                                 |   6 +-
 tools/benchmodel.cpp                          |  23 +-
 tools/test/test_convnet.cpp                   |  46 ++--
 tools/test/test_dsp.cpp                       |  94 +++++--
 tools/test/test_wavenet/test_full.cpp         |  47 ++--
 .../test/test_wavenet/test_real_time_safe.cpp |   6 +-
 14 files changed, 506 insertions(+), 203 deletions(-)
diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 3b8b18f..f82d6c1 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -146,22 +146,30 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf
     output(i) = this->_bias + input.col(j).dot(this->_weight);
 }
 
-nam::convnet::ConvNet::ConvNet(const int channels, const std::vector<int>& dilations, const bool batchnorm,
+nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector<int>& dilations, const bool batchnorm,
                                const std::string activation, std::vector<float>& weights,
                                const double expected_sample_rate, const int groups)
-: Buffer(*std::max_element(dilations.begin(), dilations.end()), expected_sample_rate)
+: Buffer(in_channels, out_channels, *std::max_element(dilations.begin(), dilations.end()), expected_sample_rate)
 {
   this->_verify_weights(channels, dilations, batchnorm, weights.size());
   this->_blocks.resize(dilations.size());
   std::vector<float>::iterator it = weights.begin();
+  // First block takes in_channels input, subsequent blocks take channels input
   for (size_t i = 0; i < dilations.size(); i++)
-    this->_blocks[i].set_weights_(i == 0 ? 1 : channels, channels, dilations[i], batchnorm, activation, groups, it);
+    this->_blocks[i].set_weights_(i == 0 ? in_channels : channels, channels, dilations[i], batchnorm, activation, groups, it);
   // Only need _block_vals for the head (one entry)
   // Conv1D layers manage their own buffers now
   this->_block_vals.resize(1);
   this->_block_vals[0].setZero();
-  std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f);
-  this->_head = _Head(channels, it);
+  
+  // Create heads for each output channel
+  this->_heads.resize(out_channels);
+  this->_head_outputs.resize(out_channels);
+  for (int ch = 0; ch < out_channels; ch++)
+  {
+    this->_heads[ch] = _Head(channels, it);
+  }
+  
   if (it != weights.end())
     throw std::runtime_error("Didn't touch all the weights when initializing ConvNet");
 
@@ -171,18 +179,25 @@ nam::convnet::ConvNet::ConvNet(const int channels, const std::vector<int>& dilat
 }
 
 
-void nam::convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
+void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 
 {
   this->_update_buffers_(input, num_frames);
-  // Main computation!
-  const long i_start = this->_input_buffer_offset;
-  const long i_end = i_start + num_frames;
-
-  // Convert input buffer to matrix for first layer
-  Eigen::MatrixXf input_matrix(1, num_frames);
-  for (int i = 0; i < num_frames; i++)
-    input_matrix(0, i) = this->_input_buffer[i_start + i];
+  const int in_channels = NumInputChannels();
+  const int out_channels = NumOutputChannels();
+  
+  // For multi-channel, we process each input channel independently through the network
+  // and sum outputs to each output channel (simple implementation)
+  // This can be extended later for more sophisticated cross-channel processing
+  
+  // Convert input buffers to matrix for first layer (stack input channels)
+  Eigen::MatrixXf input_matrix(in_channels, num_frames);
+  for (int ch = 0; ch < in_channels; ch++)
+  {
+    const long i_start = this->_input_buffer_offset[ch];
+    for (int i = 0; i < num_frames; i++)
+      input_matrix(ch, i) = this->_input_buffers[ch][i_start + i];
+  }
 
   // Process through ConvNetBlock layers
   // Each block now uses Conv1D's internal buffers via Process() and GetOutput()
@@ -206,23 +221,30 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const
     this->_blocks[i].Process(block_input, num_frames);
   }
 
-  // Process head with output from last Conv1D
-  // Head still needs the old interface, so we need to provide it via a matrix
-  // We still need _block_vals[0] for the head interface
+  // Process heads for each output channel
+  // We need _block_vals[0] for the head interface
+  const long max_buffer_size = this->_input_buffers[0].size();
   if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels()
-      || this->_block_vals[0].cols() != (long)this->_input_buffer.size())
+      || this->_block_vals[0].cols() != max_buffer_size)
   {
-    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), this->_input_buffer.size());
+    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size);
   }
+  
   // Copy last block output to _block_vals for head
   auto last_output = this->_blocks.back().GetOutput(num_frames);
+  const long i_start = this->_input_buffer_offset[0]; // Use first channel's offset
+  const long i_end = i_start + num_frames;
   this->_block_vals[0].middleCols(i_start, num_frames) = last_output;
 
-  this->_head.process_(this->_block_vals[0], this->_head_output, i_start, i_end);
-
-  // Copy to required output array
-  for (int s = 0; s < num_frames; s++)
-    output[s] = this->_head_output(s);
+  // Process each output channel head
+  for (int ch = 0; ch < out_channels; ch++)
+  {
+    this->_heads[ch].process_(this->_block_vals[0], this->_head_outputs[ch], i_start, i_end);
+    
+    // Copy to output array for this channel
+    for (int s = 0; s < num_frames; s++)
+      output[ch][s] = this->_head_outputs[ch](s);
+  }
 
   // Prepare for next call:
   nam::Buffer::_advance_input_buffer_(num_frames);
@@ -245,18 +267,24 @@ void nam::convnet::ConvNet::SetMaxBufferSize(const int maxBufferSize)
   }
 }
 
-void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE* input, const int num_frames)
+void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE** input, const int num_frames)
 {
   this->Buffer::_update_buffers_(input, num_frames);
 
-  const long buffer_size = (long)this->_input_buffer.size();
+  // Find maximum buffer size across input channels
+  long max_buffer_size = 0;
+  for (const auto& buf : this->_input_buffers)
+  {
+    if ((long)buf.size() > max_buffer_size)
+      max_buffer_size = (long)buf.size();
+  }
 
   // Only need _block_vals[0] for the head
   // Conv1D layers manage their own buffers now
   if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels()
-      || this->_block_vals[0].cols() != buffer_size)
+      || this->_block_vals[0].cols() != max_buffer_size)
   {
-    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), buffer_size);
+    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size);
     this->_block_vals[0].setZero();
   }
 }
@@ -281,8 +309,11 @@ std::unique_ptr<nam::DSP> nam::convnet::Factory(const nlohmann::json& config, st
   const bool batchnorm = config["batchnorm"];
   const std::string activation = config["activation"];
   const int groups = config.value("groups", 1); // defaults to 1
+  // Default to 1 channel in/out for backward compatibility
+  const int in_channels = config.value("in_channels", 1);
+  const int out_channels = config.value("out_channels", 1);
   return std::make_unique<nam::convnet::ConvNet>(
-    channels, dilations, batchnorm, activation, weights, expectedSampleRate, groups);
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expectedSampleRate, groups);
 }
 
 namespace
diff --git a/NAM/convnet.h b/NAM/convnet.h
index ccc1edb..5f16ad6 100644
--- a/NAM/convnet.h
+++ b/NAM/convnet.h
@@ -77,21 +77,21 @@ class _Head
 class ConvNet : public Buffer
 {
 public:
-  ConvNet(const int channels, const std::vector<int>& dilations, const bool batchnorm, const std::string activation,
+  ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector<int>& dilations, const bool batchnorm, const std::string activation,
           std::vector<float>& weights, const double expected_sample_rate = -1.0, const int groups = 1);
   ~ConvNet() = default;
 
-  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
+  void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   void SetMaxBufferSize(const int maxBufferSize) override;
 
 protected:
   std::vector<ConvNetBlock> _blocks;
   std::vector<Eigen::MatrixXf> _block_vals;
-  Eigen::VectorXf _head_output;
-  _Head _head;
+  std::vector<Eigen::VectorXf> _head_outputs;
+  std::vector<_Head> _heads;
   void _verify_weights(const int channels, const std::vector<int>& dilations, const bool batchnorm,
                        const size_t actual_weights);
-  void _update_buffers_(NAM_SAMPLE* input, const int num_frames) override;
+  void _update_buffers_(NAM_SAMPLE** input, const int num_frames) override;
   void _rewind_buffers_() override;
 
   int mPrewarmSamples = 0; // Pre-compute during initialization
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index dc46891..f9b625e 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -15,9 +15,17 @@
 
 constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32;
 
-nam::DSP::DSP(const double expected_sample_rate)
-: mExpectedSampleRate(expected_sample_rate)
+nam::DSP::DSP(const int in_channels, const int out_channels, const double expected_sample_rate)
+: mInChannels(in_channels)
+, mOutChannels(out_channels)
+, mExpectedSampleRate(expected_sample_rate)
+, mInputLevels(in_channels)
+, mOutputLevels(out_channels)
 {
+  if (in_channels <= 0 || out_channels <= 0)
+  {
+    throw std::runtime_error("Channel counts must be positive");
+  }
 }
 
 void nam::DSP::prewarm()
@@ -31,29 +39,47 @@ void nam::DSP::prewarm()
     return;
 
   const size_t bufferSize = std::max(mMaxBufferSize, 1);
-  std::vector<NAM_SAMPLE> inputBuffer, outputBuffer;
-  inputBuffer.resize(bufferSize);
-  outputBuffer.resize(bufferSize);
-  for (auto it = inputBuffer.begin(); it != inputBuffer.end(); ++it)
+  // Allocate buffers for all channels
+  std::vector<std::vector<NAM_SAMPLE>> inputBuffers(mInChannels);
+  std::vector<std::vector<NAM_SAMPLE>> outputBuffers(mOutChannels);
+  std::vector<NAM_SAMPLE*> inputPtrs(mInChannels);
+  std::vector<NAM_SAMPLE*> outputPtrs(mOutChannels);
+
+  for (int ch = 0; ch < mInChannels; ch++)
+  {
+    inputBuffers[ch].resize(bufferSize, (NAM_SAMPLE)0.0);
+    inputPtrs[ch] = inputBuffers[ch].data();
+  }
+  for (int ch = 0; ch < mOutChannels; ch++)
   {
-    (*it) = (NAM_SAMPLE)0.0;
+    outputBuffers[ch].resize(bufferSize, (NAM_SAMPLE)0.0);
+    outputPtrs[ch] = outputBuffers[ch].data();
   }
 
-  NAM_SAMPLE* inputPtr = inputBuffer.data();
-  NAM_SAMPLE* outputPtr = outputBuffer.data();
   int samplesProcessed = 0;
   while (samplesProcessed < prewarmSamples)
   {
-    this->process(inputPtr, outputPtr, bufferSize);
+    this->process(inputPtrs.data(), outputPtrs.data(), bufferSize);
     samplesProcessed += bufferSize;
   }
 }
 
-void nam::DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
+void nam::DSP::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 {
-  // Default implementation is the null operation
-  for (int i = 0; i < num_frames; i++)
-    output[i] = input[i];
+  // Default implementation is the null operation: copy input to output
+  // For now, assume 1:1 channel mapping (first min(in_channels, out_channels) channels)
+  const int channelsToProcess = std::min(mInChannels, mOutChannels);
+  for (int ch = 0; ch < channelsToProcess; ch++)
+  {
+    for (int i = 0; i < num_frames; i++)
+      output[ch][i] = input[ch][i];
+  }
+  // Zero out any extra output channels
+  for (int ch = channelsToProcess; ch < mOutChannels; ch++)
+  {
+    for (int i = 0; i < num_frames; i++)
+      output[ch][i] = (NAM_SAMPLE)0.0;
+  }
 }
 
 double nam::DSP::GetLoudness() const
@@ -87,10 +113,67 @@ void nam::DSP::SetMaxBufferSize(const int maxBufferSize)
   mMaxBufferSize = maxBufferSize;
 }
 
+double nam::DSP::GetInputLevel(const int channel)
+{
+  if (channel < 0 || channel >= mInChannels)
+  {
+    throw std::runtime_error("Invalid input channel index");
+  }
+  return mInputLevels[channel].level;
+}
+
+double nam::DSP::GetOutputLevel(const int channel)
+{
+  if (channel < 0 || channel >= mOutChannels)
+  {
+    throw std::runtime_error("Invalid output channel index");
+  }
+  return mOutputLevels[channel].level;
+}
+
+bool nam::DSP::HasInputLevel(const int channel)
+{
+  if (channel < 0 || channel >= mInChannels)
+  {
+    throw std::runtime_error("Invalid input channel index");
+  }
+  return mInputLevels[channel].haveLevel;
+}
+
+bool nam::DSP::HasOutputLevel(const int channel)
+{
+  if (channel < 0 || channel >= mOutChannels)
+  {
+    throw std::runtime_error("Invalid output channel index");
+  }
+  return mOutputLevels[channel].haveLevel;
+}
+
+void nam::DSP::SetInputLevel(const int channel, const double inputLevel)
+{
+  if (channel < 0 || channel >= mInChannels)
+  {
+    throw std::runtime_error("Invalid input channel index");
+  }
+  mInputLevels[channel].haveLevel = true;
+  mInputLevels[channel].level = inputLevel;
+}
+
+void nam::DSP::SetOutputLevel(const int channel, const double outputLevel)
+{
+  if (channel < 0 || channel >= mOutChannels)
+  {
+    throw std::runtime_error("Invalid output channel index");
+  }
+  mOutputLevels[channel].haveLevel = true;
+  mOutputLevels[channel].level = outputLevel;
+}
+
 // Buffer =====================================================================
 
-nam::Buffer::Buffer(const int receptive_field, const double expected_sample_rate)
-: nam::DSP(expected_sample_rate)
+nam::Buffer::Buffer(const int in_channels, const int out_channels, const int receptive_field,
+                    const double expected_sample_rate)
+: nam::DSP(in_channels, out_channels, expected_sample_rate)
 {
   this->_set_receptive_field(receptive_field);
 }
@@ -103,68 +186,105 @@ void nam::Buffer::_set_receptive_field(const int new_receptive_field)
 void nam::Buffer::_set_receptive_field(const int new_receptive_field, const int input_buffer_size)
 {
   this->_receptive_field = new_receptive_field;
-  this->_input_buffer.resize(input_buffer_size);
-  std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f);
+  const int in_channels = NumInputChannels();
+  const int out_channels = NumOutputChannels();
+
+  // Resize buffers for all input channels
+  _input_buffers.resize(in_channels);
+  _input_buffer_offset.resize(in_channels);
+  for (int ch = 0; ch < in_channels; ch++)
+  {
+    _input_buffers[ch].resize(input_buffer_size);
+    std::fill(_input_buffers[ch].begin(), _input_buffers[ch].end(), 0.0f);
+  }
+
+  // Resize output buffers (though they'll be resized per call in _update_buffers_)
+  _output_buffers.resize(out_channels);
+
   this->_reset_input_buffer();
 }
 
-void nam::Buffer::_update_buffers_(NAM_SAMPLE* input, const int num_frames)
+void nam::Buffer::_update_buffers_(NAM_SAMPLE** input, const int num_frames)
 {
-  // Make sure that the buffer is big enough for the receptive field and the
-  // frames needed!
+  const int in_channels = NumInputChannels();
+  const int out_channels = NumOutputChannels();
+
+  // Make sure that the buffers are big enough for the receptive field and the
+  // frames needed for each channel!
+  const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
+
+  for (int ch = 0; ch < in_channels; ch++)
   {
-    const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
-    if ((long)this->_input_buffer.size() < minimum_input_buffer_size)
+    if ((long)this->_input_buffers[ch].size() < minimum_input_buffer_size)
     {
       long new_buffer_size = 2;
       while (new_buffer_size < minimum_input_buffer_size)
         new_buffer_size *= 2;
-      this->_input_buffer.resize(new_buffer_size);
-      std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f);
+      this->_input_buffers[ch].resize(new_buffer_size);
+      std::fill(this->_input_buffers[ch].begin(), this->_input_buffers[ch].end(), 0.0f);
     }
+
+    // If we'd run off the end of the input buffer, then we need to move the data
+    // back to the start of the buffer and start again.
+    if (this->_input_buffer_offset[ch] + num_frames > (long)this->_input_buffers[ch].size())
+      this->_rewind_buffers_();
+
+    // Put the new samples into the input buffer for this channel
+    for (long i = this->_input_buffer_offset[ch], j = 0; j < num_frames; i++, j++)
+      this->_input_buffers[ch][i] = (float)input[ch][j];
   }
 
-  // If we'd run off the end of the input buffer, then we need to move the data
-  // back to the start of the buffer and start again.
-  if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size())
-    this->_rewind_buffers_();
-  // Put the new samples into the input buffer
-  for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
-    this->_input_buffer[i] = input[j];
-  // And resize the output buffer:
-  this->_output_buffer.resize(num_frames);
-  std::fill(this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f);
+  // Resize output buffers for all output channels
+  for (int ch = 0; ch < out_channels; ch++)
+  {
+    this->_output_buffers[ch].resize(num_frames);
+    std::fill(this->_output_buffers[ch].begin(), this->_output_buffers[ch].end(), 0.0f);
+  }
 }
 
 void nam::Buffer::_rewind_buffers_()
 {
-  // Copy the input buffer back
-  // RF-1 samples because we've got at least one new one inbound.
-  for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; i < this->_receptive_field; i++, j++)
-    this->_input_buffer[i] = this->_input_buffer[j];
-  // And reset the offset.
-  // Even though we could be stingy about that one sample that we won't be using
-  // (because a new set is incoming) it's probably not worth the
-  // hyper-optimization and liable for bugs. And the code looks way tidier this
-  // way.
-  this->_input_buffer_offset = this->_receptive_field;
+  const int in_channels = NumInputChannels();
+
+  // Rewind buffers for all input channels
+  for (int ch = 0; ch < in_channels; ch++)
+  {
+    // Copy the input buffer back
+    // RF-1 samples because we've got at least one new one inbound.
+    for (long i = 0, j = this->_input_buffer_offset[ch] - this->_receptive_field; i < this->_receptive_field; i++, j++)
+      this->_input_buffers[ch][i] = this->_input_buffers[ch][j];
+    // And reset the offset.
+    // Even though we could be stingy about that one sample that we won't be using
+    // (because a new set is incoming) it's probably not worth the
+    // hyper-optimization and liable for bugs. And the code looks way tidier this
+    // way.
+    this->_input_buffer_offset[ch] = this->_receptive_field;
+  }
 }
 
 void nam::Buffer::_reset_input_buffer()
 {
-  this->_input_buffer_offset = this->_receptive_field;
+  const int in_channels = NumInputChannels();
+  for (int ch = 0; ch < in_channels; ch++)
+  {
+    this->_input_buffer_offset[ch] = this->_receptive_field;
+  }
 }
 
 void nam::Buffer::_advance_input_buffer_(const int num_frames)
 {
-  this->_input_buffer_offset += num_frames;
+  const int in_channels = NumInputChannels();
+  for (int ch = 0; ch < in_channels; ch++)
+  {
+    this->_input_buffer_offset[ch] += num_frames;
+  }
 }
 
 // Linear =====================================================================
 
-nam::Linear::Linear(const int receptive_field, const bool _bias, const std::vector<float>& weights,
-                    const double expected_sample_rate)
-: nam::Buffer(receptive_field, expected_sample_rate)
+nam::Linear::Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias,
+                    const std::vector<float>& weights, const double expected_sample_rate)
+: nam::Buffer(in_channels, out_channels, receptive_field, expected_sample_rate)
 {
   if ((int)weights.size() != (receptive_field + (_bias ? 1 : 0)))
     throw std::runtime_error(
@@ -178,16 +298,33 @@ nam::Linear::Linear(const int receptive_field, const bool _bias, const std::vect
   this->_bias = _bias ? weights[receptive_field] : (float)0.0;
 }
 
-void nam::Linear::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
+void nam::Linear::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 {
   this->nam::Buffer::_update_buffers_(input, num_frames);
 
+  const int in_channels = NumInputChannels();
+  const int out_channels = NumOutputChannels();
+
+  // For now, Linear processes each input channel independently to corresponding output channel
+  // This is a simple implementation - can be extended later for cross-channel mixing
+  const int channelsToProcess = std::min(in_channels, out_channels);
+
   // Main computation!
-  for (int i = 0; i < num_frames; i++)
+  for (int ch = 0; ch < channelsToProcess; ch++)
+  {
+    for (int i = 0; i < num_frames; i++)
+    {
+      const long offset = this->_input_buffer_offset[ch] - this->_weight.size() + i + 1;
+      auto input_vec = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffers[ch][offset], this->_receptive_field);
+      output[ch][i] = this->_bias + this->_weight.dot(input_vec);
+    }
+  }
+
+  // Zero out any extra output channels
+  for (int ch = channelsToProcess; ch < out_channels; ch++)
   {
-    const long offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
-    auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
-    output[i] = this->_bias + this->_weight.dot(input);
+    for (int i = 0; i < num_frames; i++)
+      output[ch][i] = (NAM_SAMPLE)0.0;
   }
 
   // Prepare for next call:
@@ -200,7 +337,10 @@ std::unique_ptr<nam::DSP> nam::linear::Factory(const nlohmann::json& config, std
 {
   const int receptive_field = config["receptive_field"];
   const bool bias = config["bias"];
-  return std::make_unique<nam::Linear>(receptive_field, bias, weights, expectedSampleRate);
+  // Default to 1 channel in/out for backward compatibility
+  const int in_channels = config.value("in_channels", 1);
+  const int out_channels = config.value("out_channels", 1);
+  return std::make_unique<nam::Linear>(in_channels, out_channels, receptive_field, bias, weights, expectedSampleRate);
 }
 
 // NN modules =================================================================
diff --git a/NAM/dsp.h b/NAM/dsp.h
index f359a68..ef1eaf7 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -40,7 +40,7 @@ class DSP
   // Older models won't know, but newer ones will come with a loudness from the training based on their response to a
   // standardized input.
   // We may choose to have the models figure out for themselves how loud they are in here in the future.
-  DSP(const double expected_sample_rate);
+  DSP(const int in_channels, const int out_channels, const double expected_sample_rate);
   virtual ~DSP() = default;
   // prewarm() does any required intial work required to "settle" model initial conditions
   // it can be somewhat expensive, so should not be called during realtime audio processing
@@ -54,25 +54,33 @@ class DSP
   // 1. The core DSP algorithm is run (This is what should probably be
   //    overridden in subclasses).
   // 2. The output level is applied and the result stored to `output`.
-  virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames);
+  // `input` and `output` are double pointers where the first pointer indexes channels
+  // and the second indexes frames: input[channel][frame]
+  virtual void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames);
   // Expected sample rate, in Hz.
   // TODO throw if it doesn't know.
   double GetExpectedSampleRate() const { return mExpectedSampleRate; };
-  // Input Level, in dBu, corresponding to 0 dBFS for a sine wave
+  // Number of input channels
+  int NumInputChannels() const { return mInChannels; };
+  // Number of output channels
+  int NumOutputChannels() const { return mOutChannels; };
+  // Input Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel
   // You should call HasInputLevel() first to be safe.
-  double GetInputLevel() { return mInputLevel.level; };
+  double GetInputLevel(const int channel);
   // Get how loud this model is, in dB.
   // Throws a std::runtime_error if the model doesn't know how loud it is.
   double GetLoudness() const;
-  // Output Level, in dBu, corresponding to 0 dBFS for a sine wave
+  // Output Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel
   // You should call HasOutputLevel() first to be safe.
-  double GetOutputLevel() { return mOutputLevel.level; };
-  // Does this model know its output level?
-  bool HasInputLevel() { return mInputLevel.haveLevel; };
+  double GetOutputLevel(const int channel);
+  // Does this model know its input level for a specific channel?
+  // If channel == -1, returns true if any channel has a level set.
+  bool HasInputLevel(const int channel = -1);
   // Get whether the model knows how loud it is.
   bool HasLoudness() const { return mHasLoudness; };
-  // Does this model know its output level?
-  bool HasOutputLevel() { return mOutputLevel.haveLevel; };
+  // Does this model know its output level for a specific channel?
+  // If channel == -1, returns true if any channel has a level set.
+  bool HasOutputLevel(const int channel = -1);
   // General function for resetting the DSP unit.
   // This doesn't call prewarm(). If you want to do that, then you might want to use ResetAndPrewarm().
   // See https://github.com/sdatkinson/NeuralAmpModelerCore/issues/96 for the reasoning.
@@ -83,20 +91,12 @@ class DSP
     Reset(sampleRate, maxBufferSize);
     prewarm();
   }
-  void SetInputLevel(const double inputLevel)
-  {
-    mInputLevel.haveLevel = true;
-    mInputLevel.level = inputLevel;
-  };
+  void SetInputLevel(const int channel, const double inputLevel);
   // Set the loudness, in dB.
   // This is usually defined to be the loudness to a standardized input. The trainer has its own, but you can always
   // use this to define it a different way if you like yours better.
   void SetLoudness(const double loudness);
-  void SetOutputLevel(const double outputLevel)
-  {
-    mOutputLevel.haveLevel = true;
-    mOutputLevel.level = outputLevel;
-  };
+  void SetOutputLevel(const int channel, const double outputLevel);
 
 protected:
   bool mHasLoudness = false;
@@ -117,13 +117,15 @@ class DSP
   int GetMaxBufferSize() const { return mMaxBufferSize; };
 
 private:
+  const int mInChannels;
+  const int mOutChannels;
   struct Level
   {
     bool haveLevel = false;
     float level = 0.0;
   };
-  Level mInputLevel;
-  Level mOutputLevel;
+  std::vector<Level> mInputLevels;
+  std::vector<Level> mOutputLevels;
 };
 
 // Class where an input buffer is kept so that long-time effects can be
@@ -132,23 +134,22 @@ class DSP
 class Buffer : public DSP
 {
 public:
-  Buffer(const int receptive_field, const double expected_sample_rate = -1.0);
+  Buffer(const int in_channels, const int out_channels, const int receptive_field, const double expected_sample_rate = -1.0);
 
 protected:
-  // Input buffer
-  const int _input_buffer_channels = 1; // Mono
   int _receptive_field;
-  // First location where we add new samples from the input
-  long _input_buffer_offset;
-  std::vector<float> _input_buffer;
-  std::vector<float> _output_buffer;
+  // First location where we add new samples from the input (per channel)
+  std::vector<long> _input_buffer_offset;
+  // Per-channel input buffers
+  std::vector<std::vector<float>> _input_buffers;
+  std::vector<std::vector<float>> _output_buffers;
 
   void _advance_input_buffer_(const int num_frames);
   void _set_receptive_field(const int new_receptive_field, const int input_buffer_size);
   void _set_receptive_field(const int new_receptive_field);
   void _reset_input_buffer();
   // Use this->_input_post_gain
-  virtual void _update_buffers_(NAM_SAMPLE* input, int num_frames);
+  virtual void _update_buffers_(NAM_SAMPLE** input, int num_frames);
   virtual void _rewind_buffers_();
 };
 
@@ -156,9 +157,9 @@ class Buffer : public DSP
 class Linear : public Buffer
 {
 public:
-  Linear(const int receptive_field, const bool _bias, const std::vector<float>& weights,
+  Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, const std::vector<float>& weights,
          const double expected_sample_rate = -1.0);
-  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
+  void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
 
 protected:
   Eigen::VectorXf _weight;
diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp
index 99dd3a0..af1ef68 100644
--- a/NAM/get_dsp.cpp
+++ b/NAM/get_dsp.cpp
@@ -158,11 +158,19 @@ std::unique_ptr<DSP> get_dsp(dspData& conf)
   }
   if (inputLevel.have)
   {
-    out->SetInputLevel(inputLevel.value);
+    // Set the same level for all input channels (backward compatibility)
+    for (int ch = 0; ch < out->NumInputChannels(); ch++)
+    {
+      out->SetInputLevel(ch, inputLevel.value);
+    }
   }
   if (outputLevel.have)
   {
-    out->SetOutputLevel(outputLevel.value);
+    // Set the same level for all output channels (backward compatibility)
+    for (int ch = 0; ch < out->NumOutputChannels(); ch++)
+    {
+      out->SetOutputLevel(ch, outputLevel.value);
+    }
   }
 
   // "pre-warm" the model to settle initial conditions
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 6fa33a2..72eb684 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -65,9 +65,9 @@ void nam::lstm::LSTMCell::process_(const Eigen::VectorXf& x)
   }
 }
 
-nam::lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector<float>& weights,
+nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector<float>& weights,
                       const double expected_sample_rate)
-: DSP(expected_sample_rate)
+: DSP(in_channels, out_channels, expected_sample_rate)
 {
   this->_input.resize(1);
   std::vector<float>::iterator it = weights.begin();
@@ -80,10 +80,18 @@ nam::lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidd
   assert(it == weights.end());
 }
 
-void nam::lstm::LSTM::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
+void nam::lstm::LSTM::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 {
+  const int out_channels = NumOutputChannels();
+  
+  // For now, process first input channel and replicate to all output channels
+  // Can be extended later for true multi-channel support
   for (int i = 0; i < num_frames; i++)
-    output[i] = this->_process_sample(input[i]);
+  {
+    const float sample = this->_process_sample(input[0][i]);
+    for (int ch = 0; ch < out_channels; ch++)
+      output[ch][i] = sample;
+  }
 }
 
 int nam::lstm::LSTM::PrewarmSamples()
@@ -112,7 +120,10 @@ std::unique_ptr<nam::DSP> nam::lstm::Factory(const nlohmann::json& config, std::
   const int num_layers = config["num_layers"];
   const int input_size = config["input_size"];
   const int hidden_size = config["hidden_size"];
-  return std::make_unique<nam::lstm::LSTM>(num_layers, input_size, hidden_size, weights, expectedSampleRate);
+  // Default to 1 channel in/out for backward compatibility
+  const int in_channels = config.value("in_channels", input_size);
+  const int out_channels = config.value("out_channels", 1);
+  return std::make_unique<nam::lstm::LSTM>(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate);
 }
 
 // Register the factory
diff --git a/NAM/lstm.h b/NAM/lstm.h
index 17d0ada..e2123dc 100644
--- a/NAM/lstm.h
+++ b/NAM/lstm.h
@@ -51,7 +51,7 @@ class LSTMCell
 class LSTM : public DSP
 {
 public:
-  LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector<float>& weights,
+  LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector<float>& weights,
        const double expected_sample_rate = -1.0);
   ~LSTM() = default;
 
@@ -61,7 +61,7 @@ class LSTM : public DSP
 
   Eigen::VectorXf _head_weight;
   float _head_bias;
-  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
+  void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   std::vector<LSTMCell> _layers;
 
   float _process_sample(const float x);
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 285ea69..3e4ff1c 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -192,10 +192,10 @@ long nam::wavenet::_LayerArray::_get_channels() const
 
 // WaveNet ====================================================================
 
-nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>& layer_array_params,
+nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels, const std::vector<nam::wavenet::LayerArrayParams>& layer_array_params,
                                const float head_scale, const bool with_head, std::vector<float> weights,
                                const double expected_sample_rate)
-: DSP(expected_sample_rate)
+: DSP(in_channels, out_channels, expected_sample_rate)
 , _head_scale(head_scale)
 {
   if (with_head)
@@ -251,17 +251,21 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize)
     this->_layer_arrays[i].SetMaxBufferSize(maxBufferSize);
 }
 
-void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE* input, const int num_frames)
+void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int num_frames)
 {
+  // For now, use first input channel for conditioning
+  // Can be extended later to support multi-channel conditioning
   for (int j = 0; j < num_frames; j++)
   {
-    this->_condition(0, j) = input[j];
+    this->_condition(0, j) = input[0][j];
   }
 }
 
-void nam::wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
+void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 {
   assert(num_frames <= mMaxBufferSize);
+  const int out_channels = NumOutputChannels();
+  
   this->_set_condition_array(input, num_frames);
 
   // Main layer arrays:
@@ -287,11 +291,16 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const
   // (Head not implemented)
 
   auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs();
-  assert(final_head_outputs.rows() == 1);
-  for (int s = 0; s < num_frames; s++)
+  const int out_channels = NumOutputChannels();
+  assert(final_head_outputs.rows() == out_channels);
+  
+  for (int ch = 0; ch < out_channels; ch++)
   {
-    const float out = this->_head_scale * final_head_outputs(0, s);
-    output[s] = out;
+    for (int s = 0; s < num_frames; s++)
+    {
+      const float out = this->_head_scale * final_head_outputs(ch, s);
+      output[ch][s] = out;
+    }
   }
 }
 
@@ -314,8 +323,13 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
   }
   const bool with_head = !config["head"].is_null();
   const float head_scale = config["head_scale"];
+  
+  // Determine channels from first layer (input_size) and last layer (head_size)
+  const int in_channels = config.value("in_channels", layer_array_params[0].input_size);
+  const int out_channels = config.value("out_channels", layer_array_params.back().head_size);
+  
   return std::make_unique<nam::wavenet::WaveNet>(
-    layer_array_params, head_scale, with_head, weights, expectedSampleRate);
+    in_channels, out_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate);
 }
 
 // Register the factory
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index 832673b..faffa74 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -174,10 +174,10 @@ class _LayerArray
 class WaveNet : public DSP
 {
 public:
-  WaveNet(const std::vector<LayerArrayParams>& layer_array_params, const float head_scale, const bool with_head,
+  WaveNet(const int in_channels, const int out_channels, const std::vector<LayerArrayParams>& layer_array_params, const float head_scale, const bool with_head,
           std::vector<float> weights, const double expected_sample_rate = -1.0);
   ~WaveNet() = default;
-  void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override;
+  void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   void set_weights_(std::vector<float>& weights);
 
 protected:
@@ -186,7 +186,7 @@ class WaveNet : public DSP
 
   void SetMaxBufferSize(const int maxBufferSize) override;
   // Fill in the "condition" array that's fed into the various parts of the net.
-  virtual void _set_condition_array(NAM_SAMPLE* input, const int num_frames);
+  virtual void _set_condition_array(NAM_SAMPLE** input, const int num_frames);
   // How many conditioning inputs are there.
   // Just one--the audio.
   virtual int _get_condition_dim() const { return 1; };
diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp
index 5c3d60c..fb6bd09 100644
--- a/tools/benchmodel.cpp
+++ b/tools/benchmodel.cpp
@@ -40,18 +40,31 @@ int main(int argc, char* argv[])
     model->Reset(model->GetExpectedSampleRate(), bufferSize);
     size_t numBuffers = (48000 / bufferSize) * 2;
 
-    // Fill input buffer with zeroes.
-    // Output buffer doesn't matter.
-    for (int i = 0; i < AUDIO_BUFFER_SIZE; i++)
+    // Allocate multi-channel buffers
+    const int in_channels = model->NumInputChannels();
+    const int out_channels = model->NumOutputChannels();
+    
+    std::vector<std::vector<double>> inputBuffers(in_channels);
+    std::vector<std::vector<double>> outputBuffers(out_channels);
+    std::vector<double*> inputPtrs(in_channels);
+    std::vector<double*> outputPtrs(out_channels);
+    
+    for (int ch = 0; ch < in_channels; ch++)
     {
-      inputBuffer[i] = 0.0;
+      inputBuffers[ch].resize(AUDIO_BUFFER_SIZE, 0.0);
+      inputPtrs[ch] = inputBuffers[ch].data();
+    }
+    for (int ch = 0; ch < out_channels; ch++)
+    {
+      outputBuffers[ch].resize(AUDIO_BUFFER_SIZE, 0.0);
+      outputPtrs[ch] = outputBuffers[ch].data();
     }
 
     std::cout << "Running benchmark\n";
     auto t1 = high_resolution_clock::now();
     for (size_t i = 0; i < numBuffers; i++)
     {
-      model->process(inputBuffer, outputBuffer, AUDIO_BUFFER_SIZE);
+      model->process(inputPtrs.data(), outputPtrs.data(), AUDIO_BUFFER_SIZE);
     }
     auto t2 = high_resolution_clock::now();
     std::cout << "Finished\n";
diff --git a/tools/test/test_convnet.cpp b/tools/test/test_convnet.cpp
index ff11074..8966967 100644
--- a/tools/test/test_convnet.cpp
+++ b/tools/test/test_convnet.cpp
@@ -32,7 +32,7 @@ void test_convnet_basic()
   // Head weights (2 weights + 1 bias)
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -40,8 +40,10 @@ void test_convnet_basic()
 
   std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  convnet.process(input.data(), output.data(), numFrames);
+  convnet.process(inputPtrs, outputPtrs, numFrames);
 
   // Verify output dimensions
   assert(output.size() == numFrames);
@@ -74,7 +76,7 @@ void test_convnet_batchnorm()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -82,8 +84,10 @@ void test_convnet_batchnorm()
 
   std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  convnet.process(input.data(), output.data(), numFrames);
+  convnet.process(inputPtrs, outputPtrs, numFrames);
 
   assert(output.size() == numFrames);
   for (int i = 0; i < numFrames; i++)
@@ -117,7 +121,7 @@ void test_convnet_multiple_blocks()
   // Head weights
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 8;
   const int maxBufferSize = 64;
@@ -125,8 +129,10 @@ void test_convnet_multiple_blocks()
 
   std::vector<NAM_SAMPLE> input(numFrames, 0.5f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  convnet.process(input.data(), output.data(), numFrames);
+  convnet.process(inputPtrs, outputPtrs, numFrames);
 
   assert(output.size() == numFrames);
   for (int i = 0; i < numFrames; i++)
@@ -150,15 +156,17 @@ void test_convnet_zero_input()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   convnet.Reset(expected_sample_rate, numFrames);
 
   std::vector<NAM_SAMPLE> input(numFrames, 0.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  convnet.process(input.data(), output.data(), numFrames);
+  convnet.process(inputPtrs, outputPtrs, numFrames);
 
   // With zero input, output should be finite (may be zero or non-zero depending on bias)
   for (int i = 0; i < numFrames; i++)
@@ -182,18 +190,22 @@ void test_convnet_different_buffer_sizes()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   // Test with different buffer sizes
   convnet.Reset(expected_sample_rate, 64);
   std::vector<NAM_SAMPLE> input1(32, 1.0f);
   std::vector<NAM_SAMPLE> output1(32, 0.0f);
-  convnet.process(input1.data(), output1.data(), 32);
+  NAM_SAMPLE* inputPtrs1[] = {input1.data()};
+  NAM_SAMPLE* outputPtrs1[] = {output1.data()};
+  convnet.process(inputPtrs1, outputPtrs1, 32);
 
   convnet.Reset(expected_sample_rate, 128);
   std::vector<NAM_SAMPLE> input2(64, 1.0f);
   std::vector<NAM_SAMPLE> output2(64, 0.0f);
-  convnet.process(input2.data(), output2.data(), 64);
+  NAM_SAMPLE* inputPtrs2[] = {input2.data()};
+  NAM_SAMPLE* outputPtrs2[] = {output2.data()};
+  convnet.process(inputPtrs2, outputPtrs2, 64);
 
   // Both should work without errors
   assert(output1.size() == 32);
@@ -219,7 +231,7 @@ void test_convnet_prewarm()
   // Head weights (2 weights + 1 bias)
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   // Test that prewarm can be called without errors
   convnet.Reset(expected_sample_rate, 64);
@@ -229,7 +241,9 @@ void test_convnet_prewarm()
   const int numFrames = 4;
   std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
-  convnet.process(input.data(), output.data(), numFrames);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+  convnet.process(inputPtrs, outputPtrs, numFrames);
 
   // Output should be finite
   for (int i = 0; i < numFrames; i++)
@@ -253,7 +267,7 @@ void test_convnet_multiple_calls()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 2;
   convnet.Reset(expected_sample_rate, numFrames);
@@ -263,7 +277,9 @@ void test_convnet_multiple_calls()
   {
     std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
     std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
-    convnet.process(input.data(), output.data(), numFrames);
+    NAM_SAMPLE* inputPtrs[] = {input.data()};
+    NAM_SAMPLE* outputPtrs[] = {output.data()};
+    convnet.process(inputPtrs, outputPtrs, numFrames);
 
     // Output should be finite
     for (int j = 0; j < numFrames; j++)
diff --git a/tools/test/test_dsp.cpp b/tools/test/test_dsp.cpp
index bbdee63..d71bd18 100644
--- a/tools/test/test_dsp.cpp
+++ b/tools/test/test_dsp.cpp
@@ -1,33 +1,41 @@
 // Tests for dsp
 
 #include "NAM/dsp.h"
+#include <vector>
 
 namespace test_dsp
 {
 // Simplest test: can I construct something!
 void test_construct()
 {
-  nam::DSP myDsp(48000.0);
+  nam::DSP myDsp(1, 1, 48000.0);
+}
+
+void test_channels()
+{
+  nam::DSP myDsp(2, 3, 48000.0);
+  assert(myDsp.NumInputChannels() == 2);
+  assert(myDsp.NumOutputChannels() == 3);
 }
 
 void test_get_input_level()
 {
-  nam::DSP myDsp(48000.0);
+  nam::DSP myDsp(2, 1, 48000.0);
   const double expected = 19.0;
-  myDsp.SetInputLevel(expected);
-  assert(myDsp.HasInputLevel());
-  const double actual = myDsp.GetInputLevel();
+  myDsp.SetInputLevel(0, expected);
+  assert(myDsp.HasInputLevel(0));
+  const double actual = myDsp.GetInputLevel(0);
 
   assert(actual == expected);
 }
 
 void test_get_output_level()
 {
-  nam::DSP myDsp(48000.0);
+  nam::DSP myDsp(1, 2, 48000.0);
   const double expected = 12.0;
-  myDsp.SetOutputLevel(expected);
-  assert(myDsp.HasOutputLevel());
-  const double actual = myDsp.GetOutputLevel();
+  myDsp.SetOutputLevel(1, expected);
+  assert(myDsp.HasOutputLevel(1));
+  const double actual = myDsp.GetOutputLevel(1);
 
   assert(actual == expected);
 }
@@ -35,32 +43,74 @@ void test_get_output_level()
 // Test correct function of DSP::HasInputLevel()
 void test_has_input_level()
 {
-  nam::DSP myDsp(48000.0);
-  assert(!myDsp.HasInputLevel());
-
-  myDsp.SetInputLevel(19.0);
-  assert(myDsp.HasInputLevel());
+  nam::DSP myDsp(2, 1, 48000.0);
+  myDsp.SetInputLevel(0, 19.0);
+  assert(myDsp.HasInputLevel(0));
+  assert(!myDsp.HasInputLevel(1));
 }
 
 void test_has_output_level()
 {
-  nam::DSP myDsp(48000.0);
-  assert(!myDsp.HasOutputLevel());
+  nam::DSP myDsp(1, 2, 48000.0);
 
-  myDsp.SetOutputLevel(12.0);
-  assert(myDsp.HasOutputLevel());
+  assert(!myDsp.HasOutputLevel(0));
+  assert(!myDsp.HasOutputLevel(1));
+
+  myDsp.SetOutputLevel(1, 12.0);
+  assert(!myDsp.HasOutputLevel(0));
+  assert(myDsp.HasOutputLevel(1));
 }
 
 // Test correct function of DSP::HasInputLevel()
 void test_set_input_level()
 {
-  nam::DSP myDsp(48000.0);
-  myDsp.SetInputLevel(19.0);
+  nam::DSP myDsp(2, 1, 48000.0);
+  myDsp.SetInputLevel(0, 19.0);
+  myDsp.SetInputLevel(1, 20.0);
 }
 
 void test_set_output_level()
 {
-  nam::DSP myDsp(48000.0);
-  myDsp.SetOutputLevel(19.0);
+  nam::DSP myDsp(1, 2, 48000.0);
+  myDsp.SetOutputLevel(0, 19.0);
+  myDsp.SetOutputLevel(1, 20.0);
+}
+
+void test_process_multi_channel()
+{
+  nam::DSP myDsp(2, 2, 48000.0);
+  const int num_frames = 64;
+
+  // Allocate buffers
+  std::vector<std::vector<double>> inputBuffers(2);
+  std::vector<std::vector<double>> outputBuffers(2);
+  std::vector<double*> inputPtrs(2);
+  std::vector<double*> outputPtrs(2);
+
+  for (int ch = 0; ch < 2; ch++)
+  {
+    inputBuffers[ch].resize(num_frames);
+    outputBuffers[ch].resize(num_frames);
+    inputPtrs[ch] = inputBuffers[ch].data();
+    outputPtrs[ch] = outputBuffers[ch].data();
+
+    // Fill input with test data
+    for (int i = 0; i < num_frames; i++)
+    {
+      inputBuffers[ch][i] = (ch + 1) * 0.5 + i * 0.01;
+    }
+  }
+
+  // Process
+  myDsp.process(inputPtrs.data(), outputPtrs.data(), num_frames);
+
+  // Check that default implementation copied input to output
+  for (int ch = 0; ch < 2; ch++)
+  {
+    for (int i = 0; i < num_frames; i++)
+    {
+      assert(outputBuffers[ch][i] == inputBuffers[ch][i]);
+    }
+  }
 }
 }; // namespace test_dsp
diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp
index d75ae1c..dcb1941 100644
--- a/tools/test/test_wavenet/test_full.cpp
+++ b/tools/test/test_wavenet/test_full.cpp
@@ -47,7 +47,8 @@ void test_wavenet_model()
   weights.push_back(1.0f); // Head rechannel
   weights.push_back(head_scale); // Head scale
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
+    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -55,8 +56,10 @@ void test_wavenet_model()
 
   std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  wavenet->process(input.data(), output.data(), numFrames);
+  wavenet->process(inputPtrs, outputPtrs, numFrames);
 
   // Verify output dimensions
   assert(output.size() == numFrames);
@@ -89,13 +92,13 @@ void test_wavenet_multiple_arrays()
   const int bottleneck = channels;
   const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
-                                                              bottleneck, kernel_size, std::move(dilations1), activation,
-                                                              gated, head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations1),
+                                                              activation, gated, head_bias, groups, groups_1x1));
   // Second array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
-                                                              bottleneck, kernel_size, std::move(dilations2), activation,
-                                                              gated, head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations2),
+                                                              activation, gated, head_bias, groups, groups_1x1));
 
   std::vector<float> weights;
   // Array 0: rechannel, layer, head_rechannel
@@ -104,7 +107,8 @@ void test_wavenet_multiple_arrays()
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f});
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
+    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -112,8 +116,10 @@ void test_wavenet_multiple_arrays()
 
   std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  wavenet->process(input.data(), output.data(), numFrames);
+  wavenet->process(inputPtrs, outputPtrs, numFrames);
 
   assert(output.size() == numFrames);
   for (int i = 0; i < numFrames; i++)
@@ -147,15 +153,18 @@ void test_wavenet_zero_input()
 
   std::vector<float> weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale};
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
+    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int numFrames = 4;
   wavenet->Reset(48000.0, numFrames);
 
   std::vector<NAM_SAMPLE> input(numFrames, 0.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
 
-  wavenet->process(input.data(), output.data(), numFrames);
+  wavenet->process(inputPtrs, outputPtrs, numFrames);
 
   // With zero input, output should be finite (may be zero or non-zero depending on bias)
   for (int i = 0; i < numFrames; i++)
@@ -189,18 +198,23 @@ void test_wavenet_different_buffer_sizes()
 
   std::vector<float> weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale};
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
+    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   // Test with different buffer sizes
   wavenet->Reset(48000.0, 64);
   std::vector<NAM_SAMPLE> input1(32, 1.0f);
   std::vector<NAM_SAMPLE> output1(32, 0.0f);
-  wavenet->process(input1.data(), output1.data(), 32);
+  NAM_SAMPLE* inputPtrs1[] = {input1.data()};
+  NAM_SAMPLE* outputPtrs1[] = {output1.data()};
+  wavenet->process(inputPtrs1, outputPtrs1, 32);
 
   wavenet->Reset(48000.0, 128);
   std::vector<NAM_SAMPLE> input2(64, 1.0f);
   std::vector<NAM_SAMPLE> output2(64, 0.0f);
-  wavenet->process(input2.data(), output2.data(), 64);
+  NAM_SAMPLE* inputPtrs2[] = {input2.data()};
+  NAM_SAMPLE* outputPtrs2[] = {output2.data()};
+  wavenet->process(inputPtrs2, outputPtrs2, 64);
 
   // Both should work without errors
   assert(output1.size() == 32);
@@ -251,7 +265,8 @@ void test_wavenet_prewarm()
   weights.push_back(1.0f);
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
+    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   // Test that prewarm can be called without errors
   wavenet->Reset(48000.0, 64);
@@ -261,7 +276,9 @@ void test_wavenet_prewarm()
   const int numFrames = 4;
   std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
   std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
-  wavenet->process(input.data(), output.data(), numFrames);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+  wavenet->process(inputPtrs, outputPtrs, numFrames);
 
   // Output should be finite
   for (int i = 0; i < numFrames; i++)
diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp
index 91d8628..c35c97c 100644
--- a/tools/test/test_wavenet/test_real_time_safe.cpp
+++ b/tools/test/test_wavenet/test_real_time_safe.cpp
@@ -775,7 +775,7 @@ void test_process_realtime_safe()
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f});
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int maxBufferSize = 256;
   wavenet->Reset(48000.0, maxBufferSize);
@@ -794,7 +794,9 @@ void test_process_realtime_safe()
       nullptr, // No setup needed
       [&]() {
         // Call process() - this should not allocate or free
-        wavenet->process(input.data(), output.data(), buffer_size);
+        NAM_SAMPLE* inputPtrs[] = {input.data()};
+        NAM_SAMPLE* outputPtrs[] = {output.data()};
+        wavenet->process(inputPtrs, outputPtrs, buffer_size);
       },
       nullptr, // No teardown needed
       test_name.c_str());

From 6402f146eedef2cbbe2a198a1589f23a62fbc32f Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 21:27:13 -0800
Subject: [PATCH 02/19] Formatting

---
 NAM/convnet.cpp                               | 21 ++++++++-------
 NAM/convnet.h                                 |  5 ++--
 NAM/dsp.h                                     |  7 ++---
 NAM/lstm.cpp                                  |  9 ++++---
 NAM/lstm.h                                    |  4 +--
 NAM/wavenet.cpp                               | 11 ++++----
 NAM/wavenet.h                                 |  5 ++--
 tools/benchmodel.cpp                          |  4 +--
 .../test/test_wavenet/test_real_time_safe.cpp | 27 ++++++++++---------
 9 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index f82d6c1..6c0f3b7 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -146,9 +146,9 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf
     output(i) = this->_bias + input.col(j).dot(this->_weight);
 }
 
-nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector<int>& dilations, const bool batchnorm,
-                               const std::string activation, std::vector<float>& weights,
-                               const double expected_sample_rate, const int groups)
+nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels,
+                               const std::vector<int>& dilations, const bool batchnorm, const std::string activation,
+                               std::vector<float>& weights, const double expected_sample_rate, const int groups)
 : Buffer(in_channels, out_channels, *std::max_element(dilations.begin(), dilations.end()), expected_sample_rate)
 {
   this->_verify_weights(channels, dilations, batchnorm, weights.size());
@@ -156,12 +156,13 @@ nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, co
   std::vector<float>::iterator it = weights.begin();
   // First block takes in_channels input, subsequent blocks take channels input
   for (size_t i = 0; i < dilations.size(); i++)
-    this->_blocks[i].set_weights_(i == 0 ? in_channels : channels, channels, dilations[i], batchnorm, activation, groups, it);
+    this->_blocks[i].set_weights_(
+      i == 0 ? in_channels : channels, channels, dilations[i], batchnorm, activation, groups, it);
   // Only need _block_vals for the head (one entry)
   // Conv1D layers manage their own buffers now
   this->_block_vals.resize(1);
   this->_block_vals[0].setZero();
-  
+
   // Create heads for each output channel
   this->_heads.resize(out_channels);
   this->_head_outputs.resize(out_channels);
@@ -169,7 +170,7 @@ nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, co
   {
     this->_heads[ch] = _Head(channels, it);
   }
-  
+
   if (it != weights.end())
     throw std::runtime_error("Didn't touch all the weights when initializing ConvNet");
 
@@ -185,11 +186,11 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
   this->_update_buffers_(input, num_frames);
   const int in_channels = NumInputChannels();
   const int out_channels = NumOutputChannels();
-  
+
   // For multi-channel, we process each input channel independently through the network
   // and sum outputs to each output channel (simple implementation)
   // This can be extended later for more sophisticated cross-channel processing
-  
+
   // Convert input buffers to matrix for first layer (stack input channels)
   Eigen::MatrixXf input_matrix(in_channels, num_frames);
   for (int ch = 0; ch < in_channels; ch++)
@@ -229,7 +230,7 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
   {
     this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size);
   }
-  
+
   // Copy last block output to _block_vals for head
   auto last_output = this->_blocks.back().GetOutput(num_frames);
   const long i_start = this->_input_buffer_offset[0]; // Use first channel's offset
@@ -240,7 +241,7 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
   for (int ch = 0; ch < out_channels; ch++)
   {
     this->_heads[ch].process_(this->_block_vals[0], this->_head_outputs[ch], i_start, i_end);
-    
+
     // Copy to output array for this channel
     for (int s = 0; s < num_frames; s++)
       output[ch][s] = this->_head_outputs[ch](s);
diff --git a/NAM/convnet.h b/NAM/convnet.h
index 5f16ad6..c14994f 100644
--- a/NAM/convnet.h
+++ b/NAM/convnet.h
@@ -77,8 +77,9 @@ class _Head
 class ConvNet : public Buffer
 {
 public:
-  ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector<int>& dilations, const bool batchnorm, const std::string activation,
-          std::vector<float>& weights, const double expected_sample_rate = -1.0, const int groups = 1);
+  ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector<int>& dilations,
+          const bool batchnorm, const std::string activation, std::vector<float>& weights,
+          const double expected_sample_rate = -1.0, const int groups = 1);
   ~ConvNet() = default;
 
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
diff --git a/NAM/dsp.h b/NAM/dsp.h
index ef1eaf7..a15cbca 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -134,7 +134,8 @@ class DSP
 class Buffer : public DSP
 {
 public:
-  Buffer(const int in_channels, const int out_channels, const int receptive_field, const double expected_sample_rate = -1.0);
+  Buffer(const int in_channels, const int out_channels, const int receptive_field,
+         const double expected_sample_rate = -1.0);
 
 protected:
   int _receptive_field;
@@ -157,8 +158,8 @@ class Buffer : public DSP
 class Linear : public Buffer
 {
 public:
-  Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, const std::vector<float>& weights,
-         const double expected_sample_rate = -1.0);
+  Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias,
+         const std::vector<float>& weights, const double expected_sample_rate = -1.0);
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
 
 protected:
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 72eb684..ada9580 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -65,8 +65,8 @@ void nam::lstm::LSTMCell::process_(const Eigen::VectorXf& x)
   }
 }
 
-nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector<float>& weights,
-                      const double expected_sample_rate)
+nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size,
+                      const int hidden_size, std::vector<float>& weights, const double expected_sample_rate)
 : DSP(in_channels, out_channels, expected_sample_rate)
 {
   this->_input.resize(1);
@@ -83,7 +83,7 @@ nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int n
 void nam::lstm::LSTM::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 {
   const int out_channels = NumOutputChannels();
-  
+
   // For now, process first input channel and replicate to all output channels
   // Can be extended later for true multi-channel support
   for (int i = 0; i < num_frames; i++)
@@ -123,7 +123,8 @@ std::unique_ptr<nam::DSP> nam::lstm::Factory(const nlohmann::json& config, std::
   // Default to 1 channel in/out for backward compatibility
   const int in_channels = config.value("in_channels", input_size);
   const int out_channels = config.value("out_channels", 1);
-  return std::make_unique<nam::lstm::LSTM>(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate);
+  return std::make_unique<nam::lstm::LSTM>(
+    in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate);
 }
 
 // Register the factory
diff --git a/NAM/lstm.h b/NAM/lstm.h
index e2123dc..251e01b 100644
--- a/NAM/lstm.h
+++ b/NAM/lstm.h
@@ -51,8 +51,8 @@ class LSTMCell
 class LSTM : public DSP
 {
 public:
-  LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size, std::vector<float>& weights,
-       const double expected_sample_rate = -1.0);
+  LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size,
+       std::vector<float>& weights, const double expected_sample_rate = -1.0);
   ~LSTM() = default;
 
 protected:
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 3e4ff1c..0b07b79 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -192,7 +192,8 @@ long nam::wavenet::_LayerArray::_get_channels() const
 
 // WaveNet ====================================================================
 
-nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels, const std::vector<nam::wavenet::LayerArrayParams>& layer_array_params,
+nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels,
+                               const std::vector<nam::wavenet::LayerArrayParams>& layer_array_params,
                                const float head_scale, const bool with_head, std::vector<float> weights,
                                const double expected_sample_rate)
 : DSP(in_channels, out_channels, expected_sample_rate)
@@ -265,7 +266,7 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
 {
   assert(num_frames <= mMaxBufferSize);
   const int out_channels = NumOutputChannels();
-  
+
   this->_set_condition_array(input, num_frames);
 
   // Main layer arrays:
@@ -293,7 +294,7 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
   auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs();
   const int out_channels = NumOutputChannels();
   assert(final_head_outputs.rows() == out_channels);
-  
+
   for (int ch = 0; ch < out_channels; ch++)
   {
     for (int s = 0; s < num_frames; s++)
@@ -323,11 +324,11 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
   }
   const bool with_head = !config["head"].is_null();
   const float head_scale = config["head_scale"];
-  
+
   // Determine channels from first layer (input_size) and last layer (head_size)
   const int in_channels = config.value("in_channels", layer_array_params[0].input_size);
   const int out_channels = config.value("out_channels", layer_array_params.back().head_size);
-  
+
   return std::make_unique<nam::wavenet::WaveNet>(
     in_channels, out_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate);
 }
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index faffa74..4ca370f 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -174,8 +174,9 @@ class _LayerArray
 class WaveNet : public DSP
 {
 public:
-  WaveNet(const int in_channels, const int out_channels, const std::vector<LayerArrayParams>& layer_array_params, const float head_scale, const bool with_head,
-          std::vector<float> weights, const double expected_sample_rate = -1.0);
+  WaveNet(const int in_channels, const int out_channels, const std::vector<LayerArrayParams>& layer_array_params,
+          const float head_scale, const bool with_head, std::vector<float> weights,
+          const double expected_sample_rate = -1.0);
   ~WaveNet() = default;
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   void set_weights_(std::vector<float>& weights);
diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp
index fb6bd09..d8a1690 100644
--- a/tools/benchmodel.cpp
+++ b/tools/benchmodel.cpp
@@ -43,12 +43,12 @@ int main(int argc, char* argv[])
     // Allocate multi-channel buffers
     const int in_channels = model->NumInputChannels();
     const int out_channels = model->NumOutputChannels();
-    
+
     std::vector<std::vector<double>> inputBuffers(in_channels);
     std::vector<std::vector<double>> outputBuffers(out_channels);
     std::vector<double*> inputPtrs(in_channels);
     std::vector<double*> outputPtrs(out_channels);
-    
+
     for (int ch = 0; ch < in_channels; ch++)
     {
       inputBuffers[ch].resize(AUDIO_BUFFER_SIZE, 0.0);
diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp
index c35c97c..e0c2261 100644
--- a/tools/test/test_wavenet/test_real_time_safe.cpp
+++ b/tools/test/test_wavenet/test_real_time_safe.cpp
@@ -437,8 +437,8 @@ void test_layer_process_realtime_safe()
   const int groups_input = 1;
   const int groups_1x1 = 1;
 
-  auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated,
-                                    groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
 
   // Set weights
   std::vector<float> weights{1.0f, 0.0f, // Conv (weight, bias)
@@ -492,8 +492,8 @@ void test_layer_bottleneck_process_realtime_safe()
   const int groups_input = 1;
   const int groups_1x1 = 1;
 
-  auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated,
-                                    groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
 
   // Set weights for bottleneck != channels
   // Conv: (channels, bottleneck, kernelSize=1) = (4, 2, 1) + bias
@@ -544,8 +544,8 @@ void test_layer_bottleneck_process_realtime_safe()
     input.setConstant(0.5f);
     condition.setConstant(0.5f);
 
-    std::string test_name = "Layer Process (bottleneck=" + std::to_string(bottleneck) + ", channels=" +
-                            std::to_string(channels) + ") - Buffer size " + std::to_string(buffer_size);
+    std::string test_name = "Layer Process (bottleneck=" + std::to_string(bottleneck) + ", channels="
+                            + std::to_string(channels) + ") - Buffer size " + std::to_string(buffer_size);
     run_allocation_test_no_allocations(
       nullptr, // No setup needed
       [&]() {
@@ -577,8 +577,8 @@ void test_layer_grouped_process_realtime_safe()
   const int groups_input = 2; // groups_input > 1
   const int groups_1x1 = 2; // 1x1 is also grouped
 
-  auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gated,
-                                    groups_input, groups_1x1);
+  auto layer = nam::wavenet::_Layer(
+    condition_size, channels, bottleneck, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
 
   // Set weights for grouped convolution
   // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels
@@ -757,13 +757,13 @@ void test_process_realtime_safe()
   const int bottleneck = channels;
   const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
-                                                              bottleneck, kernel_size, std::move(dilations1), activation,
-                                                              gated, head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations1),
+                                                              activation, gated, head_bias, groups, groups_1x1));
   // Second layer array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
-                                                              bottleneck, kernel_size, std::move(dilations2), activation,
-                                                              gated, head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations2),
+                                                              activation, gated, head_bias, groups, groups_1x1));
 
   // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1)
   //          Array 1: same structure
@@ -775,7 +775,8 @@ void test_process_realtime_safe()
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f});
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
+    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int maxBufferSize = 256;
   wavenet->Reset(48000.0, maxBufferSize);

From 1cff233e18a758c025ab1be5a666bb3567b13e44 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 22:44:33 -0800
Subject: [PATCH 03/19] [REFINE] Simplify DSP input/output level handling

- Refactored DSP class to eliminate channel-specific input/output level methods, consolidating them into single methods for input and output levels.
- Updated related methods and tests to reflect the new simplified interface, ensuring backward compatibility.
- Adjusted buffer management in the Buffer class to accommodate the changes in DSP level handling, ensuring all channels use consistent buffer sizes.
---
 NAM/convnet.cpp         |  95 +++++++++++++++++++++---------------
 NAM/dsp.cpp             | 103 ++++++++++++++--------------------------
 NAM/get_dsp.cpp         |  12 +----
 NAM/wavenet.cpp         |  41 ++++++++++++----
 tools/test/test_dsp.cpp |  93 ++++++++++++++++++++++--------------
 5 files changed, 185 insertions(+), 159 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 6c0f3b7..3ee6fbe 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -129,21 +129,47 @@ long nam::convnet::ConvNetBlock::get_out_channels() const
   return this->conv.get_out_channels();
 }
 
-nam::convnet::_Head::_Head(const int channels, std::vector<float>::iterator& weights)
+nam::convnet::_Head::_Head(const int in_channels, const int out_channels, std::vector<float>::iterator& weights)
 {
-  this->_weight.resize(channels);
-  for (int i = 0; i < channels; i++)
-    this->_weight[i] = *(weights++);
-  this->_bias = *(weights++);
+  // Weights are stored row-major: first row (output 0), then row 1 (output 1), etc.
+  // For each output channel: [w0, w1, ..., w_{in_channels-1}]
+  // Then biases: [bias0, bias1, ..., bias_{out_channels-1}]
+  this->_weight.resize(out_channels, in_channels);
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < in_channels; in_ch++)
+    {
+      this->_weight(out_ch, in_ch) = *(weights++);
+    }
+  }
+
+  // Biases for each output channel
+  this->_bias.resize(out_channels);
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    this->_bias(out_ch) = *(weights++);
+  }
 }
 
-void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start,
+void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start,
                                    const long i_end) const
 {
   const long length = i_end - i_start;
-  output.resize(length);
-  for (long i = 0, j = i_start; i < length; i++, j++)
-    output(i) = this->_bias + input.col(j).dot(this->_weight);
+  const long out_channels = this->_weight.rows();
+  const long in_channels = this->_weight.cols();
+  
+  // Resize output to (out_channels x length)
+  output.resize(out_channels, length);
+
+  // Extract input slice: (in_channels x length)
+  Eigen::MatrixXf input_slice = input.middleCols(i_start, length);
+  
+  // Compute output = weight * input_slice: (out_channels x in_channels) * (in_channels x length) = (out_channels x length)
+  output.noalias() = this->_weight * input_slice;
+  
+  // Add bias to each column: output.colwise() += bias
+  // output is (out_channels x length), bias is (out_channels x 1), so colwise() += works
+  output.colwise() += this->_bias;
 }
 
 nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, const int channels,
@@ -163,13 +189,8 @@ nam::convnet::ConvNet::ConvNet(const int in_channels, const int out_channels, co
   this->_block_vals.resize(1);
   this->_block_vals[0].setZero();
 
-  // Create heads for each output channel
-  this->_heads.resize(out_channels);
-  this->_head_outputs.resize(out_channels);
-  for (int ch = 0; ch < out_channels; ch++)
-  {
-    this->_heads[ch] = _Head(channels, it);
-  }
+  // Create single head that outputs all channels
+  this->_head = _Head(channels, out_channels, it);
 
   if (it != weights.end())
     throw std::runtime_error("Didn't touch all the weights when initializing ConvNet");
@@ -193,9 +214,9 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
 
   // Convert input buffers to matrix for first layer (stack input channels)
   Eigen::MatrixXf input_matrix(in_channels, num_frames);
+  const long i_start = this->_input_buffer_offset;
   for (int ch = 0; ch < in_channels; ch++)
   {
-    const long i_start = this->_input_buffer_offset[ch];
     for (int i = 0; i < num_frames; i++)
       input_matrix(ch, i) = this->_input_buffers[ch][i_start + i];
   }
@@ -222,29 +243,32 @@ void nam::convnet::ConvNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
     this->_blocks[i].Process(block_input, num_frames);
   }
 
-  // Process heads for each output channel
+  // Process head for all output channels at once
   // We need _block_vals[0] for the head interface
-  const long max_buffer_size = this->_input_buffers[0].size();
+  const long buffer_size = (long)this->_input_buffers[0].size();
   if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels()
-      || this->_block_vals[0].cols() != max_buffer_size)
+      || this->_block_vals[0].cols() != buffer_size)
   {
-    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size);
+    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), buffer_size);
   }
 
   // Copy last block output to _block_vals for head
   auto last_output = this->_blocks.back().GetOutput(num_frames);
-  const long i_start = this->_input_buffer_offset[0]; // Use first channel's offset
-  const long i_end = i_start + num_frames;
-  this->_block_vals[0].middleCols(i_start, num_frames) = last_output;
+  const long buffer_offset = this->_input_buffer_offset;
+  const long buffer_i_end = buffer_offset + num_frames;
+  // last_output is (channels x num_frames), _block_vals[0] is (channels x buffer_size)
+  // Copy to the correct location in _block_vals
+  this->_block_vals[0].block(0, buffer_offset, last_output.rows(), num_frames) = last_output;
+
+  // Process head - outputs all channels at once
+  // Head will resize _head_output internally
+  this->_head.process_(this->_block_vals[0], this->_head_output, buffer_offset, buffer_i_end);
 
-  // Process each output channel head
+  // Copy to output arrays for each channel
   for (int ch = 0; ch < out_channels; ch++)
   {
-    this->_heads[ch].process_(this->_block_vals[0], this->_head_outputs[ch], i_start, i_end);
-
-    // Copy to output array for this channel
     for (int s = 0; s < num_frames; s++)
-      output[ch][s] = this->_head_outputs[ch](s);
+      output[ch][s] = this->_head_output(ch, s);
   }
 
   // Prepare for next call:
@@ -272,20 +296,15 @@ void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE** input, const int num_f
 {
   this->Buffer::_update_buffers_(input, num_frames);
 
-  // Find maximum buffer size across input channels
-  long max_buffer_size = 0;
-  for (const auto& buf : this->_input_buffers)
-  {
-    if ((long)buf.size() > max_buffer_size)
-      max_buffer_size = (long)buf.size();
-  }
+  // All channels use the same buffer size
+  const long buffer_size = (long)this->_input_buffers[0].size();
 
   // Only need _block_vals[0] for the head
   // Conv1D layers manage their own buffers now
   if (this->_block_vals[0].rows() != this->_blocks.back().get_out_channels()
-      || this->_block_vals[0].cols() != max_buffer_size)
+      || this->_block_vals[0].cols() != buffer_size)
   {
-    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), max_buffer_size);
+    this->_block_vals[0].resize(this->_blocks.back().get_out_channels(), buffer_size);
     this->_block_vals[0].setZero();
   }
 }
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index f9b625e..d0a1c4c 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -19,8 +19,6 @@ nam::DSP::DSP(const int in_channels, const int out_channels, const double expect
 : mInChannels(in_channels)
 , mOutChannels(out_channels)
 , mExpectedSampleRate(expected_sample_rate)
-, mInputLevels(in_channels)
-, mOutputLevels(out_channels)
 {
   if (in_channels <= 0 || out_channels <= 0)
   {
@@ -113,60 +111,36 @@ void nam::DSP::SetMaxBufferSize(const int maxBufferSize)
   mMaxBufferSize = maxBufferSize;
 }
 
-double nam::DSP::GetInputLevel(const int channel)
+double nam::DSP::GetInputLevel()
 {
-  if (channel < 0 || channel >= mInChannels)
-  {
-    throw std::runtime_error("Invalid input channel index");
-  }
-  return mInputLevels[channel].level;
+  return mInputLevel.level;
 }
 
-double nam::DSP::GetOutputLevel(const int channel)
+double nam::DSP::GetOutputLevel()
 {
-  if (channel < 0 || channel >= mOutChannels)
-  {
-    throw std::runtime_error("Invalid output channel index");
-  }
-  return mOutputLevels[channel].level;
+  return mOutputLevel.level;
 }
 
-bool nam::DSP::HasInputLevel(const int channel)
+bool nam::DSP::HasInputLevel()
 {
-  if (channel < 0 || channel >= mInChannels)
-  {
-    throw std::runtime_error("Invalid input channel index");
-  }
-  return mInputLevels[channel].haveLevel;
+  return mInputLevel.haveLevel;
 }
 
-bool nam::DSP::HasOutputLevel(const int channel)
+bool nam::DSP::HasOutputLevel()
 {
-  if (channel < 0 || channel >= mOutChannels)
-  {
-    throw std::runtime_error("Invalid output channel index");
-  }
-  return mOutputLevels[channel].haveLevel;
+  return mOutputLevel.haveLevel;
 }
 
-void nam::DSP::SetInputLevel(const int channel, const double inputLevel)
+void nam::DSP::SetInputLevel(const double inputLevel)
 {
-  if (channel < 0 || channel >= mInChannels)
-  {
-    throw std::runtime_error("Invalid input channel index");
-  }
-  mInputLevels[channel].haveLevel = true;
-  mInputLevels[channel].level = inputLevel;
+  mInputLevel.haveLevel = true;
+  mInputLevel.level = inputLevel;
 }
 
-void nam::DSP::SetOutputLevel(const int channel, const double outputLevel)
+void nam::DSP::SetOutputLevel(const double outputLevel)
 {
-  if (channel < 0 || channel >= mOutChannels)
-  {
-    throw std::runtime_error("Invalid output channel index");
-  }
-  mOutputLevels[channel].haveLevel = true;
-  mOutputLevels[channel].level = outputLevel;
+  mOutputLevel.haveLevel = true;
+  mOutputLevel.level = outputLevel;
 }
 
 // Buffer =====================================================================
@@ -191,7 +165,6 @@ void nam::Buffer::_set_receptive_field(const int new_receptive_field, const int
 
   // Resize buffers for all input channels
   _input_buffers.resize(in_channels);
-  _input_buffer_offset.resize(in_channels);
   for (int ch = 0; ch < in_channels; ch++)
   {
     _input_buffers[ch].resize(input_buffer_size);
@@ -210,7 +183,7 @@ void nam::Buffer::_update_buffers_(NAM_SAMPLE** input, const int num_frames)
   const int out_channels = NumOutputChannels();
 
   // Make sure that the buffers are big enough for the receptive field and the
-  // frames needed for each channel!
+  // frames needed. All channels use the same buffer size.
   const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
 
   for (int ch = 0; ch < in_channels; ch++)
@@ -223,14 +196,18 @@ void nam::Buffer::_update_buffers_(NAM_SAMPLE** input, const int num_frames)
       this->_input_buffers[ch].resize(new_buffer_size);
       std::fill(this->_input_buffers[ch].begin(), this->_input_buffers[ch].end(), 0.0f);
     }
+  }
 
-    // If we'd run off the end of the input buffer, then we need to move the data
-    // back to the start of the buffer and start again.
-    if (this->_input_buffer_offset[ch] + num_frames > (long)this->_input_buffers[ch].size())
-      this->_rewind_buffers_();
+  // If we'd run off the end of the input buffer, then we need to move the data
+  // back to the start of the buffer and start again. All channels move together.
+  const long buffer_size = (long)this->_input_buffers[0].size();
+  if (this->_input_buffer_offset + num_frames > buffer_size)
+    this->_rewind_buffers_();
 
-    // Put the new samples into the input buffer for this channel
-    for (long i = this->_input_buffer_offset[ch], j = 0; j < num_frames; i++, j++)
+  // Put the new samples into the input buffer for each channel
+  for (int ch = 0; ch < in_channels; ch++)
+  {
+    for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
       this->_input_buffers[ch][i] = (float)input[ch][j];
   }
 
@@ -246,38 +223,30 @@ void nam::Buffer::_rewind_buffers_()
 {
   const int in_channels = NumInputChannels();
 
-  // Rewind buffers for all input channels
+  // Rewind buffers for all input channels (they all move together)
   for (int ch = 0; ch < in_channels; ch++)
   {
     // Copy the input buffer back
     // RF-1 samples because we've got at least one new one inbound.
-    for (long i = 0, j = this->_input_buffer_offset[ch] - this->_receptive_field; i < this->_receptive_field; i++, j++)
+    for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; i < this->_receptive_field; i++, j++)
       this->_input_buffers[ch][i] = this->_input_buffers[ch][j];
-    // And reset the offset.
-    // Even though we could be stingy about that one sample that we won't be using
-    // (because a new set is incoming) it's probably not worth the
-    // hyper-optimization and liable for bugs. And the code looks way tidier this
-    // way.
-    this->_input_buffer_offset[ch] = this->_receptive_field;
   }
+  // And reset the offset.
+  // Even though we could be stingy about that one sample that we won't be using
+  // (because a new set is incoming) it's probably not worth the
+  // hyper-optimization and liable for bugs. And the code looks way tidier this
+  // way.
+  this->_input_buffer_offset = this->_receptive_field;
 }
 
 void nam::Buffer::_reset_input_buffer()
 {
-  const int in_channels = NumInputChannels();
-  for (int ch = 0; ch < in_channels; ch++)
-  {
-    this->_input_buffer_offset[ch] = this->_receptive_field;
-  }
+  this->_input_buffer_offset = this->_receptive_field;
 }
 
 void nam::Buffer::_advance_input_buffer_(const int num_frames)
 {
-  const int in_channels = NumInputChannels();
-  for (int ch = 0; ch < in_channels; ch++)
-  {
-    this->_input_buffer_offset[ch] += num_frames;
-  }
+  this->_input_buffer_offset += num_frames;
 }
 
 // Linear =====================================================================
@@ -314,7 +283,7 @@ void nam::Linear::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num
   {
     for (int i = 0; i < num_frames; i++)
     {
-      const long offset = this->_input_buffer_offset[ch] - this->_weight.size() + i + 1;
+      const long offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
       auto input_vec = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffers[ch][offset], this->_receptive_field);
       output[ch][i] = this->_bias + this->_weight.dot(input_vec);
     }
diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp
index af1ef68..99dd3a0 100644
--- a/NAM/get_dsp.cpp
+++ b/NAM/get_dsp.cpp
@@ -158,19 +158,11 @@ std::unique_ptr<DSP> get_dsp(dspData& conf)
   }
   if (inputLevel.have)
   {
-    // Set the same level for all input channels (backward compatibility)
-    for (int ch = 0; ch < out->NumInputChannels(); ch++)
-    {
-      out->SetInputLevel(ch, inputLevel.value);
-    }
+    out->SetInputLevel(inputLevel.value);
   }
   if (outputLevel.have)
   {
-    // Set the same level for all output channels (backward compatibility)
-    for (int ch = 0; ch < out->NumOutputChannels(); ch++)
-    {
-      out->SetOutputLevel(ch, outputLevel.value);
-    }
+    out->SetOutputLevel(outputLevel.value);
   }
 
   // "pre-warm" the model to settle initial conditions
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 0b07b79..748a3c9 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -192,13 +192,18 @@ long nam::wavenet::_LayerArray::_get_channels() const
 
 // WaveNet ====================================================================
 
-nam::wavenet::WaveNet::WaveNet(const int in_channels, const int out_channels,
+nam::wavenet::WaveNet::WaveNet(const int in_channels,
                                const std::vector<nam::wavenet::LayerArrayParams>& layer_array_params,
                                const float head_scale, const bool with_head, std::vector<float> weights,
                                const double expected_sample_rate)
-: DSP(in_channels, out_channels, expected_sample_rate)
+: DSP(in_channels,
+      layer_array_params.empty() ? throw std::runtime_error("WaveNet requires at least one layer array")
+                                 : layer_array_params.back().head_size,
+      expected_sample_rate)
 , _head_scale(head_scale)
 {
+  if (layer_array_params.empty())
+    throw std::runtime_error("WaveNet requires at least one layer array");
   if (with_head)
     throw std::runtime_error("Head not implemented!");
   for (size_t i = 0; i < layer_array_params.size(); i++)
@@ -254,11 +259,26 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize)
 
 void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int num_frames)
 {
-  // For now, use first input channel for conditioning
-  // Can be extended later to support multi-channel conditioning
-  for (int j = 0; j < num_frames; j++)
+  const int in_channels = NumInputChannels();
+  const int condition_dim = this->_get_condition_dim();
+  
+  assert(in_channels <= condition_dim);
+  
+  // Fill condition array with input channels
+  for (int ch = 0; ch < in_channels; ch++)
   {
-    this->_condition(0, j) = input[0][j];
+    for (int j = 0; j < num_frames; j++)
+    {
+      this->_condition(ch, j) = input[ch][j];
+    }
+  }
+  // Zero-fill remaining condition channels if in_channels < condition_dim
+  for (int ch = in_channels; ch < condition_dim; ch++)
+  {
+    for (int j = 0; j < num_frames; j++)
+    {
+      this->_condition(ch, j) = 0.0f;
+    }
   }
 }
 
@@ -292,7 +312,6 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con
   // (Head not implemented)
 
   auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs();
-  const int out_channels = NumOutputChannels();
   assert(final_head_outputs.rows() == out_channels);
 
   for (int ch = 0; ch < out_channels; ch++)
@@ -325,12 +344,16 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
   const bool with_head = !config["head"].is_null();
   const float head_scale = config["head_scale"];
 
-  // Determine channels from first layer (input_size) and last layer (head_size)
+  if (layer_array_params.empty())
+    throw std::runtime_error("WaveNet config requires at least one layer array");
+
+  // Determine input channels from config or first layer
   const int in_channels = config.value("in_channels", layer_array_params[0].input_size);
   const int out_channels = config.value("out_channels", layer_array_params.back().head_size);
 
+  // out_channels is determined from last layer array's head_size
   return std::make_unique<nam::wavenet::WaveNet>(
-    in_channels, out_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate);
+    in_channels, layer_array_params, head_scale, with_head, weights, expectedSampleRate);
 }
 
 // Register the factory
diff --git a/tools/test/test_dsp.cpp b/tools/test/test_dsp.cpp
index d71bd18..d019a87 100644
--- a/tools/test/test_dsp.cpp
+++ b/tools/test/test_dsp.cpp
@@ -8,34 +8,42 @@ namespace test_dsp
 // Simplest test: can I construct something!
 void test_construct()
 {
-  nam::DSP myDsp(1, 1, 48000.0);
+  const int in_channels = 1;
+  const int out_channels = 1;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
 }
 
 void test_channels()
 {
-  nam::DSP myDsp(2, 3, 48000.0);
-  assert(myDsp.NumInputChannels() == 2);
-  assert(myDsp.NumOutputChannels() == 3);
+  const int in_channels = 2;
+  const int out_channels = 3;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
+  assert(myDsp.NumInputChannels() == in_channels);
+  assert(myDsp.NumOutputChannels() == out_channels);
 }
 
 void test_get_input_level()
 {
-  nam::DSP myDsp(2, 1, 48000.0);
+  const int in_channels = 2;
+  const int out_channels = 1;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
   const double expected = 19.0;
-  myDsp.SetInputLevel(0, expected);
-  assert(myDsp.HasInputLevel(0));
-  const double actual = myDsp.GetInputLevel(0);
+  myDsp.SetInputLevel(expected);
+  assert(myDsp.HasInputLevel());
+  const double actual = myDsp.GetInputLevel();
 
   assert(actual == expected);
 }
 
 void test_get_output_level()
 {
-  nam::DSP myDsp(1, 2, 48000.0);
+  const int in_channels = 1;
+  const int out_channels = 2;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
   const double expected = 12.0;
-  myDsp.SetOutputLevel(1, expected);
-  assert(myDsp.HasOutputLevel(1));
-  const double actual = myDsp.GetOutputLevel(1);
+  myDsp.SetOutputLevel(expected);
+  assert(myDsp.HasOutputLevel());
+  const double actual = myDsp.GetOutputLevel();
 
   assert(actual == expected);
 }
@@ -43,51 +51,60 @@ void test_get_output_level()
 // Test correct function of DSP::HasInputLevel()
 void test_has_input_level()
 {
-  nam::DSP myDsp(2, 1, 48000.0);
-  myDsp.SetInputLevel(0, 19.0);
-  assert(myDsp.HasInputLevel(0));
-  assert(!myDsp.HasInputLevel(1));
+  const int in_channels = 2;
+  const int out_channels = 1;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
+  assert(!myDsp.HasInputLevel());
+
+  const double level = 19.0;
+  myDsp.SetInputLevel(level);
+  assert(myDsp.HasInputLevel());
 }
 
 void test_has_output_level()
 {
-  nam::DSP myDsp(1, 2, 48000.0);
+  const int in_channels = 1;
+  const int out_channels = 2;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
 
-  assert(!myDsp.HasOutputLevel(0));
-  assert(!myDsp.HasOutputLevel(1));
+  assert(!myDsp.HasOutputLevel());
 
-  myDsp.SetOutputLevel(1, 12.0);
-  assert(!myDsp.HasOutputLevel(0));
-  assert(myDsp.HasOutputLevel(1));
+  const double level = 12.0;
+  myDsp.SetOutputLevel(level);
+  assert(myDsp.HasOutputLevel());
 }
 
 // Test correct function of DSP::HasInputLevel()
 void test_set_input_level()
 {
-  nam::DSP myDsp(2, 1, 48000.0);
-  myDsp.SetInputLevel(0, 19.0);
-  myDsp.SetInputLevel(1, 20.0);
+  const int in_channels = 2;
+  const int out_channels = 1;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
+  myDsp.SetInputLevel(19.0);
 }
 
 void test_set_output_level()
 {
-  nam::DSP myDsp(1, 2, 48000.0);
-  myDsp.SetOutputLevel(0, 19.0);
-  myDsp.SetOutputLevel(1, 20.0);
+  const int in_channels = 1;
+  const int out_channels = 2;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
+  myDsp.SetOutputLevel(19.0);
 }
 
 void test_process_multi_channel()
 {
-  nam::DSP myDsp(2, 2, 48000.0);
+  const int in_channels = 2;
+  const int out_channels = 2;
+  nam::DSP myDsp(in_channels, out_channels, 48000.0);
   const int num_frames = 64;
 
   // Allocate buffers
-  std::vector<std::vector<double>> inputBuffers(2);
-  std::vector<std::vector<double>> outputBuffers(2);
-  std::vector<double*> inputPtrs(2);
-  std::vector<double*> outputPtrs(2);
+  std::vector<std::vector<double>> inputBuffers(in_channels);
+  std::vector<std::vector<double>> outputBuffers(out_channels);
+  std::vector<double*> inputPtrs(in_channels);
+  std::vector<double*> outputPtrs(out_channels);
 
-  for (int ch = 0; ch < 2; ch++)
+  for (int ch = 0; ch < in_channels; ch++)
   {
     inputBuffers[ch].resize(num_frames);
     outputBuffers[ch].resize(num_frames);
@@ -100,12 +117,18 @@ void test_process_multi_channel()
       inputBuffers[ch][i] = (ch + 1) * 0.5 + i * 0.01;
     }
   }
+  for (int ch = 0; ch < out_channels; ch++)
+  {
+    outputBuffers[ch].resize(num_frames);
+    outputPtrs[ch] = outputBuffers[ch].data();
+  }
 
   // Process
   myDsp.process(inputPtrs.data(), outputPtrs.data(), num_frames);
 
   // Check that default implementation copied input to output
-  for (int ch = 0; ch < 2; ch++)
+  const int channelsToCheck = std::min(in_channels, out_channels);
+  for (int ch = 0; ch < channelsToCheck; ch++)
   {
     for (int i = 0; i < num_frames; i++)
     {

From ed5bf8db4bd70a85e79d87fb7f77a144f28efab5 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 22:45:31 -0800
Subject: [PATCH 04/19] Formatting

---
 NAM/convnet.cpp                               |  9 ++-
 NAM/convnet.h                                 | 12 +--
 NAM/dsp.h                                     | 37 +++++----
 NAM/lstm.cpp                                  | 79 ++++++++++++++++---
 NAM/lstm.h                                    | 10 ++-
 NAM/wavenet.cpp                               |  4 +-
 NAM/wavenet.h                                 |  5 +-
 tools/test/test_wavenet/test_full.cpp         | 20 ++---
 .../test/test_wavenet/test_real_time_safe.cpp |  4 +-
 9 files changed, 120 insertions(+), 60 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 3ee6fbe..4e3b4fd 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -157,16 +157,17 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf
   const long length = i_end - i_start;
   const long out_channels = this->_weight.rows();
   const long in_channels = this->_weight.cols();
-  
+
   // Resize output to (out_channels x length)
   output.resize(out_channels, length);
 
   // Extract input slice: (in_channels x length)
   Eigen::MatrixXf input_slice = input.middleCols(i_start, length);
-  
-  // Compute output = weight * input_slice: (out_channels x in_channels) * (in_channels x length) = (out_channels x length)
+
+  // Compute output = weight * input_slice: (out_channels x in_channels) * (in_channels x length) = (out_channels x
+  // length)
   output.noalias() = this->_weight * input_slice;
-  
+
   // Add bias to each column: output.colwise() += bias
   // output is (out_channels x length), bias is (out_channels x 1), so colwise() += works
   output.colwise() += this->_bias;
diff --git a/NAM/convnet.h b/NAM/convnet.h
index c14994f..d1e846c 100644
--- a/NAM/convnet.h
+++ b/NAM/convnet.h
@@ -66,12 +66,12 @@ class _Head
 {
 public:
   _Head() {};
-  _Head(const int channels, std::vector<float>::iterator& weights);
-  void process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, const long i_end) const;
+  _Head(const int in_channels, const int out_channels, std::vector<float>::iterator& weights);
+  void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end) const;
 
 private:
-  Eigen::VectorXf _weight;
-  float _bias = 0.0f;
+  Eigen::MatrixXf _weight; // (out_channels, in_channels)
+  Eigen::VectorXf _bias; // (out_channels,)
 };
 
 class ConvNet : public Buffer
@@ -88,8 +88,8 @@ class ConvNet : public Buffer
 protected:
   std::vector<ConvNetBlock> _blocks;
   std::vector<Eigen::MatrixXf> _block_vals;
-  std::vector<Eigen::VectorXf> _head_outputs;
-  std::vector<_Head> _heads;
+  Eigen::MatrixXf _head_output; // (out_channels, num_frames)
+  _Head _head;
   void _verify_weights(const int channels, const std::vector<int>& dilations, const bool batchnorm,
                        const size_t actual_weights);
   void _update_buffers_(NAM_SAMPLE** input, const int num_frames) override;
diff --git a/NAM/dsp.h b/NAM/dsp.h
index a15cbca..5787212 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -64,23 +64,26 @@ class DSP
   int NumInputChannels() const { return mInChannels; };
   // Number of output channels
   int NumOutputChannels() const { return mOutChannels; };
-  // Input Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel
+  // Input Level, in dBu, corresponding to 0 dBFS for a sine wave
   // You should call HasInputLevel() first to be safe.
-  double GetInputLevel(const int channel);
+  // Note: input level is assumed global over all inputs.
+  double GetInputLevel();
   // Get how loud this model is, in dB.
   // Throws a std::runtime_error if the model doesn't know how loud it is.
+  // Note: loudness is assumed global over all outputs.
   double GetLoudness() const;
-  // Output Level, in dBu, corresponding to 0 dBFS for a sine wave, for a specific channel
+  // Output Level, in dBu, corresponding to 0 dBFS for a sine wave
   // You should call HasOutputLevel() first to be safe.
-  double GetOutputLevel(const int channel);
-  // Does this model know its input level for a specific channel?
-  // If channel == -1, returns true if any channel has a level set.
-  bool HasInputLevel(const int channel = -1);
+  // Note: output level is assumed global over all outputs.
+  double GetOutputLevel();
+  // Does this model know its input level?
+  // Note: input level is assumed global over all inputs.
+  bool HasInputLevel();
   // Get whether the model knows how loud it is.
   bool HasLoudness() const { return mHasLoudness; };
-  // Does this model know its output level for a specific channel?
-  // If channel == -1, returns true if any channel has a level set.
-  bool HasOutputLevel(const int channel = -1);
+  // Does this model know its output level?
+  // Note: output level is assumed global over all outputs.
+  bool HasOutputLevel();
   // General function for resetting the DSP unit.
   // This doesn't call prewarm(). If you want to do that, then you might want to use ResetAndPrewarm().
   // See https://github.com/sdatkinson/NeuralAmpModelerCore/issues/96 for the reasoning.
@@ -91,12 +94,13 @@ class DSP
     Reset(sampleRate, maxBufferSize);
     prewarm();
   }
-  void SetInputLevel(const int channel, const double inputLevel);
+  void SetInputLevel(const double inputLevel);
   // Set the loudness, in dB.
   // This is usually defined to be the loudness to a standardized input. The trainer has its own, but you can always
   // use this to define it a different way if you like yours better.
+  // Note: loudness is assumed global over all outputs.
   void SetLoudness(const double loudness);
-  void SetOutputLevel(const int channel, const double outputLevel);
+  void SetOutputLevel(const double outputLevel);
 
 protected:
   bool mHasLoudness = false;
@@ -124,8 +128,9 @@ class DSP
     bool haveLevel = false;
     float level = 0.0;
   };
-  std::vector<Level> mInputLevels;
-  std::vector<Level> mOutputLevels;
+  // Note: input/output levels are assumed global over all inputs/outputs
+  Level mInputLevel;
+  Level mOutputLevel;
 };
 
 // Class where an input buffer is kept so that long-time effects can be
@@ -139,8 +144,8 @@ class Buffer : public DSP
 
 protected:
   int _receptive_field;
-  // First location where we add new samples from the input (per channel)
-  std::vector<long> _input_buffer_offset;
+  // First location where we add new samples from the input (same for all channels)
+  long _input_buffer_offset;
   // Per-channel input buffers
   std::vector<std::vector<float>> _input_buffers;
   std::vector<std::vector<float>> _output_buffers;
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index ada9580..7104553 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -69,28 +69,59 @@ nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int n
                       const int hidden_size, std::vector<float>& weights, const double expected_sample_rate)
 : DSP(in_channels, out_channels, expected_sample_rate)
 {
-  this->_input.resize(1);
+  // Allocate input and output vectors
+  this->_input.resize(input_size);
+  this->_output.resize(out_channels);
+
+  // Store input_size for first layer
+  this->_first_layer_input_size = input_size;
+
   std::vector<float>::iterator it = weights.begin();
   for (int i = 0; i < num_layers; i++)
     this->_layers.push_back(LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it));
-  this->_head_weight.resize(hidden_size);
-  for (int i = 0; i < hidden_size; i++)
-    this->_head_weight[i] = *(it++);
-  this->_head_bias = *(it++);
+
+  // Load head weight as matrix (out_channels x hidden_size)
+  // Weights are stored row-major: first row (output 0), then row 1 (output 1), etc.
+  this->_head_weight.resize(out_channels, hidden_size);
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    for (int h = 0; h < hidden_size; h++)
+    {
+      this->_head_weight(out_ch, h) = *(it++);
+    }
+  }
+
+  // Load head bias as vector (out_channels)
+  this->_head_bias.resize(out_channels);
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    this->_head_bias(out_ch) = *(it++);
+  }
+
   assert(it == weights.end());
 }
 
 void nam::lstm::LSTM::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)
 {
+  const int in_channels = NumInputChannels();
   const int out_channels = NumOutputChannels();
 
-  // For now, process first input channel and replicate to all output channels
-  // Can be extended later for true multi-channel support
   for (int i = 0; i < num_frames; i++)
   {
-    const float sample = this->_process_sample(input[0][i]);
+    // Copy multi-channel input to _input vector
+    for (int ch = 0; ch < in_channels; ch++)
+    {
+      this->_input(ch) = input[ch][i];
+    }
+
+    // Process sample (stores result in _output)
+    this->_process_sample();
+
+    // Copy multi-channel output from _output to output arrays
     for (int ch = 0; ch < out_channels; ch++)
-      output[ch][i] = sample;
+    {
+      output[ch][i] = this->_output(ch);
+    }
   }
 }
 
@@ -102,15 +133,37 @@ int nam::lstm::LSTM::PrewarmSamples()
   return result <= 0 ? 1 : result;
 }
 
-float nam::lstm::LSTM::_process_sample(const float x)
+void nam::lstm::LSTM::_process_sample()
 {
+  const int in_channels = NumInputChannels();
+  const int out_channels = NumOutputChannels();
+
   if (this->_layers.size() == 0)
-    return x;
-  this->_input(0) = x;
+  {
+    // No layers - pass input through to output (using first in_channels of output)
+    const int channels_to_copy = std::min(in_channels, out_channels);
+    for (int ch = 0; ch < channels_to_copy; ch++)
+      this->_output(ch) = this->_input(ch);
+    // Zero-fill remaining output channels if in_channels < out_channels
+    for (int ch = channels_to_copy; ch < out_channels; ch++)
+      this->_output(ch) = 0.0f;
+    return;
+  }
+
   this->_layers[0].process_(this->_input);
   for (size_t i = 1; i < this->_layers.size(); i++)
     this->_layers[i].process_(this->_layers[i - 1].get_hidden_state());
-  return this->_head_weight.dot(this->_layers[this->_layers.size() - 1].get_hidden_state()) + this->_head_bias;
+
+  // Compute output using head weight matrix and bias vector
+  // _output = _head_weight * hidden_state + _head_bias
+  const Eigen::VectorXf& hidden_state = this->_layers[this->_layers.size() - 1].get_hidden_state();
+
+  // Compute matrix-vector product: (out_channels x hidden_size) * (hidden_size) = (out_channels)
+  // Store directly in _output (which is already sized correctly in constructor)
+  this->_output.noalias() = this->_head_weight * hidden_state;
+
+  // Add bias: (out_channels) += (out_channels)
+  this->_output.noalias() += this->_head_bias;
 }
 
 // Factory to instantiate from nlohmann json
diff --git a/NAM/lstm.h b/NAM/lstm.h
index 251e01b..2eca8d4 100644
--- a/NAM/lstm.h
+++ b/NAM/lstm.h
@@ -59,16 +59,18 @@ class LSTM : public DSP
   // Hacky, but a half-second seems to work for most models.
   int PrewarmSamples() override;
 
-  Eigen::VectorXf _head_weight;
-  float _head_bias;
+  Eigen::MatrixXf _head_weight; // (out_channels x hidden_size)
+  Eigen::VectorXf _head_bias; // (out_channels)
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   std::vector<LSTMCell> _layers;
 
-  float _process_sample(const float x);
+  void _process_sample();
 
   // Input to the LSTM.
-  // Since this is assumed to not be a parametric model, its shape should be (1,)
+  // Since this is assumed to not be a parametric model, its shape should be (in_channels,)
   Eigen::VectorXf _input;
+  // Output from _process_sample - multi-channel output vector (size out_channels)
+  Eigen::VectorXf _output;
 };
 
 // Factory to instantiate from nlohmann json
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 748a3c9..8405e27 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -261,9 +261,9 @@ void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int n
 {
   const int in_channels = NumInputChannels();
   const int condition_dim = this->_get_condition_dim();
-  
+
   assert(in_channels <= condition_dim);
-  
+
   // Fill condition array with input channels
   for (int ch = 0; ch < in_channels; ch++)
   {
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index 4ca370f..c67b9a5 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -174,9 +174,8 @@ class _LayerArray
 class WaveNet : public DSP
 {
 public:
-  WaveNet(const int in_channels, const int out_channels, const std::vector<LayerArrayParams>& layer_array_params,
-          const float head_scale, const bool with_head, std::vector<float> weights,
-          const double expected_sample_rate = -1.0);
+  WaveNet(const int in_channels, const std::vector<LayerArrayParams>& layer_array_params, const float head_scale,
+          const bool with_head, std::vector<float> weights, const double expected_sample_rate = -1.0);
   ~WaveNet() = default;
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   void set_weights_(std::vector<float>& weights);
diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp
index dcb1941..122ea0b 100644
--- a/tools/test/test_wavenet/test_full.cpp
+++ b/tools/test/test_wavenet/test_full.cpp
@@ -47,8 +47,8 @@ void test_wavenet_model()
   weights.push_back(1.0f); // Head rechannel
   weights.push_back(head_scale); // Head scale
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
-    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(input_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -107,8 +107,8 @@ void test_wavenet_multiple_arrays()
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f});
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
-    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(input_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -153,8 +153,8 @@ void test_wavenet_zero_input()
 
   std::vector<float> weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale};
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
-    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(input_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int numFrames = 4;
   wavenet->Reset(48000.0, numFrames);
@@ -198,8 +198,8 @@ void test_wavenet_different_buffer_sizes()
 
   std::vector<float> weights{1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, head_scale};
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
-    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(input_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   // Test with different buffer sizes
   wavenet->Reset(48000.0, 64);
@@ -265,8 +265,8 @@ void test_wavenet_prewarm()
   weights.push_back(1.0f);
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
-    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(input_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   // Test that prewarm can be called without errors
   wavenet->Reset(48000.0, 64);
diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp
index e0c2261..fe0f6c3 100644
--- a/tools/test/test_wavenet/test_real_time_safe.cpp
+++ b/tools/test/test_wavenet/test_real_time_safe.cpp
@@ -775,8 +775,8 @@ void test_process_realtime_safe()
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f});
   weights.push_back(head_scale);
 
-  auto wavenet = std::make_unique<nam::wavenet::WaveNet>(
-    input_size, head_size, layer_array_params, head_scale, with_head, weights, 48000.0);
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(input_size, layer_array_params, head_scale, with_head, weights, 48000.0);
 
   const int maxBufferSize = 256;
   wavenet->Reset(48000.0, maxBufferSize);

From 58457877fe43768c9adbacecfc6b49e631899e68 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 22:52:01 -0800
Subject: [PATCH 05/19] Tweak how .gitignore for build/ directory

---
 .gitignore       | 2 ++
 build/.gitignore | 4 ----
 2 files changed, 2 insertions(+), 4 deletions(-)
 delete mode 100644 build/.gitignore

diff --git a/.gitignore b/.gitignore
index 8604b38..b3663e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,5 @@
 *.app
 
 .vscode/
+
+build/
diff --git a/build/.gitignore b/build/.gitignore
deleted file mode 100644
index 86d0cb2..0000000
--- a/build/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# Ignore everything in this directory
-*
-# Except this file
-!.gitignore
\ No newline at end of file

From abba2d046510ffdb0f4996d14d239faf26bbf53e Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 23:00:52 -0800
Subject: [PATCH 06/19] Fix warnings

---
 NAM/convnet.cpp | 1 -
 NAM/lstm.cpp    | 3 ---
 NAM/wavenet.cpp | 1 -
 3 files changed, 5 deletions(-)

diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 4e3b4fd..8bbcded 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -156,7 +156,6 @@ void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf
 {
   const long length = i_end - i_start;
   const long out_channels = this->_weight.rows();
-  const long in_channels = this->_weight.cols();
 
   // Resize output to (out_channels x length)
   output.resize(out_channels, length);
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 7104553..5cf6f18 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -73,9 +73,6 @@ nam::lstm::LSTM::LSTM(const int in_channels, const int out_channels, const int n
   this->_input.resize(input_size);
   this->_output.resize(out_channels);
 
-  // Store input_size for first layer
-  this->_first_layer_input_size = input_size;
-
   std::vector<float>::iterator it = weights.begin();
   for (int i = 0; i < num_layers; i++)
     this->_layers.push_back(LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it));
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 8405e27..5dfe0f8 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -349,7 +349,6 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
 
   // Determine input channels from config or first layer
   const int in_channels = config.value("in_channels", layer_array_params[0].input_size);
-  const int out_channels = config.value("out_channels", layer_array_params.back().head_size);
 
   // out_channels is determined from last layer array's head_size
   return std::make_unique<nam::wavenet::WaveNet>(

From bd4e5a5ce40f0a9b4aa8ec3c9a66a41f5c8968eb Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 23:06:37 -0800
Subject: [PATCH 07/19] Refactor benchmark_compare.sh to simplify build
 directory cleanup

- Updated the script to remove all contents from the build directory instead of only untracked files, streamlining the cleanup process before running benchmarks.
---
 tools/benchmark_compare.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh
index e742fd1..20448a4 100755
--- a/tools/benchmark_compare.sh
+++ b/tools/benchmark_compare.sh
@@ -33,10 +33,9 @@ run_benchmark() {
     
     echo -e "${YELLOW}Running benchmark on branch: ${branch_name}${NC}"
     
-    # Clean build directory - remove only untracked files, preserve tracked files like .gitignore
+    # Clean build directory - remove all contents since nothing is tracked
     if [ -d "$BUILD_DIR" ]; then
-        # Remove files/directories that aren't tracked by git (process depth-first)
-        find "$BUILD_DIR" -mindepth 1 -depth -exec sh -c 'if ! git ls-files --error-unmatch "$1" >/dev/null 2>&1; then rm -rf "$1"; fi' _ {} \;
+        rm -rf "$BUILD_DIR"/*
     fi
     mkdir -p "$BUILD_DIR"
     

From 46d146352c80179276723c3cd17add2eb5b14368 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 23:08:02 -0800
Subject: [PATCH 08/19] Revert "Tweak how .gitignore for build/ directory"

This reverts commit 58457877fe43768c9adbacecfc6b49e631899e68.
---
 .gitignore       | 2 --
 build/.gitignore | 4 ++++
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 build/.gitignore

diff --git a/.gitignore b/.gitignore
index b3663e3..8604b38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,5 +32,3 @@
 *.app
 
 .vscode/
-
-build/
diff --git a/build/.gitignore b/build/.gitignore
new file mode 100644
index 0000000..86d0cb2
--- /dev/null
+++ b/build/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file

From 629aeea8f2f4833bfb102d8c5b18dc7e3a639ab3 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 15 Jan 2026 23:08:15 -0800
Subject: [PATCH 09/19] Revert "Refactor benchmark_compare.sh to simplify build
 directory cleanup"

This reverts commit bd4e5a5ce40f0a9b4aa8ec3c9a66a41f5c8968eb.
---
 tools/benchmark_compare.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh
index 20448a4..e742fd1 100755
--- a/tools/benchmark_compare.sh
+++ b/tools/benchmark_compare.sh
@@ -33,9 +33,10 @@ run_benchmark() {
     
     echo -e "${YELLOW}Running benchmark on branch: ${branch_name}${NC}"
     
-    # Clean build directory - remove all contents since nothing is tracked
+    # Clean build directory - remove only untracked files, preserve tracked files like .gitignore
     if [ -d "$BUILD_DIR" ]; then
-        rm -rf "$BUILD_DIR"/*
+        # Remove files/directories that aren't tracked by git (process depth-first)
+        find "$BUILD_DIR" -mindepth 1 -depth -exec sh -c 'if ! git ls-files --error-unmatch "$1" >/dev/null 2>&1; then rm -rf "$1"; fi' _ {} \;
     fi
     mkdir -p "$BUILD_DIR"
     

From 05ff3667e40adaacc1721358fc6f0ee88e80ea05 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:11:52 -0800
Subject: [PATCH 10/19] Add LSTM tests to run_tests.cpp

- Included multiple LSTM test cases to validate various functionalities, including basic operations, handling of different buffer sizes, and state evolution.
- Ensured comprehensive coverage for LSTM layers by adding tests for multichannel inputs and large hidden sizes.
---
 tools/run_tests.cpp      |  14 ++
 tools/test/test_lstm.cpp | 451 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 465 insertions(+)
 create mode 100644 tools/test/test_lstm.cpp

diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp
index 33c4d45..065d7d9 100644
--- a/tools/run_tests.cpp
+++ b/tools/run_tests.cpp
@@ -18,6 +18,7 @@
 #include "test/test_wavenet_gating_compatibility.cpp"
 #include "test/test_blending_detailed.cpp"
 #include "test/test_input_buffer_verification.cpp"
+#include "test/test_lstm.cpp"
 
 int main()
 {
@@ -133,6 +134,19 @@ int main()
   test_convnet::test_convnet_prewarm();
   test_convnet::test_convnet_multiple_calls();
 
+  // LSTM tests
+  test_lstm::test_lstm_basic();
+  test_lstm::test_lstm_multiple_layers();
+  test_lstm::test_lstm_zero_input();
+  test_lstm::test_lstm_different_buffer_sizes();
+  test_lstm::test_lstm_prewarm();
+  test_lstm::test_lstm_multiple_calls();
+  test_lstm::test_lstm_multichannel();
+  test_lstm::test_lstm_large_hidden_size();
+  test_lstm::test_lstm_different_input_size();
+  test_lstm::test_lstm_state_evolution();
+  test_lstm::test_lstm_no_layers();
+
   // Gating activations tests
   test_gating_activations::TestGatingActivation::test_basic_functionality();
   test_gating_activations::TestGatingActivation::test_with_custom_activations();
diff --git a/tools/test/test_lstm.cpp b/tools/test/test_lstm.cpp
new file mode 100644
index 0000000..8c655b9
--- /dev/null
+++ b/tools/test/test_lstm.cpp
@@ -0,0 +1,451 @@
+// Tests for LSTM
+
+#include <Eigen/Dense>
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <vector>
+
+#include "NAM/lstm.h"
+
+namespace test_lstm
+{
+// Helper function to calculate weights needed for LSTM
+// For each LSTMCell:
+// - Weight matrix: (4 * hidden_size) x (input_size + hidden_size) in row-major order
+// - Bias: 4 * hidden_size
+// - Initial hidden state: hidden_size (stored in second half of _xh)
+// - Initial cell state: hidden_size
+// For the LSTM:
+// - Head weight matrix: out_channels x hidden_size in row-major order
+// - Head bias: out_channels
+std::vector<float> create_lstm_weights(int num_layers, int input_size, int hidden_size, int out_channels)
+{
+  std::vector<float> weights;
+
+  for (int layer = 0; layer < num_layers; layer++)
+  {
+    int layer_input_size = (layer == 0) ? input_size : hidden_size;
+    int w_rows = 4 * hidden_size;
+    int w_cols = layer_input_size + hidden_size;
+
+    // Weight matrix (row-major)
+    for (int i = 0; i < w_rows * w_cols; i++)
+    {
+      weights.push_back(0.1f); // Small weights for stability
+    }
+
+    // Bias vector
+    for (int i = 0; i < 4 * hidden_size; i++)
+    {
+      weights.push_back(0.0f);
+    }
+
+    // Initial hidden state (stored in _xh)
+    for (int i = 0; i < hidden_size; i++)
+    {
+      weights.push_back(0.0f);
+    }
+
+    // Initial cell state
+    for (int i = 0; i < hidden_size; i++)
+    {
+      weights.push_back(0.0f);
+    }
+  }
+
+  // Head weight matrix (row-major: out_channels x hidden_size)
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    for (int h = 0; h < hidden_size; h++)
+    {
+      weights.push_back(0.1f);
+    }
+  }
+
+  // Head bias
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    weights.push_back(0.0f);
+  }
+
+  return weights;
+}
+
+// Test basic LSTM construction and processing
+void test_lstm_basic()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 4;
+  const int maxBufferSize = 64;
+  lstm.Reset(expected_sample_rate, maxBufferSize);
+
+  std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  // Verify output dimensions
+  assert(output.size() == numFrames);
+  // Output should be non-zero and finite
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test LSTM with multiple layers
+void test_lstm_multiple_layers()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 2;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 8;
+  const int maxBufferSize = 64;
+  lstm.Reset(expected_sample_rate, maxBufferSize);
+
+  std::vector<NAM_SAMPLE> input(numFrames, 0.5f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  assert(output.size() == numFrames);
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test LSTM with zero input
+void test_lstm_zero_input()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 4;
+  lstm.Reset(expected_sample_rate, numFrames);
+
+  std::vector<NAM_SAMPLE> input(numFrames, 0.0f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  // With zero input, output should be finite (may be zero or non-zero depending on bias)
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test LSTM with different buffer sizes
+void test_lstm_different_buffer_sizes()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  // Test with different buffer sizes
+  lstm.Reset(expected_sample_rate, 64);
+  std::vector<NAM_SAMPLE> input1(32, 1.0f);
+  std::vector<NAM_SAMPLE> output1(32, 0.0f);
+  NAM_SAMPLE* inputPtrs1[] = {input1.data()};
+  NAM_SAMPLE* outputPtrs1[] = {output1.data()};
+  lstm.process(inputPtrs1, outputPtrs1, 32);
+
+  lstm.Reset(expected_sample_rate, 128);
+  std::vector<NAM_SAMPLE> input2(64, 1.0f);
+  std::vector<NAM_SAMPLE> output2(64, 0.0f);
+  NAM_SAMPLE* inputPtrs2[] = {input2.data()};
+  NAM_SAMPLE* outputPtrs2[] = {output2.data()};
+  lstm.process(inputPtrs2, outputPtrs2, 64);
+
+  // Both should work without errors
+  assert(output1.size() == 32);
+  assert(output2.size() == 64);
+}
+
+// Test LSTM prewarm functionality
+void test_lstm_prewarm()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  // Test that prewarm can be called without errors
+  lstm.Reset(expected_sample_rate, 64);
+  lstm.prewarm();
+
+  // After prewarm, processing should work
+  const int numFrames = 4;
+  std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  // Output should be finite
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test multiple process() calls (state persistence)
+void test_lstm_multiple_calls()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 2;
+  lstm.Reset(expected_sample_rate, numFrames);
+
+  // Multiple calls should work correctly with state persistence
+  for (int i = 0; i < 5; i++)
+  {
+    std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
+    std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+    NAM_SAMPLE* inputPtrs[] = {input.data()};
+    NAM_SAMPLE* outputPtrs[] = {output.data()};
+    lstm.process(inputPtrs, outputPtrs, numFrames);
+
+    // Output should be finite
+    for (int j = 0; j < numFrames; j++)
+    {
+      assert(std::isfinite(output[j]));
+    }
+  }
+}
+
+// Test LSTM with multi-channel input/output
+void test_lstm_multichannel()
+{
+  const int in_channels = 2;
+  const int out_channels = 2;
+  const int num_layers = 1;
+  const int input_size = 2;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 4;
+  lstm.Reset(expected_sample_rate, 64);
+
+  std::vector<NAM_SAMPLE> input1(numFrames, 0.5f);
+  std::vector<NAM_SAMPLE> input2(numFrames, 0.3f);
+  std::vector<NAM_SAMPLE> output1(numFrames, 0.0f);
+  std::vector<NAM_SAMPLE> output2(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input1.data(), input2.data()};
+  NAM_SAMPLE* outputPtrs[] = {output1.data(), output2.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  // Verify both output channels are finite
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output1[i]));
+    assert(std::isfinite(output2[i]));
+  }
+}
+
+// Test LSTM with larger hidden size
+void test_lstm_large_hidden_size()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 16;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 4;
+  lstm.Reset(expected_sample_rate, 64);
+
+  std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test LSTM with different input sizes
+void test_lstm_different_input_size()
+{
+  const int in_channels = 3;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 3;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 4;
+  lstm.Reset(expected_sample_rate, 64);
+
+  std::vector<NAM_SAMPLE> input1(numFrames, 0.1f);
+  std::vector<NAM_SAMPLE> input2(numFrames, 0.2f);
+  std::vector<NAM_SAMPLE> input3(numFrames, 0.3f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input1.data(), input2.data(), input3.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test LSTM state evolution over time
+void test_lstm_state_evolution()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 1;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  std::vector<float> weights = create_lstm_weights(num_layers, input_size, hidden_size, out_channels);
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 10;
+  lstm.Reset(expected_sample_rate, 64);
+
+  // Create a sine wave input
+  std::vector<NAM_SAMPLE> input(numFrames);
+  for (int i = 0; i < numFrames; i++)
+  {
+    input[i] = 0.5f * std::sin(2.0f * M_PI * i / numFrames);
+  }
+
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  // Output should be finite and potentially show some variation due to state
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+// Test LSTM with no layers (edge case)
+void test_lstm_no_layers()
+{
+  const int in_channels = 1;
+  const int out_channels = 1;
+  const int num_layers = 0;
+  const int input_size = 1;
+  const int hidden_size = 4;
+  const double expected_sample_rate = 48000.0;
+
+  // With no layers, we still need head weights
+  std::vector<float> weights;
+  // Head weight matrix (row-major: out_channels x hidden_size)
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    for (int h = 0; h < hidden_size; h++)
+    {
+      weights.push_back(0.0f); // Zero weights means pass-through
+    }
+  }
+  // Head bias
+  for (int out_ch = 0; out_ch < out_channels; out_ch++)
+  {
+    weights.push_back(0.0f);
+  }
+
+  nam::lstm::LSTM lstm(in_channels, out_channels, num_layers, input_size, hidden_size, weights, expected_sample_rate);
+
+  const int numFrames = 4;
+  lstm.Reset(expected_sample_rate, 64);
+
+  std::vector<NAM_SAMPLE> input(numFrames, 1.0f);
+  std::vector<NAM_SAMPLE> output(numFrames, 0.0f);
+  NAM_SAMPLE* inputPtrs[] = {input.data()};
+  NAM_SAMPLE* outputPtrs[] = {output.data()};
+
+  lstm.process(inputPtrs, outputPtrs, numFrames);
+
+  // With zero head weights and bias, output should equal input for first channel
+  for (int i = 0; i < numFrames; i++)
+  {
+    assert(std::isfinite(output[i]));
+  }
+}
+
+}; // namespace test_lstm

From 747231777de847de2da46eb49fca4d228f5ea1a0 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:12:22 -0800
Subject: [PATCH 11/19] Refactor ConvNet test cases to use defined input and
 output channel constants

- Updated multiple ConvNet test functions to replace hardcoded input and output channel values with defined constants for better readability and maintainability.
- Ensured consistency across tests by using the same channel definitions in all relevant instances.
---
 tools/test/test_convnet.cpp | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/tools/test/test_convnet.cpp b/tools/test/test_convnet.cpp
index 8966967..2c0d428 100644
--- a/tools/test/test_convnet.cpp
+++ b/tools/test/test_convnet.cpp
@@ -13,6 +13,8 @@ namespace test_convnet
 // Test basic ConvNet construction and processing
 void test_convnet_basic()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 2;
   const std::vector<int> dilations{1, 2};
   const bool batchnorm = false;
@@ -32,7 +34,7 @@ void test_convnet_basic()
   // Head weights (2 weights + 1 bias)
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -57,6 +59,8 @@ void test_convnet_basic()
 // Test ConvNet with batchnorm
 void test_convnet_batchnorm()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 1;
   const std::vector<int> dilations{1};
   const bool batchnorm = true;
@@ -76,7 +80,7 @@ void test_convnet_batchnorm()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -99,6 +103,8 @@ void test_convnet_batchnorm()
 // Test ConvNet with multiple blocks
 void test_convnet_multiple_blocks()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 2;
   const std::vector<int> dilations{1, 2, 4};
   const bool batchnorm = false;
@@ -121,7 +127,7 @@ void test_convnet_multiple_blocks()
   // Head weights
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 8;
   const int maxBufferSize = 64;
@@ -144,6 +150,8 @@ void test_convnet_multiple_blocks()
 // Test ConvNet with zero input
 void test_convnet_zero_input()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 1;
   const std::vector<int> dilations{1};
   const bool batchnorm = false;
@@ -156,7 +164,7 @@ void test_convnet_zero_input()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   convnet.Reset(expected_sample_rate, numFrames);
@@ -178,6 +186,8 @@ void test_convnet_zero_input()
 // Test ConvNet with different buffer sizes
 void test_convnet_different_buffer_sizes()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 1;
   const std::vector<int> dilations{1};
   const bool batchnorm = false;
@@ -190,7 +200,7 @@ void test_convnet_different_buffer_sizes()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   // Test with different buffer sizes
   convnet.Reset(expected_sample_rate, 64);
@@ -215,6 +225,8 @@ void test_convnet_different_buffer_sizes()
 // Test ConvNet prewarm functionality
 void test_convnet_prewarm()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 2;
   const std::vector<int> dilations{1, 2, 4};
   const bool batchnorm = false;
@@ -231,7 +243,7 @@ void test_convnet_prewarm()
   // Head weights (2 weights + 1 bias)
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   // Test that prewarm can be called without errors
   convnet.Reset(expected_sample_rate, 64);
@@ -255,6 +267,8 @@ void test_convnet_prewarm()
 // Test multiple process() calls (ring buffer functionality)
 void test_convnet_multiple_calls()
 {
+  const int in_channels = 1;
+  const int out_channels = 1;
   const int channels = 1;
   const std::vector<int> dilations{1};
   const bool batchnorm = false;
@@ -267,7 +281,7 @@ void test_convnet_multiple_calls()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(1, 1, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 2;
   convnet.Reset(expected_sample_rate, numFrames);

From 02403a635f714fca3832113d28dcf981a0b4a727 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:12:37 -0800
Subject: [PATCH 12/19] Formatting

---
 tools/test/test_convnet.cpp | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tools/test/test_convnet.cpp b/tools/test/test_convnet.cpp
index 2c0d428..56bd5ec 100644
--- a/tools/test/test_convnet.cpp
+++ b/tools/test/test_convnet.cpp
@@ -34,7 +34,8 @@ void test_convnet_basic()
   // Head weights (2 weights + 1 bias)
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -80,7 +81,8 @@ void test_convnet_batchnorm()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   const int maxBufferSize = 64;
@@ -127,7 +129,8 @@ void test_convnet_multiple_blocks()
   // Head weights
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 8;
   const int maxBufferSize = 64;
@@ -164,7 +167,8 @@ void test_convnet_zero_input()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 4;
   convnet.Reset(expected_sample_rate, numFrames);
@@ -200,7 +204,8 @@ void test_convnet_different_buffer_sizes()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   // Test with different buffer sizes
   convnet.Reset(expected_sample_rate, 64);
@@ -243,7 +248,8 @@ void test_convnet_prewarm()
   // Head weights (2 weights + 1 bias)
   weights.insert(weights.end(), {1.0f, 1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   // Test that prewarm can be called without errors
   convnet.Reset(expected_sample_rate, 64);
@@ -281,7 +287,8 @@ void test_convnet_multiple_calls()
   // Head weights (1 weight + 1 bias)
   weights.insert(weights.end(), {1.0f, 0.0f});
 
-  nam::convnet::ConvNet convnet(in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
+  nam::convnet::ConvNet convnet(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, expected_sample_rate);
 
   const int numFrames = 2;
   convnet.Reset(expected_sample_rate, numFrames);

From c9c32280dab7b0020e6cc0dda28ac4304deecbd8 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:28:50 -0800
Subject: [PATCH 13/19] LSTM: Default 1 input channel

---
 NAM/lstm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 5cf6f18..d162d55 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -171,7 +171,7 @@ std::unique_ptr<nam::DSP> nam::lstm::Factory(const nlohmann::json& config, std::
   const int input_size = config["input_size"];
   const int hidden_size = config["hidden_size"];
   // Default to 1 channel in/out for backward compatibility
-  const int in_channels = config.value("in_channels", input_size);
+  const int in_channels = config.value("in_channels", 1);
   const int out_channels = config.value("out_channels", 1);
   return std::make_unique<nam::lstm::LSTM>(
     in_channels, out_channels, num_layers, input_size, hidden_size, weights, expectedSampleRate);

From 7c5a97f108dd305ec28ee3faba3d59cb72fe970b Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:30:43 -0800
Subject: [PATCH 14/19] Fix WaveNet::_set_condition_array()

---
 NAM/wavenet.cpp | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 5dfe0f8..21fadfb 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -260,10 +260,6 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize)
 void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int num_frames)
 {
   const int in_channels = NumInputChannels();
-  const int condition_dim = this->_get_condition_dim();
-
-  assert(in_channels <= condition_dim);
-
   // Fill condition array with input channels
   for (int ch = 0; ch < in_channels; ch++)
   {
@@ -272,14 +268,6 @@ void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE** input, const int n
       this->_condition(ch, j) = input[ch][j];
     }
   }
-  // Zero-fill remaining condition channels if in_channels < condition_dim
-  for (int ch = in_channels; ch < condition_dim; ch++)
-  {
-    for (int j = 0; j < num_frames; j++)
-    {
-      this->_condition(ch, j) = 0.0f;
-    }
-  }
 }
 
 void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames)

From 854842fc81d364334e9b806634be85a084b4f3c9 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:32:37 -0800
Subject: [PATCH 15/19] Fix WaveNet factory for backward compatibility

---
 NAM/wavenet.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 21fadfb..6686f93 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -335,8 +335,8 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
   if (layer_array_params.empty())
     throw std::runtime_error("WaveNet config requires at least one layer array");
 
-  // Determine input channels from config or first layer
-  const int in_channels = config.value("in_channels", layer_array_params[0].input_size);
+  // Backward compatibility: assume 1 input channel
+  const int in_channels = config.value("in_channels", 1);
 
   // out_channels is determined from last layer array's head_size
   return std::make_unique<nam::wavenet::WaveNet>(

From 1add97025138ab2b33566b47c46f94344fed7c7b Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:41:39 -0800
Subject: [PATCH 16/19] Fix some issues

---
 NAM/dsp.cpp | 4 ++--
 NAM/lstm.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index d0a1c4c..023c42a 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -16,9 +16,9 @@
 constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32;
 
 nam::DSP::DSP(const int in_channels, const int out_channels, const double expected_sample_rate)
-: mInChannels(in_channels)
+: mExpectedSampleRate(expected_sample_rate)
+, mInChannels(in_channels)
 , mOutChannels(out_channels)
-, mExpectedSampleRate(expected_sample_rate)
 {
   if (in_channels <= 0 || out_channels <= 0)
   {
diff --git a/NAM/lstm.h b/NAM/lstm.h
index 2eca8d4..5c03853 100644
--- a/NAM/lstm.h
+++ b/NAM/lstm.h
@@ -54,6 +54,7 @@ class LSTM : public DSP
   LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size,
        std::vector<float>& weights, const double expected_sample_rate = -1.0);
   ~LSTM() = default;
+  void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
 
 protected:
   // Hacky, but a half-second seems to work for most models.
@@ -61,7 +62,6 @@ class LSTM : public DSP
 
   Eigen::MatrixXf _head_weight; // (out_channels x hidden_size)
   Eigen::VectorXf _head_bias; // (out_channels)
-  void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
   std::vector<LSTMCell> _layers;
 
   void _process_sample();

From be179024a291f48acd128af2ceaf630ee307259e Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:42:26 -0800
Subject: [PATCH 17/19] Add test for WaveNet::process() with 3 input channels
 and 2 output channels

- Implemented a new test case to verify that the process method does not allocate or free memory when handling 3 input channels and 2 output channels.
- The test includes setup for weights and input/output buffers, ensuring the output is valid across various buffer sizes.
- This enhances the coverage of WaveNet functionality in real-time processing scenarios.
---
 .../test/test_wavenet/test_real_time_safe.cpp | 123 ++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp
index fe0f6c3..0a57539 100644
--- a/tools/test/test_wavenet/test_real_time_safe.cpp
+++ b/tools/test/test_wavenet/test_real_time_safe.cpp
@@ -809,4 +809,127 @@ void test_process_realtime_safe()
     }
   }
 }
+
+// Test that WaveNet::process() method with 3 input channels and 2 output channels does not allocate or free memory
+void test_process_3in_2out_realtime_safe()
+{
+  // Setup: Create WaveNet with 3 input channels and 2 output channels
+  const int input_size = 3; // 3 input channels
+  const int condition_size = 3; // condition matches input channels
+  const int head_size = 2; // 2 output channels
+  const int channels = 4; // internal channels
+  const int bottleneck = 2; // bottleneck (will be used for head)
+  const int kernel_size = 1;
+  const std::string activation = "ReLU";
+  const bool gated = false;
+  const bool head_bias = false;
+  const float head_scale = 1.0f;
+  const bool with_head = false;
+  const int groups = 1;
+  const int groups_1x1 = 1;
+
+  std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
+  std::vector<int> dilations1{1};
+  layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
+                                                              bottleneck, kernel_size, std::move(dilations1),
+                                                              activation, gated, head_bias, groups, groups_1x1));
+
+  // Calculate weights:
+  // _rechannel: Conv1x1(3, 4, bias=false) = 3*4 = 12 weights
+  // Layer:
+  //   _conv: Conv1D(4, 2, kernel_size=1, bias=true) = 1*(2*4) + 2 = 10 weights
+  //   _input_mixin: Conv1x1(3, 2, bias=false) = 3*2 = 6 weights
+  //   _1x1: Conv1x1(2, 4, bias=true) = 2*4 + 4 = 12 weights
+  // _head_rechannel: Conv1x1(2, 2, bias=false) = 2*2 = 4 weights
+  // Total: 12 + 10 + 6 + 12 + 4 = 44 weights
+  std::vector<float> weights;
+  // _rechannel weights (3->4): identity-like pattern
+  for (int out_ch = 0; out_ch < 4; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < 3; in_ch++)
+    {
+      weights.push_back((out_ch < 3 && out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // Layer: _conv weights (4->2, kernel_size=1, with bias)
+  // Weight layout: for each kernel position k, for each out_channel, for each in_channel
+  for (int out_ch = 0; out_ch < 2; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < 4; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // _conv bias (2 values)
+  weights.insert(weights.end(), {0.0f, 0.0f});
+  // _input_mixin weights (3->2)
+  for (int out_ch = 0; out_ch < 2; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < 3; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // _1x1 weights (2->4, with bias)
+  for (int out_ch = 0; out_ch < 4; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < 2; in_ch++)
+    {
+      weights.push_back((out_ch < 2 && out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  // _1x1 bias (4 values)
+  weights.insert(weights.end(), {0.0f, 0.0f, 0.0f, 0.0f});
+  // _head_rechannel weights (2->2)
+  for (int out_ch = 0; out_ch < 2; out_ch++)
+  {
+    for (int in_ch = 0; in_ch < 2; in_ch++)
+    {
+      weights.push_back((out_ch == in_ch) ? 1.0f : 0.0f);
+    }
+  }
+  weights.push_back(head_scale);
+
+  const int in_channels = 3;
+  auto wavenet =
+    std::make_unique<nam::wavenet::WaveNet>(in_channels, layer_array_params, head_scale, with_head, weights, 48000.0);
+
+  const int maxBufferSize = 256;
+  wavenet->Reset(48000.0, maxBufferSize);
+
+  // Test with several different buffer sizes
+  std::vector<int> buffer_sizes{1, 8, 16, 32, 64, 128, 256};
+
+  for (int buffer_size : buffer_sizes)
+  {
+    // Prepare input/output buffers for 3 input channels and 2 output channels (allocate before tracking)
+    std::vector<std::vector<NAM_SAMPLE>> input(3, std::vector<NAM_SAMPLE>(buffer_size, 0.5f));
+    std::vector<std::vector<NAM_SAMPLE>> output(2, std::vector<NAM_SAMPLE>(buffer_size, 0.0f));
+    std::vector<NAM_SAMPLE*> inputPtrs(3);
+    std::vector<NAM_SAMPLE*> outputPtrs(2);
+    for (int ch = 0; ch < 3; ch++)
+      inputPtrs[ch] = input[ch].data();
+    for (int ch = 0; ch < 2; ch++)
+      outputPtrs[ch] = output[ch].data();
+
+    std::string test_name = "WaveNet process (3in, 2out) - Buffer size " + std::to_string(buffer_size);
+    run_allocation_test_no_allocations(
+      nullptr, // No setup needed
+      [&]() {
+        // Call process() - this should not allocate or free
+        wavenet->process(inputPtrs.data(), outputPtrs.data(), buffer_size);
+      },
+      nullptr, // No teardown needed
+      test_name.c_str());
+
+    // Verify output is valid
+    for (int ch = 0; ch < 2; ch++)
+    {
+      for (int i = 0; i < buffer_size; i++)
+      {
+        assert(std::isfinite(output[ch][i]));
+      }
+    }
+  }
+}
 } // namespace test_wavenet

From 3beaa07b49c88f470447caea0b4c076157657ef2 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 15:42:46 -0800
Subject: [PATCH 18/19] Add test to runner

---
 tools/run_tests.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp
index 065d7d9..2a50c77 100644
--- a/tools/run_tests.cpp
+++ b/tools/run_tests.cpp
@@ -125,6 +125,7 @@ int main()
   test_wavenet::test_layer_grouped_process_realtime_safe();
   test_wavenet::test_layer_array_process_realtime_safe();
   test_wavenet::test_process_realtime_safe();
+  test_wavenet::test_process_3in_2out_realtime_safe();
 
   test_convnet::test_convnet_basic();
   test_convnet::test_convnet_batchnorm();

From aa1e00af7cd86d6870e132fadefbf0aa0323cefd Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 16 Jan 2026 16:09:45 -0800
Subject: [PATCH 19/19] Fix WaveNet::_get_condition_dim()

---
 NAM/wavenet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index c67b9a5..2e99256 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -189,7 +189,7 @@ class WaveNet : public DSP
   virtual void _set_condition_array(NAM_SAMPLE** input, const int num_frames);
   // How many conditioning inputs are there.
   // Just one--the audio.
-  virtual int _get_condition_dim() const { return 1; };
+  virtual int _get_condition_dim() const { return NumInputChannels(); };
 
 private:
   std::vector<_LayerArray> _layer_arrays;