From 3b73367d2d9b72388ea349a4673b22aa4547bb3c Mon Sep 17 00:00:00 2001 From: falkTX Date: Wed, 16 Aug 2023 18:59:23 +0200 Subject: [PATCH] Add custom NAM code patch, forced optimizations specific to MOD Signed-off-by: falkTX --- .../01_mono-buffered-optimizations.patch | 1077 +++++++++++++++++ 1 file changed, 1077 insertions(+) create mode 100644 plugins/package/neural-amp-modeler-lv2/01_mono-buffered-optimizations.patch diff --git a/plugins/package/neural-amp-modeler-lv2/01_mono-buffered-optimizations.patch b/plugins/package/neural-amp-modeler-lv2/01_mono-buffered-optimizations.patch new file mode 100644 index 00000000..10c90e06 --- /dev/null +++ b/plugins/package/neural-amp-modeler-lv2/01_mono-buffered-optimizations.patch @@ -0,0 +1,1077 @@ +Submodule deps/NeuralAmpModelerCore contains modified content +diff --git a/deps/NeuralAmpModelerCore/NAM/convnet.cpp b/deps/NeuralAmpModelerCore/NAM/convnet.cpp +index 20b2af9..de5c81c 100644 +--- a/deps/NeuralAmpModelerCore/NAM/convnet.cpp ++++ b/deps/NeuralAmpModelerCore/NAM/convnet.cpp +@@ -113,15 +113,13 @@ convnet::ConvNet::ConvNet(const double loudness, const int channels, const std:: + this->_head = _Head(channels, it); + if (it != params.end()) + throw std::runtime_error("Didn't touch all the params when initializing wavenet"); +- this->_reset_anti_pop_(); + } + +-void convnet::ConvNet::_process_core_() ++void convnet::ConvNet::_process_core_(NAM_SAMPLE* outputs, const int num_frames) + { +- this->_update_buffers_(); ++ this->_update_buffers_(outputs, num_frames); + // Main computation! + const long i_start = this->_input_buffer_offset; +- const long num_frames = this->_input_post_gain.size(); + const long i_end = i_start + num_frames; + // TODO one unnecessary copy :/ #speed + for (auto i = i_start; i < i_end; i++) +@@ -132,9 +130,7 @@ void convnet::ConvNet::_process_core_() + this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end); + // Copy to required output array (TODO tighten this up) + for (int s = 0; s < num_frames; s++) +- this->_core_dsp_output[s] = this->_head_output(s); +- // Apply anti-pop +- this->_anti_pop_(); ++ outputs[s] = this->_head_output(s); + } + + void convnet::ConvNet::_verify_params(const int channels, const std::vector& dilations, const bool batchnorm, +@@ -143,9 +139,9 @@ void convnet::ConvNet::_verify_params(const int channels, const std::vector + // TODO + } + +-void convnet::ConvNet::_update_buffers_() ++void convnet::ConvNet::_update_buffers_(NAM_SAMPLE* outputs, const int num_frames) + { +- this->Buffer::_update_buffers_(); ++ this->Buffer::_update_buffers_(outputs, num_frames); + const size_t buffer_size = this->_input_buffer.size(); + this->_block_vals[0].resize(1, buffer_size); + for (size_t i = 1; i < this->_block_vals.size(); i++) +@@ -171,27 +167,3 @@ void convnet::ConvNet::_rewind_buffers_() + // Now we can do the rest of the rewind + this->Buffer::_rewind_buffers_(); + } +- +-void convnet::ConvNet::_anti_pop_() +-{ +- if (this->_anti_pop_countdown >= this->_anti_pop_ramp) +- return; +- const float slope = 1.0f / float(this->_anti_pop_ramp); +- for (size_t i = 0; i < this->_core_dsp_output.size(); i++) +- { +- if (this->_anti_pop_countdown >= this->_anti_pop_ramp) +- break; +- const float gain = std::max(slope * float(this->_anti_pop_countdown), float(0.0)); +- this->_core_dsp_output[i] *= gain; +- this->_anti_pop_countdown++; +- } +-} +- +-void convnet::ConvNet::_reset_anti_pop_() +-{ +- // You need the "real" receptive field, not the buffers. +- long receptive_field = 1; +- for (size_t i = 0; i < this->_blocks.size(); i++) +- receptive_field += this->_blocks[i].conv.get_dilation(); +- this->_anti_pop_countdown = -receptive_field; +-} +diff --git a/deps/NeuralAmpModelerCore/NAM/convnet.h b/deps/NeuralAmpModelerCore/NAM/convnet.h +index baad344..436a713 100644 +--- a/deps/NeuralAmpModelerCore/NAM/convnet.h ++++ b/deps/NeuralAmpModelerCore/NAM/convnet.h +@@ -77,18 +77,9 @@ protected: + _Head _head; + void _verify_params(const int channels, const std::vector& dilations, const bool batchnorm, + const size_t actual_params); +- void _update_buffers_() override; ++ void _update_buffers_(NAM_SAMPLE* outputs, const int num_frames) override; + void _rewind_buffers_() override; + +- void _process_core_() override; +- +- // The net starts with random parameters inside; we need to wait for a full +- // receptive field to pass through before we can count on the output being +- // ok. This implements a gentle "ramp-up" so that there's no "pop" at the +- // start. +- long _anti_pop_countdown; +- const long _anti_pop_ramp = 100; +- void _anti_pop_(); +- void _reset_anti_pop_(); ++ void _process_core_(NAM_SAMPLE* outputs, const int num_frames) override; + }; + }; // namespace convnet +diff --git a/deps/NeuralAmpModelerCore/NAM/dsp.cpp b/deps/NeuralAmpModelerCore/NAM/dsp.cpp +index a4057ef..bb7f3be 100644 +--- a/deps/NeuralAmpModelerCore/NAM/dsp.cpp ++++ b/deps/NeuralAmpModelerCore/NAM/dsp.cpp +@@ -32,15 +32,12 @@ DSP::DSP(const double loudness, const double expected_sample_rate) + { + } + +-void DSP::process(NAM_SAMPLE** inputs, NAM_SAMPLE** outputs, const int num_channels, const int num_frames, +- const double input_gain, const double output_gain, ++void DSP::process(NAM_SAMPLE* outputs, const int num_frames, + const std::unordered_map& params) + { + this->_get_params_(params); +- this->_apply_input_level_(inputs, num_channels, num_frames, input_gain); +- this->_ensure_core_dsp_output_ready_(); +- this->_process_core_(); +- this->_apply_output_level_(outputs, num_channels, num_frames, output_gain); ++ this->_process_core_(outputs, num_frames); ++ this->_apply_output_level_(outputs, num_frames); + } + + void DSP::finalize_(const int num_frames) {} +@@ -60,38 +57,15 @@ void DSP::_get_params_(const std::unordered_map& input_para + } + } + +-void DSP::_apply_input_level_(NAM_SAMPLE** inputs, const int num_channels, const int num_frames, const double gain) ++void DSP::_apply_output_level_(NAM_SAMPLE* outputs, const int num_frames) + { +- // Must match exactly; we're going to use the size of _input_post_gain later +- // for num_frames. +- if ((int)this->_input_post_gain.size() != num_frames) +- this->_input_post_gain.resize(num_frames); +- // MONO ONLY +- const int channel = 0; +- for (int i = 0; i < num_frames; i++) +- this->_input_post_gain[i] = float(gain * inputs[channel][i]); +-} +- +-void DSP::_ensure_core_dsp_output_ready_() +-{ +- if (this->_core_dsp_output.size() < this->_input_post_gain.size()) +- this->_core_dsp_output.resize(this->_input_post_gain.size()); +-} +- +-void DSP::_process_core_() +-{ +- // Default implementation is the null operation +- for (size_t i = 0; i < this->_input_post_gain.size(); i++) +- this->_core_dsp_output[i] = this->_input_post_gain[i]; +-} ++ if (this->mNormalizeOutputLoudness) ++ { ++ const NAM_SAMPLE loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0); + +-void DSP::_apply_output_level_(NAM_SAMPLE** outputs, const int num_channels, const int num_frames, const double gain) +-{ +- const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0); +- const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain; +- for (int c = 0; c < num_channels; c++) + for (int s = 0; s < num_frames; s++) +- outputs[c][s] = (NAM_SAMPLE)(finalGain * this->_core_dsp_output[s]); ++ outputs[s] *= loudnessGain; ++ } + } + + // Buffer ===================================================================== +@@ -119,9 +93,8 @@ void Buffer::_set_receptive_field(const int new_receptive_field, const int input + this->_reset_input_buffer(); + } + +-void Buffer::_update_buffers_() ++void Buffer::_update_buffers_(NAM_SAMPLE* outputs, const int num_frames) + { +- const long int num_frames = this->_input_post_gain.size(); + // Make sure that the buffer is big enough for the receptive field and the + // frames needed! + { +@@ -141,7 +114,7 @@ void Buffer::_update_buffers_() + this->_rewind_buffers_(); + // Put the new samples into the input buffer + for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++) +- this->_input_buffer[i] = this->_input_post_gain[j]; ++ this->_input_buffer[i] = outputs[j]; + // And resize the output buffer: + this->_output_buffer.resize(num_frames); + } +@@ -195,16 +168,16 @@ Linear::Linear(const double loudness, const int receptive_field, const bool _bia + this->_bias = _bias ? params[receptive_field] : (float)0.0; + } + +-void Linear::_process_core_() ++void Linear::_process_core_(NAM_SAMPLE* outputs, const int num_frames) + { +- this->Buffer::_update_buffers_(); ++ this->Buffer::_update_buffers_(outputs, num_frames); + + // Main computation! +- for (size_t i = 0; i < this->_input_post_gain.size(); i++) ++ for (int i = 0; i < num_frames; i++) + { + const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1; + auto input = Eigen::Map(&this->_input_buffer[offset], this->_receptive_field); +- this->_core_dsp_output[i] = this->_bias + this->_weight.dot(input); ++ outputs[i] = this->_bias + this->_weight.dot(input); + } + } + +diff --git a/deps/NeuralAmpModelerCore/NAM/dsp.h b/deps/NeuralAmpModelerCore/NAM/dsp.h +index 776ce6a..35052ff 100644 +--- a/deps/NeuralAmpModelerCore/NAM/dsp.h ++++ b/deps/NeuralAmpModelerCore/NAM/dsp.h +@@ -57,8 +57,7 @@ public: + // 3. The core DSP algorithm is run (This is what should probably be + // overridden in subclasses). + // 4. The output level is applied and the result stored to `output`. +- virtual void process(NAM_SAMPLE** inputs, NAM_SAMPLE** outputs, const int num_channels, const int num_frames, +- const double input_gain, const double output_gain, ++ virtual void process(NAM_SAMPLE* outputs, const int num_frames, + const std::unordered_map& params); + // Anything to take care of before next buffer comes in. + // For example: +@@ -82,10 +81,6 @@ protected: + std::unordered_map _params; + // If the params have changed since the last buffer was processed: + bool _stale_params; +- // Where to store the samples after applying input gain +- std::vector _input_post_gain; +- // Location for the output of the core DSP algorithm. +- std::vector _core_dsp_output; + + // Methods + +@@ -94,20 +89,11 @@ protected: + // (TODO use "listener" approach) + void _get_params_(const std::unordered_map& input_params); + +- // Apply the input gain +- // Result populates this->_input_post_gain +- void _apply_input_level_(NAM_SAMPLE** inputs, const int num_channels, const int num_frames, const double gain); +- +- // i.e. ensure the size is correct. +- void _ensure_core_dsp_output_ready_(); +- + // The core of your DSP algorithm. +- // Access the inputs in this->_input_post_gain +- // Place the outputs in this->_core_dsp_output +- virtual void _process_core_(); ++ virtual void _process_core_(NAM_SAMPLE* outputs, const int num_frames) = 0; + +- // Copy this->_core_dsp_output to output and apply the output volume +- void _apply_output_level_(NAM_SAMPLE** outputs, const int num_channels, const int num_frames, const double gain); ++ // Apply the output volume ++ void _apply_output_level_(NAM_SAMPLE* outputs, const int num_frames); + }; + + // Class where an input buffer is kept so that long-time effects can be +@@ -133,7 +119,7 @@ protected: + void _set_receptive_field(const int new_receptive_field); + void _reset_input_buffer(); + // Use this->_input_post_gain +- virtual void _update_buffers_(); ++ virtual void _update_buffers_(NAM_SAMPLE* outputs, const int num_frames); + virtual void _rewind_buffers_(); + }; + +@@ -145,7 +131,7 @@ public: + const double expected_sample_rate = -1.0); + Linear(const double loudness, const int receptive_field, const bool _bias, const std::vector& params, + const double expected_sample_rate = -1.0); +- void _process_core_() override; ++ void _process_core_(NAM_SAMPLE* outputs, const int num_frames) override; + + protected: + Eigen::VectorXf _weight; +diff --git a/deps/NeuralAmpModelerCore/NAM/lstm.cpp b/deps/NeuralAmpModelerCore/NAM/lstm.cpp +index 2327367..ca294ab 100644 +--- a/deps/NeuralAmpModelerCore/NAM/lstm.cpp ++++ b/deps/NeuralAmpModelerCore/NAM/lstm.cpp +@@ -101,7 +101,7 @@ void lstm::LSTM::_init_parametric(nlohmann::json& parametric) + this->_input_and_params.resize(1 + parametric.size()); // TODO amp parameters + } + +-void lstm::LSTM::_process_core_() ++void lstm::LSTM::_process_core_(NAM_SAMPLE* outputs, const int num_frames) + { + // Get params into the input vector before starting + if (this->_stale_params) +@@ -111,8 +111,8 @@ void lstm::LSTM::_process_core_() + this->_stale_params = false; + } + // Process samples, placing results in the required output location +- for (size_t i = 0; i < this->_input_post_gain.size(); i++) +- this->_core_dsp_output[i] = this->_process_sample(this->_input_post_gain[i]); ++ for (int i = 0; i < num_frames; i++) ++ outputs[i] = this->_process_sample(outputs[i]); + } + + float lstm::LSTM::_process_sample(const float x) +diff --git a/deps/NeuralAmpModelerCore/NAM/lstm.h b/deps/NeuralAmpModelerCore/NAM/lstm.h +index e855f64..a4e30a8 100644 +--- a/deps/NeuralAmpModelerCore/NAM/lstm.h ++++ b/deps/NeuralAmpModelerCore/NAM/lstm.h +@@ -58,7 +58,7 @@ public: + protected: + Eigen::VectorXf _head_weight; + float _head_bias; +- void _process_core_() override; ++ void _process_core_(NAM_SAMPLE* outputs, const int num_frames) override; + std::vector _layers; + + float _process_sample(const float x); +diff --git a/deps/NeuralAmpModelerCore/NAM/wavenet.cpp b/deps/NeuralAmpModelerCore/NAM/wavenet.cpp +index af754d5..00455c1 100644 +--- a/deps/NeuralAmpModelerCore/NAM/wavenet.cpp ++++ b/deps/NeuralAmpModelerCore/NAM/wavenet.cpp +@@ -259,7 +259,6 @@ wavenet::WaveNet::WaveNet(const double loudness, const std::vector_head_output.resize(1, 0); // Mono output! + this->set_params_(params); +- this->_reset_anti_pop_(); + } + + void wavenet::WaveNet::finalize_(const int num_frames) +@@ -309,9 +308,8 @@ void wavenet::WaveNet::_prepare_for_frames_(const long num_frames) + this->_layer_arrays[i].prepare_for_frames_(num_frames); + } + +-void wavenet::WaveNet::_process_core_() ++void wavenet::WaveNet::_process_core_(NAM_SAMPLE* outputs, const int num_frames) + { +- const long num_frames = this->_input_post_gain.size(); + this->_set_num_frames_(num_frames); + this->_prepare_for_frames_(num_frames); + +@@ -324,7 +322,7 @@ void wavenet::WaveNet::_process_core_() + // Clumsy... + for (int j = 0; j < num_frames; j++) + { +- this->_condition(0, j) = this->_input_post_gain[j]; ++ this->_condition(0, j) = outputs[j]; + if (this->_stale_params) // Column-major assignment; good for Eigen. Let the + // compiler optimize this. + for (size_t i = 0; i < this->_param_names.size(); i++) +@@ -351,13 +349,13 @@ void wavenet::WaveNet::_process_core_() + for (int s = 0; s < num_frames; s++) + { + float out = this->_head_scale * this->_head_arrays[final_head_array](0, s); ++#ifndef __MOD_DEVICES__ + // This is the NaN check that we could fix with anti-popping the input + if (isnan(out)) + out = 0.0; +- this->_core_dsp_output[s] = out; ++#endif ++ outputs[s] = out; + } +- // Apply anti-pop +- this->_anti_pop_(); + } + + void wavenet::WaveNet::_set_num_frames_(const long num_frames) +@@ -377,27 +375,3 @@ void wavenet::WaveNet::_set_num_frames_(const long num_frames) + // this->_head.set_num_frames_(num_frames); + this->_num_frames = num_frames; + } +- +-void wavenet::WaveNet::_anti_pop_() +-{ +- if (this->_anti_pop_countdown >= this->_anti_pop_ramp) +- return; +- const float slope = 1.0f / float(this->_anti_pop_ramp); +- for (size_t i = 0; i < this->_core_dsp_output.size(); i++) +- { +- if (this->_anti_pop_countdown >= this->_anti_pop_ramp) +- break; +- const float gain = std::max(slope * float(this->_anti_pop_countdown), 0.0f); +- this->_core_dsp_output[i] *= gain; +- this->_anti_pop_countdown++; +- } +-} +- +-void wavenet::WaveNet::_reset_anti_pop_() +-{ +- // You need the "real" receptive field, not the buffers. +- long receptive_field = 1; +- for (size_t i = 0; i < this->_layer_arrays.size(); i++) +- receptive_field += this->_layer_arrays[i].get_receptive_field(); +- this->_anti_pop_countdown = -receptive_field; +-} +diff --git a/deps/NeuralAmpModelerCore/NAM/wavenet.h b/deps/NeuralAmpModelerCore/NAM/wavenet.h +index ee7ebc0..b1806dc 100644 +--- a/deps/NeuralAmpModelerCore/NAM/wavenet.h ++++ b/deps/NeuralAmpModelerCore/NAM/wavenet.h +@@ -203,18 +203,9 @@ private: + void _init_parametric_(nlohmann::json& parametric); + void _prepare_for_frames_(const long num_frames); + // Reminder: From ._input_post_gain to ._core_dsp_output +- void _process_core_() override; ++ void _process_core_(NAM_SAMPLE* outputs, const int num_frames) override; + + // Ensure that all buffer arrays are the right size for this num_frames + void _set_num_frames_(const long num_frames); +- +- // The net starts with random parameters inside; we need to wait for a full +- // receptive field to pass through before we can count on the output being +- // ok. This implements a gentle "ramp-up" so that there's no "pop" at the +- // start. +- long _anti_pop_countdown; +- const long _anti_pop_ramp = 4000; +- void _anti_pop_(); +- void _reset_anti_pop_(); + }; + }; // namespace wavenet +diff --git a/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.cpp b/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.cpp +index 7eb69b8..91e9246 100644 +--- a/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.cpp ++++ b/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.cpp +@@ -22,10 +22,10 @@ mOutputStart(outputDegree) + this->mOutputCoefficients.resize(outputDegree); + } + +-DSP_SAMPLE** recursive_linear_filter::Base::Process(DSP_SAMPLE** inputs, const size_t numChannels, ++DSP_SAMPLE* recursive_linear_filter::Base::Process(DSP_SAMPLE* inputs, + const size_t numFrames) + { +- this->_PrepareBuffers(numChannels, numFrames); ++ this->_PrepareBuffers(numFrames); + long inputStart = 0; + long outputStart = 0; + // Degree = longest history +@@ -35,34 +35,35 @@ DSP_SAMPLE** recursive_linear_filter::Base::Process(DSP_SAMPLE** inputs, const s + // 0,2,3,... are fine. + const size_t inputDegree = this->_GetInputDegree(); + const size_t outputDegree = this->_GetOutputDegree(); +- for (auto c = 0; c < numChannels; c++) + { + inputStart = this->mInputStart; // Should be plenty fine + outputStart = this->mOutputStart; +- for (auto s = 0; s < numFrames; s++) ++ for (size_t s = 0; s < numFrames; s++) + { + DSP_SAMPLE out = 0.0; + // Compute input terms + inputStart -= 1; + if (inputStart < 0) + inputStart = inputDegree - 1; +- this->mInputHistory[c][inputStart] = inputs[c][s]; // Store current input +- for (auto i = 0; i < inputDegree; i++) +- out += this->mInputCoefficients[i] * this->mInputHistory[c][(inputStart + i) % inputDegree]; ++ this->mInputHistory[inputStart] = inputs[s]; // Store current input ++ for (size_t i = 0; i < inputDegree; i++) ++ out += this->mInputCoefficients[i] * this->mInputHistory[(inputStart + i) % inputDegree]; + + // Output terms + outputStart -= 1; + if (outputStart < 0) + outputStart = outputDegree - 1; +- for (auto i = 1; i < outputDegree; i++) +- out += this->mOutputCoefficients[i] * this->mOutputHistory[c][(outputStart + i) % outputDegree]; ++ for (size_t i = 1; i < outputDegree; i++) ++ out += this->mOutputCoefficients[i] * this->mOutputHistory[(outputStart + i) % outputDegree]; ++#ifndef __MOD_DEVICES__ + // Prevent a NaN from jamming the filter! + if (std::isnan(out)) + out = 0.0; ++#endif + // Store the output! + if (outputDegree >= 1) +- this->mOutputHistory[c][outputStart] = out; +- this->mOutputs[c][s] = out; ++ this->mOutputHistory[outputStart] = out; ++ this->mOutputs[s] = out; + } + } + this->mInputStart = inputStart; +@@ -70,24 +71,21 @@ DSP_SAMPLE** recursive_linear_filter::Base::Process(DSP_SAMPLE** inputs, const s + return this->_GetPointers(); + } + +-void recursive_linear_filter::Base::_PrepareBuffers(const size_t numChannels, const size_t numFrames) ++void recursive_linear_filter::Base::_PrepareBuffers(const size_t numFrames) + { + // Check for new channel count *before* parent class ensures they match! +- const bool newChannels = this->_GetNumChannels() != numChannels; ++ const bool newChannels = this->_GetNumFrames() == 0; + // Parent implementation takes care of mOutputs and mOutputPointers +- this->dsp::DSP::_PrepareBuffers(numChannels, numFrames); ++ this->dsp::DSP::_PrepareBuffers(numFrames); + if (newChannels) + { +- this->mInputHistory.resize(numChannels); +- this->mOutputHistory.resize(numChannels); + const size_t inputDegree = this->_GetInputDegree(); + const size_t outputDegree = this->_GetOutputDegree(); +- for (auto c = 0; c < numChannels; c++) + { +- this->mInputHistory[c].resize(inputDegree); +- this->mOutputHistory[c].resize(outputDegree); +- std::fill(this->mInputHistory[c].begin(), this->mInputHistory[c].end(), 0.0); +- std::fill(this->mOutputHistory[c].begin(), this->mOutputHistory[c].end(), 0.0); ++ this->mInputHistory.resize(inputDegree); ++ this->mOutputHistory.resize(outputDegree); ++ std::fill(this->mInputHistory.begin(), this->mInputHistory.end(), 0.0); ++ std::fill(this->mOutputHistory.begin(), this->mOutputHistory.end(), 0.0); + } + } + } +diff --git a/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.h b/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.h +index 737d297..afacae1 100644 +--- a/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.h ++++ b/deps/NeuralAmpModelerCore/dsp/RecursiveLinearFilter.h +@@ -22,14 +22,15 @@ class Base : public dsp::DSP + { + public: + Base(const size_t inputDegree, const size_t outputDegree); +- DSP_SAMPLE** Process(DSP_SAMPLE** inputs, const size_t numChannels, const size_t numFrames) override; ++ DSP_SAMPLE* Process(DSP_SAMPLE* inputs, const size_t numFrames) override; ++ ++ // Additionally prepares mInputHistory and mOutputHistory. ++ void _PrepareBuffers(const size_t numFrames) override; + + protected: + // Methods + size_t _GetInputDegree() const { return this->mInputCoefficients.size(); }; + size_t _GetOutputDegree() const { return this->mOutputCoefficients.size(); }; +- // Additionally prepares mInputHistory and mOutputHistory. +- void _PrepareBuffers(const size_t numChannels, const size_t numFrames) override; + + // Coefficients for the DSP filter + // [0] is for the current sample +@@ -43,8 +44,8 @@ protected: + // First index is channel + // Second index, [0] is the current input/output, [1] is the previous, [2] is + // before that, etc. +- std::vector> mInputHistory; +- std::vector> mOutputHistory; ++ std::vector mInputHistory; ++ std::vector mOutputHistory; + // Indices for history. + // Designates which index is currently "0". Use modulus to wrap around. + long mInputStart; +diff --git a/deps/NeuralAmpModelerCore/dsp/dsp.cpp b/deps/NeuralAmpModelerCore/dsp/dsp.cpp +index 6fc7bdd..6e4787e 100644 +--- a/deps/NeuralAmpModelerCore/dsp/dsp.cpp ++++ b/deps/NeuralAmpModelerCore/dsp/dsp.cpp +@@ -13,68 +13,25 @@ + // Implementation of Version 2 interface + + dsp::DSP::DSP() +-: mOutputPointers(nullptr) +-, mOutputPointersSize(0) + { + } + + dsp::DSP::~DSP() + { +- this->_DeallocateOutputPointers(); + }; + +-void dsp::DSP::_AllocateOutputPointers(const size_t numChannels) ++DSP_SAMPLE* dsp::DSP::_GetPointers() + { +- if (this->mOutputPointers != nullptr) +- throw std::runtime_error("Tried to re-allocate over non-null mOutputPointers"); +- this->mOutputPointers = new DSP_SAMPLE*[numChannels]; +- if (this->mOutputPointers == nullptr) +- throw std::runtime_error("Failed to allocate pointer to output buffer!\n"); +- this->mOutputPointersSize = numChannels; ++ return this->mOutputs.data(); + } + +-void dsp::DSP::_DeallocateOutputPointers() +-{ +- if (this->mOutputPointers != nullptr) +- { +- delete[] this->mOutputPointers; +- this->mOutputPointers = nullptr; +- } +- if (this->mOutputPointers != nullptr) +- throw std::runtime_error("Failed to deallocate output pointer!"); +- this->mOutputPointersSize = 0; +-} +- +-DSP_SAMPLE** dsp::DSP::_GetPointers() +-{ +- for (auto c = 0; c < this->_GetNumChannels(); c++) +- this->mOutputPointers[c] = this->mOutputs[c].data(); +- return this->mOutputPointers; +-} +- +-void dsp::DSP::_PrepareBuffers(const size_t numChannels, const size_t numFrames) ++void dsp::DSP::_PrepareBuffers(const size_t numFrames) + { + const size_t oldFrames = this->_GetNumFrames(); +- const size_t oldChannels = this->_GetNumChannels(); ++ const bool resizeFrames = oldFrames != numFrames; + +- const bool resizeChannels = oldChannels != numChannels; +- const bool resizeFrames = resizeChannels || (oldFrames != numFrames); +- if (resizeChannels) +- { +- this->mOutputs.resize(numChannels); +- this->_ResizePointers(numChannels); +- } + if (resizeFrames) +- for (auto c = 0; c < numChannels; c++) +- this->mOutputs[c].resize(numFrames); +-} +- +-void dsp::DSP::_ResizePointers(const size_t numChannels) +-{ +- if (this->mOutputPointersSize == numChannels) +- return; +- this->_DeallocateOutputPointers(); +- this->_AllocateOutputPointers(numChannels); ++ this->mOutputs.resize(numFrames); + } + + dsp::History::History() +diff --git a/deps/NeuralAmpModelerCore/dsp/dsp.h b/deps/NeuralAmpModelerCore/dsp/dsp.h +index faf06af..1cb8224 100644 +--- a/deps/NeuralAmpModelerCore/dsp/dsp.h ++++ b/deps/NeuralAmpModelerCore/dsp/dsp.h +@@ -32,7 +32,7 @@ public: + // The output shall be a pointer-to-pointers of matching size. + // This object instance will own the data referenced by the pointers and be + // responsible for its allocation and deallocation. +- virtual DSP_SAMPLE** Process(DSP_SAMPLE** inputs, const size_t numChannels, const size_t numFrames) = 0; ++ virtual DSP_SAMPLE* Process(DSP_SAMPLE* inputs, const size_t numFrames) = 0; + // Update the parameters of the DSP object according to the provided params. + // Not declaring a pure virtual bc there's no concrete definition that can + // use Params. +@@ -42,34 +42,20 @@ public: + protected: + // Methods + +- // Allocate mOutputPointers. +- // Assumes it's already null (Use _DeallocateOutputPointers()). +- void _AllocateOutputPointers(const size_t numChannels); +- // Ensure mOutputPointers is freed. +- void _DeallocateOutputPointers(); +- +- size_t _GetNumChannels() const { return this->mOutputs.size(); }; +- size_t _GetNumFrames() const { return this->_GetNumChannels() > 0 ? this->mOutputs[0].size() : 0; } ++ size_t _GetNumFrames() const { return this->mOutputs.size(); } + // Return a pointer-to-pointers for the DSP's output buffers (all channels) + // Assumes that ._PrepareBuffers() was called recently enough. +- DSP_SAMPLE** _GetPointers(); ++ DSP_SAMPLE* _GetPointers(); + // Resize mOutputs to (numChannels, numFrames) and ensure that the raw + // pointers are also keeping up. +- virtual void _PrepareBuffers(const size_t numChannels, const size_t numFrames); +- // Resize the pointer-to-pointers for the vector-of-vectors. +- void _ResizePointers(const size_t numChannels); ++ virtual void _PrepareBuffers(const size_t numFrames); + + // Attributes + + // The output array into which the DSP module's calculations will be written. + // Pointers to this member's data will be returned by .Process(), and std + // Will ensure proper allocation. +- std::vector> mOutputs; +- // A pointer to pointers of which copies will be given out as the output of +- // .Process(). This object will ensure proper allocation and deallocation of +- // the first level; The second level points to .data() from mOutputs. +- DSP_SAMPLE** mOutputPointers; +- size_t mOutputPointersSize; ++ std::vector mOutputs; + }; + + // A class where a longer buffer of history is needed to correctly calculate +diff --git a/src/BufferedDSP.hpp b/src/BufferedDSP.hpp +new file mode 100644 +index 0000000..ce5df3c +--- /dev/null ++++ b/src/BufferedDSP.hpp +@@ -0,0 +1,211 @@ ++/* ++ * Buffered DSP ++ * Copyright (C) 2022-2023 Filipe Coelho ++ * SPDX-License-Identifier: ISC ++ */ ++ ++#pragma once ++ ++#include "NAM/dsp.h" ++ ++#include ++#include ++#include ++#include ++ ++#if defined(__SSE2_MATH__) ++# include ++#endif ++ ++class BufferedDSP ++{ ++ std::unordered_map& namParams; ++ DSP* activedsp = nullptr; ++ float* bufferedInput = nullptr; ++ float* bufferedOutput = nullptr; ++ uint32_t bufferSize = 0; ++ sem_t semBgProcStart = {}; ++ sem_t semBgProcFinished = {}; ++ std::atomic active{ false }; ++ ++ pthread_mutex_t mutexI, mutexO; ++ pthread_t thread = {}; ++ volatile bool running = false; ++ ++public: ++ BufferedDSP(std::unordered_map& namParams_) ++ : namParams(namParams_) ++ { ++ sem_init(&semBgProcStart, 0, 0); ++ sem_init(&semBgProcFinished, 0, 0); ++ ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_setprotocol(&attr, PTHREAD_PRIO_INHERIT); ++ pthread_mutex_init(&mutexI, &attr); ++ pthread_mutex_init(&mutexO, &attr); ++ pthread_mutexattr_destroy(&attr); ++ } ++ ++ ~BufferedDSP() ++ { ++ stop(); ++ ++ pthread_mutex_destroy(&mutexI); ++ pthread_mutex_destroy(&mutexO); ++ sem_destroy(&semBgProcStart); ++ sem_destroy(&semBgProcFinished); ++ delete[] bufferedInput; ++ delete[] bufferedOutput; ++ } ++ ++ void setBufferSize(const uint32_t newBufferSize) ++ { ++ if (bufferSize == newBufferSize) ++ return; ++ ++ const bool wasRunning = running; ++ ++ if (wasRunning) ++ stop(); ++ ++ bufferSize = newBufferSize; ++ ++ delete[] bufferedInput; ++ delete[] bufferedOutput; ++ ++ bufferedInput = new float[newBufferSize]; ++ bufferedOutput = new float[newBufferSize]; ++ std::memset(bufferedOutput, 0, sizeof(float)*newBufferSize); ++ ++ if (wasRunning) ++ start(); ++ } ++ ++ void start() ++ { ++ running = true; ++ ++ struct sched_param sched_param = {}; ++ sched_param.sched_priority = 80; ++ ++ #ifdef __MOD_DEVICES__ ++ int rtprio; ++ const char* const srtprio = std::getenv("MOD_PLUGIN_THREAD_PRIORITY"); ++ if (srtprio != nullptr && (rtprio = std::atoi(srtprio)) > 0) ++ sched_param.sched_priority = rtprio - 1; ++ #endif ++ ++ pthread_attr_t attr; ++ pthread_attr_init(&attr); ++ pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); ++ pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); ++ pthread_attr_setschedpolicy(&attr, SCHED_FIFO); ++ pthread_attr_setschedparam(&attr, &sched_param); ++ ++ if (pthread_create(&thread, &attr, _run, this) != 0) ++ { ++ pthread_attr_destroy(&attr); ++ pthread_attr_init(&attr); ++ pthread_create(&thread, &attr, _run, this); ++ } ++ ++ pthread_attr_destroy(&attr); ++ } ++ ++ void stop() ++ { ++ if (!running) return; running = false; ++ sem_post(&semBgProcStart); ++ pthread_join(thread, nullptr); ++ thread = {}; ++ } ++ ++ void setDSP(DSP* const dsp) ++ { ++ activedsp = dsp; ++ ++ while (active.load()) ++ usleep(1000); ++ } ++ ++ void process(float* const output, const uint32_t len) ++ { ++ if (len > bufferSize) ++ return; ++ ++ sem_wait(&semBgProcFinished); ++ ++ pthread_mutex_lock(&mutexI); ++ std::memcpy(bufferedInput, output, sizeof(float)*len); ++ pthread_mutex_unlock(&mutexI); ++ ++ pthread_mutex_lock(&mutexO); ++ std::memcpy(output, bufferedOutput, sizeof(float)*len); ++ pthread_mutex_unlock(&mutexO); ++ ++ sem_post(&semBgProcStart); ++ } ++ ++ static void* _run(void* const arg) ++ { ++ static_cast(arg)->run(); ++ return nullptr; ++ } ++ ++ void run() ++ { ++ if (bufferSize == 0) ++ return; ++ ++ float* tmp = new float[bufferSize]; ++ std::memset(tmp, 0, sizeof(float)*bufferSize); ++ ++ // disable denormals and enable flush to zero ++ { ++ #if defined(__SSE2_MATH__) ++ _mm_setcsr(_mm_getcsr() | 0x8040); ++ #elif defined(__aarch64__) ++ uint64_t flags; ++ __asm__ __volatile__("mrs %0, fpcr" : "=r" (flags)); ++ __asm__ __volatile__("msr fpcr, %0" :: "r" (flags | 0x1000000)); ++ #elif defined(__arm__) && !defined(__SOFTFP__) ++ uint32_t flags; ++ __asm__ __volatile__("vmrs %0, fpscr" : "=r" (flags)); ++ __asm__ __volatile__("vmsr fpscr, %0" :: "r" (flags | 0x1000000)); ++ #endif ++ } ++ ++ // sem_post(&semBgProcFinished); ++ ++ while (running) ++ { ++ sem_post(&semBgProcFinished); ++ sem_wait(&semBgProcStart); ++ ++ if (!running) ++ break; ++ ++ pthread_mutex_lock(&mutexI); ++ std::memcpy(tmp, bufferedInput, sizeof(float)*bufferSize); ++ pthread_mutex_unlock(&mutexI); ++ ++ active.store(true); ++ ++ if (DSP* const dsp = activedsp) ++ { ++ dsp->process(tmp, bufferSize, namParams); ++ // dsp->process(&tmp, &tmp, 1, bufferSize, 1.0, 1.0, namParams); ++ dsp->finalize_(bufferSize); ++ } ++ ++ active.store(false); ++ ++ pthread_mutex_lock(&mutexO); ++ std::memcpy(bufferedOutput, tmp, sizeof(float)*bufferSize); ++ pthread_mutex_unlock(&mutexO); ++ } ++ ++ delete[] tmp; ++ } ++}; +diff --git a/src/nam_plugin.cpp b/src/nam_plugin.cpp +index b9274b3..6473523 100644 +--- a/src/nam_plugin.cpp ++++ b/src/nam_plugin.cpp +@@ -10,6 +10,7 @@ + + namespace NAM { + Plugin::Plugin() ++ : bufferedDSP(mNAMParams) + { + // prevent allocations on the audio thread + currentModelPath.reserve(MAX_FILE_NAME+1); +@@ -17,6 +18,7 @@ namespace NAM { + + Plugin::~Plugin() + { ++ bufferedDSP.stop(); + delete currentModel; + } + +@@ -76,8 +78,17 @@ namespace NAM { + uris.model_Path = map->map(map->handle, MODEL_URI); + + if (options != nullptr) ++ { + options_set(this, options); + ++ if (maxBufferSize != 0) ++ { ++ bufferedDSP.setBufferSize(maxBufferSize); ++ bufferedDSP.start(); ++ mHighPass._PrepareBuffers(maxBufferSize); ++ } ++ } ++ + return true; + } + +@@ -122,8 +133,13 @@ namespace NAM { + float* buffer = new float[numSamples]; + + std::unordered_map params = {}; +- model->process(&buffer, &buffer, 1, numSamples, 1.0, 1.0, params); +- model->finalize_(numSamples); ++ for (int32_t i=0; i<4096; i += numSamples) ++ { ++ std::memset(buffer, 0, sizeof(float)*numSamples); ++ model->process(buffer, numSamples, params); ++ // model->process(&buffer, &buffer, 1, numSamples, 1.0, 1.0, params); ++ model->finalize_(numSamples); ++ } + + delete[] buffer; + } +@@ -180,6 +196,8 @@ namespace NAM { + nam->currentModelPath = msg->path; + assert(nam->currentModelPath.capacity() >= MAX_FILE_NAME + 1); + ++ nam->bufferedDSP.setDSP(msg->model); ++ + // send reply + nam->schedule->schedule_work(nam->schedule->handle, sizeof(reply), &reply); + +@@ -226,60 +244,75 @@ namespace NAM { + } + } + ++ float lvl; ++ + // convert input level from db + float desiredInputLevel = powf(10, *(ports.input_level) * 0.05f); + + if (fabs(desiredInputLevel - inputLevel) > SMOOTH_EPSILON) + { ++ lvl = inputLevel; + for (unsigned int i = 0; i < n_samples; i++) + { + // do very basic smoothing +- inputLevel = (.99f * inputLevel) + (.01f * desiredInputLevel); ++ lvl = (.99f * lvl) + (.01f * desiredInputLevel); + +- ports.audio_out[i] = ports.audio_in[i] * inputLevel; ++ ports.audio_out[i] = ports.audio_in[i] * lvl; + } ++ inputLevel = lvl; + } + else + { +- inputLevel = desiredInputLevel; ++ lvl = inputLevel = desiredInputLevel; + + for (unsigned int i = 0; i < n_samples; i++) + { +- ports.audio_out[i] = ports.audio_in[i] * inputLevel; ++ ports.audio_out[i] = ports.audio_in[i] * lvl; + } + } + +- float** outputPtrs = &ports.audio_out; ++ float* outputs = ports.audio_out; + + if (currentModel != nullptr) + { +- currentModel->process(&ports.audio_out, &ports.audio_out, 1, n_samples, 1.0, 1.0, mNAMParams); +- currentModel->finalize_(n_samples); ++ if (*ports.buffered > 0.5f) ++ { ++ bufferedDSP.process(outputs, n_samples); ++ } ++ else ++ { ++ currentModel->process(outputs, n_samples, mNAMParams); ++ // currentModel->process(&outputs, &outputs, 1, n_samples, 1.0, 1.0, mNAMParams); ++ currentModel->finalize_(n_samples); ++ } + + // Apply a high pass filter at 5Hz to eliminate any DC offset +- outputPtrs = mHighPass.Process(outputPtrs, 1, n_samples); ++ outputs = mHighPass.Process(outputs, n_samples); ++ // outputs = *mHighPass.Process(&outputs, 1, n_samples); + } + + // convert output level from db +- float desiredOutputLevel = powf(10, *(ports.output_level) * 0.05f); ++ const float desiredOutputLevel = powf(10, *(ports.output_level) * 0.05f); + + if (fabs(desiredOutputLevel - outputLevel) > SMOOTH_EPSILON) + { ++ lvl = outputLevel; + for (unsigned int i = 0; i < n_samples; i++) + { + // do very basic smoothing +- outputLevel = (.99f * outputLevel) + (.01f * desiredOutputLevel); ++ lvl = (.99f * lvl) + (.01f * desiredOutputLevel); + +- ports.audio_out[i] = outputPtrs[0][i] * outputLevel; ++ ports.audio_out[i] = outputs[i] * outputLevel; + } ++ outputLevel = lvl; + } + else + { +- outputLevel = desiredOutputLevel; ++ lvl = outputLevel = desiredOutputLevel; + + for (unsigned int i = 0; i < n_samples; i++) + { +- ports.audio_out[i] = outputPtrs[0][i] * outputLevel; ++ ports.audio_out[i] = outputs[i] * lvl; + } + } + } +@@ -299,6 +332,7 @@ namespace NAM { + if (options[i].key == nam->uris.bufSize_maxBlockLength && options[i].type == nam->uris.atom_Int) + { + nam->maxBufferSize = *(const int32_t*)options[i].value; ++ nam->bufferedDSP.setBufferSize(nam->maxBufferSize); + break; + } + } +diff --git a/src/nam_plugin.h b/src/nam_plugin.h +index c1b386c..78df7d1 100644 +--- a/src/nam_plugin.h ++++ b/src/nam_plugin.h +@@ -24,6 +24,8 @@ + #include + #include + ++#include "BufferedDSP.hpp" ++ + #define PlUGIN_URI "http://github.com/mikeoliphant/neural-amp-modeler-lv2" + #define MODEL_URI PlUGIN_URI "#model" + +@@ -61,6 +63,7 @@ namespace NAM { + float* audio_out; + float* input_level; + float* output_level; ++ const float* buffered; + }; + + Ports ports = {}; +@@ -71,6 +74,7 @@ namespace NAM { + LV2_Log_Logger logger = {}; + LV2_Worker_Schedule* schedule = nullptr; + ++ BufferedDSP bufferedDSP; + ::DSP* currentModel = nullptr; + std::string currentModelPath; + recursive_linear_filter::HighPass mHighPass;