From a67d0e2122945998119a8643a4fb4e74fccc7f34 Mon Sep 17 00:00:00 2001 From: Jean M <132435771+jeanmon@users.noreply.github.com> Date: Fri, 25 Oct 2024 20:12:45 +0200 Subject: [PATCH] chore(avm): Allocate memory for unshifted polynomials according to their trace col size (#9345) Some measurements on bulk test showed that resident memory during proving went from 33.1 GB to 28.4 GB. --- .../acir_format/avm_recursion_constraint.cpp | 7 ++- .../vm/avm/generated/circuit_builder.cpp | 56 ++++++++++++++++++- .../barretenberg/vm/avm/generated/flavor.hpp | 7 +++ .../templates/circuit_builder.cpp.hbs | 56 ++++++++++++++++++- .../bb-pil-backend/templates/flavor.hpp.hbs | 12 +++- 5 files changed, 129 insertions(+), 9 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/avm_recursion_constraint.cpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/avm_recursion_constraint.cpp index bd5139fec50..e2fbd705a00 100644 --- a/barretenberg/cpp/src/barretenberg/dsl/acir_format/avm_recursion_constraint.cpp +++ b/barretenberg/cpp/src/barretenberg/dsl/acir_format/avm_recursion_constraint.cpp @@ -51,9 +51,10 @@ void create_dummy_vkey_and_proof(Builder& builder, // Derivation of circuit size based on the proof // Here, we should always get CONST_PROOF_SIZE_LOG_N. - auto log_circuit_size = (proof_size - Flavor::NUM_WITNESS_ENTITIES * Flavor::NUM_FRS_COM - - (Flavor::NUM_ALL_ENTITIES + 1) * Flavor::NUM_FRS_FR - Flavor::NUM_FRS_COM) / - (Flavor::NUM_FRS_COM + Flavor::NUM_FRS_FR * (Flavor::BATCHED_RELATION_PARTIAL_LENGTH + 1)); + const auto log_circuit_size = + (proof_size - Flavor::NUM_WITNESS_ENTITIES * Flavor::NUM_FRS_COM - + (Flavor::NUM_ALL_ENTITIES + 1) * Flavor::NUM_FRS_FR - Flavor::NUM_FRS_COM) / + (Flavor::NUM_FRS_COM + Flavor::NUM_FRS_FR * (Flavor::BATCHED_RELATION_PARTIAL_LENGTH + 1)); /*************************************************************************** * Construct Dummy Verification Key diff --git a/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp b/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp index 928732dc976..7ef2d5e928e 100644 --- a/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp +++ b/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp @@ -2,6 +2,8 @@ #include "barretenberg/vm/avm/generated/circuit_builder.hpp" #include +#include +#include #include "barretenberg/common/constexpr_utils.hpp" #include "barretenberg/common/thread.hpp" @@ -22,6 +24,23 @@ AvmCircuitBuilder::ProverPolynomials AvmCircuitBuilder::compute_polynomials() co ASSERT(num_rows <= circuit_subgroup_size); ProverPolynomials polys; + // We create a mapping between the polynomial index and the corresponding column index when row + // is expressed as a vector, i.e., column of the trace matrix. + std::unordered_map names_to_col_idx; + const auto names = Row::names(); + for (size_t i = 0; i < names.size(); i++) { + names_to_col_idx[names[i]] = i; + } + + const auto labels = polys.get_unshifted_labels(); + const size_t num_unshifted = labels.size(); + + // Mapping + std::vector polys_to_cols_unshifted_idx(num_unshifted); + for (size_t i = 0; i < num_unshifted; i++) { + polys_to_cols_unshifted_idx[i] = names_to_col_idx.at(labels[i]); + } + // Allocate mem for each column AVM_TRACK_TIME("circuit_builder/init_polys_to_be_shifted", ({ for (auto& poly : polys.get_to_be_shifted()) { @@ -30,15 +49,48 @@ AvmCircuitBuilder::ProverPolynomials AvmCircuitBuilder::compute_polynomials() co /*make shiftable with offset*/ 1 }; } })); + // catch-all with fully formed polynomials AVM_TRACK_TIME( "circuit_builder/init_polys_unshifted", ({ auto unshifted = polys.get_unshifted(); - bb::parallel_for(unshifted.size(), [&](size_t i) { + + // An array which stores for each column of the trace the smallest size of the + // truncated column containing all non-zero elements. + // It is used to allocate the polynomials without memory overhead for the tail of zeros. + std::array col_nonzero_size{}; + + // Computation of size of columns. + // Non-parallel version takes 0.5 second for a trace size of 200k rows. + // A parallel version might be considered in the future. + for (size_t i = 0; i < num_rows; i++) { + const auto row = rows[i].as_vector(); + for (size_t col = 0; col < Row::SIZE; col++) { + if (!row[col].is_zero()) { + col_nonzero_size[col] = i + 1; + } + } + } + + // Set of the labels for derived/inverse polynomials. + const auto derived_labels = polys.get_derived_labels(); + std::set derived_labels_set(derived_labels.begin(), derived_labels.end()); + + bb::parallel_for(num_unshifted, [&](size_t i) { auto& poly = unshifted[i]; + const auto col_idx = polys_to_cols_unshifted_idx[i]; + size_t col_size = 0; + + // We fully allocate the inverse polynomials. We leave this potential memory optimization for later. + if (derived_labels_set.contains(labels[i])) { + col_size = num_rows; + } else { + col_size = col_nonzero_size[col_idx]; + } + if (poly.is_empty()) { // Not set above - poly = Polynomial{ /*memory size*/ num_rows, /*largest possible index*/ circuit_subgroup_size }; + poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size }; } }); })); diff --git a/barretenberg/cpp/src/barretenberg/vm/avm/generated/flavor.hpp b/barretenberg/cpp/src/barretenberg/vm/avm/generated/flavor.hpp index 711a694ad24..fe548d4e0f9 100644 --- a/barretenberg/cpp/src/barretenberg/vm/avm/generated/flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/vm/avm/generated/flavor.hpp @@ -298,6 +298,7 @@ class AvmFlavor { DEFINE_COMPOUND_GET_ALL(WireEntities, DerivedWitnessEntities) auto get_wires() { return WireEntities::get_all(); } auto get_derived() { return DerivedWitnessEntities::get_all(); } + auto get_derived_labels() { return DerivedWitnessEntities::get_labels(); } }; template @@ -311,6 +312,12 @@ class AvmFlavor { { return concatenate(PrecomputedEntities::get_all(), WitnessEntities::get_all()); } + + auto get_unshifted_labels() + { + return concatenate(PrecomputedEntities::get_labels(), WitnessEntities::get_labels()); + } + auto get_to_be_shifted() { return AvmFlavor::get_to_be_shifted(*this); } auto get_shifted() { return ShiftedEntities::get_all(); } auto get_precomputed() { return PrecomputedEntities::get_all(); } diff --git a/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs b/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs index d3628a4c0ae..5d79f5e0389 100644 --- a/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs +++ b/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs @@ -2,6 +2,8 @@ #include "barretenberg/vm/{{snakeCase name}}/generated/circuit_builder.hpp" #include +#include +#include #include "barretenberg/common/constexpr_utils.hpp" #include "barretenberg/common/thread.hpp" @@ -21,6 +23,23 @@ namespace bb { ASSERT(num_rows <= circuit_subgroup_size); ProverPolynomials polys; + // We create a mapping between the polynomial index and the corresponding column index when row + // is expressed as a vector, i.e., column of the trace matrix. + std::unordered_map names_to_col_idx; + const auto names = Row::names(); + for (size_t i = 0; i < names.size(); i++) { + names_to_col_idx[names[i]] = i; + } + + const auto labels = polys.get_unshifted_labels(); + const size_t num_unshifted = labels.size(); + + // Mapping + std::vector polys_to_cols_unshifted_idx(num_unshifted); + for (size_t i = 0; i < num_unshifted; i++) { + polys_to_cols_unshifted_idx[i] = names_to_col_idx.at(labels[i]); + } + // Allocate mem for each column AVM_TRACK_TIME("circuit_builder/init_polys_to_be_shifted", ({ for (auto& poly : polys.get_to_be_shifted()) { @@ -29,15 +48,48 @@ namespace bb { /*make shiftable with offset*/ 1 }; } })); + // catch-all with fully formed polynomials AVM_TRACK_TIME( "circuit_builder/init_polys_unshifted", ({ auto unshifted = polys.get_unshifted(); - bb::parallel_for(unshifted.size(), [&](size_t i) { + + // An array which stores for each column of the trace the smallest size of the + // truncated column containing all non-zero elements. + // It is used to allocate the polynomials without memory overhead for the tail of zeros. + std::array col_nonzero_size{}; + + // Computation of size of columns. + // Non-parallel version takes 0.5 second for a trace size of 200k rows. + // A parallel version might be considered in the future. + for (size_t i = 0; i < num_rows; i++) { + const auto row = rows[i].as_vector(); + for (size_t col = 0; col < Row::SIZE; col++) { + if (!row[col].is_zero()) { + col_nonzero_size[col] = i + 1; + } + } + } + + // Set of the labels for derived/inverse polynomials. + const auto derived_labels = polys.get_derived_labels(); + std::set derived_labels_set(derived_labels.begin(), derived_labels.end()); + + bb::parallel_for(num_unshifted, [&](size_t i) { auto& poly = unshifted[i]; + const auto col_idx = polys_to_cols_unshifted_idx[i]; + size_t col_size = 0; + + // We fully allocate the inverse polynomials. We leave this potential memory optimization for later. + if (derived_labels_set.contains(labels[i])) { + col_size = num_rows; + } else { + col_size = col_nonzero_size[col_idx]; + } + if (poly.is_empty()) { // Not set above - poly = Polynomial{ /*memory size*/ num_rows, /*largest possible index*/ circuit_subgroup_size }; + poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size }; } }); })); diff --git a/bb-pilcom/bb-pil-backend/templates/flavor.hpp.hbs b/bb-pilcom/bb-pil-backend/templates/flavor.hpp.hbs index 802118b38d0..a31cdd55fdd 100644 --- a/bb-pilcom/bb-pil-backend/templates/flavor.hpp.hbs +++ b/bb-pilcom/bb-pil-backend/templates/flavor.hpp.hbs @@ -169,6 +169,7 @@ class {{name}}Flavor { DEFINE_COMPOUND_GET_ALL(WireEntities, DerivedWitnessEntities) auto get_wires() { return WireEntities::get_all(); } auto get_derived() { return DerivedWitnessEntities::get_all(); } + auto get_derived_labels() { return DerivedWitnessEntities::get_labels(); } }; template @@ -178,9 +179,16 @@ class {{name}}Flavor { public: DEFINE_COMPOUND_GET_ALL(PrecomputedEntities, WitnessEntities, ShiftedEntities) - auto get_unshifted() { - return concatenate(PrecomputedEntities::get_all(), WitnessEntities::get_all()); + auto get_unshifted() + { + return concatenate(PrecomputedEntities::get_all(), WitnessEntities::get_all()); } + + auto get_unshifted_labels() + { + return concatenate(PrecomputedEntities::get_labels(), WitnessEntities::get_labels()); + } + auto get_to_be_shifted() { return {{name}}Flavor::get_to_be_shifted(*this); } auto get_shifted() { return ShiftedEntities::get_all(); } auto get_precomputed() { return PrecomputedEntities::get_all(); }