Skip to content

Commit

Permalink
chore(avm): Allocate memory for unshifted polynomials according to th…
Browse files Browse the repository at this point in the history
…eir trace col size (#9345)

Some measurements on bulk test showed that resident memory during
proving went from 33.1 GB to 28.4 GB.
  • Loading branch information
jeanmon authored Oct 25, 2024
1 parent cec6306 commit a67d0e2
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ void create_dummy_vkey_and_proof(Builder& builder,

// Derivation of circuit size based on the proof
// Here, we should always get CONST_PROOF_SIZE_LOG_N.
auto log_circuit_size = (proof_size - Flavor::NUM_WITNESS_ENTITIES * Flavor::NUM_FRS_COM -
(Flavor::NUM_ALL_ENTITIES + 1) * Flavor::NUM_FRS_FR - Flavor::NUM_FRS_COM) /
(Flavor::NUM_FRS_COM + Flavor::NUM_FRS_FR * (Flavor::BATCHED_RELATION_PARTIAL_LENGTH + 1));
const auto log_circuit_size =
(proof_size - Flavor::NUM_WITNESS_ENTITIES * Flavor::NUM_FRS_COM -
(Flavor::NUM_ALL_ENTITIES + 1) * Flavor::NUM_FRS_FR - Flavor::NUM_FRS_COM) /
(Flavor::NUM_FRS_COM + Flavor::NUM_FRS_FR * (Flavor::BATCHED_RELATION_PARTIAL_LENGTH + 1));

/***************************************************************************
* Construct Dummy Verification Key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "barretenberg/vm/avm/generated/circuit_builder.hpp"

#include <mutex>
#include <set>
#include <unordered_map>

#include "barretenberg/common/constexpr_utils.hpp"
#include "barretenberg/common/thread.hpp"
Expand All @@ -22,6 +24,23 @@ AvmCircuitBuilder::ProverPolynomials AvmCircuitBuilder::compute_polynomials() co
ASSERT(num_rows <= circuit_subgroup_size);
ProverPolynomials polys;

// We create a mapping between the polynomial index and the corresponding column index when row
// is expressed as a vector, i.e., column of the trace matrix.
std::unordered_map<std::string, size_t> names_to_col_idx;
const auto names = Row::names();
for (size_t i = 0; i < names.size(); i++) {
names_to_col_idx[names[i]] = i;
}

const auto labels = polys.get_unshifted_labels();
const size_t num_unshifted = labels.size();

// Mapping
std::vector<size_t> polys_to_cols_unshifted_idx(num_unshifted);
for (size_t i = 0; i < num_unshifted; i++) {
polys_to_cols_unshifted_idx[i] = names_to_col_idx.at(labels[i]);
}

// Allocate mem for each column
AVM_TRACK_TIME("circuit_builder/init_polys_to_be_shifted", ({
for (auto& poly : polys.get_to_be_shifted()) {
Expand All @@ -30,15 +49,48 @@ AvmCircuitBuilder::ProverPolynomials AvmCircuitBuilder::compute_polynomials() co
/*make shiftable with offset*/ 1 };
}
}));

// catch-all with fully formed polynomials
AVM_TRACK_TIME(
"circuit_builder/init_polys_unshifted", ({
auto unshifted = polys.get_unshifted();
bb::parallel_for(unshifted.size(), [&](size_t i) {

// An array which stores for each column of the trace the smallest size of the
// truncated column containing all non-zero elements.
// It is used to allocate the polynomials without memory overhead for the tail of zeros.
std::array<size_t, Row::SIZE> col_nonzero_size{};

// Computation of size of columns.
// Non-parallel version takes 0.5 second for a trace size of 200k rows.
// A parallel version might be considered in the future.
for (size_t i = 0; i < num_rows; i++) {
const auto row = rows[i].as_vector();
for (size_t col = 0; col < Row::SIZE; col++) {
if (!row[col].is_zero()) {
col_nonzero_size[col] = i + 1;
}
}
}

// Set of the labels for derived/inverse polynomials.
const auto derived_labels = polys.get_derived_labels();
std::set<std::string> derived_labels_set(derived_labels.begin(), derived_labels.end());

bb::parallel_for(num_unshifted, [&](size_t i) {
auto& poly = unshifted[i];
const auto col_idx = polys_to_cols_unshifted_idx[i];
size_t col_size = 0;

// We fully allocate the inverse polynomials. We leave this potential memory optimization for later.
if (derived_labels_set.contains(labels[i])) {
col_size = num_rows;
} else {
col_size = col_nonzero_size[col_idx];
}

if (poly.is_empty()) {
// Not set above
poly = Polynomial{ /*memory size*/ num_rows, /*largest possible index*/ circuit_subgroup_size };
poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size };
}
});
}));
Expand Down
7 changes: 7 additions & 0 deletions barretenberg/cpp/src/barretenberg/vm/avm/generated/flavor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ class AvmFlavor {
DEFINE_COMPOUND_GET_ALL(WireEntities<DataType>, DerivedWitnessEntities<DataType>)
auto get_wires() { return WireEntities<DataType>::get_all(); }
auto get_derived() { return DerivedWitnessEntities<DataType>::get_all(); }
auto get_derived_labels() { return DerivedWitnessEntities<DataType>::get_labels(); }
};

template <typename DataType>
Expand All @@ -311,6 +312,12 @@ class AvmFlavor {
{
return concatenate(PrecomputedEntities<DataType>::get_all(), WitnessEntities<DataType>::get_all());
}

auto get_unshifted_labels()
{
return concatenate(PrecomputedEntities<DataType>::get_labels(), WitnessEntities<DataType>::get_labels());
}

auto get_to_be_shifted() { return AvmFlavor::get_to_be_shifted<DataType>(*this); }
auto get_shifted() { return ShiftedEntities<DataType>::get_all(); }
auto get_precomputed() { return PrecomputedEntities<DataType>::get_all(); }
Expand Down
56 changes: 54 additions & 2 deletions bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "barretenberg/vm/{{snakeCase name}}/generated/circuit_builder.hpp"

#include <mutex>
#include <set>
#include <unordered_map>

#include "barretenberg/common/constexpr_utils.hpp"
#include "barretenberg/common/thread.hpp"
Expand All @@ -21,6 +23,23 @@ namespace bb {
ASSERT(num_rows <= circuit_subgroup_size);
ProverPolynomials polys;

// We create a mapping between the polynomial index and the corresponding column index when row
// is expressed as a vector, i.e., column of the trace matrix.
std::unordered_map<std::string, size_t> names_to_col_idx;
const auto names = Row::names();
for (size_t i = 0; i < names.size(); i++) {
names_to_col_idx[names[i]] = i;
}

const auto labels = polys.get_unshifted_labels();
const size_t num_unshifted = labels.size();

// Mapping
std::vector<size_t> polys_to_cols_unshifted_idx(num_unshifted);
for (size_t i = 0; i < num_unshifted; i++) {
polys_to_cols_unshifted_idx[i] = names_to_col_idx.at(labels[i]);
}

// Allocate mem for each column
AVM_TRACK_TIME("circuit_builder/init_polys_to_be_shifted", ({
for (auto& poly : polys.get_to_be_shifted()) {
Expand All @@ -29,15 +48,48 @@ namespace bb {
/*make shiftable with offset*/ 1 };
}
}));

// catch-all with fully formed polynomials
AVM_TRACK_TIME(
"circuit_builder/init_polys_unshifted", ({
auto unshifted = polys.get_unshifted();
bb::parallel_for(unshifted.size(), [&](size_t i) {

// An array which stores for each column of the trace the smallest size of the
// truncated column containing all non-zero elements.
// It is used to allocate the polynomials without memory overhead for the tail of zeros.
std::array<size_t, Row::SIZE> col_nonzero_size{};

// Computation of size of columns.
// Non-parallel version takes 0.5 second for a trace size of 200k rows.
// A parallel version might be considered in the future.
for (size_t i = 0; i < num_rows; i++) {
const auto row = rows[i].as_vector();
for (size_t col = 0; col < Row::SIZE; col++) {
if (!row[col].is_zero()) {
col_nonzero_size[col] = i + 1;
}
}
}

// Set of the labels for derived/inverse polynomials.
const auto derived_labels = polys.get_derived_labels();
std::set<std::string> derived_labels_set(derived_labels.begin(), derived_labels.end());

bb::parallel_for(num_unshifted, [&](size_t i) {
auto& poly = unshifted[i];
const auto col_idx = polys_to_cols_unshifted_idx[i];
size_t col_size = 0;

// We fully allocate the inverse polynomials. We leave this potential memory optimization for later.
if (derived_labels_set.contains(labels[i])) {
col_size = num_rows;
} else {
col_size = col_nonzero_size[col_idx];
}

if (poly.is_empty()) {
// Not set above
poly = Polynomial{ /*memory size*/ num_rows, /*largest possible index*/ circuit_subgroup_size };
poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size };
}
});
}));
Expand Down
12 changes: 10 additions & 2 deletions bb-pilcom/bb-pil-backend/templates/flavor.hpp.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class {{name}}Flavor {
DEFINE_COMPOUND_GET_ALL(WireEntities<DataType>, DerivedWitnessEntities<DataType>)
auto get_wires() { return WireEntities<DataType>::get_all(); }
auto get_derived() { return DerivedWitnessEntities<DataType>::get_all(); }
auto get_derived_labels() { return DerivedWitnessEntities<DataType>::get_labels(); }
};

template <typename DataType>
Expand All @@ -178,9 +179,16 @@ class {{name}}Flavor {
public:
DEFINE_COMPOUND_GET_ALL(PrecomputedEntities<DataType>, WitnessEntities<DataType>, ShiftedEntities<DataType>)

auto get_unshifted() {
return concatenate(PrecomputedEntities<DataType>::get_all(), WitnessEntities<DataType>::get_all());
auto get_unshifted()
{
return concatenate(PrecomputedEntities<DataType>::get_all(), WitnessEntities<DataType>::get_all());
}

auto get_unshifted_labels()
{
return concatenate(PrecomputedEntities<DataType>::get_labels(), WitnessEntities<DataType>::get_labels());
}

auto get_to_be_shifted() { return {{name}}Flavor::get_to_be_shifted<DataType>(*this); }
auto get_shifted() { return ShiftedEntities<DataType>::get_all(); }
auto get_precomputed() { return PrecomputedEntities<DataType>::get_all(); }
Expand Down

0 comments on commit a67d0e2

Please sign in to comment.