Skip to content

Commit

Permalink
chore: parallelise inverse polynomial construction for lookup relatio…
Browse files Browse the repository at this point in the history
…ns (#10413)

Benchmark were showing that oink is the second most expensive round in
PG after combiner. On top of that one component where we see
discrepancies when increasing the ambient trace size is logderivative
inverses construction. A step towards improving this is parallelising
the construction of inverse polynomials (which is linear). Also the
inverses can be committed to with `commit_sparse` which shows a slight
improvement as well.
BEFORE
```
CLIENT_IVC_BENCH_STRUCTURE(2^19)

ClientIVCBench/Full/6      29146 ms        27299 ms
ProtogalaxyProver::prove(t)            16265    58.29%
ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t)    5624    34.58%


EXAMPLE_20(2^20)

ClientIVCBench/Full/6      37145 ms        34235 ms
ProtogalaxyProver::prove(t)            21283    60.75%
ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t)    8818    41.43%
COMMIT::lookup_inverses(t)        406     9.82%
```

AFTER
```
CLIENT_IVC_BENCH_STRUCTURE(2^19)

ClientIVCBench/Full/6      27351 ms        25477 ms 
ProtogalaxyProver::prove(t)            14627    55.72%
ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t)    4030    27.55%


EXAMPLE_20(2^20)
ClientIVCBench/Full/6      33852 ms        30893 ms   
ProtogalaxyProver::prove(t)            18250    56.97%
ProtogalaxyProver_::run_oink_prover_on_each_incomplete_key(t)    5526    30.28%
COMMIT::lookup_inverses(t)        301     7.43%
```
  • Loading branch information
maramihali authored Dec 5, 2024
1 parent 5b0b721 commit 427cf59
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <tuple>

#include "barretenberg/common/constexpr_utils.hpp"
#include "barretenberg/common/thread.hpp"
#include "barretenberg/polynomials/univariate.hpp"
#include "barretenberg/relations/relation_types.hpp"

Expand Down Expand Up @@ -231,32 +232,42 @@ template <typename FF_> class DatabusLookupRelationImpl {
const size_t circuit_size)
{
auto& inverse_polynomial = BusData<bus_idx, Polynomials>::inverses(polynomials);
bool is_read = false;
bool nonzero_read_count = false;
for (size_t i = 0; i < circuit_size; ++i) {
// Determine if the present row contains a databus operation
auto q_busread = polynomials.q_busread[i];
if constexpr (bus_idx == 0) { // calldata
is_read = q_busread == 1 && polynomials.q_l[i] == 1;
nonzero_read_count = polynomials.calldata_read_counts[i] > 0;
}
if constexpr (bus_idx == 1) { // secondary_calldata
is_read = q_busread == 1 && polynomials.q_r[i] == 1;
nonzero_read_count = polynomials.secondary_calldata_read_counts[i] > 0;
}
if constexpr (bus_idx == 2) { // return data
is_read = q_busread == 1 && polynomials.q_o[i] == 1;
nonzero_read_count = polynomials.return_data_read_counts[i] > 0;
}
// We only compute the inverse if this row contains a read gate or data that has been read
if (is_read || nonzero_read_count) {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible.
auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly!
auto value = compute_read_term<FF>(row, relation_parameters) *
compute_write_term<FF, bus_idx>(row, relation_parameters);
inverse_polynomial.at(i) = value;

size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
size_t num_threads = bb::calculate_num_threads_pow2(circuit_size, min_iterations_per_thread);
size_t iterations_per_thread = circuit_size / num_threads; // actual iterations per thread

parallel_for(num_threads, [&](size_t thread_idx) {
size_t start = thread_idx * iterations_per_thread;
size_t end = (thread_idx + 1) * iterations_per_thread;
bool is_read = false;
bool nonzero_read_count = false;
for (size_t i = start; i < end; ++i) {
// Determine if the present row contains a databus operation
auto q_busread = polynomials.q_busread[i];
if constexpr (bus_idx == 0) { // calldata
is_read = q_busread == 1 && polynomials.q_l[i] == 1;
nonzero_read_count = polynomials.calldata_read_counts[i] > 0;
}
if constexpr (bus_idx == 1) { // secondary_calldata
is_read = q_busread == 1 && polynomials.q_r[i] == 1;
nonzero_read_count = polynomials.secondary_calldata_read_counts[i] > 0;
}
if constexpr (bus_idx == 2) { // return data
is_read = q_busread == 1 && polynomials.q_o[i] == 1;
nonzero_read_count = polynomials.return_data_read_counts[i] > 0;
}
// We only compute the inverse if this row contains a read gate or data that has been read
if (is_read || nonzero_read_count) {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible.
auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly!
auto value = compute_read_term<FF>(row, relation_parameters) *
compute_write_term<FF, bus_idx>(row, relation_parameters);
inverse_polynomial.at(i) = value;
}
}
}
});

// Compute inverse polynomial I in place by inverting the product at each row
// Note: zeroes are ignored as they are not used anyway
FF::batch_invert(inverse_polynomial.coeffs());
Expand Down Expand Up @@ -299,8 +310,8 @@ template <typename FF_> class DatabusLookupRelationImpl {
constexpr size_t subrel_idx_1 = 2 * bus_idx;
constexpr size_t subrel_idx_2 = 2 * bus_idx + 1;

// Establish the correctness of the polynomial of inverses I. Note: inverses is computed so that the value is 0
// if !inverse_exists. Degree 3 (5)
// Establish the correctness of the polynomial of inverses I. Note: inverses is computed so that the value
// is 0 if !inverse_exists. Degree 3 (5)
std::get<subrel_idx_1>(accumulator) += (read_term * write_term * inverses - inverse_exists) * scaling_factor;

// Establish validity of the read. Note: no scaling factor here since this constraint is enforced across the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <tuple>

#include "barretenberg/common/constexpr_utils.hpp"
#include "barretenberg/common/thread.hpp"
#include "barretenberg/honk/proof_system/logderivative_library.hpp"
#include "barretenberg/polynomials/univariate.hpp"
#include "barretenberg/relations/relation_types.hpp"
Expand Down Expand Up @@ -157,16 +158,25 @@ template <typename FF_> class LogDerivLookupRelationImpl {
{
auto& inverse_polynomial = get_inverse_polynomial(polynomials);

for (size_t i = 0; i < circuit_size; ++i) {
// We only compute the inverse if this row contains a lookup gate or data that has been looked up
if (polynomials.q_lookup.get(i) == 1 || polynomials.lookup_read_tags.get(i) == 1) {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible.
auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly!
auto value = compute_read_term<FF, 0>(row, relation_parameters) *
compute_write_term<FF, 0>(row, relation_parameters);
inverse_polynomial.at(i) = value;
size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
size_t num_threads = bb::calculate_num_threads_pow2(circuit_size, min_iterations_per_thread);
size_t iterations_per_thread = circuit_size / num_threads; // actual iterations per thread

parallel_for(num_threads, [&](size_t thread_idx) {
size_t start = thread_idx * iterations_per_thread;
size_t end = (thread_idx + 1) * iterations_per_thread;
for (size_t i = start; i < end; ++i) {
// We only compute the inverse if this row contains a lookup gate or data that has been looked up
if (polynomials.q_lookup.get(i) == 1 || polynomials.lookup_read_tags.get(i) == 1) {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): avoid get_row if possible.
auto row = polynomials.get_row(i); // Note: this is a copy. use sparingly!
auto value = compute_read_term<FF, 0>(row, relation_parameters) *
compute_write_term<FF, 0>(row, relation_parameters);
inverse_polynomial.at(i) = value;
}
}
}
});

// Compute inverse polynomial I in place by inverting the product at each row
FF::batch_invert(inverse_polynomial.coeffs());
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ typename TraceToPolynomials<Flavor>::TraceData TraceToPolynomials<Flavor>::const
auto block_size = static_cast<uint32_t>(block.size());

// Save ranges over which the blocks are "active" for use in structured commitments
if constexpr (IsUltraFlavor<Flavor>) {
if constexpr (IsUltraFlavor<Flavor>) { // Mega and Ultra
if (block.size() > 0) {
proving_key.active_block_ranges.emplace_back(offset, offset + block.size());
}
Expand Down
4 changes: 2 additions & 2 deletions barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_log_derivative_

{
PROFILE_THIS_NAME("COMMIT::lookup_inverses");
witness_commitments.lookup_inverses =
proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.lookup_inverses);
witness_commitments.lookup_inverses = proving_key->proving_key.commitment_key->commit_sparse(
proving_key->proving_key.polynomials.lookup_inverses);
}
transcript->send_to_verifier(domain_separator + commitment_labels.lookup_inverses,
witness_commitments.lookup_inverses);
Expand Down

1 comment on commit 427cf59

@AztecBot
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'C++ Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.05.

Benchmark suite Current: 427cf59 Previous: a1e5966 Ratio
wasmClientIVCBench/Full/6 91484.2179 ms/iter 86911.401326 ms/iter 1.05
wasmconstruct_proof_ultrahonk_power_of_2/20 16490.637163 ms/iter 15144.797168000001 ms/iter 1.09

This comment was automatically generated by workflow using github-action-benchmark.

CC: @ludamad @codygunton

Please sign in to comment.