From cbc6cddcab22614e655fe63d10aa662b854f2d26 Mon Sep 17 00:00:00 2001 From: Pierre Krieger Date: Wed, 11 Oct 2023 10:23:18 +0200 Subject: [PATCH 1/3] Split big storage proof requests into smaller ones --- light-base/src/sync_service.rs | 64 +++++++++++++++++++++++++++++----- wasm-node/CHANGELOG.md | 4 +++ 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/light-base/src/sync_service.rs b/light-base/src/sync_service.rs index ff910d7a2d..f186a202d2 100644 --- a/light-base/src/sync_service.rs +++ b/light-base/src/sync_service.rs @@ -29,7 +29,7 @@ use crate::{network_service, platform::PlatformRef, runtime_service}; use alloc::{borrow::ToOwned as _, boxed::Box, format, string::String, sync::Arc, vec::Vec}; -use core::{fmt, future::Future, mem, num::NonZeroU32, pin::Pin, time::Duration}; +use core::{cmp, fmt, future::Future, mem, num::NonZeroU32, pin::Pin, time::Duration}; use futures_channel::oneshot; use futures_lite::stream; use rand::seq::IteratorRandom as _; @@ -434,7 +434,6 @@ impl SyncService { _max_parallel: NonZeroU32, ) -> Result, StorageQueryError> { // TODO: this should probably be extracted to a state machine in `/lib`, with unit tests - // TODO: big requests should be split into multiple smaller ones // TODO: handle max_parallel enum RequestImpl { PrefixScan { @@ -484,6 +483,11 @@ impl SyncService { let mut final_results = Vec::::with_capacity(requests_remaining.len() * 4); + // Number of nodes that are possible in a response before exceeding the response size + // limit. Because the size of a trie node is unknown, this can only ever be a gross + // estimate. + let mut response_nodes_cap = (1024 * 1024) / 164; + let mut randomness = rand_chacha::ChaCha20Rng::from_seed({ let mut seed = [0; 32]; self.platform.fill_random_bytes(&mut seed); @@ -517,27 +521,50 @@ impl SyncService { // Build the list of keys to request. let keys_to_request = { + // Keep track of the number of nodes that might be found in the response. + // This is a generous overestimation of the actual number. + let mut max_reponse_nodes = 0; + let mut keys = hashbrown::HashSet::with_capacity_and_hasher( requests_remaining.len() * 4, fnv::FnvBuildHasher::default(), ); for request in &requests_remaining { + if max_reponse_nodes >= response_nodes_cap { + break; + } + match request { RequestImpl::PrefixScan { scan, .. } => { - keys.extend(scan.requested_keys().map(|nibbles| { - trie::nibbles_to_bytes_suffix_extend(nibbles).collect::>() - })); + for scan_key in scan.requested_keys() { + if max_reponse_nodes >= response_nodes_cap { + break; + } + + let scan_key = trie::nibbles_to_bytes_suffix_extend(scan_key) + .collect::>(); + let scan_key_len = scan_key.len(); + if keys.insert(scan_key) { + max_reponse_nodes += scan_key_len * 2; + } + } } RequestImpl::ValueOrHash { key, .. } => { - keys.insert(key.clone()); + if keys.insert(key.clone()) { + max_reponse_nodes += key.len() * 2; + } } RequestImpl::ClosestDescendantMerkleValue { key } => { // We query the parent of `key`. if key.is_empty() { - keys.insert(Vec::new()); + if keys.insert(Vec::new()) { + max_reponse_nodes += 1; + } } else { - keys.insert(key[..key.len() - 1].to_owned()); + if keys.insert(key[..key.len() - 1].to_owned()) { + max_reponse_nodes += key.len() * 2 - 1; + } } } } @@ -563,7 +590,26 @@ impl SyncService { let proof = match result { Ok(r) => r, Err(err) => { - outcome_errors.push(StorageQueryErrorDetail::Network(err)); + // In case of error that isn't a protocol error, we reduce the number of + // trie node items to request. + let reduce_max = match &err { + network_service::StorageProofRequestError::RequestTooLarge => true, + network_service::StorageProofRequestError::Request( + service::StorageProofRequestError::Request(err), + ) => !err.is_protocol_error(), + _ => false, + }; + if reduce_max { + response_nodes_cap = cmp::max(1, response_nodes_cap / 2); + } + + if !matches!( + err, + network_service::StorageProofRequestError::RequestTooLarge + ) { + outcome_errors.push(StorageQueryErrorDetail::Network(err)); + } + continue; } }; diff --git a/wasm-node/CHANGELOG.md b/wasm-node/CHANGELOG.md index 15e627a309..5ad09a90a9 100644 --- a/wasm-node/CHANGELOG.md +++ b/wasm-node/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +### Changed + +- When asking for Merkle proofs from full nodes, smoldot will now automatically and dynamically split big requests into multiple smaller requests. This should avoid timeout errors in case of big requests. + ## 2.0.3 - 2023-09-28 ### Fixed From f6824a79f0a99ce9c708b7ce5feefa9cc7dcc747 Mon Sep 17 00:00:00 2001 From: Pierre Krieger Date: Wed, 11 Oct 2023 10:31:19 +0200 Subject: [PATCH 2/3] PR link --- wasm-node/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm-node/CHANGELOG.md b/wasm-node/CHANGELOG.md index 5ad09a90a9..83bfed25ba 100644 --- a/wasm-node/CHANGELOG.md +++ b/wasm-node/CHANGELOG.md @@ -4,7 +4,7 @@ ### Changed -- When asking for Merkle proofs from full nodes, smoldot will now automatically and dynamically split big requests into multiple smaller requests. This should avoid timeout errors in case of big requests. +- When asking for Merkle proofs from full nodes, smoldot will now automatically and dynamically split big requests into multiple smaller requests. This should avoid timeout errors in case of big requests. ([#1209](https://github.com/smol-dot/smoldot/pull/1209)) ## 2.0.3 - 2023-09-28 From d76c46a7c412df5250c77f5de383c9d6e1e9badf Mon Sep 17 00:00:00 2001 From: Pierre Krieger Date: Wed, 11 Oct 2023 10:32:24 +0200 Subject: [PATCH 3/3] Count timeouts as errors anyway if the cap is already at minimum --- light-base/src/sync_service.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/light-base/src/sync_service.rs b/light-base/src/sync_service.rs index f186a202d2..7bf06ecbd2 100644 --- a/light-base/src/sync_service.rs +++ b/light-base/src/sync_service.rs @@ -599,17 +599,19 @@ impl SyncService { ) => !err.is_protocol_error(), _ => false, }; - if reduce_max { - response_nodes_cap = cmp::max(1, response_nodes_cap / 2); - } if !matches!( err, network_service::StorageProofRequestError::RequestTooLarge - ) { + ) || response_nodes_cap == 1 + { outcome_errors.push(StorageQueryErrorDetail::Network(err)); } + if reduce_max { + response_nodes_cap = cmp::max(1, response_nodes_cap / 2); + } + continue; } };