Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add elastic scaling MVP guide #4663

Merged
merged 7 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cumulus/polkadot-parachain/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ hex-literal = { workspace = true, default-features = true }
log = { workspace = true, default-features = true }
serde = { features = ["derive"], workspace = true, default-features = true }
serde_json = { workspace = true, default-features = true }
docify = { workspace = true }

# Local
rococo-parachain-runtime = { workspace = true }
Expand Down
71 changes: 54 additions & 17 deletions cumulus/polkadot-parachain/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
// along with Cumulus. If not, see <http://www.gnu.org/licenses/>.

use cumulus_client_cli::{CollatorOptions, ExportGenesisHeadCommand};
use cumulus_client_collator::service::CollatorService;
use cumulus_client_consensus_aura::collators::{
lookahead::{self as aura, Params as AuraParams},
slot_based::{self as slot_based, Params as SlotBasedParams},
use cumulus_client_collator::service::{
CollatorService, ServiceInterface as CollatorServiceInterface,
};
use cumulus_client_consensus_aura::collators::lookahead::{self as aura, Params as AuraParams};
#[docify::export(slot_based_colator_import)]
use cumulus_client_consensus_aura::collators::slot_based::{
self as slot_based, Params as SlotBasedParams,
};
use cumulus_client_consensus_common::ParachainBlockImport as TParachainBlockImport;
use cumulus_client_consensus_proposer::Proposer;
use cumulus_client_consensus_proposer::{Proposer, ProposerInterface};
use cumulus_client_consensus_relay_chain::Verifier as RelayChainVerifier;
#[allow(deprecated)]
use cumulus_client_service::old_consensus;
Expand Down Expand Up @@ -62,6 +65,7 @@ use sc_sysinfo::HwBench;
use sc_telemetry::{Telemetry, TelemetryHandle, TelemetryWorker, TelemetryWorkerHandle};
use sc_transaction_pool::FullPool;
use sp_api::ProvideRuntimeApi;
use sp_inherents::CreateInherentDataProviders;
use sp_keystore::KeystorePtr;
use sp_runtime::{app_crypto::AppCrypto, traits::Header as HeaderT};
use std::{marker::PhantomData, pin::Pin, sync::Arc, time::Duration};
Expand Down Expand Up @@ -623,6 +627,48 @@ pub(crate) struct StartSlotBasedAuraConsensus<RuntimeApi, AuraId>(
PhantomData<(RuntimeApi, AuraId)>,
);

impl<RuntimeApi, AuraId> StartSlotBasedAuraConsensus<RuntimeApi, AuraId>
where
RuntimeApi: ConstructNodeRuntimeApi<Block, ParachainClient<RuntimeApi>>,
RuntimeApi::RuntimeApi: AuraRuntimeApi<Block, AuraId>,
AuraId: AuraIdT + Sync,
{
#[docify::export_content]
fn launch_slot_based_collator<CIDP, CHP, Proposer, CS>(
params: SlotBasedParams<
ParachainBlockImport<RuntimeApi>,
CIDP,
ParachainClient<RuntimeApi>,
ParachainBackend,
Arc<dyn RelayChainInterface>,
CHP,
Proposer,
CS,
>,
task_manager: &TaskManager,
) where
CIDP: CreateInherentDataProviders<Block, ()> + 'static,
CIDP::InherentDataProviders: Send,
CHP: cumulus_client_consensus_common::ValidationCodeHashProvider<Hash> + Send + 'static,
Proposer: ProposerInterface<Block> + Send + Sync + 'static,
CS: CollatorServiceInterface<Block> + Send + Sync + Clone + 'static,
{
let (collation_future, block_builder_future) =
slot_based::run::<Block, <AuraId as AppCrypto>::Pair, _, _, _, _, _, _, _, _>(params);

task_manager.spawn_essential_handle().spawn(
"collation-task",
Some("parachain-block-authoring"),
collation_future,
);
task_manager.spawn_essential_handle().spawn(
"block-builder-task",
Some("parachain-block-authoring"),
block_builder_future,
);
}
}

impl<RuntimeApi, AuraId> StartConsensus<RuntimeApi>
for StartSlotBasedAuraConsensus<RuntimeApi, AuraId>
where
Expand Down Expand Up @@ -683,19 +729,10 @@ where
slot_drift: Duration::from_secs(1),
};

let (collation_future, block_builder_future) =
slot_based::run::<Block, <AuraId as AppCrypto>::Pair, _, _, _, _, _, _, _, _>(params);
// We have a separate function only to be able to use `docify::export` on this piece of
// code.
Self::launch_slot_based_collator(params, task_manager);

task_manager.spawn_essential_handle().spawn(
"collation-task",
Some("parachain-block-authoring"),
collation_future,
);
task_manager.spawn_essential_handle().spawn(
"block-builder-task",
Some("parachain-block-authoring"),
block_builder_future,
);
Ok(())
}
}
Expand Down
142 changes: 142 additions & 0 deletions docs/sdk/src/guides/enable_elastic_scaling_mvp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
//! # Enable elastic scaling MVP for a parachain
//!
//! <div class="warning">This guide assumes full familiarity with Asynchronous Backing and its
//! terminology, as defined in https://wiki.polkadot.network/docs/maintain-guides-async-backing.
//! Furthermore, the parachain should have already been upgraded according to the guide.</div>
//!
//! ## Quick introduction to elastic scaling
//!
//! [Elastic scaling](https://polkadot.network/blog/elastic-scaling-streamling-growth-on-polkadot)
//! is a feature that will enable parachains to seamlessly scale up/down the number of used cores.
//! This can be desirable in order to increase the compute or storage throughput of a parachain or
//! to lower the latency between a transaction being submitted and it getting built in a parachain
//! block.
//!
//! At present, with Asynchronous Backing enabled, a parachain can only include a block on the relay
//! chain every 6 seconds, irregardless of how many cores the parachain acquires. Elastic scaling
//! builds further on the 10x throughput increase of Async Backing, enabling collators to submit up
//! to 3 parachain blocks per relay chain block, resulting in a further 3x throughput increase.
//!
//! ## Current limitations of the MVP
//!
//! The full implementation of elastic scaling spans across the entire relay/parachain stack and is
//! still [work in progress](https://github.com/paritytech/polkadot-sdk/issues/1829).
//! The MVP is still considered experimental software, so stability is not guaranteed.
//! If you encounter any problems,
//! [please open an issue](https://github.com/paritytech/polkadot-sdk/issues).
//! Below are described the current limitations of the MVP:
//!
//! 1. **Limited core count**. Parachain block authoring is sequential, so the second block will
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we know that these 3 para-blocks are still valid when imported in 3 parallel cores?

For example, there are 2 tx in each parablock. The collator proposes [t1, t2, t3, t4, t5, t6] and they are all valid. But the validity of t6 depends on the execution of t1. When imported in 3 cores, t1 and t6 are no longer present.

In general, I would assume all of this to be fixed in the cumulus block building code. My question is, does it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These 3 blocks are expected to form a chain, the ones that don't will not be included.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These 3 blocks are expected to form a chain, the ones that don't will not be included.

yes, also a candidate will not be included until all of its ancestors are included. if one ancestor is not included (times out availability) or is concluded invalid via a dispute, all of its descendants will also be evicted from the cores. So we only deal with candidate chains

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I still don't get this.

@sandreim if they form a chain, and part of the chain is executed in one core and part of it in another core, how does either of the cores check that the whole thing is a chain?

in my example, [t1, t2, t3, t4, t5, t6], [t1, t2, t3] goes into one core, [t4, t5, t6] into another. The whole [t1 -> t6] indeed forms a chain, and execution of t5 depends on the execution of t2.

Perhaps what you mean to say is that the transactions that go into different cores must in fact be independent of one another?

Copy link
Contributor

@sandreim sandreim Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The transactions are not independent. We achieve parallel execution even in that case, and still check they form a chain by passing in the appropriate validation inputs (

pub struct PersistedValidationData<H = Hash, N = BlockNumber> {
) . We can validate t2 because we already have the parent head data of t1 from the collator of t2. So we can correctly construct the inputs and the PoV contains the right data ( t2 was built after t1 by the collator).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the answer?

[t1, t2, t3] goes into one core, [t4, t5, t6], but the PoV of the latter contains the full execution of the former?

I think this is fine, but truthfully to scale up, I think different transactions going into different cores must be independent, or else the system can only scale as much as you can jack up one collator.

Copy link
Member

@ordian ordian Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but the PoV of the latter contains the full execution of the former?

PoV of [t4, t5, t6] would refer to the state post [t1, t2, t3] execution.

I think different transactions going into different cores must be independent, or else the system can only scale as much as you can jack up one collator

One way of achieving that without jacking up one collator would be to have a DAG instead of a blockchain (two blocks having the same parent state). But then you'd need to somehow ensure they are truly independent. This could be done with e.g. specifying dependencies in the transactions themselves (a ala Solana or Ethereum access lists).

Another way would be to rely on multiple CPU cores of a collator and implement execution on the collator side differently with optimistic concurrency control (ala Monad). This only requires modification on the collator side and does not affect transaction format.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, thanks @ordian.

I totally agree with all of your directions as well. I am not sure if you have seen it or not, but my MSC Thesis was on the same topic 🙈 https://github.com/kianenigma/SonicChain. I think what I have done here is similar to access list, and it should be quite easy to add to FRAME and Substrate: each tx to declare, via its code author, what storage keys it "thinks" it will access. Then the collators can easily agree among themselves to collate non-conflicting transactions.

This is a problem that is best solve from the collator side, and once there is a lot of demand. Polkadot is already doing what it should do, and should not do any "magic" to handle this.

Once there is more demand:

  1. Either collators just jack up, as they kinda are expected to do now. This won't scale a lot but it will for a bit.
  2. I think the access list stuff is super cool and will scale
  3. OCC is fancy but similarly doesn't scale, because there is only so many CPU cores, and you are still bound to one collator somehow filling up 8 Polkadot cores. Option 2 is much more powerful, because you can enable 8 collators to fill 8 blocks simultaneously.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OCC is fancy but similarly doesn't scale, because there is only so many CPU cores, and you are still bound to one collator somehow filling up 8 Polkadot cores. Option 2 is much more powerful, because you can enable 8 collators to fill 8 blocks simultaneously.

I agree here only partially. First, you can't produce (para)blocks at a rate faster than collators/full-nodes can import them. Unless they are not checking everything themselves. But even if they are not checking, this assumes that the bottleneck will be CPU and not storage/IO, which is not currently the case. Even with NOMT and other future optimizations, you can't accept transactions faster than you can modify the state. You need to know the latest state in order to check transactions. Unless we're talking about sharding parachain's state itself.

Another argument is that single threaded performance is going to reach a plateau eventually (whether it's Moor's law or physics) and nowadays we see even smartphones have 8 cores, so why not utilize them all instead of doing everything single-threaded?

That being said, I think options 2 and 3 are composable, you can do both.

Copy link
Contributor

@sandreim sandreim Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current status quo is that we rely on 1 (beefy collators). 2 for sure is something that can scale well, but it seems to be complicated and is not really compatible with the relay chain which expects chains not a DAG. #4696 (comment) shows how the limitations of what is possible with ref hw and 5 collators.

We did a nice brainstorming session with @skunert and @eskimor on the subject some time ago. We think that best way to go forward is to implement a transaction streaming mechanism. At the begging of each slot, the block author sends transactions to the next block author as it pushes them in the current block. By the time it announces the block, the next author should already have all state changes applied and doesn't need to wait to import it and can immediately start building his own block. And so on.

If that is not enough, next block author can start to speculatively build it's next block update the transactions and state as it learns what the current author is putting in his blocks.

//! start being built only after the previous block is imported. The current block production is
//! capped at 2 seconds of execution. Therefore, assuming the full 2 seconds are used, a
//! parachain can only utilise at most 3 cores in a relay chain slot of 6 seconds. If the full
//! execution time is not being used, higher core counts can be achieved.
//! 2. **Single collator requirement for consistently scaling beyond a core at full authorship
//! duration of 2 seconds per block.** Using the current implementation with multiple collators
//! adds additional latency to the block production pipeline. Assuming block execution takes
//! about the same as authorship, the additional overhead is equal the duration of the authorship
//! plus the block announcement. Each collator must first import the previous block before
//! authoring a new one, so it is clear that the highest throughput can be achieved using a
//! single collator. Experiments show that the peak performance using more than one collator
//! (measured up to 10 collators) is utilising 2 cores with authorship time of 1.3 seconds per
//! block, which leaves 400ms for networking overhead. This would allow for 2.6 seconds of
//! execution, compared to the 2 seconds async backing enabled.
//! [More experiments](https://github.com/paritytech/polkadot-sdk/issues/4696) are being
//! conducted in this space.
//! 3. **Trusted collator set.** The collator set needs to be trusted until there’s a mitigation
alindima marked this conversation as resolved.
Show resolved Hide resolved
//! that would prevent or deter multiple collators from submitting the same collation to multiple
//! backing groups. A solution is being discussed
//! [here](https://github.com/polkadot-fellows/RFCs/issues/92).
//! 4. **Fixed scaling.** For true elasticity, the parachain must be able to seamlessly acquire or
//! sell coretime as the user demand grows and shrinks over time, in an automated manner. This is
//! currently lacking - a parachain can only scale up or down by “manually” acquiring coretime.
//! This is not in the scope of the relay chain functionality. Parachains can already start
//! implementing such autoscaling, but we aim to provide a framework/examples for developing
//! autoscaling strategies.
//!
//! Another hard limitation that is not envisioned to ever be lifted is that parachains which create
//! forks will generally not be able to utilise the full number of cores they acquire.
//!
//! ## Using elastic scaling MVP
//!
//! ### Prerequisites
//!
//! - Ensure Asynchronous Backing is enabled on the network and you have enabled it on the parachain
//! using [`crate::guides::async_backing_guide`].
//! - Ensure the `AsyncBackingParams.max_candidate_depth` value is configured to a value that is at
//! least double the maximum targeted parachain velocity. For example, if the parachain will build
//! at most 3 candidates per relay chain block, the `max_candidate_depth` should be at least 6.
//! - Use a trusted single collator for maximum throughput.
//! - Ensure enough coretime is assigned to the parachain. For maximum throughput the upper bound is
//! 3 cores.
//!
//! <div class="warning">Phase 1 is not needed if using the `polkadot-parachain` binary built
//! from the latest polkadot-sdk release! Simply pass the `--experimental-use-slot-based` parameter
//! to the command line and jump to Phase 2.</div>
//!
//! The following steps assume using the cumulus parachain template.
//!
//! ### Phase 1 - (For custom parachain node) Update Parachain Node
//!
//! This assumes you are using
//! [the latest parachain template](https://github.com/paritytech/polkadot-sdk/tree/master/templates/parachain).
//!
//! This phase consists of plugging in the new slot-based collator.
//!
//! 1. In `node/src/service.rs` import the slot based collator instead of the lookahead collator.
#![doc = docify::embed!("../../cumulus/polkadot-parachain/src/service.rs", slot_based_colator_import)]
//!
//! 2. In `start_consensus()`
kianenigma marked this conversation as resolved.
Show resolved Hide resolved
//! - Remove the `overseer_handle` param (also remove the
//! `OverseerHandle` type import if it’s not used elsewhere).
//! - Rename `AuraParams` to `SlotBasedParams`, remove the `overseer_handle` field and add a
//! `slot_drift` field with a value of `Duration::from_secs(1)`.
//! - Replace the single future returned by `aura::run` with the two futures returned by it and
//! spawn them as separate tasks:
#![doc = docify::embed!("../../cumulus/polkadot-parachain/src/service.rs", launch_slot_based_collator)]
//!
//! 3. In `start_parachain_node()` remove the `overseer_handle` param passed to `start_consensus`.
//!
//! ### Phase 2 - Activate fixed factor scaling in the runtime
//!
//! This phase consists of a couple of changes needed to be made to the parachain’s runtime in order
//! to utilise fixed factor scaling.
//!
//! First of all, you need to decide the upper limit to how many parachain blocks you need to
//! produce per relay chain block (in direct correlation with the number of acquired cores). This
//! should be either 1 (no scaling), 2 or 3. This is called the parachain velocity.
//!
//! If you configure a velocity which is different from the number of assigned cores, the measured
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can just compute the rest of the values based on the minimum parachain block time (MIN_SLOT_DURATION)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes they can. I specifically say how they all relate to each other and the formulas to derive them. I think the parachain teams can decide how to code them, this is just an example

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the constant computations based on maximum velocity

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I definitely have seen this velocity stuff in the async backing guides PR as well, I think it is best to first push that to completion in the best possible shape, then build this on top of it.

//! velocity in practice will be the minimum of these two.
//!
//! The chosen velocity will also be used to compute:
//! - The slot duration, by dividing the 6000 ms duration of the relay chain slot duration by the
//! velocity.
//! - The unincluded segment capacity, by multiplying the velocity with 2 and adding 1 to
//! it.
//!
//! Let’s assume a desired maximum velocity of 3 parachain blocks per relay chain block. The needed
//! changes would all be done in `runtime/src/lib.rs`:
//!
//! 1. Rename `BLOCK_PROCESSING_VELOCITY` to `MAX_BLOCK_PROCESSING_VELOCITY` and increase it to the
//! desired value. In this example, 3.
//!
//! ```ignore
//! const MAX_BLOCK_PROCESSING_VELOCITY: u32 = 3;
//! ```
//!
//! 2. Set the `MILLISECS_PER_BLOCK` to the desired value.
//!
//! ```ignore
//! const MILLISECS_PER_BLOCK: u32 =
//! RELAY_CHAIN_SLOT_DURATION_MILLIS / MAX_BLOCK_PROCESSING_VELOCITY;
//! ```
//! Note: for a parachain which measures time in terms of its own block number, changing block
//! time may cause complications, requiring additional changes. See here more information:
//! [`crate::guides::async_backing_guide#timing-by-block-number`].
//!
//! 3. Increase the `UNINCLUDED_SEGMENT_CAPACITY` to the desired value.
//!
//! ```ignore
//! const UNINCLUDED_SEGMENT_CAPACITY: u32 = 2 * MAX_BLOCK_PROCESSING_VELOCITY + 1;
//! ```
3 changes: 3 additions & 0 deletions docs/sdk/src/guides/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ pub mod async_backing_guide;

/// How to enable metadata hash verification in the runtime.
pub mod enable_metadata_hash;

/// How to enable elastic scaling MVP on a parachain.
pub mod enable_elastic_scaling_mvp;
14 changes: 14 additions & 0 deletions prdoc/pr_4663.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0
# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json

title: Add elastic scaling MVP guide

doc:
- audience: Node Operator
description: |
Adds a guide for parachains that want to use the experimental elastic scaling MVP.
Will be viewable at: https://paritytech.github.io/polkadot-sdk/master/polkadot_sdk_docs/guides/enable_elastic_scaling_mvp/index.html

crates:
- name: polkadot-parachain-bin
bump: none
Loading