Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proper handling for legacy parachain leases with gaps in coretime migration #426

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions relay/polkadot/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pallet-balances = { workspace = true }
pallet-beefy = { workspace = true }
pallet-beefy-mmr = { workspace = true }
pallet-bounties = { workspace = true }
pallet-broker = {workspace = true }
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
pallet-child-bounties = { workspace = true }
pallet-transaction-payment = { workspace = true }
pallet-transaction-payment-rpc-runtime-api = { workspace = true }
Expand Down Expand Up @@ -103,6 +104,9 @@ xcm-runtime-apis = { workspace = true }

sp-debug-derive = { workspace = true }

# just for the coretime migration
polkadot-parachain-primitives = { workspace = true }

[dev-dependencies]
sp-keyring = { workspace = true }
sp-trie = { workspace = true }
Expand Down
319 changes: 319 additions & 0 deletions relay/polkadot/src/coretime_migration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

//! Coretime migration for Polkadot runtime

#[cfg(feature = "try-runtime")]
use crate::scheduler::common::AssignmentProvider;
use crate::{
coretime::{Config, WeightInfo},
parachains_assigner_coretime,
parachains_assigner_coretime::PartsOf57600,
OriginKind,
};
use codec::{Decode, Encode};
use core::{iter, result};
#[cfg(feature = "try-runtime")]
use frame_support::ensure;
use frame_support::{
traits::{OnRuntimeUpgrade, PalletInfoAccess, StorageVersion},
weights::Weight,
};
use frame_system::pallet_prelude::BlockNumberFor;
use pallet_broker::{CoreAssignment, CoreMask, ScheduleItem};
use polkadot_parachain_primitives::primitives::IsSystem;
use polkadot_primitives::{Balance, BlockNumber, CoreIndex, Id as ParaId};
use runtime_parachains::configuration;

use sp_arithmetic::traits::SaturatedConversion;
use sp_core::Get;
use sp_runtime::BoundedVec;
use sp_std::{vec, vec::Vec};
use xcm::prelude::{send_xcm, Instruction, Junction, Location, SendError, WeightLimit, Xcm};

/// Return information about a legacy lease of a parachain.
pub trait GetLegacyLease<N> {
/// If parachain is a lease holding parachain, return the block at which the lease expires.
fn get_parachain_lease_in_blocks(para: ParaId) -> Option<N>;
// All parachains holding a lease, no matter if there are gaps in the slots or not.
fn get_all_parachains_with_leases() -> Vec<ParaId>;
}

#[derive(Encode, Decode)]
enum CoretimeCalls {
#[codec(index = 1)]
Reserve(pallet_broker::Schedule),
#[codec(index = 3)]
SetLease(pallet_broker::TaskId, pallet_broker::Timeslice),
#[codec(index = 19)]
NotifyCoreCount(u16),
#[codec(index = 20)]
NotifyRevenue((BlockNumber, Balance)),
#[codec(index = 99)]
SwapLeases(ParaId, ParaId),
}

#[derive(Encode, Decode)]
enum BrokerRuntimePallets {
#[codec(index = 50)]
Broker(CoretimeCalls),
}

/// Migrate a chain to use coretime.
///
/// This assumes that the `Coretime` and the `AssignerCoretime` pallets are added at the same
/// time to a runtime.
pub struct MigrateToCoretime<T, SendXcm, LegacyLease>(
core::marker::PhantomData<(T, SendXcm, LegacyLease)>,
);

impl<T: Config, SendXcm: xcm::v4::SendXcm, LegacyLease: GetLegacyLease<BlockNumberFor<T>>>
MigrateToCoretime<T, SendXcm, LegacyLease>
{
fn already_migrated() -> bool {
// We are using the assigner coretime because the coretime pallet doesn't has any
// storage data. But both pallets are introduced at the same time, so this is fine.
let name_hash = parachains_assigner_coretime::Pallet::<T>::name_hash();
let mut next_key = name_hash.to_vec();
let storage_version_key =
StorageVersion::storage_key::<parachains_assigner_coretime::Pallet<T>>();

loop {
match sp_io::storage::next_key(&next_key) {
// StorageVersion is initialized before, so we need to ignore it.
Some(key) if key == storage_version_key => {
next_key = key;
},
// If there is any other key with the prefix of the pallet,
// we already have executed the migration.
Some(key) if key.starts_with(&name_hash) => {
log::info!("`MigrateToCoretime` already executed!");
return true
},
// Any other key/no key means that we did not yet have migrated.
None | Some(_) => return false,
}
}
}
}

impl<
T: Config + runtime_parachains::dmp::Config,
SendXcm: xcm::v4::SendXcm,
LegacyLease: GetLegacyLease<BlockNumberFor<T>>,
> OnRuntimeUpgrade for MigrateToCoretime<T, SendXcm, LegacyLease>
{
fn on_runtime_upgrade() -> Weight {
if Self::already_migrated() {
return Weight::zero()
}

log::info!("Migrating existing parachains to coretime.");
migrate_to_coretime::<T, SendXcm, LegacyLease>()
}

#[cfg(feature = "try-runtime")]
fn pre_upgrade() -> Result<Vec<u8>, sp_runtime::DispatchError> {
if Self::already_migrated() {
return Ok(Vec::new())
}

let legacy_paras = paras::Parachains::<T>::get();
let config = configuration::ActiveConfig::<T>::get();
let total_core_count = config.scheduler_params.num_cores + legacy_paras.len() as u32;

let dmp_queue_size =
crate::dmp::Pallet::<T>::dmq_contents(T::BrokerId::get().into()).len() as u32;

let total_core_count = total_core_count as u32;
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved

Ok((total_core_count, dmp_queue_size).encode())
}

#[cfg(feature = "try-runtime")]
fn post_upgrade(state: Vec<u8>) -> Result<(), sp_runtime::DispatchError> {
if state.is_empty() {
return Ok(())
}

log::trace!("Running post_upgrade()");

let (prev_core_count, prev_dmp_queue_size) = <(u32, u32)>::decode(&mut &state[..]).unwrap();

let dmp_queue_size =
crate::dmp::Pallet::<T>::dmq_contents(T::BrokerId::get().into()).len() as u32;
let new_core_count = parachains_assigner_coretime::Pallet::<T>::session_core_count();
ensure!(new_core_count == prev_core_count, "Total number of cores need to not change.");
ensure!(
dmp_queue_size > prev_dmp_queue_size,
"There should have been enqueued at least one DMP messages."
);

Ok(())
}
}

// Migrate to Coretime.
//
// NOTE: Also migrates `num_cores` config value in configuration::ActiveConfig.
fn migrate_to_coretime<
T: Config,
SendXcm: xcm::v4::SendXcm,
LegacyLease: GetLegacyLease<BlockNumberFor<T>>,
>() -> Weight {
// let legacy_paras = paras::Parachains::<T>::get();
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
let legacy_paras = LegacyLease::get_all_parachains_with_leases();
let legacy_count = legacy_paras.len() as u32;
let now = frame_system::Pallet::<T>::block_number();
for (core, para_id) in legacy_paras.into_iter().enumerate() {
let r = parachains_assigner_coretime::Pallet::<T>::assign_core(
CoreIndex(core as u32),
now,
vec![(CoreAssignment::Task(para_id.into()), PartsOf57600::FULL)],
None,
);
if let Err(err) = r {
log::error!(
"Creating assignment for existing para failed: {:?}, error: {:?}",
para_id,
err
);
}
}

let config = configuration::ActiveConfig::<T>::get();
// num_cores was on_demand_cores until now:
for on_demand in 0..config.scheduler_params.num_cores {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Polkadot this is not the path of this config entry, checking the chain state it's at config.coretime_cores. Is there a migration ahead of this one to change the configuration in storage?

Copy link
Contributor Author

@tdimitrov tdimitrov Aug 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parachains_configuration::migration::v12::MigrateToV12<Runtime> does this change:
https://github.com/paritytech/polkadot-sdk/blob/ebf4f8d2d590f41817d5d38b2d9b5812a46f2342/polkadot/runtime/parachains/src/configuration/migration/v12.rs#L146-L158

It gets executed before the coretime migration.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still the comment is actually not correct for Polkadot. It used to be coretime_cores not on_demand cores and there was no on-demand core before, thus nothing to migrate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(This loop will do nothing, which is fine)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've removed the comment.

let core = CoreIndex(legacy_count.saturating_add(on_demand as _));
let r = parachains_assigner_coretime::Pallet::<T>::assign_core(
core,
now,
vec![(CoreAssignment::Pool, PartsOf57600::FULL)],
None,
);
if let Err(err) = r {
log::error!("Creating assignment for existing on-demand core, failed: {:?}", err);
}
}
let total_cores = config.scheduler_params.num_cores + legacy_count;
configuration::ActiveConfig::<T>::mutate(|c| {
c.scheduler_params.num_cores = total_cores;
});

if let Err(err) = migrate_send_assignments_to_coretime_chain::<T, SendXcm, LegacyLease>() {
log::error!("Sending legacy chain data to coretime chain failed: {:?}", err);
}

let single_weight = <T as Config>::WeightInfo::assign_core(1);
single_weight
.saturating_mul(u64::from(legacy_count.saturating_add(config.scheduler_params.num_cores)))
// Second read from sending assignments to the coretime chain.
.saturating_add(T::DbWeight::get().reads_writes(2, 1))
}

fn migrate_send_assignments_to_coretime_chain<
T: Config,
SendXcm: xcm::v4::SendXcm,
LegacyLease: GetLegacyLease<BlockNumberFor<T>>,
>() -> result::Result<(), SendError> {
let legacy_paras = LegacyLease::get_all_parachains_with_leases();
let legacy_paras_count = legacy_paras.len();
let (system_chains, lease_holding): (Vec<_>, Vec<_>) =
legacy_paras.into_iter().partition(IsSystem::is_system);

let reservations = system_chains.into_iter().map(|p| {
let schedule = BoundedVec::truncate_from(vec![ScheduleItem {
mask: CoreMask::complete(),
assignment: CoreAssignment::Task(p.into()),
}]);
mk_coretime_call::<T>(CoretimeCalls::Reserve(schedule))
});

let mut leases = lease_holding.into_iter().filter_map(|p| {
log::trace!(target: "coretime-migration", "Preparing sending of lease holding para {:?}", p);
let Some(valid_until) = LegacyLease::get_parachain_lease_in_blocks(p) else {
log::error!("Lease holding chain with no lease information?!");
return None
};
let valid_until: u32 = match valid_until.try_into() {
Ok(val) => val,
Err(_) => {
log::error!("Converting block number to u32 failed!");
return None
},
};
Comment on lines +239 to +245
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let valid_until: u32 = match valid_until.try_into() {
Ok(val) => val,
Err(_) => {
log::error!("Converting block number to u32 failed!");
return None
},
};

block number is u32

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't compile due to expected u32 found associated type errors. Let's keep it as it is.

// We assume the coretime chain set this parameter to the recommended value in RFC-1:
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
const TIME_SLICE_PERIOD: u32 = 80;
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
let round_up = if valid_until % TIME_SLICE_PERIOD > 0 { 1 } else { 0 };
let time_slice = valid_until / TIME_SLICE_PERIOD + TIME_SLICE_PERIOD * round_up;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let round_up = if valid_until % TIME_SLICE_PERIOD > 0 { 1 } else { 0 };
let time_slice = valid_until / TIME_SLICE_PERIOD + TIME_SLICE_PERIOD * round_up;
let time_slice = (valid_until + TIME_SLICE_PERID - 1) / TIME_SLICE_PERIOD;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do a different kind of round up here. timeslice is not number of periods but a lifetime in blocks (divided by 80). Your formula works only for the 'number of periods' case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused... why we multiply the round up with TIME_SLICE_PERIOD?

Shouldn't it be let time_slice = valid_until / TIME_SLICE_PERIOD + round_up;) which is equivalent to your formula?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

round_up is just 0 or 1. It leads to the same result as what I have written there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't. Round up is multiplied by TIME_SLICE_PERIOD. Have a look: https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=a5ce70639f2ee8a169e5d618e707ed74

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basti's version is right. Integer division is just the floor, adding TIMESLICE_PERIOD rounds up, subtracting 1 makes sure it doesn't round up when v%T == 0.
The original code is adding 80 timeslices to the end of the lease, rather than one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

log::trace!(target: "coretime-migration", "Sending of lease holding para {:?}, valid_until: {:?}, time_slice: {:?}", p, valid_until, time_slice);
Some(mk_coretime_call::<T>(CoretimeCalls::SetLease(p.into(), time_slice)))
});

let core_count: u16 = configuration::ActiveConfig::<T>::get()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. on another call in a minute, but please check that this core handling is actually correct. This core_count value should read 0 on Polkadot at the time of the migration.

Copy link
Contributor Author

@tdimitrov tdimitrov Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not 0. We update it on line 211-214 in the same file:
https://github.com/polkadot-fellows/runtimes/pull/426/files#diff-6a311f9b6c4cd2a09f397e5e0c90ea9193ba103bf277bb88de6bd223148af52eR211-R214

The actual value is 52:

coretime-migration  TRACE: Set core count to 52. legacy paras count is  52

The reserved cores are 0 though because legacy_paras_count == core_count. I have got an assert for this in the test:
https://github.com/tdimitrov/polkadot-coretime-launch-test/blob/a8e7aba7d6a741b10bd5330ae3119ef15a018711/coretime-migration-check/main.js#L158-L160

Is this correct?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reserved cores are 0 though

What you mean by reserved cores? The system chains?

Generally your code is correct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the system chains.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, wait. These are reserved for on demand, if I am not mistaken.
We reserve cores for system chains, then for lease cores and what's left is for on demand. Right @eskimor ?

Copy link
Contributor

@seadanda seadanda Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pre-migration the system chains just have leases as if they're regular parachains. They're included in the 52, except for the coretime chain since it isn't live yet. It will be registered by the time this migration runs, so it'll detect 53 leases (maybe more depending on how the auctions go).
reservations: Asset Hub, Collectives, Bridge Hub, People Chain and Coretime Chain for a total of 5 (you'll see 4 at the minute in your tests)
leases: 48 (plus any new ones from auctions, minus any that end between now and when it runs).

legacy_paras_count isn't the same as leases though, I think the assertion you're expecting is system_chains.len() + leases.len() == core_count, since core_count is equal to legacy_paras_count by construction (since num_cores is zero atm)

.scheduler_params
.num_cores
.saturated_into();
let set_core_count =
iter::once(mk_coretime_call::<T>(CoretimeCalls::NotifyCoreCount(core_count)));
log::trace!(target: "coretime-migration", "Set core count to {:?}. legacy paras count is {:?}",core_count, legacy_paras_count);

let pool = (legacy_paras_count..core_count.into()).map(|_| {
let schedule = BoundedVec::truncate_from(vec![ScheduleItem {
mask: CoreMask::complete(),
assignment: CoreAssignment::Pool,
}]);
// Reserved cores will come before lease cores, so cores will change their assignments
// when coretime chain sends us their assign_core calls -> Good test.
mk_coretime_call::<T>(CoretimeCalls::Reserve(schedule))
});

let message_content = iter::once(Instruction::UnpaidExecution {
weight_limit: WeightLimit::Unlimited,
check_origin: None,
});

let reservation_content = message_content.clone().chain(reservations).collect();
let pool_content = message_content.clone().chain(pool).collect();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pool will be empty. Do we send an empty message here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I missed that. I guess we send an empty message. I'll fix it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

let leases_content_1 = message_content
.clone()
.chain(leases.by_ref().take(legacy_paras_count / 2)) // split in two messages to avoid overweighted XCM
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.chain(leases.by_ref().take(legacy_paras_count / 2)) // split in two messages to avoid overweighted XCM
.chain(leases.by_ref().take(leases.len() / 2)) // split in two messages to avoid overweighted XCM

legacy_paras_count includes system parachains
leases does not

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but leases is an iterator and it's a bit involved to get its length. Let's keep it that way. It's not a big deal that the messages aren't equally distributed as long as they are sent in two passes (it's just one ParaId which causes the overweight).

.collect();
let leases_content_2 = message_content.clone().chain(leases).collect();
let set_core_count_content = message_content.clone().chain(set_core_count).collect();

let messages = vec![
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
Xcm(reservation_content),
Xcm(pool_content),
Xcm(leases_content_1),
Xcm(leases_content_2),
Xcm(set_core_count_content),
];

for message in messages {
send_xcm::<SendXcm>(Location::new(0, Junction::Parachain(T::BrokerId::get())), message)?;
}

Ok(())
}

fn mk_coretime_call<T: Config>(call: CoretimeCalls) -> Instruction<()> {
Instruction::Transact {
origin_kind: OriginKind::Superuser,
require_weight_at_most: T::MaxXcmTransactWeight::get(),
call: BrokerRuntimePallets::Broker(call).encode().into(),
}
}
15 changes: 13 additions & 2 deletions relay/polkadot/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ use governance::{
pub mod impls;
pub mod xcm_config;

mod coretime_migration;

pub const LOG_TARGET: &str = "runtime::polkadot";

impl_runtime_weights!(polkadot_runtime_constants);
Expand Down Expand Up @@ -1936,7 +1938,7 @@ pub mod migrations {
use super::*;

pub struct GetLegacyLeaseImpl;
impl coretime::migration::GetLegacyLease<BlockNumber> for GetLegacyLeaseImpl {
impl coretime_migration::GetLegacyLease<BlockNumber> for GetLegacyLeaseImpl {
fn get_parachain_lease_in_blocks(para: ParaId) -> Option<BlockNumber> {
let now = frame_system::Pallet::<Runtime>::block_number();
let lease = slots::Leases::<Runtime>::get(para);
Expand All @@ -1947,6 +1949,15 @@ pub mod migrations {
<slots::Pallet<Runtime> as Leaser<BlockNumber>>::lease_period_index(now)?;
Some(index.saturating_add(lease.len() as u32).saturating_mul(LeasePeriod::get()))
}

fn get_all_parachains_with_leases() -> Vec<ParaId> {
let mut leases = slots::Leases::<Runtime>::iter()
.filter(|(_, lease)| !lease.is_empty())
.map(|(para, _)| para)
.collect::<Vec<_>>();
leases.sort();
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
leases
}
}

/// Cancel all ongoing auctions.
Expand Down Expand Up @@ -2001,7 +2012,7 @@ pub mod migrations {
>,
clear_judgement_proxies::Migration,
// Migrate from legacy lease to coretime. Needs to run after configuration v11
coretime::migration::MigrateToCoretime<
coretime_migration::MigrateToCoretime<
Runtime,
crate::xcm_config::XcmRouter,
GetLegacyLeaseImpl,
Expand Down
Loading