-
Notifications
You must be signed in to change notification settings - Fork 3
/
trace_protocol.rs
205 lines (176 loc) · 7.7 KB
/
trace_protocol.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
//! The trace protocol for sending proof information to a prover scheduler.
//!
//! Because parsing performance has a very negligible impact on overall proof
//! generation latency & throughput, the overall priority of this protocol is
//! ease of implementation for clients. The flexibility comes from giving
//! multiple ways to the client to provide the data for the protocol, where the
//! implementors can pick whichever way is the most convenient for them.
//!
//! It might not be obvious why we need traces for each txn in order to generate
//! proofs. While it's true that we could just run all the txns of a block in an
//! EVM to generate the traces ourselves, there are a few major downsides:
//! - The client is likely a full node and already has to run the txns in an EVM
//! anyways.
//! - We want this protocol to be as agnostic as possible to the underlying
//! chain that we're generating proofs for, and running our own EVM would
//! likely cause us to loose this genericness.
//!
//! While it's also true that we run our own zk-EVM (plonky2) to generate
//! proofs, it's critical that we are able to generate txn proofs in parallel.
//! Since generating proofs with plonky2 is very slow, this would force us to
//! sequentialize the entire proof generation process. So in the end, it's ideal
//! if we can get this information sent to us instead.
use std::collections::HashMap;
use eth_trie_utils::partial_trie::HashedPartialTrie;
use ethereum_types::{Address, U256};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, FromInto, TryFromInto};
use crate::{
deserializers::ByteString,
types::{CodeHash, HashedAccountAddr, StorageAddr, StorageVal},
utils::hash,
};
/// Core payload needed to generate a proof for a block. Note that the scheduler
/// may need to request some additional data from the client along with this in
/// order to generate a proof.
#[derive(Debug, Deserialize, Serialize)]
pub struct BlockTrace {
/// The trie pre-images (state & storage) in multiple possible formats.
pub trie_pre_images: BlockTraceTriePreImages,
/// Traces and other info per txn. The index of the txn corresponds to the
/// slot in this vec.
pub txn_info: Vec<TxnInfo>,
}
/// Minimal hashed out tries needed by all txns in the block.
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum BlockTraceTriePreImages {
Separate(SeparateTriePreImages),
Combined(CombinedPreImages),
}
/// State/Storage trie pre-images that are separate.
#[derive(Debug, Deserialize, Serialize)]
pub struct SeparateTriePreImages {
pub state: SeparateTriePreImage,
pub storage: SeparateStorageTriesPreImage,
}
/// A trie pre-image where state & storage are separate.
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum SeparateTriePreImage {
Uncompressed(TrieUncompressed),
Direct(TrieDirect),
}
/// A trie pre-image where both state & storage are combined into one payload.
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub struct CombinedPreImages {
pub compact: TrieCompact,
}
// TODO
/// Bulkier format that is quicker to process.
#[derive(Debug, Deserialize, Serialize)]
pub struct TrieUncompressed {}
// TODO
#[serde_as]
/// Compact representation of a trie (will likely be very close to <https://github.com/ledgerwatch/erigon/blob/devel/docs/programmers_guide/witness_formal_spec.md>)
#[derive(Debug, Deserialize, Serialize)]
pub struct TrieCompact(#[serde_as(as = "FromInto<ByteString>")] pub Vec<u8>);
// TODO
/// Trie format that is in exactly the same format of our internal trie format.
/// This is the fastest format for us to processes.
#[derive(Debug, Deserialize, Serialize)]
pub struct TrieDirect(pub HashedPartialTrie);
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum SeparateStorageTriesPreImage {
/// A single hash map that contains all node hashes from all storage tries
/// involved in the block. We can reconstruct the individual storage tries
/// by the storage root hash in the state entries.
SingleTrie(TrieUncompressed),
/// Each storage trie is sent over in a hashmap with the hashed account
/// address as a key.
MultipleTries(HashMap<HashedAccountAddr, SeparateTriePreImage>),
}
/// Info specific to txns in the block.
#[derive(Debug, Deserialize, Serialize)]
pub struct TxnInfo {
/// Trace data for the txn. This is used by the protocol to:
/// - Mutate it's own trie state between txns to arrive at the correct trie
/// state for the start of each txn.
/// - Create minimal partial tries needed for proof gen based on what state
/// the txn accesses. (eg. What trie nodes are accessed).
pub traces: HashMap<Address, TxnTrace>,
/// Data that is specific to the txn as a whole.
pub meta: TxnMeta,
}
#[serde_as]
#[derive(Debug, Deserialize, Serialize)]
pub struct TxnMeta {
/// Txn byte code.
#[serde_as(as = "FromInto<ByteString>")]
pub byte_code: Vec<u8>,
/// Rlped bytes of the new txn value inserted into the txn trie by
/// this txn. Note that the key is not included and this is only the rlped
/// value of the node!
#[serde_as(as = "FromInto<ByteString>")]
pub new_txn_trie_node_byte: Vec<u8>,
/// Rlped bytes of the new receipt value inserted into the receipt trie by
/// this txn. Note that the key is not included and this is only the rlped
/// value of the node!
#[serde_as(as = "TryFromInto<ByteString>")]
pub new_receipt_trie_node_byte: Vec<u8>,
/// Gas used by this txn (Note: not cumulative gas used).
pub gas_used: u64,
}
/// A "trace" specific to an account for a txn.
///
/// Specifically, since we can not execute the txn before proof generation, we
/// rely on a separate EVM to run the txn and supply this data for us.
#[derive(Debug, Deserialize, Serialize)]
pub struct TxnTrace {
/// If the balance changed, then the new balance will appear here. Will be
/// `None` if no change.
#[serde(skip_serializing_if = "Option::is_none")]
pub balance: Option<U256>,
/// If the nonce changed, then the new nonce will appear here. Will be
/// `None` if no change.
#[serde(skip_serializing_if = "Option::is_none")]
pub nonce: Option<U256>,
/// Account addresses that were only read by the txn.
///
/// Note that if storage is written to, then it does not need to appear in
/// this list (but is also fine if it does).
#[serde(skip_serializing_if = "Option::is_none")]
pub storage_read: Option<Vec<StorageAddr>>,
/// Account storage addresses that were mutated by the txn along with their
/// new value.
#[serde(skip_serializing_if = "Option::is_none")]
pub storage_written: Option<HashMap<StorageAddr, StorageVal>>,
/// Contract code that this address accessed.
#[serde(skip_serializing_if = "Option::is_none")]
pub code_usage: Option<ContractCodeUsage>,
/// True if the account existed before this txn but self-destructed at the
/// end of this txn.
#[serde(skip_serializing_if = "Option::is_none")]
pub self_destructed: Option<bool>,
}
/// Contract code access type. Used by txn traces.
#[serde_as]
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum ContractCodeUsage {
/// Contract was read.
Read(CodeHash),
/// Contract was created (and these are the bytes). Note that this new
/// contract code will not appear in the [`BlockTrace`] map.
Write(#[serde_as(as = "FromInto<ByteString>")] ByteString),
}
impl ContractCodeUsage {
pub(crate) fn get_code_hash(&self) -> CodeHash {
match self {
ContractCodeUsage::Read(hash) => *hash,
ContractCodeUsage::Write(bytes) => hash(bytes),
}
}
}