Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Statediff for full node #6

Merged
merged 9 commits into from
Feb 21, 2019
9 changes: 6 additions & 3 deletions cmd/geth/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,14 @@ func makeFullNode(ctx *cli.Context) *node.Node {
if ctx.GlobalIsSet(utils.ConstantinopleOverrideFlag.Name) {
cfg.Eth.ConstantinopleOverride = new(big.Int).SetUint64(ctx.GlobalUint64(utils.ConstantinopleOverrideFlag.Name))
}

utils.RegisterEthService(stack, &cfg.Eth)

if ctx.GlobalBool(utils.StateDiffFlag.Name) {
cfg.Eth.StateDiff = true
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make sense to combine this with the RegisterStateDiffService call, if both are based off of the call to ctx.GlobalBool(utils.StateDiffFlag.Name)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, good call!

utils.RegisterStateDiffService(stack, ctx)
}

if ctx.GlobalBool(utils.DashboardEnabledFlag.Name) {
utils.RegisterDashboardService(stack, &cfg.Dashboard, gitCommit)
}
Expand All @@ -181,9 +187,6 @@ func makeFullNode(ctx *cli.Context) *node.Node {
utils.RegisterEthStatsService(stack, cfg.Ethstats.URL)
}

if ctx.GlobalBool(utils.StateDiffFlag.Name) {
utils.RegisterStateDiffService(stack, ctx)
}
return stack
}

Expand Down
73 changes: 53 additions & 20 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ const (
// CacheConfig contains the configuration values for the trie caching/pruning
// that's resident in a blockchain.
type CacheConfig struct {
Disabled bool // Whether to disable trie write caching (archive node)
TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory
TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk
TrieTimeLimit time.Duration // Time limit after which to flush the current in-memory trie to disk
Disabled bool // Whether to disable trie write caching (archive node)
TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory
TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk
TrieTimeLimit time.Duration // Time limit after which to flush the current in-memory trie to disk
ProcessStateDiffs bool
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What exactly does this flag entail? Hint hint, there are comments on the rest of them ;)

}

// BlockChain represents the canonical chain given a database with a genesis
Expand Down Expand Up @@ -136,6 +137,8 @@ type BlockChain struct {

badBlocks *lru.Cache // Bad block cache
shouldPreserve func(*types.Block) bool // Function used to determine whether should preserve the given block.

stateDiffsProcessed map[common.Hash]int
}

// NewBlockChain returns a fully initialised block chain using information
Expand All @@ -155,24 +158,26 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
blockCache, _ := lru.New(blockCacheLimit)
futureBlocks, _ := lru.New(maxFutureBlocks)
badBlocks, _ := lru.New(badBlockLimit)

stateDiffsProcessed := make(map[common.Hash]int)
bc := &BlockChain{
chainConfig: chainConfig,
cacheConfig: cacheConfig,
db: db,
triegc: prque.New(nil),
stateCache: state.NewDatabaseWithCache(db, cacheConfig.TrieCleanLimit),
quit: make(chan struct{}),
shouldPreserve: shouldPreserve,
bodyCache: bodyCache,
bodyRLPCache: bodyRLPCache,
receiptsCache: receiptsCache,
blockCache: blockCache,
futureBlocks: futureBlocks,
engine: engine,
vmConfig: vmConfig,
badBlocks: badBlocks,
chainConfig: chainConfig,
cacheConfig: cacheConfig,
db: db,
triegc: prque.New(nil),
stateCache: state.NewDatabaseWithCache(db, cacheConfig.TrieCleanLimit),
quit: make(chan struct{}),
shouldPreserve: shouldPreserve,
bodyCache: bodyCache,
bodyRLPCache: bodyRLPCache,
receiptsCache: receiptsCache,
blockCache: blockCache,
futureBlocks: futureBlocks,
engine: engine,
vmConfig: vmConfig,
badBlocks: badBlocks,
stateDiffsProcessed: stateDiffsProcessed,
}

bc.SetValidator(NewBlockValidator(chainConfig, bc, engine))
bc.SetProcessor(NewStateProcessor(chainConfig, bc, engine))

Expand Down Expand Up @@ -922,6 +927,17 @@ func (bc *BlockChain) WriteBlockWithoutState(block *types.Block, td *big.Int) (e
return nil
}

func (bc *BlockChain) AddToStateDiffProcessedCollection(hash common.Hash) {
count, ok := bc.stateDiffsProcessed[hash]

if ok {
count++
bc.stateDiffsProcessed[hash] = count
} else {
bc.stateDiffsProcessed[hash] = 1
}
Copy link

@m0ar m0ar Feb 19, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is equivalent:

count, _ := bc.stateDiffsProcessed[hash]
bc.stateDiffsProcessed[hash] = count + 1

If !ok, count will have the zero value of int: 0 anyway. I assume this is run very frequently, would save some computation.
https://play.golang.org/p/EAba1v8NRcw

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch!

}

// WriteBlockWithState writes the block and all associated state to the database.
func (bc *BlockChain) WriteBlockWithState(block *types.Block, receipts []*types.Receipt, state *state.StateDB) (status WriteStatus, err error) {
bc.wg.Add(1)
Expand Down Expand Up @@ -994,6 +1010,16 @@ func (bc *BlockChain) WriteBlockWithState(block *types.Block, receipts []*types.
bc.triegc.Push(root, number)
break
}

if bc.cacheConfig.ProcessStateDiffs {
if !bc.allowedRootToBeDereferenced(root.(common.Hash)) {
bc.triegc.Push(root, number)
break
} else {
delete(bc.stateDiffsProcessed, root.(common.Hash))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean we clean up the map when we're finished with a certain diff? Was just about to ask about memory leaks here, but probably okay if that's the case!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, if a root is okay to be dereferenced, we removed it from the stateDiffsProcessed collection just to make sure that that collection doesn't get huge. And then it will fall through to triedb.Dereference(root.(common.Hash)) which removes it from the in-memory trie database.

}
}

triedb.Dereference(root.(common.Hash))
}
}
Expand Down Expand Up @@ -1048,6 +1074,13 @@ func (bc *BlockChain) WriteBlockWithState(block *types.Block, receipts []*types.
return status, nil
}

//if we haven't processed the statediff for a given state root and it's child, don't dereference it yet
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why? πŸ™ƒ Is it to prevent branches from GC/pruning? Worth the explicit information, that was hard to figure out and harder to guess :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally makes sense! Is this more clear?

// since we need the state tries of the current block and its parent in-memory
// in order to process statediffs, we should avoid dereferencing roots until
// its statediff and its child have been processed

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elizabethengelman Stellarly so! 🌟

func (bc *BlockChain) allowedRootToBeDereferenced(root common.Hash) bool {
diffProcessedForSelfAndChildCount := 2
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feels like this constant could be ejected and set somewhere else so we don't have to assign it to a local closure all the time.

count := bc.stateDiffsProcessed[root]
return count >= diffProcessedForSelfAndChildCount
}

// addFutureBlock checks if the block is within the max allowed window to get
// accepted for future processing, and returns an error if the block is too far
// ahead and was not added.
Expand Down
81 changes: 81 additions & 0 deletions core/blockchain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1483,3 +1483,84 @@ func BenchmarkBlockChain_1x1000Executions(b *testing.B) {

benchmarkLargeNumberOfValueToNonexisting(b, numTxs, numBlocks, recipientFn, dataFn)
}

func TestProcessingStateDiffs(t *testing.T) {
defaultTrieCleanCache := 256
defaultTrieDirtyCache := 256
defaultTrieTimeout := 60 * time.Minute
cacheConfig := &CacheConfig{
Disabled: false,
TrieCleanLimit: defaultTrieCleanCache,
TrieDirtyLimit: defaultTrieDirtyCache,
TrieTimeLimit: defaultTrieTimeout,
ProcessStateDiffs: true,
}
db := ethdb.NewMemDatabase()
genesis := new(Genesis).MustCommit(db)
numberOfBlocks := triesInMemory
engine := ethash.NewFaker()
blockchain, _ := NewBlockChain(db, cacheConfig, params.AllEthashProtocolChanges, engine, vm.Config{}, nil)
blocks := makeBlockChain(genesis, numberOfBlocks+1, engine, db, canonicalSeed)
_, err := blockchain.InsertChain(blocks)
if err != nil {
t.Fatalf("failed to create pristine chain: %v", err)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

✨ pristine ✨

}
defer blockchain.Stop()

//when adding a root hash to the collection, it will increment the count
firstStateRoot := blocks[0].Root()
blockchain.AddToStateDiffProcessedCollection(firstStateRoot)
value, ok := blockchain.stateDiffsProcessed[firstStateRoot]
if !ok {
t.Error("state root not found in collection")
}
if value != 1 {
t.Error("state root count not correct", "want", 1, "got", value)
}

blockchain.AddToStateDiffProcessedCollection(firstStateRoot)
value, ok = blockchain.stateDiffsProcessed[firstStateRoot]
if !ok {
t.Error("state root not found in collection")
}
if value != 2 {
t.Error("state root count not correct", "want", 2, "got", value)
}

moreBlocks := makeBlockChain(blocks[len(blocks)-1], 1, engine, db, canonicalSeed)
_, err = blockchain.InsertChain(moreBlocks)

//a root hash can be dereferenced when it's state diff and it's child's state diff have been processed
//(i.e. it has a count of 2 in stateDiffsProcessed)
nodes := blockchain.stateCache.TrieDB().Nodes()
if containsRootHash(nodes, firstStateRoot) {
t.Errorf("stateRoot %s in nodes, want: %t, got: %t", firstStateRoot.Hex(), false, true)
}

//a root hash should still be in the in-mem db if it's child's state diff hasn't yet been processed
//(i.e. it has a count of 1 stateDiffsProcessed)
secondStateRoot := blocks[1].Root()
blockchain.AddToStateDiffProcessedCollection(secondStateRoot)
if !containsRootHash(nodes, secondStateRoot) {
t.Errorf("stateRoot %s in nodes, want: %t, got: %t", secondStateRoot.Hex(), true, false)
}

//the stateDiffsProcessed collection is cleaned up once a hash has been dereferenced
_, ok = blockchain.stateDiffsProcessed[firstStateRoot]
if ok {
t.Errorf("stateRoot %s in stateDiffsProcessed collection, want: %t, got: %t",
firstStateRoot.Hex(),
false,
ok,
)
}
}

func containsRootHash(collection []common.Hash, hash common.Hash) bool {
for _, n := range collection {
if n == hash {
return true
}
}
return false
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This diff shows how we should write our tests instead of that damn Ginkgo! Looks so clean, and probably runs way faster. 🐌 Why are we even using gomega assertions when we have good 'ol bool

Could probably be split up in smaller tests, but I figure the setup is a bit annoying.

9 changes: 5 additions & 4 deletions eth/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,11 @@ func New(ctx *node.ServiceContext, config *Config) (*Ethereum, error) {
EVMInterpreter: config.EVMInterpreter,
}
cacheConfig = &core.CacheConfig{
Disabled: config.NoPruning,
TrieCleanLimit: config.TrieCleanCache,
TrieDirtyLimit: config.TrieDirtyCache,
TrieTimeLimit: config.TrieTimeout,
Disabled: config.NoPruning,
TrieCleanLimit: config.TrieCleanCache,
TrieDirtyLimit: config.TrieDirtyCache,
TrieTimeLimit: config.TrieTimeout,
ProcessStateDiffs: config.StateDiff,
}
)
eth.blockchain, err = core.NewBlockChain(chainDb, cacheConfig, eth.chainConfig, eth.engine, vmConfig, eth.shouldPreserve)
Expand Down
4 changes: 4 additions & 0 deletions eth/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ var DefaultConfig = Config{
Blocks: 20,
Percentile: 60,
},

StateDiff: false,
}

func init() {
Expand Down Expand Up @@ -135,6 +137,8 @@ type Config struct {

// Constantinople block override (TODO: remove after the fork)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ˜‚ 🍴

ConstantinopleOverride *big.Int

StateDiff bool
}

type configMarshaling struct {
Expand Down
Loading