From 6f9bc7b0e0c9c953320bd37b7479c198b0c03f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 2 Sep 2022 15:26:01 +0300 Subject: [PATCH] eth/protocols/snap: throttle trie heal requests when peers DoS us --- eth/protocols/snap/sync.go | 107 ++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 7 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 1455bacbcb88..522c74545f46 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -21,10 +21,12 @@ import ( "encoding/json" "errors" "fmt" + gomath "math" "math/big" "math/rand" "sort" "sync" + "sync/atomic" "time" "github.com/ethereum/go-ethereum/common" @@ -78,6 +80,29 @@ const ( // and waste round trip times. If it's too high, we're capping responses and // waste bandwidth. maxTrieRequestCount = maxRequestSize / 512 + + // trienodeHealRateMeasurementImpact is the impact a single measurement has on + // the local node's trienode processing capacity. A value closer to 0 reacts + // slower to sudden changes, but it is also more stable against temporary hiccups. + trienodeHealRateMeasurementImpact = 0.005 + + // minTrienodeHealThrottle is the minimum divisor for throttling trie node + // heal requests to avoid overloading the local node and exessively expanding + // the state trie bedth wise. + minTrienodeHealThrottle = 1 + + // maxTrienodeHealThrottle is the maximum divisor for throttling trie node + // heal requests to avoid overloading the local node and exessively expanding + // the state trie bedth wise. + maxTrienodeHealThrottle = maxTrieRequestCount + + // trienodeHealThrottleIncrease is the multiplier for the throttle when the + // rate of arriving data is higher than the rate of processing it. + trienodeHealThrottleIncrease = 1.33 + + // trienodeHealThrottleDecrease is the divisor for the throttle when the + // rate of arriving data is lower than the rate of processing it. + trienodeHealThrottleDecrease = 1.25 ) var ( @@ -431,6 +456,11 @@ type Syncer struct { trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running + trienodeHealRate float64 // Average heal rate for processing trie node data + trienodeHealPend uint64 // Number of trie nodes currently pending for processing + trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested + trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated + trienodeHealSynced uint64 // Number of state trie nodes downloaded trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk trienodeHealDups uint64 // Number of state trie nodes already processed @@ -476,9 +506,10 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer { trienodeHealIdlers: make(map[string]struct{}), bytecodeHealIdlers: make(map[string]struct{}), - trienodeHealReqs: make(map[uint64]*trienodeHealRequest), - bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), - stateWriter: db.NewBatch(), + trienodeHealReqs: make(map[uint64]*trienodeHealRequest), + bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), + trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk + stateWriter: db.NewBatch(), extProgress: new(SyncProgress), } @@ -1321,6 +1352,10 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai if cap > maxTrieRequestCount { cap = maxTrieRequestCount } + cap = int(float64(cap) / s.trienodeHealThrottle) + if cap <= 0 { + cap = 1 + } var ( hashes = make([]common.Hash, 0, cap) paths = make([]string, 0, cap) @@ -2090,6 +2125,10 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // processTrienodeHealResponse integrates an already validated trienode response // into the healer tasks. func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { + var ( + start = time.Now() + fills int + ) for i, hash := range res.hashes { node := res.nodes[i] @@ -2098,6 +2137,8 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { res.task.trieTasks[res.paths[i]] = res.hashes[i] continue } + fills++ + // Push the trie node into the state syncer s.trienodeHealSynced++ s.trienodeHealBytes += common.StorageSize(len(node)) @@ -2121,6 +2162,50 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { log.Crit("Failed to persist healing data", "err", err) } log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) + + // Calculate the processing rate of one filled trie node + rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) + + // Update the currently measured trienode queueing and processing throughput. + // + // The processing rate needs to be updated uniformly independent if we've + // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in + // the face of varying network packets. As such, we cannot just measure the + // time it took to process N trie nodes and update once, we need one update + // per trie node. + // + // Naively, that would be: + // + // for i:=0; i time.Second { + // Periodically adjust the trie node throttler + if float64(pending) > 2*s.trienodeHealRate { + s.trienodeHealThrottle *= trienodeHealThrottleIncrease + } else { + s.trienodeHealThrottle /= trienodeHealThrottleDecrease + } + if s.trienodeHealThrottle > maxTrienodeHealThrottle { + s.trienodeHealThrottle = maxTrienodeHealThrottle + } else if s.trienodeHealThrottle < minTrienodeHealThrottle { + s.trienodeHealThrottle = minTrienodeHealThrottle + } + s.trienodeHealThrottled = time.Now() + + log.Warn("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle) + } } // processBytecodeHealResponse integrates an already validated bytecode response @@ -2655,10 +2740,12 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error // Cross reference the requested trienodes with the response to find gaps // that the serving node is missing - hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) - hash := make([]byte, 32) - - nodes := make([][]byte, len(req.hashes)) + var ( + hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState) + hash = make([]byte, 32) + nodes = make([][]byte, len(req.hashes)) + fills uint64 + ) for i, j := 0, 0; i < len(trienodes); i++ { // Find the next hash that we've been served, leaving misses with nils hasher.Reset() @@ -2670,16 +2757,22 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error } if j < len(req.hashes) { nodes[j] = trienodes[i] + fills++ j++ continue } // We've either ran out of hashes, or got unrequested data logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) + // Signal this request as failed, and ready for rescheduling s.scheduleRevertTrienodeHealRequest(req) return errors.New("unexpected healing trienode") } // Response validated, send it to the scheduler for filling + atomic.AddUint64(&s.trienodeHealPend, fills) + defer func() { + atomic.AddUint64(&s.trienodeHealPend, ^(fills - 1)) + }() response := &trienodeHealResponse{ paths: req.paths, task: req.task,