Skip to content

Commit

Permalink
Handle All Gaps within Geth
Browse files Browse the repository at this point in the history
Including an updated doc which keeps track of events in this PR.
  • Loading branch information
Abdul Rabbani committed Mar 25, 2022
1 parent 1a3a63d commit f6ff20e
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 33 deletions.
18 changes: 18 additions & 0 deletions statediff/docs/KnownGaps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Overview

This document will provide some insight into the `known_gaps` table, their use cases, and implementation. Please refer to the [following PR](https://github.com/vulcanize/go-ethereum/pull/217) and the [following epic](https://github.com/vulcanize/ops/issues/143) to grasp their inception.

![known gaps](diagrams/KnownGapsProcess.png)

# Use Cases

The known gaps table is updated when the following events occur:

1. At start up we check the latest block from the `eth.headers_cid` table. We compare the first block that we are processing with the latest block from the DB. If they are not one unit of expectedDifference away from each other, add the gap between the two blocks.
2. If there is any error in processing a block (db connection, deadlock, etc), add that block to the knownErrorBlocks slice, when the next block is successfully written, write this slice into the DB.
3. If the last processed block is not one unit of expectedDifference away from the current block being processed. This can be due to any unknown or unhandled errors in geth.

# Glossary

1. `expectedDifference (number)` - This number indicates what the difference between two blocks should be. If we are capturing all events on a geth node then this number would be `1`. But once we scale nodes, the `expectedDifference` might be `2` or greater.
2. `processingKey (number)` - This number can be used to keep track of different geth nodes and their specific `expectedDifference`.
3 changes: 3 additions & 0 deletions statediff/docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Overview

This folder keeps tracks of random documents as they relate to the `statediff` service.
Binary file added statediff/docs/diagrams/KnownGapsProcess.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions statediff/indexer/database/dump/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,6 @@ func (sdi *StateDiffIndexer) Close() error {
func (sdi *StateDiffIndexer) FindAndUpdateGaps(latestBlockOnChain *big.Int, expectedDifference *big.Int, processingKey int64) error {
return nil
}
func (sdi *StateDiffIndexer) PushKnownGaps(startingBlockNumber *big.Int, endingBlockNumber *big.Int, checkedOut bool, processingKey int64) error {
return nil
}
4 changes: 4 additions & 0 deletions statediff/indexer/database/file/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -482,3 +482,7 @@ func (sdi *StateDiffIndexer) Close() error {
func (sdi *StateDiffIndexer) FindAndUpdateGaps(latestBlockOnChain *big.Int, expectedDifference *big.Int, processingKey int64) error {
return nil
}

func (sdi *StateDiffIndexer) PushKnownGaps(startingBlockNumber *big.Int, endingBlockNumber *big.Int, checkedOut bool, processingKey int64) error {
return nil
}
21 changes: 16 additions & 5 deletions statediff/indexer/database/sql/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,10 @@ func (sdi *StateDiffIndexer) Close() error {
}

// Update the known gaps table with the gap information.
func (sdi *StateDiffIndexer) pushKnownGaps(startingBlockNumber *big.Int, endingBlockNumber *big.Int, checkedOut bool, processingKey int64) error {
func (sdi *StateDiffIndexer) PushKnownGaps(startingBlockNumber *big.Int, endingBlockNumber *big.Int, checkedOut bool, processingKey int64) error {
if startingBlockNumber.Cmp(endingBlockNumber) != -1 {
return fmt.Errorf("Starting Block %d, is greater than ending block %d", startingBlockNumber, endingBlockNumber)
}
knownGap := models.KnownGapsModel{
StartingBlockNumber: startingBlockNumber.String(),
EndingBlockNumber: endingBlockNumber.String(),
Expand All @@ -573,7 +576,7 @@ func (sdi *StateDiffIndexer) QueryDb(queryString string) (string, error) {
var ret string
err := sdi.dbWriter.db.QueryRow(context.Background(), queryString).Scan(&ret)
if err != nil {
log.Error("Can't properly query the DB for query: ", queryString)
log.Error(fmt.Sprint("Can't properly query the DB for query: ", queryString))
return "", err
}
return ret, nil
Expand All @@ -589,7 +592,7 @@ func (sdi *StateDiffIndexer) QueryDbToBigInt(queryString string) (*big.Int, erro
}
ret, ok := ret.SetString(res, 10)
if !ok {
log.Error("Can't turn the res ", res, "into a bigInt")
log.Error(fmt.Sprint("Can't turn the res ", res, "into a bigInt"))
return ret, fmt.Errorf("Can't turn %s into a bigInt", res)
}
return ret, nil
Expand All @@ -611,6 +614,9 @@ func isGap(latestBlockInDb *big.Int, latestBlockOnChain *big.Int, expectedDiffer
// This function will check for Gaps and update the DB if gaps are found.
// The processingKey will currently be set to 0, but as we start to leverage horizontal scaling
// It might be a useful parameter to update depending on the geth node.
// TODO:
// REmove the return value
// Write to file if err in writing to DB
func (sdi *StateDiffIndexer) FindAndUpdateGaps(latestBlockOnChain *big.Int, expectedDifference *big.Int, processingKey int64) error {
dbQueryString := "SELECT MAX(block_number) FROM eth.header_cids"
latestBlockInDb, err := sdi.QueryDbToBigInt(dbQueryString)
Expand All @@ -625,8 +631,13 @@ func (sdi *StateDiffIndexer) FindAndUpdateGaps(latestBlockOnChain *big.Int, expe
startBlock.Add(latestBlockInDb, expectedDifference)
endBlock.Sub(latestBlockOnChain, expectedDifference)

log.Warn("Found Gaps starting at, ", startBlock, " and ending at, ", endBlock)
sdi.pushKnownGaps(startBlock, endBlock, false, processingKey)
log.Warn(fmt.Sprint("Found Gaps starting at, ", startBlock, " and ending at, ", endBlock))
err := sdi.PushKnownGaps(startBlock, endBlock, false, processingKey)
if err != nil {
// Write to file SQL file instead!!!
// If write to SQL file fails, write to disk. Handle this within the write to SQL file function!
return err
}
}

return nil
Expand Down
28 changes: 0 additions & 28 deletions statediff/indexer/database/sql/mainnet_tests/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,31 +116,3 @@ func tearDown(t *testing.T) {
err = ind.Close()
require.NoError(t, err)
}

//func TestKnownGapsUpsert(t *testing.T) {
// var startBlockNumber int64 = 111
// var endBlockNumber int64 = 121
// ind, err := setupDb(t)
// if err != nil {
// t.Fatal(err)
// }
// require.NoError(t, err)
//
// testKnownGapsUpsert(t, startBlockNumber, endBlockNumber, ind)
// //str, err := ind.QueryDb("SELECT MAX(block_number) FROM eth.header_cids") // Figure out the string.
// queryString := fmt.Sprintf("SELECT starting_block_number from eth.known_gaps WHERE starting_block_number = %d AND ending_block_number = %d", startBlockNumber, endBlockNumber)
// _, queryErr := ind.QueryDb(queryString) // Figure out the string.
// require.NoError(t, queryErr)
//
//}
//func testKnownGapsUpsert(t *testing.T, startBlockNumber int64, endBlockNumber int64, ind interfaces.StateDiffIndexer) {
// startBlock := big.NewInt(startBlockNumber)
// endBlock := big.NewInt(endBlockNumber)
//
// processGapError := ind.PushKnownGaps(startBlock, endBlock, false, 1)
// if processGapError != nil {
// t.Fatal(processGapError)
// }
// require.NoError(t, processGapError)
//}
//
1 change: 1 addition & 0 deletions statediff/indexer/interfaces/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type StateDiffIndexer interface {
PushCodeAndCodeHash(tx Batch, codeAndCodeHash sdtypes.CodeAndCodeHash) error
ReportDBMetrics(delay time.Duration, quit <-chan bool)
FindAndUpdateGaps(latestBlockOnChain *big.Int, expectedDifference *big.Int, processingKey int64) error
PushKnownGaps(startingBlockNumber *big.Int, endingBlockNumber *big.Int, checkedOut bool, processingKey int64) error
io.Closer
}

Expand Down
125 changes: 125 additions & 0 deletions statediff/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package statediff

import (
"bytes"
"fmt"
"math/big"
"strconv"
"strings"
Expand Down Expand Up @@ -123,6 +124,8 @@ type Service struct {
BackendAPI ethapi.Backend
// Should the statediff service wait for geth to sync to head?
WaitForSync bool
// Used to signal if we should check for KnownGaps
KnownGaps KnownGaps
// Whether or not we have any subscribers; only if we do, do we processes state diffs
subscribers int32
// Interface for publishing statediffs as PG-IPLD objects
Expand All @@ -135,6 +138,91 @@ type Service struct {
maxRetry uint
}

type KnownGaps struct {
// Should we check for gaps by looking at the DB and comparing the latest block with head
checkForGaps bool
// Arbitrary processingKey that can be used down the line to differentiate different geth nodes.
processingKey int64
// This number indicates the expected difference between blocks.
// Currently, this is 1 since the geth node processes each block. But down the road this can be used in
// Tandom with the processingKey to differentiate block processing logic.
expectedDifference *big.Int
// Indicates if Geth is in an error state
// This is used to indicate the right time to upserts
errorState bool
// This array keeps track of errorBlocks as they occur.
// When the errorState is false again, we can process these blocks.
// Do we need a list, can we have /KnownStartErrorBlock and knownEndErrorBlock ints instead?
knownErrorBlocks []*big.Int
// The last processed block keeps track of the last processed block.
// Its used to make sure we didn't skip over any block!
lastProcessedBlock *big.Int
}

// This function will capture any missed blocks that were not captured in sds.KnownGaps.knownErrorBlocks.
// It is invoked when the sds.KnownGaps.lastProcessed block is not one unit
// away from sds.KnownGaps.expectedDifference
// Essentially, if geth ever misses blocks but doesn't output an error, we are covered.
func (sds *Service) capturedMissedBlocks(currentBlock *big.Int, knownErrorBlocks []*big.Int, lastProcessedBlock *big.Int) {
// last processed: 110
// current block: 125
if len(knownErrorBlocks) > 0 {
// 115
startErrorBlock := new(big.Int).Set(knownErrorBlocks[0])
// 120
endErrorBlock := new(big.Int).Set(knownErrorBlocks[len(knownErrorBlocks)-1])

// 111
expectedStartErrorBlock := big.NewInt(0).Add(lastProcessedBlock, sds.KnownGaps.expectedDifference)
// 124
expectedEndErrorBlock := big.NewInt(0).Sub(currentBlock, sds.KnownGaps.expectedDifference)

if (expectedStartErrorBlock == startErrorBlock) &&
(expectedEndErrorBlock == endErrorBlock) {
log.Info("All Gaps already captured in knownErrorBlocks")
}

if expectedEndErrorBlock.Cmp(endErrorBlock) == 1 {
log.Warn(fmt.Sprint("There are gaps in the knownErrorBlocks list: ", knownErrorBlocks))
log.Warn("But there are gaps that were also not added there.")
log.Warn(fmt.Sprint("Last Block in knownErrorBlocks: ", endErrorBlock))
log.Warn(fmt.Sprint("Last processed Block: ", lastProcessedBlock))
log.Warn(fmt.Sprint("Current Block: ", currentBlock))
//120 + 1 == 121
startBlock := big.NewInt(0).Add(endErrorBlock, sds.KnownGaps.expectedDifference)
// 121 to 124
log.Warn(fmt.Sprintf("Adding the following block range to known_gaps table: %d - %d", startBlock, expectedEndErrorBlock))
sds.indexer.PushKnownGaps(startBlock, expectedEndErrorBlock, false, sds.KnownGaps.processingKey)
}

if expectedStartErrorBlock.Cmp(startErrorBlock) == -1 {
log.Warn(fmt.Sprint("There are gaps in the knownErrorBlocks list: ", knownErrorBlocks))
log.Warn("But there are gaps that were also not added there.")
log.Warn(fmt.Sprint("First Block in knownErrorBlocks: ", startErrorBlock))
log.Warn(fmt.Sprint("Last processed Block: ", lastProcessedBlock))
// 115 - 1 == 114
endBlock := big.NewInt(0).Sub(startErrorBlock, sds.KnownGaps.expectedDifference)
// 111 to 114
log.Warn(fmt.Sprintf("Adding the following block range to known_gaps table: %d - %d", expectedStartErrorBlock, endBlock))
sds.indexer.PushKnownGaps(expectedStartErrorBlock, endBlock, false, sds.KnownGaps.processingKey)
}

log.Warn(fmt.Sprint("The following Gaps were found: ", knownErrorBlocks))
log.Warn(fmt.Sprint("Updating known Gaps table from ", startErrorBlock, " to ", endErrorBlock, " with processing key, ", sds.KnownGaps.processingKey))
sds.indexer.PushKnownGaps(startErrorBlock, endErrorBlock, false, sds.KnownGaps.processingKey)

} else {
log.Warn("We missed blocks without any errors.")
// 110 + 1 == 111
startBlock := big.NewInt(0).Add(lastProcessedBlock, sds.KnownGaps.expectedDifference)
// 125 - 1 == 124
endBlock := big.NewInt(0).Sub(currentBlock, sds.KnownGaps.expectedDifference)
log.Warn(fmt.Sprint("Missed blocks starting from: ", startBlock))
log.Warn(fmt.Sprint("Missed blocks ending at: ", endBlock))
sds.indexer.PushKnownGaps(startBlock, endBlock, false, sds.KnownGaps.processingKey)
}
}

// BlockCache caches the last block for safe access from different service loops
type BlockCache struct {
sync.Mutex
Expand Down Expand Up @@ -174,6 +262,14 @@ func New(stack *node.Node, ethServ *eth.Ethereum, cfg *ethconfig.Config, params
if workers == 0 {
workers = 1
}
// If we ever have multiple processingKeys we can update them here
// along with the expectedDifference
knownGaps := &KnownGaps{
checkForGaps: true,
processingKey: 0,
expectedDifference: big.NewInt(1),
errorState: false,
}
sds := &Service{
Mutex: sync.Mutex{},
BlockChain: blockChain,
Expand All @@ -184,6 +280,7 @@ func New(stack *node.Node, ethServ *eth.Ethereum, cfg *ethconfig.Config, params
BlockCache: NewBlockCache(workers),
BackendAPI: backend,
WaitForSync: params.WaitForSync,
KnownGaps: *knownGaps,
indexer: indexer,
enableWriteLoop: params.EnableWriteLoop,
numWorkers: workers,
Expand Down Expand Up @@ -308,12 +405,40 @@ func (sds *Service) writeLoopWorker(params workerParams) {
sds.writeGenesisStateDiff(parentBlock, params.id)
}

// If for any reason we need to check for gaps,
// Check and update the gaps table.
if sds.KnownGaps.checkForGaps && !sds.KnownGaps.errorState {
log.Info("Checking for Gaps at current block: ", currentBlock.Number())
go sds.indexer.FindAndUpdateGaps(currentBlock.Number(), sds.KnownGaps.expectedDifference, sds.KnownGaps.processingKey)
sds.KnownGaps.checkForGaps = false
}

log.Info("Writing state diff", "block height", currentBlock.Number().Uint64(), "worker", params.id)
err := sds.writeStateDiffWithRetry(currentBlock, parentBlock.Root(), writeLoopParams)
if err != nil {
log.Error("statediff.Service.WriteLoop: processing error", "block height", currentBlock.Number().Uint64(), "error", err.Error(), "worker", params.id)
sds.KnownGaps.errorState = true
sds.KnownGaps.knownErrorBlocks = append(sds.KnownGaps.knownErrorBlocks, currentBlock.Number())
log.Warn("Updating the following block to knownErrorBlocks to be inserted into knownGaps table: ", currentBlock.Number())
// Write object to startdiff
continue
}
sds.KnownGaps.errorState = false
// Understand what the last block that should have been processed is
previousExpectedBlock := big.NewInt(0).Sub(currentBlock.Number(), sds.KnownGaps.expectedDifference)
// If we last block which should have been processed is not
// the actual lastProcessedBlock, add it to known gaps table.
if previousExpectedBlock != sds.KnownGaps.lastProcessedBlock && sds.KnownGaps.lastProcessedBlock != nil {
// We must pass in parameters by VALUE not reference.
// If we pass them in my reference, the references can change before the computation is complete!
staticKnownErrorBlocks := make([]*big.Int, len(sds.KnownGaps.knownErrorBlocks))
copy(staticKnownErrorBlocks, sds.KnownGaps.knownErrorBlocks)
staticLastProcessedBlock := new(big.Int).Set(sds.KnownGaps.lastProcessedBlock)
go sds.capturedMissedBlocks(currentBlock.Number(), staticKnownErrorBlocks, staticLastProcessedBlock)
sds.KnownGaps.knownErrorBlocks = nil
}
sds.KnownGaps.lastProcessedBlock = currentBlock.Number()

// TODO: how to handle with concurrent workers
statediffMetrics.lastStatediffHeight.Update(int64(currentBlock.Number().Uint64()))
case <-sds.QuitChan:
Expand Down
Loading

0 comments on commit f6ff20e

Please sign in to comment.