Skip to content

Commit

Permalink
Add idx_verify tool
Browse files Browse the repository at this point in the history
  • Loading branch information
wmitsuda committed Dec 15, 2024
1 parent feaa76e commit 928dc9d
Showing 1 changed file with 176 additions and 0 deletions.
176 changes: 176 additions & 0 deletions cmd/integration/commands/idx_verify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package commands

import (
"bytes"
"io/fs"
"log"
"os"
"path/filepath"
"strings"

"github.com/erigontech/erigon-lib/common"
"github.com/erigontech/erigon-lib/common/hexutility"
"github.com/erigontech/erigon-lib/config3"
"github.com/erigontech/erigon-lib/recsplit"
"github.com/erigontech/erigon-lib/recsplit/eliasfano32"
"github.com/erigontech/erigon-lib/recsplit/multiencseq"
"github.com/erigontech/erigon-lib/seg"
"github.com/spf13/cobra"
)

var idxVerify = &cobra.Command{
Use: "idx_verify",
Short: "After a genesis sync + snapshot regen, deep compare original and optimized .ef files of 2 E3 instances",
Run: func(cmd *cobra.Command, args []string) {
ctx, _ := common.RootContext()

sourceIdxPath := filepath.Join(sourceDirCli, "snapshots", "idx")
sourceIdxDir := os.DirFS(sourceIdxPath)

files, err := fs.ReadDir(sourceIdxDir, ".")
if err != nil {
log.Fatalf("Failed to read directory contents: %v", err)
}

log.Println("Comparing idx files:")
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".ef") {
continue
}

log.Printf("Deep checking file %s...", file.Name())

efInfo, err := parseEFFilename(file.Name())
if err != nil {
log.Fatalf("Failed to parse file info: %v", err)
}
baseTxNum := efInfo.startStep * config3.DefaultStepSize

targetEfi, err := recsplit.OpenIndex(targetDirCli + "/snapshots/accessor/" + file.Name() + "i")
if err != nil {
log.Fatalf("Failed to open index: %v", err)
}
defer targetEfi.Close()

targetEfiReader := targetEfi.GetReaderFromPool()
defer targetEfiReader.Close()

// original .ef file
sourceIdx, err := seg.NewDecompressor(sourceDirCli + "/snapshots/idx/" + file.Name())
if err != nil {
log.Fatalf("Failed to open decompressor: %v", err)
}
defer sourceIdx.Close()

// reencoded optimized .ef file
targetIdx, err := seg.NewDecompressor(targetDirCli + "/snapshots/idx/" + file.Name())
if err != nil {
log.Fatalf("Failed to open decompressor: %v", err)
}
defer targetIdx.Close()

g := sourceIdx.MakeGetter()
sourceReader := seg.NewReader(g, seg.CompressNone)
sourceReader.Reset(0)

g = targetIdx.MakeGetter()
targetReader := seg.NewReader(g, seg.CompressNone)
targetReader.Reset(0)

prevKeyOffset := uint64(0)
for sourceReader.HasNext() {
if !targetReader.HasNext() {
log.Fatal("target reader doesn't have next!")
}

sourceK, _ := sourceReader.Next(nil)
targetK, _ := targetReader.Next(nil)
if !bytes.Equal(sourceK, targetK) {
log.Fatalf("key mismatch!")
}

if !sourceReader.HasNext() {
log.Fatal("source reader doesn't have next!")
}
if !targetReader.HasNext() {
log.Fatal("target reader doesn't have next!")
}

// source/target semantic value comparison
sourceV, _ := sourceReader.Next(nil)
targetV, nextKeyOffset := targetReader.Next(nil)
if !compareSequences(sourceV, targetV, baseTxNum) {
log.Fatalf("value mismatch!")
}

// checks new efi lookup points to the same value
offset, found := targetEfiReader.TwoLayerLookup(targetK)
if !found {
log.Fatalf("key %v not found in efi", hexutility.Encode(targetK))
}
if offset != prevKeyOffset {
log.Fatalf("offset mismatch: %d != %d", offset, prevKeyOffset)
}
prevKeyOffset = nextKeyOffset

select {
case <-ctx.Done():
return
default:
}
}
sourceIdx.Close()
targetIdx.Close()
targetEfiReader.Close()
targetEfi.Close()
}
},
}

func compareSequences(sourceV, targetV []byte, baseTxNum uint64) bool {
sourceEf, _ := eliasfano32.ReadEliasFano(sourceV)
targetSeq := multiencseq.ReadMultiEncSeq(baseTxNum, targetV)

if targetSeq.Count() > sourceEf.Count() {
log.Print("Optimized eliasfano is longer")
return false
}
if sourceEf.Count() > targetSeq.Count() {
log.Print("Optimized eliasfano is shorter")
return false
}

sourceIt := sourceEf.Iterator()
targetIt := targetSeq.Iterator(0)
for sourceIt.HasNext() {
sourceN, err := sourceIt.Next()
if err != nil {
log.Fatalf("Failed to read next: %v", err)
}
targetN, err := targetIt.Next()
if err != nil {
log.Fatalf("Failed to read next: %v", err)
}
if sourceN != targetN {
log.Printf("values mismatch: source=%d target=%d", sourceN, targetN)
log.Printf("source=%v target=%v", hexutility.Encode(sourceV), hexutility.Encode(targetV))
return false
}
}

return true
}

func init() {
idxVerify.Flags().StringVar(&sourceDirCli, "sourcedir", "", "data directory of original E3 instance")
must(idxVerify.MarkFlagRequired("sourcedir"))
must(idxVerify.MarkFlagDirname("sourcedir"))

idxVerify.Flags().StringVar(&targetDirCli, "targetdir", "", "data directory of optimized E3 instance")
must(idxVerify.MarkFlagRequired("targetdir"))
must(idxVerify.MarkFlagDirname("targetdir"))

rootCmd.AddCommand(idxVerify)
}

var sourceDirCli, targetDirCli string

0 comments on commit 928dc9d

Please sign in to comment.