diff --git a/cmd/integration/commands/idx_verify.go b/cmd/integration/commands/idx_verify.go new file mode 100644 index 00000000000..4dc573aa6a6 --- /dev/null +++ b/cmd/integration/commands/idx_verify.go @@ -0,0 +1,176 @@ +package commands + +import ( + "bytes" + "io/fs" + "log" + "os" + "path/filepath" + "strings" + + "github.com/erigontech/erigon-lib/common" + "github.com/erigontech/erigon-lib/common/hexutility" + "github.com/erigontech/erigon-lib/config3" + "github.com/erigontech/erigon-lib/recsplit" + "github.com/erigontech/erigon-lib/recsplit/eliasfano32" + "github.com/erigontech/erigon-lib/recsplit/multiencseq" + "github.com/erigontech/erigon-lib/seg" + "github.com/spf13/cobra" +) + +var idxVerify = &cobra.Command{ + Use: "idx_verify", + Short: "After a genesis sync + snapshot regen, deep compare original and optimized .ef files of 2 E3 instances", + Run: func(cmd *cobra.Command, args []string) { + ctx, _ := common.RootContext() + + sourceIdxPath := filepath.Join(sourceDirCli, "snapshots", "idx") + sourceIdxDir := os.DirFS(sourceIdxPath) + + files, err := fs.ReadDir(sourceIdxDir, ".") + if err != nil { + log.Fatalf("Failed to read directory contents: %v", err) + } + + log.Println("Comparing idx files:") + for _, file := range files { + if file.IsDir() || !strings.HasSuffix(file.Name(), ".ef") { + continue + } + + log.Printf("Deep checking file %s...", file.Name()) + + efInfo, err := parseEFFilename(file.Name()) + if err != nil { + log.Fatalf("Failed to parse file info: %v", err) + } + baseTxNum := efInfo.startStep * config3.DefaultStepSize + + targetEfi, err := recsplit.OpenIndex(targetDirCli + "/snapshots/accessor/" + file.Name() + "i") + if err != nil { + log.Fatalf("Failed to open index: %v", err) + } + defer targetEfi.Close() + + targetEfiReader := targetEfi.GetReaderFromPool() + defer targetEfiReader.Close() + + // original .ef file + sourceIdx, err := seg.NewDecompressor(sourceDirCli + "/snapshots/idx/" + file.Name()) + if err != nil { + log.Fatalf("Failed to open decompressor: %v", err) + } + defer sourceIdx.Close() + + // reencoded optimized .ef file + targetIdx, err := seg.NewDecompressor(targetDirCli + "/snapshots/idx/" + file.Name()) + if err != nil { + log.Fatalf("Failed to open decompressor: %v", err) + } + defer targetIdx.Close() + + g := sourceIdx.MakeGetter() + sourceReader := seg.NewReader(g, seg.CompressNone) + sourceReader.Reset(0) + + g = targetIdx.MakeGetter() + targetReader := seg.NewReader(g, seg.CompressNone) + targetReader.Reset(0) + + prevKeyOffset := uint64(0) + for sourceReader.HasNext() { + if !targetReader.HasNext() { + log.Fatal("target reader doesn't have next!") + } + + sourceK, _ := sourceReader.Next(nil) + targetK, _ := targetReader.Next(nil) + if !bytes.Equal(sourceK, targetK) { + log.Fatalf("key mismatch!") + } + + if !sourceReader.HasNext() { + log.Fatal("source reader doesn't have next!") + } + if !targetReader.HasNext() { + log.Fatal("target reader doesn't have next!") + } + + // source/target semantic value comparison + sourceV, _ := sourceReader.Next(nil) + targetV, nextKeyOffset := targetReader.Next(nil) + if !compareSequences(sourceV, targetV, baseTxNum) { + log.Fatalf("value mismatch!") + } + + // checks new efi lookup points to the same value + offset, found := targetEfiReader.TwoLayerLookup(targetK) + if !found { + log.Fatalf("key %v not found in efi", hexutility.Encode(targetK)) + } + if offset != prevKeyOffset { + log.Fatalf("offset mismatch: %d != %d", offset, prevKeyOffset) + } + prevKeyOffset = nextKeyOffset + + select { + case <-ctx.Done(): + return + default: + } + } + sourceIdx.Close() + targetIdx.Close() + targetEfiReader.Close() + targetEfi.Close() + } + }, +} + +func compareSequences(sourceV, targetV []byte, baseTxNum uint64) bool { + sourceEf, _ := eliasfano32.ReadEliasFano(sourceV) + targetSeq := multiencseq.ReadMultiEncSeq(baseTxNum, targetV) + + if targetSeq.Count() > sourceEf.Count() { + log.Print("Optimized eliasfano is longer") + return false + } + if sourceEf.Count() > targetSeq.Count() { + log.Print("Optimized eliasfano is shorter") + return false + } + + sourceIt := sourceEf.Iterator() + targetIt := targetSeq.Iterator(0) + for sourceIt.HasNext() { + sourceN, err := sourceIt.Next() + if err != nil { + log.Fatalf("Failed to read next: %v", err) + } + targetN, err := targetIt.Next() + if err != nil { + log.Fatalf("Failed to read next: %v", err) + } + if sourceN != targetN { + log.Printf("values mismatch: source=%d target=%d", sourceN, targetN) + log.Printf("source=%v target=%v", hexutility.Encode(sourceV), hexutility.Encode(targetV)) + return false + } + } + + return true +} + +func init() { + idxVerify.Flags().StringVar(&sourceDirCli, "sourcedir", "", "data directory of original E3 instance") + must(idxVerify.MarkFlagRequired("sourcedir")) + must(idxVerify.MarkFlagDirname("sourcedir")) + + idxVerify.Flags().StringVar(&targetDirCli, "targetdir", "", "data directory of optimized E3 instance") + must(idxVerify.MarkFlagRequired("targetdir")) + must(idxVerify.MarkFlagDirname("targetdir")) + + rootCmd.AddCommand(idxVerify) +} + +var sourceDirCli, targetDirCli string