Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (backport #10026) #10114

Merged
merged 4 commits into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ Ref: https://keepachangelog.com/en/1.0.0/

## Unreleased

### Improvements

* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions.

### Bug Fixes

* [\#9969](https://github.com/cosmos/cosmos-sdk/pull/9969) fix: use keyring in config for add-genesis-account cmd.
Expand Down
101 changes: 22 additions & 79 deletions store/cachekv/memiterator.go
Original file line number Diff line number Diff line change
@@ -1,107 +1,50 @@
package cachekv

import (
"errors"

dbm "github.com/tendermint/tm-db"

"github.com/cosmos/cosmos-sdk/types/kv"
"github.com/cosmos/cosmos-sdk/store/types"
)

// Iterates over iterKVCache items.
// if key is nil, means it was deleted.
// Implements Iterator.
type memIterator struct {
start, end []byte
items []*kv.Pair
ascending bool
}

func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator {
itemsInDomain := make([]*kv.Pair, 0, items.Len())

var entered bool

for e := items.Front(); e != nil; e = e.Next() {
item := e.Value
if !dbm.IsKeyInDomain(item.Key, start, end) {
if entered {
break
}

continue
}

itemsInDomain = append(itemsInDomain, item)
entered = true
}
types.Iterator

return &memIterator{
start: start,
end: end,
items: itemsInDomain,
ascending: ascending,
}
deleted map[string]struct{}
}

func (mi *memIterator) Domain() ([]byte, []byte) {
return mi.start, mi.end
}
func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator {
var iter types.Iterator
var err error

func (mi *memIterator) Valid() bool {
return len(mi.items) > 0
}
if ascending {
iter, err = items.Iterator(start, end)
} else {
iter, err = items.ReverseIterator(start, end)
}

func (mi *memIterator) assertValid() {
if err := mi.Error(); err != nil {
if err != nil {
panic(err)
}
}

func (mi *memIterator) Next() {
mi.assertValid()

if mi.ascending {
mi.items = mi.items[1:]
} else {
mi.items = mi.items[:len(mi.items)-1]
newDeleted := make(map[string]struct{})
for k, v := range deleted {
newDeleted[k] = v
}
}

func (mi *memIterator) Key() []byte {
mi.assertValid()
return &memIterator{
Iterator: iter,

if mi.ascending {
return mi.items[0].Key
deleted: newDeleted,
}

return mi.items[len(mi.items)-1].Key
}

func (mi *memIterator) Value() []byte {
mi.assertValid()

if mi.ascending {
return mi.items[0].Value
}

return mi.items[len(mi.items)-1].Value
}

func (mi *memIterator) Close() error {
mi.start = nil
mi.end = nil
mi.items = nil

return nil
}

// Error returns an error if the memIterator is invalid defined by the Valid
// method.
func (mi *memIterator) Error() error {
if !mi.Valid() {
return errors.New("invalid memIterator")
key := mi.Iterator.Key()
if _, ok := mi.deleted[string(key)]; ok {
return nil
}

return nil
return mi.Iterator.Value()
}
94 changes: 56 additions & 38 deletions store/cachekv/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ import (
// If value is nil but deleted is false, it means the parent doesn't have the
// key. (No need to delete upon Write())
type cValue struct {
value []byte
deleted bool
dirty bool
value []byte
dirty bool
}

// Store wraps an in-memory cache around an underlying types.KVStore.
type Store struct {
mtx sync.Mutex
cache map[string]*cValue
deleted map[string]struct{}
unsortedCache map[string]struct{}
sortedCache *kv.List // always ascending sorted
sortedCache *dbm.MemDB // always ascending sorted
parent types.KVStore
}

Expand All @@ -40,8 +40,9 @@ var _ types.CacheKVStore = (*Store)(nil)
func NewStore(parent types.KVStore) *Store {
return &Store{
cache: make(map[string]*cValue),
deleted: make(map[string]struct{}),
unsortedCache: make(map[string]struct{}),
sortedCache: kv.NewList(),
sortedCache: dbm.NewMemDB(),
parent: parent,
}
}
Expand Down Expand Up @@ -120,7 +121,7 @@ func (store *Store) Write() {
cacheValue := store.cache[key]

switch {
case cacheValue.deleted:
case store.isDeleted(key):
store.parent.Delete([]byte(key))
case cacheValue.value == nil:
// Skip, it already doesn't exist in parent.
Expand All @@ -131,8 +132,9 @@ func (store *Store) Write() {

// Clear the cache
store.cache = make(map[string]*cValue)
store.deleted = make(map[string]struct{})
store.unsortedCache = make(map[string]struct{})
store.sortedCache = kv.NewList()
store.sortedCache = dbm.NewMemDB()
}

// CacheWrap implements CacheWrapper.
Expand Down Expand Up @@ -171,7 +173,7 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
}

store.dirtyItems(start, end)
cache = newMemIterator(start, end, store.sortedCache, ascending)
cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)

return newCacheMergeIterator(parent, cache, ascending)
}
Expand All @@ -180,7 +182,7 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
// from string -> []byte to speed up operations, it is not meant
// to be used generally, but for a specific pattern to check for available
// keys within a domain.
func strToByte(s string) []byte {
func strToBytes(s string) []byte {
var b []byte
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
hdr.Cap = len(s)
Expand All @@ -200,16 +202,33 @@ func byteSliceToStr(b []byte) string {

// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)

n := len(store.unsortedCache)
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(strToByte(key), start, end) {
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n >= 1024 {
for key := range store.unsortedCache {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
} else {
// else do a linear scan to determine if the unsorted pairs are in the pool.
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(strToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
}
store.clearUnsortedCacheSubset(unsorted)
}

func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
n := len(store.unsortedCache)
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
Expand All @@ -219,32 +238,21 @@ func (store *Store) dirtyItems(start, end []byte) {
delete(store.unsortedCache, byteSliceToStr(kv.Key))
}
}

sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})

for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; {
uitem := unsorted[0]
sitem := e.Value
comp := bytes.Compare(uitem.Key, sitem.Key)

switch comp {
case -1:
unsorted = unsorted[1:]

store.sortedCache.InsertBefore(uitem, e)
case 1:
e = e.Next()
case 0:
unsorted = unsorted[1:]
e.Value = uitem
e = e.Next()
for _, item := range unsorted {
if item.Value == nil {
// deleted element, tracked by store.deleted
// setting arbitrary value
store.sortedCache.Set(item.Key, []byte{})
continue
}
err := store.sortedCache.Set(item.Key, item.Value)
if err != nil {
panic(err)
}
}

for _, kvp := range unsorted {
store.sortedCache.PushBack(kvp)
}
}

Expand All @@ -253,12 +261,22 @@ func (store *Store) dirtyItems(start, end []byte) {

// Only entrypoint to mutate store.cache.
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
store.cache[string(key)] = &cValue{
value: value,
deleted: deleted,
dirty: dirty,
keyStr := byteSliceToStr(key)
store.cache[keyStr] = &cValue{
value: value,
dirty: dirty,
}
if deleted {
store.deleted[keyStr] = struct{}{}
} else {
delete(store.deleted, keyStr)
}
if dirty {
store.unsortedCache[string(key)] = struct{}{}
}
}

func (store *Store) isDeleted(key string) bool {
_, ok := store.deleted[key]
return ok
}