From 28bf2c124b3e3ddd8aed1999e6798b5d4d649940 Mon Sep 17 00:00:00 2001 From: Dev Ojha Date: Fri, 10 Sep 2021 15:42:56 -0400 Subject: [PATCH] perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (#10026) --- CHANGELOG.md | 1 + store/cachekv/memiterator.go | 101 ++++++++--------------------------- store/cachekv/store.go | 89 +++++++++++++++++------------- 3 files changed, 76 insertions(+), 115 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 306caec94aba..26c7aefd2aa0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Improvements +* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions. * (types) [\#10076](https://github.com/cosmos/cosmos-sdk/pull/10076) Significantly speedup and lower allocations for `Coins.String()`. * (x/bank) [\#10022](https://github.com/cosmos/cosmos-sdk/pull/10022) `BankKeeper.SendCoins` now takes less execution time. * (deps) [\#9956](https://github.com/cosmos/cosmos-sdk/pull/9956) Bump Tendermint to [v0.34.12](https://github.com/tendermint/tendermint/releases/tag/v0.34.12). diff --git a/store/cachekv/memiterator.go b/store/cachekv/memiterator.go index b197ac141660..0a4bc57a6406 100644 --- a/store/cachekv/memiterator.go +++ b/store/cachekv/memiterator.go @@ -1,107 +1,50 @@ package cachekv import ( - "errors" - dbm "github.com/tendermint/tm-db" - "github.com/cosmos/cosmos-sdk/types/kv" + "github.com/cosmos/cosmos-sdk/store/types" ) // Iterates over iterKVCache items. // if key is nil, means it was deleted. // Implements Iterator. type memIterator struct { - start, end []byte - items []*kv.Pair - ascending bool -} - -func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator { - itemsInDomain := make([]*kv.Pair, 0, items.Len()) - - var entered bool - - for e := items.Front(); e != nil; e = e.Next() { - item := e.Value - if !dbm.IsKeyInDomain(item.Key, start, end) { - if entered { - break - } - - continue - } - - itemsInDomain = append(itemsInDomain, item) - entered = true - } + types.Iterator - return &memIterator{ - start: start, - end: end, - items: itemsInDomain, - ascending: ascending, - } + deleted map[string]struct{} } -func (mi *memIterator) Domain() ([]byte, []byte) { - return mi.start, mi.end -} +func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator { + var iter types.Iterator + var err error -func (mi *memIterator) Valid() bool { - return len(mi.items) > 0 -} + if ascending { + iter, err = items.Iterator(start, end) + } else { + iter, err = items.ReverseIterator(start, end) + } -func (mi *memIterator) assertValid() { - if err := mi.Error(); err != nil { + if err != nil { panic(err) } -} -func (mi *memIterator) Next() { - mi.assertValid() - - if mi.ascending { - mi.items = mi.items[1:] - } else { - mi.items = mi.items[:len(mi.items)-1] + newDeleted := make(map[string]struct{}) + for k, v := range deleted { + newDeleted[k] = v } -} -func (mi *memIterator) Key() []byte { - mi.assertValid() + return &memIterator{ + Iterator: iter, - if mi.ascending { - return mi.items[0].Key + deleted: newDeleted, } - - return mi.items[len(mi.items)-1].Key } func (mi *memIterator) Value() []byte { - mi.assertValid() - - if mi.ascending { - return mi.items[0].Value - } - - return mi.items[len(mi.items)-1].Value -} - -func (mi *memIterator) Close() error { - mi.start = nil - mi.end = nil - mi.items = nil - - return nil -} - -// Error returns an error if the memIterator is invalid defined by the Valid -// method. -func (mi *memIterator) Error() error { - if !mi.Valid() { - return errors.New("invalid memIterator") + key := mi.Iterator.Key() + if _, ok := mi.deleted[string(key)]; ok { + return nil } - - return nil + return mi.Iterator.Value() } diff --git a/store/cachekv/store.go b/store/cachekv/store.go index 101e526f133e..799805c2ae1f 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -20,17 +20,17 @@ import ( // If value is nil but deleted is false, it means the parent doesn't have the // key. (No need to delete upon Write()) type cValue struct { - value []byte - deleted bool - dirty bool + value []byte + dirty bool } // Store wraps an in-memory cache around an underlying types.KVStore. type Store struct { mtx sync.Mutex cache map[string]*cValue + deleted map[string]struct{} unsortedCache map[string]struct{} - sortedCache *kv.List // always ascending sorted + sortedCache *dbm.MemDB // always ascending sorted parent types.KVStore } @@ -40,8 +40,9 @@ var _ types.CacheKVStore = (*Store)(nil) func NewStore(parent types.KVStore) *Store { return &Store{ cache: make(map[string]*cValue), + deleted: make(map[string]struct{}), unsortedCache: make(map[string]struct{}), - sortedCache: kv.NewList(), + sortedCache: dbm.NewMemDB(), parent: parent, } } @@ -120,7 +121,7 @@ func (store *Store) Write() { cacheValue := store.cache[key] switch { - case cacheValue.deleted: + case store.isDeleted(key): store.parent.Delete([]byte(key)) case cacheValue.value == nil: // Skip, it already doesn't exist in parent. @@ -131,8 +132,9 @@ func (store *Store) Write() { // Clear the cache store.cache = make(map[string]*cValue) + store.deleted = make(map[string]struct{}) store.unsortedCache = make(map[string]struct{}) - store.sortedCache = kv.NewList() + store.sortedCache = dbm.NewMemDB() } // CacheWrap implements CacheWrapper. @@ -176,23 +178,40 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator { } store.dirtyItems(start, end) - cache = newMemIterator(start, end, store.sortedCache, ascending) + cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending) return newCacheMergeIterator(parent, cache, ascending) } // Constructs a slice of dirty items, to use w/ memIterator. func (store *Store) dirtyItems(start, end []byte) { - unsorted := make([]*kv.Pair, 0) - n := len(store.unsortedCache) - for key := range store.unsortedCache { - if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { + unsorted := make([]*kv.Pair, 0) + // If the unsortedCache is too big, its costs too much to determine + // whats in the subset we are concerned about. + // If you are interleaving iterator calls with writes, this can easily become an + // O(N^2) overhead. + // Even without that, too many range checks eventually becomes more expensive + // than just not having the cache. + if n >= 1024 { + for key := range store.unsortedCache { cacheValue := store.cache[key] unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) } + } else { + // else do a linear scan to determine if the unsorted pairs are in the pool. + for key := range store.unsortedCache { + if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { + cacheValue := store.cache[key] + unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) + } + } } + store.clearUnsortedCacheSubset(unsorted) +} +func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) { + n := len(store.unsortedCache) if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map. for key := range store.unsortedCache { delete(store.unsortedCache, key) @@ -202,32 +221,21 @@ func (store *Store) dirtyItems(start, end []byte) { delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key)) } } - sort.Slice(unsorted, func(i, j int) bool { return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0 }) - for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; { - uitem := unsorted[0] - sitem := e.Value - comp := bytes.Compare(uitem.Key, sitem.Key) - - switch comp { - case -1: - unsorted = unsorted[1:] - - store.sortedCache.InsertBefore(uitem, e) - case 1: - e = e.Next() - case 0: - unsorted = unsorted[1:] - e.Value = uitem - e = e.Next() + for _, item := range unsorted { + if item.Value == nil { + // deleted element, tracked by store.deleted + // setting arbitrary value + store.sortedCache.Set(item.Key, []byte{}) + continue + } + err := store.sortedCache.Set(item.Key, item.Value) + if err != nil { + panic(err) } - } - - for _, kvp := range unsorted { - store.sortedCache.PushBack(kvp) } } @@ -238,11 +246,20 @@ func (store *Store) dirtyItems(start, end []byte) { func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) { keyStr := conv.UnsafeBytesToStr(key) store.cache[keyStr] = &cValue{ - value: value, - deleted: deleted, - dirty: dirty, + value: value, + dirty: dirty, + } + if deleted { + store.deleted[keyStr] = struct{}{} + } else { + delete(store.deleted, keyStr) } if dirty { store.unsortedCache[keyStr] = struct{}{} } } + +func (store *Store) isDeleted(key string) bool { + _, ok := store.deleted[key] + return ok +}