diff --git a/CHANGELOG.md b/CHANGELOG.md index a24cc57f3035..72c497c989b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ * [\#9837](https://github.com/cosmos/cosmos-sdk/issues/9837) `--generate-only` flag will accept the keyname now. * [\#10045](https://github.com/cosmos/cosmos-sdk/pull/10045) Revert [#8549](https://github.com/cosmos/cosmos-sdk/pull/8549). Do not route grpc queries through Tendermint. * [\#10326](https://github.com/cosmos/cosmos-sdk/pull/10326) `x/authz` add query all grants by granter query. +* [\#10024](https://github.com/cosmos/cosmos-sdk/pull/10024) `store/cachekv` performance improvement by reduced growth factor for iterator ranging by using binary searches to find dirty items when unsorted key count >= 1024 ### API Breaking Changes diff --git a/store/cachekv/search_test.go b/store/cachekv/search_test.go new file mode 100644 index 000000000000..41321c076eae --- /dev/null +++ b/store/cachekv/search_test.go @@ -0,0 +1,141 @@ +package cachekv + +import "testing" + +func TestFindStartIndex(t *testing.T) { + tests := []struct { + name string + sortedL []string + query string + want int + }{ + { + name: "non-existent value", + sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, + query: "o", + want: 8, + }, + { + name: "dupes start at index 0", + sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, + query: "a", + want: 0, + }, + { + name: "dupes start at non-index 0", + sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, + query: "c", + want: 1, + }, + { + name: "at end", + sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"}, + query: "z", + want: 7, + }, + { + name: "dupes at end", + sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"}, + query: "z", + want: 7, + }, + { + name: "entirely dupes", + sortedL: []string{"z", "z", "z", "z", "z"}, + query: "z", + want: 0, + }, + { + name: "non-existent but within >=start", + sortedL: []string{"z", "z", "z", "z", "z"}, + query: "p", + want: 0, + }, + { + name: "non-existent and out of range", + sortedL: []string{"d", "e", "f", "g", "h"}, + query: "z", + want: -1, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + body := tt.sortedL + got := findStartIndex(body, tt.query) + if got != tt.want { + t.Fatalf("Got: %d, want: %d", got, tt.want) + } + }) + } +} + +func TestFindEndIndex(t *testing.T) { + tests := []struct { + name string + sortedL []string + query string + want int + }{ + { + name: "non-existent value", + sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, + query: "o", + want: 7, + }, + { + name: "dupes start at index 0", + sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, + query: "a", + want: 0, + }, + { + name: "dupes start at non-index 0", + sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"}, + query: "c", + want: 1, + }, + { + name: "at end", + sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"}, + query: "z", + want: 7, + }, + { + name: "dupes at end", + sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"}, + query: "z", + want: 7, + }, + { + name: "entirely dupes", + sortedL: []string{"z", "z", "z", "z", "z"}, + query: "z", + want: 0, + }, + { + name: "non-existent and out of range", + sortedL: []string{"z", "z", "z", "z", "z"}, + query: "p", + want: -1, + }, + { + name: "non-existent and out of range", + sortedL: []string{"d", "e", "f", "g", "h"}, + query: "z", + want: 4, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + body := tt.sortedL + got := findEndIndex(body, tt.query) + if got != tt.want { + t.Fatalf("Got: %d, want: %d", got, tt.want) + } + }) + } +} diff --git a/store/cachekv/store.go b/store/cachekv/store.go index 221a96e679a4..471c2387cd99 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -179,8 +179,94 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator { return newCacheMergeIterator(parent, cache, ascending) } +func findStartIndex(strL []string, startQ string) int { + // Modified binary search to find the very first element in >=startQ. + if len(strL) == 0 { + return -1 + } + + var left, right, mid int + right = len(strL) - 1 + for left <= right { + mid = (left + right) >> 1 + midStr := strL[mid] + if midStr == startQ { + // Handle condition where there might be multiple values equal to startQ. + // We are looking for the very first value < midStL, that i+1 will be the first + // element >= midStr. + for i := mid - 1; i >= 0; i-- { + if strL[i] != midStr { + return i + 1 + } + } + return 0 + } + if midStr < startQ { + left = mid + 1 + } else { // midStrL > startQ + right = mid - 1 + } + } + if left >= 0 && left < len(strL) && strL[left] >= startQ { + return left + } + return -1 +} + +func findEndIndex(strL []string, endQ string) int { + if len(strL) == 0 { + return -1 + } + + // Modified binary search to find the very first element > 1 + midStr := strL[mid] + if midStr == endQ { + // Handle condition where there might be multiple values equal to startQ. + // We are looking for the very first value < midStL, that i+1 will be the first + // element >= midStr. + for i := mid - 1; i >= 0; i-- { + if strL[i] < midStr { + return i + 1 + } + } + return 0 + } + if midStr < endQ { + left = mid + 1 + } else { // midStrL > startQ + right = mid - 1 + } + } + + // Binary search failed, now let's find a value less than endQ. + for i := right; i >= 0; i-- { + if strL[i] < endQ { + return i + } + } + + return -1 +} + +type sortState int + +const ( + stateUnsorted sortState = iota + stateAlreadySorted +) + // Constructs a slice of dirty items, to use w/ memIterator. func (store *Store) dirtyItems(start, end []byte) { + startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end) + if startStr > endStr { + // Nothing to do here. + return + } + n := len(store.unsortedCache) unsorted := make([]*kv.Pair, 0) // If the unsortedCache is too big, its costs too much to determine @@ -189,24 +275,49 @@ func (store *Store) dirtyItems(start, end []byte) { // O(N^2) overhead. // Even without that, too many range checks eventually becomes more expensive // than just not having the cache. - if n >= 1024 { - for key := range store.unsortedCache { - cacheValue := store.cache[key] - unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) - } - } else { - // else do a linear scan to determine if the unsorted pairs are in the pool. + if n < 1024 { for key := range store.unsortedCache { if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { cacheValue := store.cache[key] unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) } } + store.clearUnsortedCacheSubset(unsorted, stateUnsorted) + return + } + + // Otherwise it is large so perform a modified binary search to find + // the target ranges for the keys that we should be looking for. + strL := make([]string, 0, n) + for key := range store.unsortedCache { + strL = append(strL, key) } - store.clearUnsortedCacheSubset(unsorted) + sort.Strings(strL) + + // Now find the values within the domain + // [start, end) + startIndex := findStartIndex(strL, startStr) + endIndex := findEndIndex(strL, endStr) + + if endIndex < 0 { + endIndex = len(strL) - 1 + } + if startIndex < 0 { + startIndex = 0 + } + + kvL := make([]*kv.Pair, 0) + for i := startIndex; i <= endIndex; i++ { + key := strL[i] + cacheValue := store.cache[key] + kvL = append(kvL, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) + } + + // kvL was already sorted so pass it in as is. + store.clearUnsortedCacheSubset(kvL, stateAlreadySorted) } -func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) { +func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair, sortState sortState) { n := len(store.unsortedCache) if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map. for key := range store.unsortedCache { @@ -217,9 +328,12 @@ func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) { delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key)) } } - sort.Slice(unsorted, func(i, j int) bool { - return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0 - }) + + if sortState == stateUnsorted { + sort.Slice(unsorted, func(i, j int) bool { + return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0 + }) + } for _, item := range unsorted { if item.Value == nil {