Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches #10024

Merged
merged 1 commit into from
Oct 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
* [\#9837](https://github.com/cosmos/cosmos-sdk/issues/9837) `--generate-only` flag will accept the keyname now.
* [\#10045](https://github.com/cosmos/cosmos-sdk/pull/10045) Revert [#8549](https://github.com/cosmos/cosmos-sdk/pull/8549). Do not route grpc queries through Tendermint.
* [\#10326](https://github.com/cosmos/cosmos-sdk/pull/10326) `x/authz` add query all grants by granter query.
* [\#10024](https://github.com/cosmos/cosmos-sdk/pull/10024) `store/cachekv` performance improvement by reduced growth factor for iterator ranging by using binary searches to find dirty items when unsorted key count >= 1024

### API Breaking Changes

Expand Down
141 changes: 141 additions & 0 deletions store/cachekv/search_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package cachekv

import "testing"

func TestFindStartIndex(t *testing.T) {
tests := []struct {
name string
sortedL []string
query string
want int
}{
{
name: "non-existent value",
sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
query: "o",
want: 8,
},
{
name: "dupes start at index 0",
sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
query: "a",
want: 0,
},
{
name: "dupes start at non-index 0",
sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
query: "c",
want: 1,
},
{
name: "at end",
sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},
query: "z",
want: 7,
},
{
name: "dupes at end",
sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},
query: "z",
want: 7,
},
{
name: "entirely dupes",
sortedL: []string{"z", "z", "z", "z", "z"},
query: "z",
want: 0,
},
{
name: "non-existent but within >=start",
sortedL: []string{"z", "z", "z", "z", "z"},
query: "p",
want: 0,
},
{
name: "non-existent and out of range",
sortedL: []string{"d", "e", "f", "g", "h"},
query: "z",
want: -1,
},
}

for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
body := tt.sortedL
got := findStartIndex(body, tt.query)
if got != tt.want {
t.Fatalf("Got: %d, want: %d", got, tt.want)
}
})
}
}

func TestFindEndIndex(t *testing.T) {
tests := []struct {
name string
sortedL []string
query string
want int
}{
{
name: "non-existent value",
sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
query: "o",
want: 7,
},
{
name: "dupes start at index 0",
sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
query: "a",
want: 0,
},
{
name: "dupes start at non-index 0",
sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
query: "c",
want: 1,
},
{
name: "at end",
sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},
query: "z",
want: 7,
},
{
name: "dupes at end",
sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},
query: "z",
want: 7,
},
{
name: "entirely dupes",
sortedL: []string{"z", "z", "z", "z", "z"},
query: "z",
want: 0,
},
{
name: "non-existent and out of range",
sortedL: []string{"z", "z", "z", "z", "z"},
query: "p",
want: -1,
},
{
name: "non-existent and out of range",
sortedL: []string{"d", "e", "f", "g", "h"},
query: "z",
want: 4,
},
}

for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
body := tt.sortedL
got := findEndIndex(body, tt.query)
if got != tt.want {
t.Fatalf("Got: %d, want: %d", got, tt.want)
}
})
}
}
138 changes: 126 additions & 12 deletions store/cachekv/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,94 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
return newCacheMergeIterator(parent, cache, ascending)
}

func findStartIndex(strL []string, startQ string) int {
// Modified binary search to find the very first element in >=startQ.
if len(strL) == 0 {
return -1
}

var left, right, mid int
right = len(strL) - 1
for left <= right {
mid = (left + right) >> 1
midStr := strL[mid]
if midStr == startQ {
// Handle condition where there might be multiple values equal to startQ.
// We are looking for the very first value < midStL, that i+1 will be the first
// element >= midStr.
for i := mid - 1; i >= 0; i-- {
if strL[i] != midStr {
return i + 1
}
}
return 0
}
if midStr < startQ {
left = mid + 1
} else { // midStrL > startQ
right = mid - 1
}
}
if left >= 0 && left < len(strL) && strL[left] >= startQ {
return left
}
return -1
}

func findEndIndex(strL []string, endQ string) int {
if len(strL) == 0 {
return -1
}

// Modified binary search to find the very first element <endQ.
var left, right, mid int
right = len(strL) - 1
for left <= right {
mid = (left + right) >> 1
midStr := strL[mid]
if midStr == endQ {
// Handle condition where there might be multiple values equal to startQ.
// We are looking for the very first value < midStL, that i+1 will be the first
// element >= midStr.
for i := mid - 1; i >= 0; i-- {
if strL[i] < midStr {
return i + 1
}
}
return 0
}
if midStr < endQ {
left = mid + 1
} else { // midStrL > startQ
right = mid - 1
}
}

// Binary search failed, now let's find a value less than endQ.
for i := right; i >= 0; i-- {
if strL[i] < endQ {
return i
}
}

return -1
}

type sortState int

const (
stateUnsorted sortState = iota
stateAlreadySorted
)

// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end)
if startStr > endStr {
// Nothing to do here.
return
}

n := len(store.unsortedCache)
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
Expand All @@ -189,24 +275,49 @@ func (store *Store) dirtyItems(start, end []byte) {
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n >= 1024 {
for key := range store.unsortedCache {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
} else {
// else do a linear scan to determine if the unsorted pairs are in the pool.
if n < 1024 {
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
store.clearUnsortedCacheSubset(unsorted, stateUnsorted)
return
}

// Otherwise it is large so perform a modified binary search to find
// the target ranges for the keys that we should be looking for.
strL := make([]string, 0, n)
for key := range store.unsortedCache {
strL = append(strL, key)
}
store.clearUnsortedCacheSubset(unsorted)
sort.Strings(strL)

// Now find the values within the domain
// [start, end)
startIndex := findStartIndex(strL, startStr)
endIndex := findEndIndex(strL, endStr)

if endIndex < 0 {
endIndex = len(strL) - 1
}
if startIndex < 0 {
startIndex = 0
}

kvL := make([]*kv.Pair, 0)
for i := startIndex; i <= endIndex; i++ {
key := strL[i]
cacheValue := store.cache[key]
kvL = append(kvL, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}

// kvL was already sorted so pass it in as is.
store.clearUnsortedCacheSubset(kvL, stateAlreadySorted)
}

func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair, sortState sortState) {
n := len(store.unsortedCache)
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
Expand All @@ -217,9 +328,12 @@ func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
}
}
sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})

if sortState == stateUnsorted {
sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})
}

for _, item := range unsorted {
if item.Value == nil {
Expand Down