planner: prepare AccessPath.PartialAlternativeIndexPaths to handle …

…MV indexes (#58397) ref #58361
pingcap · Dec 21, 2024 · d0ea9e5 · d0ea9e5
1 parent e53ec59
commit d0ea9e5
Show file tree

Hide file tree

Showing 8 changed files with 340 additions and 102 deletions.
diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
@@ -918,11 +918,17 @@ func matchPropForIndexMergeAlternatives(ds *logicalop.DataSource, path *util.Acc
 	//  path2: {pk}
 	// if we choose pk in the first path, then path2 has no choice but pk, this will result in all single index failure.
 	// so we should collect all match prop paths down, stored as matchIdxes here.
-	for pathIdx, oneItemAlternatives := range path.PartialAlternativeIndexPaths {
+	for pathIdx, oneORBranch := range path.PartialAlternativeIndexPaths {
 		matchIdxes := make([]int, 0, 1)
-		for i, oneIndexAlternativePath := range oneItemAlternatives {
+		for i, oneAlternative := range oneORBranch {
 			// if there is some sort items and this path doesn't match this prop, continue.
-			if !noSortItem && !isMatchProp(ds, oneIndexAlternativePath, prop) {
+			match := true
+			for _, oneAccessPath := range oneAlternative {
+				if !noSortItem && !isMatchProp(ds, oneAccessPath, prop) {
+					match = false
+				}
+			}
+			if !match {
 				continue
 			}
 			// two possibility here:
@@ -937,26 +943,18 @@ func matchPropForIndexMergeAlternatives(ds *logicalop.DataSource, path *util.Acc
 		}
 		if len(matchIdxes) > 1 {
 			// if matchIdxes greater than 1, we should sort this match alternative path by its CountAfterAccess.
-			tmpOneItemAlternatives := oneItemAlternatives
+			alternatives := oneORBranch
 			slices.SortStableFunc(matchIdxes, func(a, b int) int {
-				lhsCountAfter := tmpOneItemAlternatives[a].CountAfterAccess
-				if len(tmpOneItemAlternatives[a].IndexFilters) > 0 {
-					lhsCountAfter = tmpOneItemAlternatives[a].CountAfterIndex
-				}
-				rhsCountAfter := tmpOneItemAlternatives[b].CountAfterAccess
-				if len(tmpOneItemAlternatives[b].IndexFilters) > 0 {
-					rhsCountAfter = tmpOneItemAlternatives[b].CountAfterIndex
-				}
-				res := cmp.Compare(lhsCountAfter, rhsCountAfter)
+				res := cmpAlternativesByRowCount(alternatives[a], alternatives[b])
 				if res != 0 {
 					return res
 				}
 				// If CountAfterAccess is same, any path is global index should be the first one.
 				var lIsGlobalIndex, rIsGlobalIndex int
-				if !tmpOneItemAlternatives[a].IsTablePath() && tmpOneItemAlternatives[a].Index.Global {
+				if !alternatives[a][0].IsTablePath() && alternatives[a][0].Index.Global {
 					lIsGlobalIndex = 1
 				}
-				if !tmpOneItemAlternatives[b].IsTablePath() && tmpOneItemAlternatives[b].Index.Global {
+				if !alternatives[b][0].IsTablePath() && alternatives[b][0].Index.Global {
 					rIsGlobalIndex = 1
 				}
 				return -cmp.Compare(lIsGlobalIndex, rIsGlobalIndex)
@@ -983,14 +981,14 @@ func matchPropForIndexMergeAlternatives(ds *logicalop.DataSource, path *util.Acc
 		// By this way, a distinguished one is better.
 		for _, oneIdx := range matchIdxes.matchIdx {
 			var indexID int64
-			if alternatives[oneIdx].IsTablePath() {
+			if alternatives[oneIdx][0].IsTablePath() {
 				indexID = -1
 			} else {
-				indexID = alternatives[oneIdx].Index.ID
+				indexID = alternatives[oneIdx][0].Index.ID
 			}
 			if _, ok := usedIndexMap[indexID]; !ok {
 				// try to avoid all index partial paths are all about a single index.
-				determinedIndexPartialPaths = append(determinedIndexPartialPaths, alternatives[oneIdx].Clone())
+				determinedIndexPartialPaths = append(determinedIndexPartialPaths, alternatives[oneIdx][0].Clone())
 				usedIndexMap[indexID] = struct{}{}
 				found = true
 				break
@@ -999,7 +997,8 @@ func matchPropForIndexMergeAlternatives(ds *logicalop.DataSource, path *util.Acc
 		if !found {
 			// just pick the same name index (just using the first one is ok), in case that there may be some other
 			// picked distinctive index path for other partial paths latter.
-			determinedIndexPartialPaths = append(determinedIndexPartialPaths, alternatives[matchIdxes.matchIdx[0]].Clone())
+			determinedIndexPartialPaths = append(determinedIndexPartialPaths,
+				alternatives[matchIdxes.matchIdx[0]][0].Clone())
 			// uedIndexMap[oneItemAlternatives[oneIdx].Index.ID] = struct{}{} must already be colored.
 		}
 	}

diff --git a/pkg/planner/core/indexmerge_path.go b/pkg/planner/core/indexmerge_path.go
@@ -438,7 +438,14 @@ func buildIndexMergeOrPath(
 	current int,
 	shouldKeepCurrentFilter bool,
 ) *util.AccessPath {
-	indexMergePath := &util.AccessPath{PartialAlternativeIndexPaths: partialAlternativePaths}
+	tmp := make([][][]*util.AccessPath, len(partialAlternativePaths))
+	for i, orBranch := range partialAlternativePaths {
+		tmp[i] = make([][]*util.AccessPath, len(orBranch))
+		for j, alternative := range orBranch {
+			tmp[i][j] = []*util.AccessPath{alternative}
+		}
+	}
+	indexMergePath := &util.AccessPath{PartialAlternativeIndexPaths: tmp}
 	indexMergePath.TableFilters = append(indexMergePath.TableFilters, filters[:current]...)
 	indexMergePath.TableFilters = append(indexMergePath.TableFilters, filters[current+1:]...)
 	// since shouldKeepCurrentFilter may be changed in alternative paths converging, kept the filer expression anyway here.

diff --git a/pkg/planner/core/indexmerge_test.go b/pkg/planner/core/indexmerge_test.go
@@ -16,6 +16,7 @@ package core
 
 import (
 	"context"
+	"strings"
 	"testing"
 
 	"github.com/pingcap/errors"
@@ -37,38 +38,37 @@ func getIndexMergePathDigest(ctx expression.EvalContext, paths []*util.AccessPat
 	if len(paths) == startIndex {
 		return "[]"
 	}
-	idxMergeDigest := "["
-	for i := startIndex; i < len(paths); i++ {
-		if i != startIndex {
-			idxMergeDigest += ","
-		}
-		path := paths[i]
-		idxMergeDigest += "{Idxs:["
-		for j := 0; j < len(path.PartialAlternativeIndexPaths); j++ {
-			if j > 0 {
-				idxMergeDigest += ","
-			}
-			idxMergeDigest += "{"
+	resultStrs := make([]string, 0, len(paths)-startIndex)
+	for _, path := range paths[startIndex:] {
+		partialPathsStrs := make([]string, 0, len(path.PartialIndexPaths))
+		for _, partial := range path.PartialAlternativeIndexPaths {
 			// for every ONE index partial alternatives, output a set.
-			for k, one := range path.PartialAlternativeIndexPaths[j] {
-				if k != 0 {
-					idxMergeDigest += ","
+			oneAlternativeStrs := make([]string, 0, len(partial))
+			for _, oneAlternative := range partial {
+				if len(oneAlternative) == 1 {
+					oneAlternativeStrs = append(oneAlternativeStrs, oneAlternative[0].Index.Name.L)
+					continue
+				}
+				pathStrs := make([]string, 0, len(oneAlternative))
+				for _, singlePath := range oneAlternative {
+					pathStrs = append(pathStrs, singlePath.Index.Name.L)
 				}
-				idxMergeDigest += one.Index.Name.L
+				oneAlternativeStrs = append(oneAlternativeStrs, "{"+strings.Join(pathStrs, ",")+"}")
 			}
-			idxMergeDigest += "}"
+			partialPathsStrs = append(partialPathsStrs, "{"+strings.Join(oneAlternativeStrs, ",")+"}")
 		}
-		idxMergeDigest += "],TbFilters:["
-		for j := 0; j < len(path.TableFilters); j++ {
-			if j > 0 {
-				idxMergeDigest += ","
-			}
-			idxMergeDigest += path.TableFilters[j].StringWithCtx(ctx, errors.RedactLogDisable)
+
+		filterStrs := make([]string, 0, len(path.TableFilters))
+		for _, filter := range path.TableFilters {
+			filterStrs = append(filterStrs, filter.StringWithCtx(ctx, errors.RedactLogDisable))
 		}
-		idxMergeDigest += "]}"
+		resultStrs = append(resultStrs, "{Idxs:["+
+			strings.Join(partialPathsStrs, ",")+
+			"],TbFilters:["+
+			strings.Join(filterStrs, ",")+
+			"]}")
 	}
-	idxMergeDigest += "]"
-	return idxMergeDigest
+	return "[" + strings.Join(resultStrs, ",") + "]"
 }
 
 func TestIndexMergePathGeneration(t *testing.T) {

diff --git a/pkg/planner/core/indexmerge_unfinished_path.go b/pkg/planner/core/indexmerge_unfinished_path.go
@@ -15,7 +15,7 @@
 package core
 
 import (
-	"math"
+	"cmp"
 	"slices"
 
 	"github.com/pingcap/tidb/pkg/expression"
@@ -135,24 +135,15 @@ func initUnfinishedPathsFromExpr(
 		ret[i].index = path.Index
 		// case 1: try to use the previous logic to handle non-mv index
 		if !isMVIndexPath(path) {
-			// generateNormalIndexPartialPaths4DNF is introduced for handle a slice of DNF items and a slice of
-			// candidate AccessPaths before, now we reuse it to handle single filter and single candidate AccessPath,
-			// so we need to wrap them in a slice here.
 			partialPath, needSelection := generateNormalIndexPartialPath(
 				ds,
 				expr,
 				path,
 			)
 			if partialPath != nil {
 				ret[i].initedWithValidRange = true
-				ret[i].usableFilters = partialPath.AccessConds
 				ret[i].needKeepFilter = needSelection
-				// Here is a special case, if this expr is always false and this path is a dual path, it will run to
-				// this point, and paths[0].AccessConds and paths[0].Ranges will be nil.
-				// In this case, we set the accessFilters to the original expr.
-				if len(ret[i].usableFilters) <= 0 {
-					ret[i].usableFilters = []expression.Expression{expr}
-				}
+				ret[i].usableFilters = []expression.Expression{expr}
 				continue
 			}
 		}
@@ -249,7 +240,6 @@ func handleTopLevelANDList(
 /*
 Example (consistent with the one in genUnfinishedPathFromORList()):
 
-	idx1: (a, j->'$.a' unsigned array)  idx2: (j->'$.b' unsigned array, a
 	idx1: (a, j->'$.a' unsigned array)  idx2: (j->'$.b' unsigned array, a)
 	Input:
 	  indexMergePath:
@@ -316,16 +306,16 @@ func buildIntoAccessPath(
 	// produce several partial paths).
 	partialPaths := make([]*util.AccessPath, 0, len(indexMergePath.orBranches))
 
-	// for each partial path
-	for _, unfinishedPathList := range indexMergePath.orBranches {
-		var (
-			bestPaths            []*util.AccessPath
-			bestCountAfterAccess float64
-			bestNeedSelection    bool
-		)
+	// for each OR branch
+	for _, orBranch := range indexMergePath.orBranches {
+		type alternative struct {
+			paths         []*util.AccessPath
+			needSelection bool
+		}
+		var alternativesForORBranch []alternative
 
-		// for each possible access path of this partial path
-		for i, unfinishedPath := range unfinishedPathList {
+		// for each alternative of this OR branch
+		for i, unfinishedPath := range orBranch {
 			if unfinishedPath == nil {
 				continue
 			}
@@ -381,28 +371,23 @@ func buildIntoAccessPath(
 				paths = []*util.AccessPath{path}
 			}
 			needSelection = needSelection || unfinishedPath.needKeepFilter
-			// If there are several partial paths, we use the max CountAfterAccess for comparison.
-			maxCountAfterAccess := -1.0
-			for _, p := range paths {
-				maxCountAfterAccess = math.Max(maxCountAfterAccess, p.CountAfterAccess)
-			}
-			// Choose the best partial path for this partial path.
-			if len(bestPaths) == 0 {
-				bestPaths = paths
-				bestCountAfterAccess = maxCountAfterAccess
-				bestNeedSelection = needSelection
-			} else if bestCountAfterAccess > maxCountAfterAccess {
-				bestPaths = paths
-				bestCountAfterAccess = maxCountAfterAccess
-				bestNeedSelection = needSelection
-			}
+
+			alternativesForORBranch = append(alternativesForORBranch, alternative{paths, needSelection})
 		}
-		if len(bestPaths) == 0 {
+		if len(alternativesForORBranch) == 0 {
+			return nil
+		}
+
+		bestAlternative := slices.MinFunc(
+			alternativesForORBranch, func(a, b alternative) int {
+				return cmpAlternativesByRowCount(a.paths, b.paths)
+			})
+		if len(bestAlternative.paths) == 0 {
 			return nil
 		}
 		// Succeeded to get valid path(s) for this partial path.
-		partialPaths = append(partialPaths, bestPaths...)
-		needSelectionGlobal = needSelectionGlobal || bestNeedSelection
+		partialPaths = append(partialPaths, bestAlternative.paths...)
+		needSelectionGlobal = needSelectionGlobal || bestAlternative.needSelection
 	}
 
 	// 2. Collect the final table filter
@@ -417,3 +402,21 @@ func buildIntoAccessPath(
 	ret := buildPartialPathUp4MVIndex(partialPaths, false, tableFilter, ds.TableStats.HistColl)
 	return ret
 }
+
+func cmpAlternativesByRowCount(a, b []*util.AccessPath) int {
+	// If one alternative consists of multiple AccessPath, we use the maximum row count of them to compare.
+	getMaxRowCountFromPaths := func(paths []*util.AccessPath) float64 {
+		maxRowCount := 0.0
+		for _, path := range paths {
+			rowCount := path.CountAfterAccess
+			if len(path.IndexFilters) > 0 {
+				rowCount = path.CountAfterIndex
+			}
+			maxRowCount = max(maxRowCount, rowCount)
+		}
+		return maxRowCount
+	}
+	lhsRowCount := getMaxRowCountFromPaths(a)
+	rhsRowCount := getMaxRowCountFromPaths(b)
+	return cmp.Compare(lhsRowCount, rhsRowCount)
+}
diff --git a/pkg/planner/util/misc.go b/pkg/planner/util/misc.go
@@ -34,6 +34,19 @@ import (
 	"github.com/pingcap/tidb/pkg/util/ranger"
 )
 
+// SliceDeepClone uses Clone() to clone a slice.
+// The elements in the slice must implement func (T) Clone() T.
+func SliceDeepClone[T interface{ Clone() T }](s []T) []T {
+	if s == nil {
+		return nil
+	}
+	cloned := make([]T, 0, len(s))
+	for _, item := range s {
+		cloned = append(cloned, item.Clone())
+	}
+	return cloned
+}
+
 // CloneFieldNames uses types.FieldName.Clone to clone a slice of types.FieldName.
 func CloneFieldNames(names []*types.FieldName) []*types.FieldName {
 	if names == nil {

diff --git a/pkg/planner/util/path.go b/pkg/planner/util/path.go
@@ -52,17 +52,41 @@ type AccessPath struct {
 	// If there are extra filters, store them in TableFilters.
 	PartialIndexPaths []*AccessPath
 
-	// ************************************************** special field below *********************************************************
-	// For every dnf/cnf item, there maybe several matched partial index paths to be determined later in property detecting and cost model.
-	// when PartialAlternativeIndexPaths is not empty, it means a special state for index merge path, and it can't have PartialIndexPaths
-	// at same time. Normal single index or table path also doesn't use this field.
-	PartialAlternativeIndexPaths [][]*AccessPath
-	// KeepIndexMergeORSourceFilter and IndexMergeORSourceFilter are only used with PartialAlternativeIndexPaths, which means for
-	// the new state/type of access path. (undetermined index merge path)
+	// The 3 fields below are for another case for building IndexMerge path besides AccessPath.PartialIndexPaths.
+	// Currently, it only applies to OR type IndexMerge.
+	// For every item in the OR list, there might be multiple candidate paths that satisfy the filters.
+	// The AccessPath.PartialIndexPaths case decides on one of them when building AccessPath. But here, we keep all the
+	// alternatives and make the decision later in findBestTask (see matchPropForIndexMergeAlternatives()).
+	// It's because we only know the required Sort property at that time. Delaying the decision to findBestTask can make
+	// us able to consider and try to satisfy the required Sort property.
+	/* For example:
+		create table t (a int, b int, c int, key a(a), key b(b), key ac(a, c), key bc(b, c));
+		explain format='verbose' select * from t where a=1 or b=1 order by c;
+	For a=1, it has two partial alternative paths: [a, ac]
+	For b=1, it has two partial alternative paths: [b, bc]
+	Then we build such a AccessPath:
+		AccessPath {
+			PartialAlternativeIndexPaths: [[[a], [ac]], [[b], [bc]]]
+			IndexMergeORSourceFilter: a = 1 or b = 1
+		}
+	*/
+
+	// PartialAlternativeIndexPaths stores all the alternative paths for each OR branch.
+	// meaning of the 3 dimensions:
+	// each OR branch -> each alternative for this OR branch -> each access path of this alternative (One JSON filter on
+	// MV index may build into multiple partial paths. For example, json_overlap(a, '[1, 2, 3]') builds into 3 partial
+	// paths in the final plan. For non-MV index, each alternative only has one AccessPath.)
+	PartialAlternativeIndexPaths [][][]*AccessPath
+	// KeepIndexMergeORSourceFilter indicates if we need to keep IndexMergeORSourceFilter in the final Selection of the
+	// IndexMerge plan.
+	// It has 2 cases:
+	// 1. The AccessPath.PartialAlternativeIndexPaths is set.
+	// If this field is true, the final plan should keep the filter.
+	// 2. It's a children of AccessPath.PartialAlternativeIndexPaths.
+	// If the final plan contains this alternative, it should keep the filter.
 	KeepIndexMergeORSourceFilter bool
-	// IndexMergeORSourceFilter indicates that there are some expression inside this dnf that couldn't be pushed down, and we should keep the entire dnf above.
+	// IndexMergeORSourceFilter is the original OR list for building the IndexMerge path.
 	IndexMergeORSourceFilter expression.Expression
-	// ********************************************************************************************************************************
 
 	// IndexMergeIsIntersection means whether it's intersection type or union type IndexMerge path.
 	// It's only valid for a IndexMerge path.
@@ -131,15 +155,15 @@ func (path *AccessPath) Clone() *AccessPath {
 	if path.IndexMergeORSourceFilter != nil {
 		ret.IndexMergeORSourceFilter = path.IndexMergeORSourceFilter.Clone()
 	}
-	for _, partialPath := range path.PartialIndexPaths {
-		ret.PartialIndexPaths = append(ret.PartialIndexPaths, partialPath.Clone())
-	}
-	for _, onePartialAlternative := range path.PartialAlternativeIndexPaths {
-		tmp := make([]*AccessPath, 0, len(onePartialAlternative))
-		for _, oneAlternative := range onePartialAlternative {
-			tmp = append(tmp, oneAlternative.Clone())
+	ret.PartialIndexPaths = SliceDeepClone(path.PartialIndexPaths)
+	ret.PartialAlternativeIndexPaths = make([][][]*AccessPath, 0, len(path.PartialAlternativeIndexPaths))
+	for _, oneORBranch := range path.PartialAlternativeIndexPaths {
+		clonedORBranch := make([][]*AccessPath, 0, len(oneORBranch))
+		for _, oneAlternative := range oneORBranch {
+			clonedOneAlternative := SliceDeepClone(oneAlternative)
+			clonedORBranch = append(clonedORBranch, clonedOneAlternative)
 		}
-		ret.PartialAlternativeIndexPaths = append(ret.PartialAlternativeIndexPaths, tmp)
+		ret.PartialAlternativeIndexPaths = append(ret.PartialAlternativeIndexPaths, clonedORBranch)
 	}
 	return ret
 }