Skip to content

Commit

Permalink
planner: enhance stats derive suitable for memo derive and traditiona…
Browse files Browse the repository at this point in the history
…l logical tree derive (#58252)

ref #51664
  • Loading branch information
AilinKid authored Dec 24, 2024
1 parent a21c95b commit 8ecdb54
Show file tree
Hide file tree
Showing 32 changed files with 143 additions and 127 deletions.
25 changes: 12 additions & 13 deletions pkg/planner/cascades/memo/group_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,19 +167,18 @@ func (e *GroupExpression) DeriveLogicalProp() (err error) {
// todo: functional dependency
tmpSchema := e.LogicalPlan.Schema()
tmpStats := e.LogicalPlan.StatsInfo()
// only for those new created logical op from XForm, we should rebuild their stats;
// in memo init phase, all logical ops has maintained their stats already, just use them.
if tmpStats == nil {
skipDeriveStats := false
failpoint.Inject("MockPlanSkipMemoDeriveStats", func(val failpoint.Value) {
skipDeriveStats = val.(bool)
})
if !skipDeriveStats {
// here can only derive the basic stats from bottom up, we can't pass any colGroups required by parents.
tmpStats, err = e.LogicalPlan.DeriveStats(childStats, tmpSchema, childSchema, nil)
if err != nil {
return err
}
// the leaves node may have already had their stats in join reorder est phase, while
// their group ndv signal is passed in CollectPredicateColumnsPoint which is applied
// behind join reorder rule, we should build their group ndv again (implied in DeriveStats).
skipDeriveStats := false
failpoint.Inject("MockPlanSkipMemoDeriveStats", func(val failpoint.Value) {
skipDeriveStats = val.(bool)
})
if !skipDeriveStats {
// here can only derive the basic stats from bottom up, we can't pass any colGroups required by parents.
tmpStats, err = e.LogicalPlan.DeriveStats(childStats, tmpSchema, childSchema)
if err != nil {
return err
}
}
e.GetGroup().GetLogicalProperty().Schema = tmpSchema
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/cascades/old/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ func (opt *Optimizer) fillGroupStats(g *memo.Group) (err error) {
childSchema[i] = childGroup.Prop.Schema
}
planNode := expr.ExprNode
g.Prop.Stats, err = planNode.DeriveStats(childStats, g.Prop.Schema, childSchema, nil)
g.Prop.Stats, err = planNode.DeriveStats(childStats, g.Prop.Schema, childSchema)
return err
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/base/plan_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ type LogicalPlan interface {
// DeriveStats derives statistic info for current plan node given child stats.
// We need selfSchema, childSchema here because it makes this method can be used in
// cascades planner, where LogicalPlan might not record its children or schema.
DeriveStats(childStats []*property.StatsInfo, selfSchema *expression.Schema, childSchema []*expression.Schema, colGroups [][]*expression.Column) (*property.StatsInfo, error)
DeriveStats(childStats []*property.StatsInfo, selfSchema *expression.Schema, childSchema []*expression.Schema) (*property.StatsInfo, error)

// ExtractColGroups extracts column groups from child operator whose DNVs are required by the current operator.
// For example, if current operator is LogicalAggregation of `Group By a, b`, we indicate the child operators to maintain
Expand Down
1 change: 1 addition & 0 deletions pkg/planner/core/casetest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ go_test(
"//pkg/planner/core/base",
"//pkg/planner/core/operator/logicalop",
"//pkg/planner/core/resolve",
"//pkg/planner/core/rule",
"//pkg/planner/property",
"//pkg/statistics/handle/ddl/testutil",
"//pkg/testkit",
Expand Down
1 change: 1 addition & 0 deletions pkg/planner/core/casetest/cascades/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ go_test(
"//pkg/planner/core",
"//pkg/planner/core/base",
"//pkg/planner/core/resolve",
"//pkg/planner/core/rule",
"//pkg/testkit",
"//pkg/testkit/testdata",
"//pkg/testkit/testmain",
Expand Down
15 changes: 7 additions & 8 deletions pkg/planner/core/casetest/cascades/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"bytes"
"context"
"fmt"
"strconv"
"testing"

"github.com/pingcap/tidb/pkg/parser"
Expand All @@ -26,6 +27,7 @@ import (
plannercore "github.com/pingcap/tidb/pkg/planner/core"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/core/resolve"
"github.com/pingcap/tidb/pkg/planner/core/rule"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/testkit/testdata"
"github.com/pingcap/tidb/pkg/util/hint"
Expand Down Expand Up @@ -63,12 +65,11 @@ func TestDeriveStats(t *testing.T) {
tk.Session().GetSessionVars().PlanColumnID.Store(0)
builder, _ := plannercore.NewPlanBuilder().Init(tk.Session().GetPlanCtx(), ret.InfoSchema, hint.NewQBHintHandler(nil))
p, err := builder.Build(ctx, nodeW)
p.SCtx().GetSessionVars().StmtCtx.OriginalSQL = tt
require.NoError(t, err, tt)
p, err = plannercore.LogicalOptimizeTest(ctx, builder.GetOptFlag(), p.(base.LogicalPlan))
p, err = plannercore.LogicalOptimizeTest(ctx, builder.GetOptFlag()|rule.FlagCollectPredicateColumnsPoint, p.(base.LogicalPlan))
require.NoError(t, err, tt)
lp := p.(base.LogicalPlan)
_, err = plannercore.RecursiveDeriveStats4Test(lp)
require.NoError(t, err, tt)
// after stats derive is done, which means the up-down propagation of group ndv is done, in bottom-up building phase
// of memo, we don't have to expect the upper operator's group cols passing down anymore.
mm := memo.NewMemo()
Expand Down Expand Up @@ -117,7 +118,7 @@ func TestDeriveStats(t *testing.T) {
output[i].SQL = tt
output[i].Str = strs
})
require.Equal(t, output[i].Str, strs, "case i "+tt)
require.Equal(t, output[i].Str, strs, "case i:"+strconv.Itoa(i)+" "+tt)
}
}

Expand Down Expand Up @@ -157,11 +158,9 @@ func TestGroupNDVCols(t *testing.T) {
builder, _ := plannercore.NewPlanBuilder().Init(tk.Session().GetPlanCtx(), ret.InfoSchema, hint.NewQBHintHandler(nil))
p, err := builder.Build(ctx, nodeW)
require.NoError(t, err, tt)
p, err = plannercore.LogicalOptimizeTest(ctx, builder.GetOptFlag(), p.(base.LogicalPlan))
p, err = plannercore.LogicalOptimizeTest(ctx, builder.GetOptFlag()|rule.FlagCollectPredicateColumnsPoint, p.(base.LogicalPlan))
require.NoError(t, err, tt)
lp := p.(base.LogicalPlan)
_, err = plannercore.RecursiveDeriveStats4Test(lp)
require.NoError(t, err, tt)
// after stats derive is done, which means the up-down propagation of group ndv is done, in bottom-up building phase
// of memo, we don't have to expect the upper operator's group cols passing down anymore.
mm := memo.NewMemo()
Expand Down Expand Up @@ -209,6 +208,6 @@ func TestGroupNDVCols(t *testing.T) {
output[i].SQL = tt
output[i].Str = strs
})
require.Equal(t, output[i].Str, strs, "case i "+tt)
require.Equal(t, output[i].Str, strs, "case i:"+strconv.Itoa(i)+" "+tt)
}
}
Loading

0 comments on commit 8ecdb54

Please sign in to comment.