From c607437270c88a842e1f238cca6ec9acb461bce2 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 12 Jul 2024 12:53:05 +0200 Subject: [PATCH] Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats (#11357) * Improve `CommonSubexprEliminate` rule with surely and conditionally evaluated stats * remove expression tree hashing as no longer needed * address review comments * add negative tests --- datafusion/expr/src/expr.rs | 39 ++- .../optimizer/src/common_subexpr_eliminate.rs | 256 +++++++++++------- .../optimizer/src/optimize_projections/mod.rs | 10 +- datafusion/sqllogictest/test_files/cse.slt | 88 +++++- datafusion/sqllogictest/test_files/select.slt | 20 +- .../sqllogictest/test_files/tpch/q14.slt.part | 33 +-- 6 files changed, 298 insertions(+), 148 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ecece6dbfce7f..a344e621ddb12 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -17,7 +17,7 @@ //! Logical Expressions: [`Expr`] -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::fmt::{self, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::mem; @@ -1380,7 +1380,7 @@ impl Expr { /// // refs contains "a" and "b" /// assert_eq!(refs.len(), 2); /// assert!(refs.contains(&Column::new_unqualified("a"))); - /// assert!(refs.contains(&Column::new_unqualified("b"))); + /// assert!(refs.contains(&Column::new_unqualified("b"))); /// ``` pub fn column_refs(&self) -> HashSet<&Column> { let mut using_columns = HashSet::new(); @@ -1401,6 +1401,41 @@ impl Expr { .expect("traversal is infallable"); } + /// Return all references to columns and their occurrence counts in the expression. + /// + /// # Example + /// ``` + /// # use std::collections::HashMap; + /// # use datafusion_common::Column; + /// # use datafusion_expr::col; + /// // For an expression `a + (b * a)` + /// let expr = col("a") + (col("b") * col("a")); + /// let mut refs = expr.column_refs_counts(); + /// // refs contains "a" and "b" + /// assert_eq!(refs.len(), 2); + /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2); + /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1); + /// ``` + pub fn column_refs_counts(&self) -> HashMap<&Column, usize> { + let mut map = HashMap::new(); + self.add_column_ref_counts(&mut map); + map + } + + /// Adds references to all columns and their occurrence counts in the expression to + /// the map. + /// + /// See [`Self::column_refs_counts`] for details + pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) { + self.apply(|expr| { + if let Expr::Column(col) = expr { + *map.entry(col).or_default() += 1; + } + Ok(TreeNodeRecursion::Continue) + }) + .expect("traversal is infallable"); + } + /// Returns true if there are any column references in this Expr pub fn any_column_refs(&self) -> bool { self.exists(|expr| Ok(matches!(expr, Expr::Column(_)))) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 721987b917d4c..e4b36652974d7 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -33,12 +33,12 @@ use datafusion_common::tree_node::{ use datafusion_common::{ internal_datafusion_err, qualified_name, Column, DFSchema, DFSchemaRef, Result, }; -use datafusion_expr::expr::Alias; +use datafusion_expr::expr::{Alias, ScalarFunction}; use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::logical_plan::{ Aggregate, Filter, LogicalPlan, Projection, Sort, Window, }; -use datafusion_expr::{col, Expr, ExprSchemable}; +use datafusion_expr::{col, BinaryExpr, Case, Expr, ExprSchemable, Operator}; use indexmap::IndexMap; const CSE_PREFIX: &str = "__common_expr"; @@ -56,13 +56,9 @@ struct Identifier<'n> { } impl<'n> Identifier<'n> { - fn new(expr: &'n Expr, is_tree: bool, random_state: &RandomState) -> Self { + fn new(expr: &'n Expr, random_state: &RandomState) -> Self { let mut hasher = random_state.build_hasher(); - if is_tree { - expr.hash(&mut hasher); - } else { - expr.hash_node(&mut hasher); - } + expr.hash_node(&mut hasher); let hash = hasher.finish(); Self { hash, expr } } @@ -110,8 +106,9 @@ impl Hash for Identifier<'_> { /// ``` type IdArray<'n> = Vec<(usize, Option>)>; -/// A map that contains the number of occurrences of expressions by their identifiers. -type ExprStats<'n> = HashMap, usize>; +/// A map that contains the number of normal and conditional occurrences of expressions by +/// their identifiers. +type ExprStats<'n> = HashMap, (usize, usize)>; /// A map that contains the common expressions and their alias extracted during the /// second, rewriting traversal. @@ -200,6 +197,7 @@ impl CommonSubexprEliminate { expr_mask, random_state: &self.random_state, found_common: false, + conditional: false, }; expr.visit(&mut visitor)?; @@ -901,15 +899,17 @@ struct ExprIdentifierVisitor<'a, 'n> { random_state: &'a RandomState, // a flag to indicate that common expression found found_common: bool, + // if we are in a conditional branch. A conditional branch means that the expression + // might not be executed depending on the runtime values of other expressions, and + // thus can not be extracted as a common expression. + conditional: bool, } /// Record item that used when traversing an expression tree. enum VisitRecord<'n> { /// Marks the beginning of expression. It contains: /// - The post-order index assigned during the first, visiting traversal. - /// - A boolean flag if the record marks an expression subtree (not just a single - /// node). - EnterMark(usize, bool), + EnterMark(usize), /// Marks an accumulated subexpression tree. It contains: /// - The accumulated identifier of a subexpression. @@ -924,10 +924,6 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> { /// Find the first `EnterMark` in the stack, and accumulates every `ExprItem` before /// it. Returns a tuple that contains: /// - The pre-order index of the expression we marked. - /// - A boolean flag if we marked an expression subtree (not just a single node). - /// If true we didn't recurse into the node's children, so we need to calculate the - /// hash of the marked expression tree (not just the node) and we need to validate - /// the expression tree (not just the node). /// - The accumulated identifier of the children of the marked expression. /// - An accumulated boolean flag from the children of the marked expression if all /// children are valid for subexpression elimination (i.e. it is safe to extract the @@ -937,14 +933,14 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> { /// information up from children to parents via `visit_stack` during the first, /// visiting traversal and no need to test the expression's validity beforehand with /// an extra traversal). - fn pop_enter_mark(&mut self) -> (usize, bool, Option>, bool) { + fn pop_enter_mark(&mut self) -> (usize, Option>, bool) { let mut expr_id = None; let mut is_valid = true; while let Some(item) = self.visit_stack.pop() { match item { - VisitRecord::EnterMark(down_index, is_tree) => { - return (down_index, is_tree, expr_id, is_valid); + VisitRecord::EnterMark(down_index) => { + return (down_index, expr_id, is_valid); } VisitRecord::ExprItem(sub_expr_id, sub_expr_is_valid) => { expr_id = Some(sub_expr_id.combine(expr_id)); @@ -954,53 +950,112 @@ impl<'n> ExprIdentifierVisitor<'_, 'n> { } unreachable!("Enter mark should paired with node number"); } + + /// Save the current `conditional` status and run `f` with `conditional` set to true. + fn conditionally Result<()>>( + &mut self, + mut f: F, + ) -> Result<()> { + let conditional = self.conditional; + self.conditional = true; + f(self)?; + self.conditional = conditional; + + Ok(()) + } } impl<'n> TreeNodeVisitor<'n> for ExprIdentifierVisitor<'_, 'n> { type Node = Expr; fn f_down(&mut self, expr: &'n Expr) -> Result { - // If an expression can short circuit its children then don't consider its - // children for CSE (https://github.com/apache/arrow-datafusion/issues/8814). - // This means that we don't recurse into its children, but handle the expression - // as a subtree when we calculate its identifier. - // TODO: consider surely executed children of "short circuited"s for CSE - let is_tree = expr.short_circuits(); - let tnr = if is_tree { - TreeNodeRecursion::Jump - } else { - TreeNodeRecursion::Continue - }; - self.id_array.push((0, None)); self.visit_stack - .push(VisitRecord::EnterMark(self.down_index, is_tree)); + .push(VisitRecord::EnterMark(self.down_index)); self.down_index += 1; - Ok(tnr) + // If an expression can short-circuit then some of its children might not be + // executed so count the occurrence of subexpressions as conditional in all + // children. + Ok(match expr { + // If we are already in a conditionally evaluated subtree then continue + // traversal. + _ if self.conditional => TreeNodeRecursion::Continue, + + // In case of `ScalarFunction`s we don't know which children are surely + // executed so start visiting all children conditionally and stop the + // recursion with `TreeNodeRecursion::Jump`. + Expr::ScalarFunction(ScalarFunction { func, args }) + if func.short_circuits() => + { + self.conditionally(|visitor| { + args.iter().try_for_each(|e| e.visit(visitor).map(|_| ())) + })?; + + TreeNodeRecursion::Jump + } + + // In case of `And` and `Or` the first child is surely executed, but we + // account subexpressions as conditional in the second. + Expr::BinaryExpr(BinaryExpr { + left, + op: Operator::And | Operator::Or, + right, + }) => { + left.visit(self)?; + self.conditionally(|visitor| right.visit(visitor).map(|_| ()))?; + + TreeNodeRecursion::Jump + } + + // In case of `Case` the optional base expression and the first when + // expressions are surely executed, but we account subexpressions as + // conditional in the others. + Expr::Case(Case { + expr, + when_then_expr, + else_expr, + }) => { + expr.iter().try_for_each(|e| e.visit(self).map(|_| ()))?; + when_then_expr.iter().take(1).try_for_each(|(when, then)| { + when.visit(self)?; + self.conditionally(|visitor| then.visit(visitor).map(|_| ())) + })?; + self.conditionally(|visitor| { + when_then_expr.iter().skip(1).try_for_each(|(when, then)| { + when.visit(visitor)?; + then.visit(visitor).map(|_| ()) + })?; + else_expr + .iter() + .try_for_each(|e| e.visit(visitor).map(|_| ())) + })?; + + TreeNodeRecursion::Jump + } + + // In case of non-short-circuit expressions continue the traversal. + _ => TreeNodeRecursion::Continue, + }) } fn f_up(&mut self, expr: &'n Expr) -> Result { - let (down_index, is_tree, sub_expr_id, sub_expr_is_valid) = self.pop_enter_mark(); + let (down_index, sub_expr_id, sub_expr_is_valid) = self.pop_enter_mark(); - let (expr_id, is_valid) = if is_tree { - ( - Identifier::new(expr, true, self.random_state), - !expr.is_volatile()?, - ) - } else { - ( - Identifier::new(expr, false, self.random_state).combine(sub_expr_id), - !expr.is_volatile_node() && sub_expr_is_valid, - ) - }; + let expr_id = Identifier::new(expr, self.random_state).combine(sub_expr_id); + let is_valid = !expr.is_volatile_node() && sub_expr_is_valid; self.id_array[down_index].0 = self.up_index; if is_valid && !self.expr_mask.ignores(expr) { self.id_array[down_index].1 = Some(expr_id); - let count = self.expr_stats.entry(expr_id).or_insert(0); - *count += 1; - if *count > 1 { + let (count, conditional_count) = + self.expr_stats.entry(expr_id).or_insert((0, 0)); + if self.conditional { + *conditional_count += 1; + } else { + *count += 1; + } + if *count > 1 || (*count == 1 && *conditional_count > 0) { self.found_common = true; } } @@ -1039,51 +1094,40 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_, '_> { self.alias_counter += 1; } - // The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to generate the - // `id_array`, which records the expr's identifier used to rewrite expr. So if we - // skip an expr in `ExprIdentifierVisitor`, we should skip it here, too. - let is_tree = expr.short_circuits(); - let tnr = if is_tree { - TreeNodeRecursion::Jump - } else { - TreeNodeRecursion::Continue - }; - let (up_index, expr_id) = self.id_array[self.down_index]; self.down_index += 1; - // skip `Expr`s without identifier (empty identifier). - let Some(expr_id) = expr_id else { - return Ok(Transformed::new(expr, false, tnr)); - }; - - let count = self.expr_stats.get(&expr_id).unwrap(); - if *count > 1 { - // step index to skip all sub-node (which has smaller series number). - while self.down_index < self.id_array.len() - && self.id_array[self.down_index].0 < up_index - { - self.down_index += 1; - } + // Handle `Expr`s with identifiers only + if let Some(expr_id) = expr_id { + let (count, conditional_count) = self.expr_stats.get(&expr_id).unwrap(); + if *count > 1 || *count == 1 && *conditional_count > 0 { + // step index to skip all sub-node (which has smaller series number). + while self.down_index < self.id_array.len() + && self.id_array[self.down_index].0 < up_index + { + self.down_index += 1; + } - let expr_name = expr.display_name()?; - let (_, expr_alias) = self.common_exprs.entry(expr_id).or_insert_with(|| { - let expr_alias = self.alias_generator.next(CSE_PREFIX); - (expr, expr_alias) - }); + let expr_name = expr.display_name()?; + let (_, expr_alias) = + self.common_exprs.entry(expr_id).or_insert_with(|| { + let expr_alias = self.alias_generator.next(CSE_PREFIX); + (expr, expr_alias) + }); - // alias the expressions without an `Alias` ancestor node - let rewritten = if self.alias_counter > 0 { - col(expr_alias.clone()) - } else { - self.alias_counter += 1; - col(expr_alias.clone()).alias(expr_name) - }; + // alias the expressions without an `Alias` ancestor node + let rewritten = if self.alias_counter > 0 { + col(expr_alias.clone()) + } else { + self.alias_counter += 1; + col(expr_alias.clone()).alias(expr_name) + }; - Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump)) - } else { - Ok(Transformed::new(expr, false, tnr)) + return Ok(Transformed::new(rewritten, true, TreeNodeRecursion::Jump)); + } } + + Ok(Transformed::no(expr)) } fn f_up(&mut self, expr: Expr) -> Result> { @@ -1685,7 +1729,7 @@ mod test { .unwrap(); let rule = CommonSubexprEliminate::new(); let optimized_plan = rule.rewrite(plan, &OptimizerContext::new()).unwrap(); - assert!(!optimized_plan.transformed); + assert!(optimized_plan.transformed); let optimized_plan = optimized_plan.data; let schema = optimized_plan.schema(); @@ -1837,22 +1881,29 @@ mod test { let table_scan = test_table_scan()?; let extracted_short_circuit = col("a").eq(lit(0)).or(col("b").eq(lit(0))); - let not_extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0)); + let extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0)); let not_extracted_short_circuit_leg_2 = (col("a") - col("b")).eq(lit(0)); + let extracted_short_circuit_leg_3 = (col("a") * col("b")).eq(lit(0)); let plan = LogicalPlanBuilder::from(table_scan.clone()) .project(vec![ extracted_short_circuit.clone().alias("c1"), extracted_short_circuit.alias("c2"), - not_extracted_short_circuit_leg_1.clone().alias("c3"), - not_extracted_short_circuit_leg_2.clone().alias("c4"), - not_extracted_short_circuit_leg_1 - .or(not_extracted_short_circuit_leg_2) + extracted_short_circuit_leg_1 + .clone() + .or(not_extracted_short_circuit_leg_2.clone()) + .alias("c3"), + extracted_short_circuit_leg_1 + .and(not_extracted_short_circuit_leg_2) + .alias("c4"), + extracted_short_circuit_leg_3 + .clone() + .or(extracted_short_circuit_leg_3.clone()) .alias("c5"), ])? .build()?; - let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, test.a + test.b = Int32(0) AS c3, test.a - test.b = Int32(0) AS c4, test.a + test.b = Int32(0) OR test.a - test.b = Int32(0) AS c5\ - \n Projection: test.a = Int32(0) OR test.b = Int32(0) AS __common_expr_1, test.a, test.b, test.c\ + let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2 OR test.a - test.b = Int32(0) AS c3, __common_expr_2 AND test.a - test.b = Int32(0) AS c4, __common_expr_3 OR __common_expr_3 AS c5\ + \n Projection: test.a = Int32(0) OR test.b = Int32(0) AS __common_expr_1, test.a + test.b = Int32(0) AS __common_expr_2, test.a * test.b = Int32(0) AS __common_expr_3, test.a, test.b, test.c\ \n TableScan: test"; assert_optimized_plan_eq(expected, plan, None); @@ -1888,10 +1939,12 @@ mod test { let table_scan = test_table_scan()?; let rand = rand_func().call(vec![]); - let not_extracted_volatile_short_circuit_2 = - rand.clone().eq(lit(0)).or(col("b").eq(lit(0))); + let extracted_short_circuit_leg_1 = col("a").eq(lit(0)); let not_extracted_volatile_short_circuit_1 = - col("a").eq(lit(0)).or(rand.eq(lit(0))); + extracted_short_circuit_leg_1.or(rand.clone().eq(lit(0))); + let not_extracted_short_circuit_leg_2 = col("b").eq(lit(0)); + let not_extracted_volatile_short_circuit_2 = + rand.eq(lit(0)).or(not_extracted_short_circuit_leg_2); let plan = LogicalPlanBuilder::from(table_scan.clone()) .project(vec![ not_extracted_volatile_short_circuit_1.clone().alias("c1"), @@ -1901,10 +1954,11 @@ mod test { ])? .build()?; - let expected = "Projection: test.a = Int32(0) OR random() = Int32(0) AS c1, test.a = Int32(0) OR random() = Int32(0) AS c2, random() = Int32(0) OR test.b = Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\ - \n TableScan: test"; + let expected = "Projection: __common_expr_1 OR random() = Int32(0) AS c1, __common_expr_1 OR random() = Int32(0) AS c2, random() = Int32(0) OR test.b = Int32(0) AS c3, random() = Int32(0) OR test.b = Int32(0) AS c4\ + \n Projection: test.a = Int32(0) AS __common_expr_1, test.a, test.b, test.c\ + \n TableScan: test"; - assert_non_optimized_plan_eq(expected, plan, None); + assert_optimized_plan_eq(expected, plan, None); Ok(()) } diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index cae2a7b2cad2f..58c1ae297b02e 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -19,7 +19,7 @@ mod required_indices; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use crate::optimizer::ApplyOrder; @@ -42,7 +42,6 @@ use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, }; use datafusion_expr::logical_plan::tree_node::unwrap_arc; -use hashbrown::HashMap; /// Optimizer rule to prune unnecessary columns from intermediate schemas /// inside the [`LogicalPlan`]. This rule: @@ -472,11 +471,8 @@ fn merge_consecutive_projections(proj: Projection) -> Result::new(); - for columns in expr.iter().map(|expr| expr.column_refs()) { - for col in columns.into_iter() { - *column_referral_map.entry(col).or_default() += 1; - } - } + expr.iter() + .for_each(|expr| expr.add_column_ref_counts(&mut column_referral_map)); // If an expression is non-trivial and appears more than once, do not merge // them as consecutive projections will benefit from a compute-once approach. diff --git a/datafusion/sqllogictest/test_files/cse.slt b/datafusion/sqllogictest/test_files/cse.slt index 3579c1c1635cb..19b47fa50e410 100644 --- a/datafusion/sqllogictest/test_files/cse.slt +++ b/datafusion/sqllogictest/test_files/cse.slt @@ -93,15 +93,16 @@ FROM t1 ---- logical_plan 01)Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2 AS c3, __common_expr_2 AS c4, __common_expr_3 AS c5, __common_expr_3 AS c6 -02)--Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS __common_expr_1, t1.a = Float64(0) OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN t1.a = Float64(0) THEN Int64(0) ELSE Int64(1) END AS __common_expr_3 -03)----TableScan: t1 projection=[a, b] +02)--Projection: __common_expr_4 AND t1.b = Float64(0) AS __common_expr_1, __common_expr_4 OR t1.b = Float64(0) AS __common_expr_2, CASE WHEN __common_expr_4 THEN Int64(0) ELSE Int64(1) END AS __common_expr_3 +03)----Projection: t1.a = Float64(0) AS __common_expr_4, t1.b +04)------TableScan: t1 projection=[a, b] physical_plan 01)ProjectionExec: expr=[__common_expr_1@0 as c1, __common_expr_1@0 as c2, __common_expr_2@1 as c3, __common_expr_2@1 as c4, __common_expr_3@2 as c5, __common_expr_3@2 as c6] -02)--ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as __common_expr_1, a@0 = 0 OR b@1 = 0 as __common_expr_2, CASE WHEN a@0 = 0 THEN 0 ELSE 1 END as __common_expr_3] -03)----MemoryExec: partitions=1, partition_sizes=[0] +02)--ProjectionExec: expr=[__common_expr_4@0 AND b@1 = 0 as __common_expr_1, __common_expr_4@0 OR b@1 = 0 as __common_expr_2, CASE WHEN __common_expr_4@0 THEN 0 ELSE 1 END as __common_expr_3] +03)----ProjectionExec: expr=[a@0 = 0 as __common_expr_4, b@1 as b] +04)------MemoryExec: partitions=1, partition_sizes=[0] # Common children of short-circuit expression -# TODO: consider surely executed children of "short circuited"s for CSE. i.e. `a = 0`, `a = 2`, `a = 4` should be extracted query TT EXPLAIN SELECT a = 0 AND b = 0 AS c1, @@ -121,14 +122,15 @@ EXPLAIN SELECT FROM t1 ---- logical_plan -01)Projection: t1.a = Float64(0) AND t1.b = Float64(0) AS c1, t1.a = Float64(0) AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) AS c3, t1.b = Float64(3) AND t1.a = Float64(1) AS c4, t1.a = Float64(2) OR t1.b = Float64(4) AS c5, t1.a = Float64(2) OR t1.b = Float64(5) AS c6, t1.b = Float64(6) OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) AS c8, CASE WHEN t1.a = Float64(4) THEN Int64(0) ELSE Int64(1) END AS c9, CASE WHEN t1.a = Float64(4) THEN Int64(0) ELSE Int64(2) END AS c10, CASE WHEN t1.b = Float64(8) THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) THEN Float64(0) ELSE t1.a + Float64(2) END AS c14 -02)--TableScan: t1 projection=[a, b] +01)Projection: __common_expr_1 AND t1.b = Float64(0) AS c1, __common_expr_1 AND t1.b = Float64(1) AS c2, t1.b = Float64(2) AND t1.a = Float64(1) AS c3, t1.b = Float64(3) AND t1.a = Float64(1) AS c4, __common_expr_2 OR t1.b = Float64(4) AS c5, __common_expr_2 OR t1.b = Float64(5) AS c6, t1.b = Float64(6) OR t1.a = Float64(3) AS c7, t1.b = Float64(7) OR t1.a = Float64(3) AS c8, CASE WHEN __common_expr_3 THEN Int64(0) ELSE Int64(1) END AS c9, CASE WHEN __common_expr_3 THEN Int64(0) ELSE Int64(2) END AS c10, CASE WHEN t1.b = Float64(8) THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) THEN Float64(0) ELSE t1.a + Float64(2) END AS c14 +02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b +03)----TableScan: t1 projection=[a, b] physical_plan -01)ProjectionExec: expr=[a@0 = 0 AND b@1 = 0 as c1, a@0 = 0 AND b@1 = 1 as c2, b@1 = 2 AND a@0 = 1 as c3, b@1 = 3 AND a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 as c5, a@0 = 2 OR b@1 = 5 as c6, b@1 = 6 OR a@0 = 3 as c7, b@1 = 7 OR a@0 = 3 as c8, CASE WHEN a@0 = 4 THEN 0 ELSE 1 END as c9, CASE WHEN a@0 = 4 THEN 0 ELSE 2 END as c10, CASE WHEN b@1 = 8 THEN a@0 + 1 ELSE 0 END as c11, CASE WHEN b@1 = 9 THEN a@0 + 1 ELSE 0 END as c12, CASE WHEN b@1 = 10 THEN 0 ELSE a@0 + 2 END as c13, CASE WHEN b@1 = 11 THEN 0 ELSE a@0 + 2 END as c14] -02)--MemoryExec: partitions=1, partition_sizes=[0] +01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = 0 as c1, __common_expr_1@0 AND b@4 = 1 as c2, b@4 = 2 AND a@3 = 1 as c3, b@4 = 3 AND a@3 = 1 as c4, __common_expr_2@1 OR b@4 = 4 as c5, __common_expr_2@1 OR b@4 = 5 as c6, b@4 = 6 OR a@3 = 3 as c7, b@4 = 7 OR a@3 = 3 as c8, CASE WHEN __common_expr_3@2 THEN 0 ELSE 1 END as c9, CASE WHEN __common_expr_3@2 THEN 0 ELSE 2 END as c10, CASE WHEN b@4 = 8 THEN a@3 + 1 ELSE 0 END as c11, CASE WHEN b@4 = 9 THEN a@3 + 1 ELSE 0 END as c12, CASE WHEN b@4 = 10 THEN 0 ELSE a@3 + 2 END as c13, CASE WHEN b@4 = 11 THEN 0 ELSE a@3 + 2 END as c14] +02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b] +03)----MemoryExec: partitions=1, partition_sizes=[0] # Common children of volatile, short-circuit expression -# TODO: consider surely executed children of "short circuited"s for CSE. i.e. `a = 0`, `a = 2`, `a = 4` should be extracted query TT EXPLAIN SELECT a = 0 AND b = random() AS c1, @@ -148,11 +150,13 @@ EXPLAIN SELECT FROM t1 ---- logical_plan -01)Projection: t1.a = Float64(0) AND t1.b = random() AS c1, t1.a = Float64(0) AND t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a = Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4, t1.a = Float64(2) OR t1.b = Float64(4) + random() AS c5, t1.a = Float64(2) OR t1.b = Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a = Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE WHEN t1.a = Float64(4) THEN random() ELSE Float64(1) END AS c9, CASE WHEN t1.a = Float64(4) THEN random() ELSE Float64(2) END AS c10, CASE WHEN t1.b = Float64(8) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c14 -02)--TableScan: t1 projection=[a, b] +01)Projection: __common_expr_1 AND t1.b = random() AS c1, __common_expr_1 AND t1.b = Float64(1) + random() AS c2, t1.b = Float64(2) + random() AND t1.a = Float64(1) AS c3, t1.b = Float64(3) + random() AND t1.a = Float64(1) AS c4, __common_expr_2 OR t1.b = Float64(4) + random() AS c5, __common_expr_2 OR t1.b = Float64(5) + random() AS c6, t1.b = Float64(6) + random() OR t1.a = Float64(3) AS c7, t1.b = Float64(7) + random() OR t1.a = Float64(3) AS c8, CASE WHEN __common_expr_3 THEN random() ELSE Float64(1) END AS c9, CASE WHEN __common_expr_3 THEN random() ELSE Float64(2) END AS c10, CASE WHEN t1.b = Float64(8) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c11, CASE WHEN t1.b = Float64(9) + random() THEN t1.a + Float64(1) ELSE Float64(0) END AS c12, CASE WHEN t1.b = Float64(10) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c13, CASE WHEN t1.b = Float64(11) + random() THEN Float64(0) ELSE t1.a + Float64(2) END AS c14 +02)--Projection: t1.a = Float64(0) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a = Float64(4) AS __common_expr_3, t1.a, t1.b +03)----TableScan: t1 projection=[a, b] physical_plan -01)ProjectionExec: expr=[a@0 = 0 AND b@1 = random() as c1, a@0 = 0 AND b@1 = 1 + random() as c2, b@1 = 2 + random() AND a@0 = 1 as c3, b@1 = 3 + random() AND a@0 = 1 as c4, a@0 = 2 OR b@1 = 4 + random() as c5, a@0 = 2 OR b@1 = 5 + random() as c6, b@1 = 6 + random() OR a@0 = 3 as c7, b@1 = 7 + random() OR a@0 = 3 as c8, CASE WHEN a@0 = 4 THEN random() ELSE 1 END as c9, CASE WHEN a@0 = 4 THEN random() ELSE 2 END as c10, CASE WHEN b@1 = 8 + random() THEN a@0 + 1 ELSE 0 END as c11, CASE WHEN b@1 = 9 + random() THEN a@0 + 1 ELSE 0 END as c12, CASE WHEN b@1 = 10 + random() THEN 0 ELSE a@0 + 2 END as c13, CASE WHEN b@1 = 11 + random() THEN 0 ELSE a@0 + 2 END as c14] -02)--MemoryExec: partitions=1, partition_sizes=[0] +01)ProjectionExec: expr=[__common_expr_1@0 AND b@4 = random() as c1, __common_expr_1@0 AND b@4 = 1 + random() as c2, b@4 = 2 + random() AND a@3 = 1 as c3, b@4 = 3 + random() AND a@3 = 1 as c4, __common_expr_2@1 OR b@4 = 4 + random() as c5, __common_expr_2@1 OR b@4 = 5 + random() as c6, b@4 = 6 + random() OR a@3 = 3 as c7, b@4 = 7 + random() OR a@3 = 3 as c8, CASE WHEN __common_expr_3@2 THEN random() ELSE 1 END as c9, CASE WHEN __common_expr_3@2 THEN random() ELSE 2 END as c10, CASE WHEN b@4 = 8 + random() THEN a@3 + 1 ELSE 0 END as c11, CASE WHEN b@4 = 9 + random() THEN a@3 + 1 ELSE 0 END as c12, CASE WHEN b@4 = 10 + random() THEN 0 ELSE a@3 + 2 END as c13, CASE WHEN b@4 = 11 + random() THEN 0 ELSE a@3 + 2 END as c14] +02)--ProjectionExec: expr=[a@0 = 0 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 = 4 as __common_expr_3, a@0 as a, b@1 as b] +03)----MemoryExec: partitions=1, partition_sizes=[0] # Common volatile children of short-circuit expression query TT @@ -171,3 +175,59 @@ logical_plan physical_plan 01)ProjectionExec: expr=[a@0 = random() AND b@1 = 0 as c1, a@0 = random() AND b@1 = 1 as c2, a@0 = 2 + random() OR b@1 = 4 as c3, a@0 = 2 + random() OR b@1 = 5 as c4, CASE WHEN a@0 = 4 + random() THEN 0 ELSE 1 END as c5, CASE WHEN a@0 = 4 + random() THEN 0 ELSE 2 END as c6] 02)--MemoryExec: partitions=1, partition_sizes=[0] + +# Surely only once but also conditionally evaluated expressions +query TT +EXPLAIN SELECT + (a = 1 OR random() = 0) AND a = 1 AS c1, + (a = 2 AND random() = 0) OR a = 2 AS c2, + CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3, + CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4, + CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5, + CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6 +FROM t1 +---- +logical_plan +01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6 +02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6 +03)----TableScan: t1 projection=[a] +physical_plan +01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6] +02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6] +03)----MemoryExec: partitions=1, partition_sizes=[0] + +# Surely only once but also conditionally evaluated subexpressions +query TT +EXPLAIN SELECT + (a = 1 OR random() = 0) AND (a = 1 OR random() = 1) AS c1, + (a = 2 AND random() = 0) OR (a = 2 AND random() = 1) AS c2, + CASE WHEN a + 3 = 0 THEN a + 3 + random() ELSE 0 END AS c3, + CASE WHEN a + 4 = 0 THEN 0 ELSE a + 4 + random() END AS c4 +FROM t1 +---- +logical_plan +01)Projection: (__common_expr_1 OR random() = Float64(0)) AND (__common_expr_1 OR random() = Float64(1)) AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AND random() = Float64(1) AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 + random() ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Float64(0) ELSE __common_expr_4 + random() END AS c4 +02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4 +03)----TableScan: t1 projection=[a] +physical_plan +01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND (__common_expr_1@0 OR random() = 1) as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 AND random() = 1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 + random() ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 ELSE __common_expr_4@3 + random() END as c4] +02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4] +03)----MemoryExec: partitions=1, partition_sizes=[0] + +# Only conditionally evaluated expressions +query TT +EXPLAIN SELECT + (random() = 0 OR a = 1) AND a = 1 AS c1, + (random() = 0 AND a = 2) OR a = 2 AS c2, + CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3, + CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4, + CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5, + CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END AS c6 +FROM t1 +---- +logical_plan +01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6 +02)--TableScan: t1 projection=[a] +physical_plan +01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random() = 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6] +02)--MemoryExec: partitions=1, partition_sizes=[0] diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index f9baf8db69d5b..95f67245a981e 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1504,21 +1504,25 @@ query TT EXPLAIN SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t; ---- logical_plan -01)Projection: t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND t.y > Int32(0) AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x -02)--TableScan: t projection=[x, y] +01)Projection: __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y > Int64(0) AND Int64(1) / t.y < Int64(1), t.x > Int32(0) AND __common_expr_1 AND Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x +02)--Projection: t.y > Int32(0) AS __common_expr_1, t.x, t.y +03)----TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 as t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@0 > 0 AND y@1 > 0 AND 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x] -02)--MemoryExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[__common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 as t.y > Int64(0) AND Int64(1) / t.y < Int64(1), x@1 > 0 AND __common_expr_1@0 AND 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x > Int64(0) AND t.y > Int64(0) AND Int64(1) / t.y < Int64(1) / t.x] +02)--ProjectionExec: expr=[y@1 > 0 as __common_expr_1, x@0 as x, y@1 as y] +03)----MemoryExec: partitions=1, partition_sizes=[1] query TT EXPLAIN SELECT y = 0 or 1 / y < 1, x = 0 or y = 0 or 1 / y < 1 / x from t; ---- logical_plan -01)Projection: t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR t.y = Int32(0) OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x -02)--TableScan: t projection=[x, y] +01)Projection: __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) AS t.y = Int64(0) OR Int64(1) / t.y < Int64(1), t.x = Int32(0) OR __common_expr_1 OR Int64(1) / CAST(t.y AS Int64) < Int64(1) / CAST(t.x AS Int64) AS t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x +02)--Projection: t.y = Int32(0) AS __common_expr_1, t.x, t.y +03)----TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@0 = 0 OR y@1 = 0 OR 1 / CAST(y@1 AS Int64) < 1 / CAST(x@0 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x] -02)--MemoryExec: partitions=1, partition_sizes=[1] +01)ProjectionExec: expr=[__common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 as t.y = Int64(0) OR Int64(1) / t.y < Int64(1), x@1 = 0 OR __common_expr_1@0 OR 1 / CAST(y@2 AS Int64) < 1 / CAST(x@1 AS Int64) as t.x = Int64(0) OR t.y = Int64(0) OR Int64(1) / t.y < Int64(1) / t.x] +02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y] +03)----MemoryExec: partitions=1, partition_sizes=[1] # due to the reason describe in https://github.com/apache/datafusion/issues/8927, # the following queries will fail diff --git a/datafusion/sqllogictest/test_files/tpch/q14.slt.part b/datafusion/sqllogictest/test_files/tpch/q14.slt.part index e56e463a617d7..3743c201ff2e5 100644 --- a/datafusion/sqllogictest/test_files/tpch/q14.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q14.slt.part @@ -32,9 +32,9 @@ where and l_shipdate < date '1995-10-01'; ---- logical_plan -01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue -02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] -03)----Projection: lineitem.l_extendedprice, lineitem.l_discount, part.p_type +01)Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue +02)--Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +03)----Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, part.p_type 04)------Inner Join: lineitem.l_partkey = part.p_partkey 05)--------Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount 06)----------Filter: lineitem.l_shipdate >= Date32("1995-09-01") AND lineitem.l_shipdate < Date32("1995-10-01") @@ -44,19 +44,20 @@ physical_plan 01)ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] 02)--AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] 03)----CoalescePartitionsExec -04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] -05)--------CoalesceBatchesExec: target_batch_size=8192 -06)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4] -07)------------CoalesceBatchesExec: target_batch_size=8192 -08)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 -09)----------------ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] -10)------------------CoalesceBatchesExec: target_batch_size=8192 -11)--------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01 -12)----------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], has_header=false -13)------------CoalesceBatchesExec: target_batch_size=8192 -14)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 -15)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -16)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], has_header=false +04)------AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +05)--------ProjectionExec: expr=[l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as __common_expr_1, p_type@2 as p_type] +06)----------CoalesceBatchesExec: target_batch_size=8192 +07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_type@4] +08)--------------CoalesceBatchesExec: target_batch_size=8192 +09)----------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 +10)------------------ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] +11)--------------------CoalesceBatchesExec: target_batch_size=8192 +12)----------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01 +13)------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], has_header=false +14)--------------CoalesceBatchesExec: target_batch_size=8192 +15)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 +16)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +17)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], has_header=false