From eb6e21dd68a75130c466923168456f84a6386658 Mon Sep 17 00:00:00 2001 From: wiedld Date: Thu, 29 Aug 2024 11:31:36 -0700 Subject: [PATCH 1/6] chore(12119): add expand_views_at_output optimizer config option, default to false --- datafusion/common/src/config.rs | 5 +++++ datafusion/sqllogictest/test_files/information_schema.slt | 2 ++ 2 files changed, 7 insertions(+) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 37d26c6f00c4..2ec982ea15b9 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -630,6 +630,11 @@ config_namespace! { /// When set to true, the optimizer will not attempt to convert Union to Interleave pub prefer_existing_union: bool, default = false + + /// When set to true, if the returned type is a view type + /// then the output will be coerced to a non-view. + /// Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. + pub expand_views_at_output: bool, default = false } } diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index efd017a90bc4..a159a7795f08 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -226,6 +226,7 @@ datafusion.optimizer.default_filter_selectivity 20 datafusion.optimizer.enable_distinct_aggregation_soft_limit true datafusion.optimizer.enable_round_robin_repartition true datafusion.optimizer.enable_topk_aggregation true +datafusion.optimizer.expand_views_at_output false datafusion.optimizer.filter_null_join_keys false datafusion.optimizer.hash_join_single_partition_threshold 1048576 datafusion.optimizer.hash_join_single_partition_threshold_rows 131072 @@ -316,6 +317,7 @@ datafusion.optimizer.default_filter_selectivity 20 The default filter selectivit datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read. datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible +datafusion.optimizer.expand_views_at_output false When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down. datafusion.optimizer.hash_join_single_partition_threshold 1048576 The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition datafusion.optimizer.hash_join_single_partition_threshold_rows 131072 The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition From 081eb9f65d56e74acc9d7d7bf36e4a0adc160444 Mon Sep 17 00:00:00 2001 From: wiedld Date: Thu, 29 Aug 2024 11:32:33 -0700 Subject: [PATCH 2/6] feat(12119): coerce output based upon expand_views_at_output --- datafusion/expr/src/logical_plan/plan.rs | 2 +- .../optimizer/src/analyzer/type_coercion.rs | 79 ++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 359de2d30a57..9a328d04773d 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -1289,7 +1289,7 @@ impl LogicalPlan { /// referenced expressions into columns. /// /// See also: [`crate::utils::columnize_expr`] - pub(crate) fn columnized_output_exprs(&self) -> Result> { + pub fn columnized_output_exprs(&self) -> Result> { match self { LogicalPlan::Aggregate(aggregate) => Ok(aggregate .output_expressions()? diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index a6b9bad6c5d9..a7691987550e 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use itertools::izip; -use arrow::datatypes::{DataType, Field, IntervalUnit}; +use arrow::datatypes::{DataType, Field, IntervalUnit, Schema}; use crate::analyzer::AnalyzerRule; use crate::utils::NamePreserver; @@ -66,19 +66,39 @@ impl TypeCoercion { } } +/// Coerce output schema based upon optimizer config. +fn coerce_output(plan: LogicalPlan, config: &ConfigOptions) -> Result { + if !config.optimizer.expand_views_at_output { + return Ok(plan); + } + + let outer_refs = plan.expressions(); + if outer_refs.is_empty() { + return Ok(plan); + } + + if let Some(dfschema) = transform_schema_to_nonview(plan.schema()) { + coerce_plan_expr_for_schema(plan, &dfschema?) + } else { + Ok(plan) + } +} + impl AnalyzerRule for TypeCoercion { fn name(&self) -> &str { "type_coercion" } - fn analyze(&self, plan: LogicalPlan, _: &ConfigOptions) -> Result { + fn analyze(&self, plan: LogicalPlan, config: &ConfigOptions) -> Result { let empty_schema = DFSchema::empty(); + // recurse let transformed_plan = plan .transform_up_with_subqueries(|plan| analyze_internal(&empty_schema, plan))? .data; - Ok(transformed_plan) + // finish + coerce_output(transformed_plan, config) } } @@ -515,6 +535,59 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> { } } +/// Transform a schema to use non-view types for Utf8View and BinaryView +fn transform_schema_to_nonview(dfschema: &DFSchemaRef) -> Option> { + let metadata = dfschema.as_arrow().metadata.clone(); + let len = dfschema.fields().len(); + let mut transformed = false; + + let (qualifiers, transformed_fields) = dfschema + .iter() + .map(|(qualifier, field)| match field.data_type() { + DataType::Utf8View => { + transformed = true; + ( + qualifier, + Arc::new(Field::new( + field.name(), + DataType::LargeUtf8, + field.is_nullable(), + )), + ) + } + DataType::BinaryView => { + transformed = true; + ( + qualifier, + Arc::new(Field::new( + field.name(), + DataType::LargeBinary, + field.is_nullable(), + )), + ) + } + _ => (qualifier, Arc::clone(field)), + }) + .fold( + (Vec::with_capacity(len), Vec::with_capacity(len)), + |(mut qs, mut fs), (q, f)| { + qs.push(q.cloned()); + fs.push(f); + (qs, fs) + }, + ); + + if !transformed { + return None; + } + + let schema = Schema::new_with_metadata(transformed_fields, metadata); + Some(DFSchema::from_field_specific_qualified_schema( + qualifiers, + &Arc::new(schema), + )) +} + /// Casts the given `value` to `target_type`. Note that this function /// only considers `Null` or `Utf8` values. fn coerce_scalar(target_type: &DataType, value: &ScalarValue) -> Result { From 31b7805a5a23202f71676952316c0763c67340fa Mon Sep 17 00:00:00 2001 From: wiedld Date: Sat, 31 Aug 2024 00:25:38 -0700 Subject: [PATCH 3/6] test(12119): test coercion of view types, to non-view types, on output. --- .../optimizer/src/analyzer/type_coercion.rs | 127 +++++++++++++++++- datafusion/optimizer/src/test/mod.rs | 11 ++ 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index a7691987550e..3ca174a0b711 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1009,10 +1009,11 @@ mod test { use arrow::datatypes::DataType::Utf8; use arrow::datatypes::{DataType, Field, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TransformedResult, TreeNode}; use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue}; use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction}; - use datafusion_expr::logical_plan::{EmptyRelation, Projection}; + use datafusion_expr::logical_plan::{EmptyRelation, Projection, Sort}; use datafusion_expr::test::function_stub::avg_udaf; use datafusion_expr::{ cast, col, create_udaf, is_true, lit, AccumulatorFactoryFunction, AggregateUDF, @@ -1025,7 +1026,7 @@ mod test { use crate::analyzer::type_coercion::{ coerce_case_expression, TypeCoercion, TypeCoercionRewriter, }; - use crate::test::assert_analyzed_plan_eq; + use crate::test::{assert_analyzed_plan_eq, assert_analyzed_plan_with_config_eq}; fn empty() -> Arc { Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { @@ -1056,6 +1057,128 @@ mod test { assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected) } + #[test] + fn coerce_utf8view_output() -> Result<()> { + let expr = col("a"); + let empty = empty_with_type(DataType::Utf8View); + let plan = LogicalPlan::Projection(Projection::try_new( + vec![expr.clone()], + Arc::clone(&empty), + )?); + + // baseline + let expected = "Projection: a\n EmptyRelation"; + assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan.clone(), expected)?; + + // coerce: Utf8View => LargeUtf8 + let expect_cast = "Projection: CAST(a AS LargeUtf8)\n EmptyRelation"; + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + plan.clone(), + expect_cast, + )?; + + // request coerce --> but output in bool, not Utf8View + let bool_expr = col("a").lt(lit("foo")); + let plan_no_cast = LogicalPlan::Projection(Projection::try_new( + vec![bool_expr], + Arc::clone(&empty), + )?); + let expect_no_cast = + "Projection: a < CAST(Utf8(\"foo\") AS Utf8View)\n EmptyRelation"; + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + plan_no_cast.clone(), + expect_no_cast, + )?; + + // coerce start with a non-projection root logical plan node + let sort_expr = expr.sort(true, true); + let sort_plan = LogicalPlan::Sort(Sort { + expr: vec![sort_expr], + input: Arc::new(plan), + fetch: None, + }); + let expect_cast = "Projection: CAST(a AS LargeUtf8)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + sort_plan.clone(), + expect_cast, + )?; + + Ok(()) + } + + #[test] + fn coerce_binaryview_output() -> Result<()> { + let expr = col("a"); + let empty = empty_with_type(DataType::BinaryView); + let plan = LogicalPlan::Projection(Projection::try_new( + vec![expr.clone()], + Arc::clone(&empty), + )?); + + // baseline + let expected = "Projection: a\n EmptyRelation"; + assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan.clone(), expected)?; + + // coerce: BinaryView => LargeBinary + let expect_cast = "Projection: CAST(a AS LargeBinary)\n EmptyRelation"; + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + plan.clone(), + expect_cast, + )?; + + // request coerce --> but output in bool, not BinaryView + let bool_expr = col("a").lt(lit(vec![8, 1, 8, 1])); + let plan_no_cast = LogicalPlan::Projection(Projection::try_new( + vec![bool_expr], + Arc::clone(&empty), + )?); + let expect_no_cast = + "Projection: a < CAST(Binary(\"8,1,8,1\") AS BinaryView)\n EmptyRelation"; + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + plan_no_cast.clone(), + expect_no_cast, + )?; + + // coerce start with a non-projection root logical plan node + let sort_expr = expr.sort(true, true); + let sort_plan = LogicalPlan::Sort(Sort { + expr: vec![sort_expr], + input: Arc::new(plan), + fetch: None, + }); + let expect_cast = "Projection: CAST(a AS LargeBinary)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + sort_plan.clone(), + expect_cast, + )?; + + Ok(()) + } + #[test] fn nested_case() -> Result<()> { let expr = col("a").lt(lit(2_u32)); diff --git a/datafusion/optimizer/src/test/mod.rs b/datafusion/optimizer/src/test/mod.rs index 1266b548ab05..cabeafd8e7de 100644 --- a/datafusion/optimizer/src/test/mod.rs +++ b/datafusion/optimizer/src/test/mod.rs @@ -114,6 +114,17 @@ pub fn assert_analyzed_plan_eq( expected: &str, ) -> Result<()> { let options = ConfigOptions::default(); + assert_analyzed_plan_with_config_eq(options, rule, plan, expected)?; + + Ok(()) +} + +pub fn assert_analyzed_plan_with_config_eq( + options: ConfigOptions, + rule: Arc, + plan: LogicalPlan, + expected: &str, +) -> Result<()> { let analyzed_plan = Analyzer::with_rules(vec![rule]).execute_and_check(plan, &options, |_, _| {})?; let formatted_plan = format!("{analyzed_plan}"); From 4d0ed73a331b0a30dab506d4f24fd75fb4f0a65c Mon Sep 17 00:00:00 2001 From: wiedld Date: Sat, 31 Aug 2024 00:47:06 -0700 Subject: [PATCH 4/6] chore(12119): config.md for new config option --- docs/source/user-guide/configs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 4255307781b6..0e1237f7be81 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -112,6 +112,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.optimizer.hash_join_single_partition_threshold_rows | 131072 | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition | | datafusion.optimizer.default_filter_selectivity | 20 | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). | | datafusion.optimizer.prefer_existing_union | false | When set to true, the optimizer will not attempt to convert Union to Interleave | +| datafusion.optimizer.expand_views_at_output | false | When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. | | datafusion.explain.logical_plan_only | false | When set to true, the explain statement will only print logical plans | | datafusion.explain.physical_plan_only | false | When set to true, the explain statement will only print physical plans | | datafusion.explain.show_statistics | false | When set to true, the explain statement will print operator statistics for physical plans | From 90505d28e4e352ada30f562037f7ef63741c2b00 Mon Sep 17 00:00:00 2001 From: wiedld Date: Thu, 5 Sep 2024 15:58:09 -0700 Subject: [PATCH 5/6] refactor(12119): code cleanup --- .../optimizer/src/analyzer/type_coercion.rs | 76 +++++++++---------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 767e947741ae..6f700ac3c786 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -29,7 +29,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{ exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, - DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, + DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, TableReference, }; use datafusion_expr::expr::{ self, Alias, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, @@ -537,44 +537,40 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> { /// Transform a schema to use non-view types for Utf8View and BinaryView fn transform_schema_to_nonview(dfschema: &DFSchemaRef) -> Option> { let metadata = dfschema.as_arrow().metadata.clone(); - let len = dfschema.fields().len(); let mut transformed = false; - let (qualifiers, transformed_fields) = dfschema - .iter() - .map(|(qualifier, field)| match field.data_type() { - DataType::Utf8View => { - transformed = true; - ( - qualifier, - Arc::new(Field::new( - field.name(), - DataType::LargeUtf8, - field.is_nullable(), - )), - ) - } - DataType::BinaryView => { - transformed = true; - ( - qualifier, - Arc::new(Field::new( - field.name(), - DataType::LargeBinary, - field.is_nullable(), - )), - ) - } - _ => (qualifier, Arc::clone(field)), - }) - .fold( - (Vec::with_capacity(len), Vec::with_capacity(len)), - |(mut qs, mut fs), (q, f)| { - qs.push(q.cloned()); - fs.push(f); - (qs, fs) - }, - ); + let (qualifiers, transformed_fields): (Vec>, Vec>) = + dfschema + .iter() + .map(|(qualifier, field)| match field.data_type() { + DataType::Utf8View => { + transformed = true; + ( + qualifier.cloned() as Option, + Arc::new(Field::new( + field.name(), + DataType::LargeUtf8, + field.is_nullable(), + )), + ) + } + DataType::BinaryView => { + transformed = true; + ( + qualifier.cloned() as Option, + Arc::new(Field::new( + field.name(), + DataType::LargeBinary, + field.is_nullable(), + )), + ) + } + _ => ( + qualifier.cloned() as Option, + Arc::clone(field), + ), + }) + .unzip(); if !transformed { return None; @@ -1086,15 +1082,15 @@ mod test { vec![bool_expr], Arc::clone(&empty), )?); - let expect_no_cast = - "Projection: a < CAST(Utf8(\"foo\") AS Utf8View)\n EmptyRelation"; + let expect_no_additional_cast = + "Projection: a < CAST(Utf8(\"foo\") AS Utf8View)\n EmptyRelation"; // outer bool not re-casted let mut options = ConfigOptions::default(); options.optimizer.expand_views_at_output = true; assert_analyzed_plan_with_config_eq( options, Arc::new(TypeCoercion::new()), plan_no_cast.clone(), - expect_no_cast, + expect_no_additional_cast, )?; // coerce start with a non-projection root logical plan node From caae730e63f3f3df6caae0b6f3f6fb2eb99bf757 Mon Sep 17 00:00:00 2001 From: wiedld Date: Thu, 5 Sep 2024 16:45:24 -0700 Subject: [PATCH 6/6] test(12119): more clearly define scenarios in viewtype-coercion test coverage, and expand with PlanD scenario --- .../optimizer/src/analyzer/type_coercion.rs | 179 ++++++++++-------- 1 file changed, 103 insertions(+), 76 deletions(-) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 6f700ac3c786..722ee604e7d0 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1052,124 +1052,151 @@ mod test { assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected) } + fn coerce_on_output_if_viewtype(plan: LogicalPlan, expected: &str) -> Result<()> { + let mut options = ConfigOptions::default(); + options.optimizer.expand_views_at_output = true; + + assert_analyzed_plan_with_config_eq( + options, + Arc::new(TypeCoercion::new()), + plan.clone(), + expected, + ) + } + + fn do_not_coerce_on_output(plan: LogicalPlan, expected: &str) -> Result<()> { + assert_analyzed_plan_with_config_eq( + ConfigOptions::default(), + Arc::new(TypeCoercion::new()), + plan.clone(), + expected, + ) + } + #[test] fn coerce_utf8view_output() -> Result<()> { + // Plan A + // scenario: outermost utf8view projection let expr = col("a"); let empty = empty_with_type(DataType::Utf8View); let plan = LogicalPlan::Projection(Projection::try_new( vec![expr.clone()], Arc::clone(&empty), )?); - - // baseline - let expected = "Projection: a\n EmptyRelation"; - assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan.clone(), expected)?; - - // coerce: Utf8View => LargeUtf8 - let expect_cast = "Projection: CAST(a AS LargeUtf8)\n EmptyRelation"; - let mut options = ConfigOptions::default(); - options.optimizer.expand_views_at_output = true; - assert_analyzed_plan_with_config_eq( - options, - Arc::new(TypeCoercion::new()), - plan.clone(), - expect_cast, - )?; - - // request coerce --> but output in bool, not Utf8View + // Plan A: no coerce + let if_not_coerced = "Projection: a\n EmptyRelation"; + do_not_coerce_on_output(plan.clone(), if_not_coerced)?; + // Plan A: coerce requested: Utf8View => LargeUtf8 + let if_coerced = "Projection: CAST(a AS LargeUtf8)\n EmptyRelation"; + coerce_on_output_if_viewtype(plan.clone(), if_coerced)?; + + // Plan B + // scenario: outermost bool projection let bool_expr = col("a").lt(lit("foo")); - let plan_no_cast = LogicalPlan::Projection(Projection::try_new( + let bool_plan = LogicalPlan::Projection(Projection::try_new( vec![bool_expr], Arc::clone(&empty), )?); - let expect_no_additional_cast = - "Projection: a < CAST(Utf8(\"foo\") AS Utf8View)\n EmptyRelation"; // outer bool not re-casted - let mut options = ConfigOptions::default(); - options.optimizer.expand_views_at_output = true; - assert_analyzed_plan_with_config_eq( - options, - Arc::new(TypeCoercion::new()), - plan_no_cast.clone(), - expect_no_additional_cast, - )?; - - // coerce start with a non-projection root logical plan node + // Plan B: no coerce + let if_not_coerced = + "Projection: a < CAST(Utf8(\"foo\") AS Utf8View)\n EmptyRelation"; + do_not_coerce_on_output(bool_plan.clone(), if_not_coerced)?; + // Plan B: coerce requested: no coercion applied + let if_coerced = if_not_coerced; + coerce_on_output_if_viewtype(bool_plan, if_coerced)?; + + // Plan C + // scenario: with a non-projection root logical plan node let sort_expr = expr.sort(true, true); let sort_plan = LogicalPlan::Sort(Sort { expr: vec![sort_expr], input: Arc::new(plan), fetch: None, }); - let expect_cast = "Projection: CAST(a AS LargeUtf8)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; - let mut options = ConfigOptions::default(); - options.optimizer.expand_views_at_output = true; - assert_analyzed_plan_with_config_eq( - options, - Arc::new(TypeCoercion::new()), - sort_plan.clone(), - expect_cast, - )?; + // Plan C: no coerce + let if_not_coerced = + "Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + do_not_coerce_on_output(sort_plan.clone(), if_not_coerced)?; + // Plan C: coerce requested: Utf8View => LargeUtf8 + let if_coerced = "Projection: CAST(a AS LargeUtf8)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + coerce_on_output_if_viewtype(sort_plan.clone(), if_coerced)?; + + // Plan D + // scenario: two layers of projections with view types + let plan = LogicalPlan::Projection(Projection::try_new( + vec![col("a")], + Arc::new(sort_plan), + )?); + // Plan D: no coerce + let if_not_coerced = "Projection: a\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + do_not_coerce_on_output(plan.clone(), if_not_coerced)?; + // Plan B: coerce requested: Utf8View => LargeUtf8 only on outermost + let if_coerced = "Projection: CAST(a AS LargeUtf8)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + coerce_on_output_if_viewtype(plan.clone(), if_coerced)?; Ok(()) } #[test] fn coerce_binaryview_output() -> Result<()> { + // Plan A + // scenario: outermost binaryview projection let expr = col("a"); let empty = empty_with_type(DataType::BinaryView); let plan = LogicalPlan::Projection(Projection::try_new( vec![expr.clone()], Arc::clone(&empty), )?); - - // baseline - let expected = "Projection: a\n EmptyRelation"; - assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan.clone(), expected)?; - - // coerce: BinaryView => LargeBinary - let expect_cast = "Projection: CAST(a AS LargeBinary)\n EmptyRelation"; - let mut options = ConfigOptions::default(); - options.optimizer.expand_views_at_output = true; - assert_analyzed_plan_with_config_eq( - options, - Arc::new(TypeCoercion::new()), - plan.clone(), - expect_cast, - )?; - - // request coerce --> but output in bool, not BinaryView + // Plan A: no coerce + let if_not_coerced = "Projection: a\n EmptyRelation"; + do_not_coerce_on_output(plan.clone(), if_not_coerced)?; + // Plan A: coerce requested: BinaryView => LargeBinary + let if_coerced = "Projection: CAST(a AS LargeBinary)\n EmptyRelation"; + coerce_on_output_if_viewtype(plan.clone(), if_coerced)?; + + // Plan B + // scenario: outermost bool projection let bool_expr = col("a").lt(lit(vec![8, 1, 8, 1])); - let plan_no_cast = LogicalPlan::Projection(Projection::try_new( + let bool_plan = LogicalPlan::Projection(Projection::try_new( vec![bool_expr], Arc::clone(&empty), )?); - let expect_no_cast = + // Plan B: no coerce + let if_not_coerced = "Projection: a < CAST(Binary(\"8,1,8,1\") AS BinaryView)\n EmptyRelation"; - let mut options = ConfigOptions::default(); - options.optimizer.expand_views_at_output = true; - assert_analyzed_plan_with_config_eq( - options, - Arc::new(TypeCoercion::new()), - plan_no_cast.clone(), - expect_no_cast, - )?; + do_not_coerce_on_output(bool_plan.clone(), if_not_coerced)?; + // Plan B: coerce requested: no coercion applied + let if_coerced = if_not_coerced; + coerce_on_output_if_viewtype(bool_plan, if_coerced)?; - // coerce start with a non-projection root logical plan node + // Plan C + // scenario: with a non-projection root logical plan node let sort_expr = expr.sort(true, true); let sort_plan = LogicalPlan::Sort(Sort { expr: vec![sort_expr], input: Arc::new(plan), fetch: None, }); - let expect_cast = "Projection: CAST(a AS LargeBinary)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; - let mut options = ConfigOptions::default(); - options.optimizer.expand_views_at_output = true; - assert_analyzed_plan_with_config_eq( - options, - Arc::new(TypeCoercion::new()), - sort_plan.clone(), - expect_cast, - )?; + // Plan C: no coerce + let if_not_coerced = + "Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + do_not_coerce_on_output(sort_plan.clone(), if_not_coerced)?; + // Plan C: coerce requested: BinaryView => LargeBinary + let if_coerced = "Projection: CAST(a AS LargeBinary)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + coerce_on_output_if_viewtype(sort_plan.clone(), if_coerced)?; + + // Plan D + // scenario: two layers of projections with view types + let plan = LogicalPlan::Projection(Projection::try_new( + vec![col("a")], + Arc::new(sort_plan), + )?); + // Plan D: no coerce + let if_not_coerced = "Projection: a\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + do_not_coerce_on_output(plan.clone(), if_not_coerced)?; + // Plan B: coerce requested: BinaryView => LargeBinary only on outermost + let if_coerced = "Projection: CAST(a AS LargeBinary)\n Sort: a ASC NULLS FIRST\n Projection: a\n EmptyRelation"; + coerce_on_output_if_viewtype(plan.clone(), if_coerced)?; Ok(()) }