From 70a2c1bccae67793abdb99b21ed1b3aed731a49f Mon Sep 17 00:00:00 2001
From: Weijun-H <huangweijun1001@gmail.com>
Date: Sat, 3 Aug 2024 20:32:49 +0800
Subject: [PATCH] chore: leave tests in core module

---
 .../aggregate_statistics_test.rs              |  382 ++
 .../enforce_distribution_test.rs              | 3319 +++++++++++++++++
 .../enforce_sorting_test.rs                   | 1850 +++++++++
 .../limited_distinct_aggregation_test.rs      |  443 +++
 datafusion/core/src/physical_optimizer/mod.rs |    7 +-
 .../core/src/physical_optimizer/optimizer.rs  |    8 +-
 ...ace_with_order_preserving_variants_test.rs | 1264 +++++++
 datafusion/physical-optimizer/Cargo.toml      |    4 +-
 .../src/aggregate_statistics.rs               |  359 --
 .../src}/enforce_distribution.rs              |   45 +-
 .../physical-optimizer/src/enforce_sorting.rs | 1808 +--------
 datafusion/physical-optimizer/src/lib.rs      |    1 +
 .../src/limited_distinct_aggregation.rs       |  421 ---
 .../replace_with_order_preserving_variants.rs | 1245 +------
 .../physical-optimizer/src/sort_pushdown.rs   |    3 +-
 15 files changed, 7305 insertions(+), 3854 deletions(-)
 create mode 100644 datafusion/core/src/physical_optimizer/aggregate_statistics_test.rs
 create mode 100644 datafusion/core/src/physical_optimizer/enforce_distribution_test.rs
 create mode 100644 datafusion/core/src/physical_optimizer/enforce_sorting_test.rs
 create mode 100644 datafusion/core/src/physical_optimizer/limited_distinct_aggregation_test.rs
 create mode 100644 datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants_test.rs
 rename datafusion/{core/src/physical_optimizer => physical-optimizer/src}/enforce_distribution.rs (99%)

diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics_test.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics_test.rs
new file mode 100644
index 0000000000000..8319e042d7d5a
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics_test.rs
@@ -0,0 +1,382 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(test)]
+pub(crate) mod tests {
+
+    use crate::error::Result;
+    use std::sync::Arc;
+
+    use crate::prelude::SessionContext;
+    use datafusion_common::config::ConfigOptions;
+    use datafusion_expr::utils::COUNT_STAR_EXPANSION;
+    use datafusion_expr::Operator;
+    use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics;
+    use datafusion_physical_optimizer::PhysicalOptimizerRule;
+    use datafusion_physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
+    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+    use datafusion_physical_plan::filter::FilterExec;
+    use datafusion_physical_plan::memory::MemoryExec;
+    use datafusion_physical_plan::projection::ProjectionExec;
+    use datafusion_physical_plan::{common, ExecutionPlan};
+
+    use datafusion_common::arrow::array::Int32Array;
+    use datafusion_common::arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::arrow::record_batch::RecordBatch;
+    use datafusion_common::cast::as_int64_array;
+    use datafusion_functions_aggregate::count::count_udaf;
+    use datafusion_physical_expr::expressions::{self, cast};
+    use datafusion_physical_expr::{AggregateExpr, PhysicalExpr};
+    use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
+    use datafusion_physical_plan::aggregates::AggregateMode;
+
+    /// Mock data using a MemoryExec which has an exact count statistic
+    fn mock_data() -> Result<Arc<MemoryExec>> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
+                Arc::new(Int32Array::from(vec![Some(4), None, Some(6)])),
+            ],
+        )?;
+
+        Ok(Arc::new(MemoryExec::try_new(
+            &[vec![batch]],
+            Arc::clone(&schema),
+            None,
+        )?))
+    }
+
+    /// Checks that the count optimization was applied and we still get the right result
+    async fn assert_count_optim_success(
+        plan: AggregateExec,
+        agg: TestAggregate,
+    ) -> Result<()> {
+        let session_ctx = SessionContext::new();
+        let state = session_ctx.state();
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(plan);
+
+        let optimized = AggregateStatistics::new()
+            .optimize(Arc::clone(&plan), state.config_options())?;
+
+        // A ProjectionExec is a sign that the count optimization was applied
+        assert!(optimized.as_any().is::<ProjectionExec>());
+
+        // run both the optimized and nonoptimized plan
+        let optimized_result =
+            common::collect(optimized.execute(0, session_ctx.task_ctx())?).await?;
+        let nonoptimized_result =
+            common::collect(plan.execute(0, session_ctx.task_ctx())?).await?;
+        assert_eq!(optimized_result.len(), nonoptimized_result.len());
+
+        //  and validate the results are the same and expected
+        assert_eq!(optimized_result.len(), 1);
+        check_batch(optimized_result.into_iter().next().unwrap(), &agg);
+        // check the non optimized one too to ensure types and names remain the same
+        assert_eq!(nonoptimized_result.len(), 1);
+        check_batch(nonoptimized_result.into_iter().next().unwrap(), &agg);
+
+        Ok(())
+    }
+
+    fn check_batch(batch: RecordBatch, agg: &TestAggregate) {
+        let schema = batch.schema();
+        let fields = schema.fields();
+        assert_eq!(fields.len(), 1);
+
+        let field = &fields[0];
+        assert_eq!(field.name(), agg.column_name());
+        assert_eq!(field.data_type(), &DataType::Int64);
+        // note that nullabiolity differs
+
+        assert_eq!(
+            as_int64_array(batch.column(0)).unwrap().values(),
+            &[agg.expected_count()]
+        );
+    }
+
+    /// Describe the type of aggregate being tested
+    pub(crate) enum TestAggregate {
+        /// Testing COUNT(*) type aggregates
+        CountStar,
+
+        /// Testing for COUNT(column) aggregate
+        ColumnA(Arc<Schema>),
+    }
+
+    impl TestAggregate {
+        pub(crate) fn new_count_star() -> Self {
+            Self::CountStar
+        }
+
+        fn new_count_column(schema: &Arc<Schema>) -> Self {
+            Self::ColumnA(schema.clone())
+        }
+
+        // Return appropriate expr depending if COUNT is for col or table (*)
+        pub(crate) fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
+            AggregateExprBuilder::new(count_udaf(), vec![self.column()])
+                .schema(Arc::new(schema.clone()))
+                .name(self.column_name())
+                .build()
+                .unwrap()
+        }
+
+        /// what argument would this aggregate need in the plan?
+        fn column(&self) -> Arc<dyn PhysicalExpr> {
+            match self {
+                Self::CountStar => expressions::lit(COUNT_STAR_EXPANSION),
+                Self::ColumnA(s) => expressions::col("a", s).unwrap(),
+            }
+        }
+
+        /// What name would this aggregate produce in a plan?
+        fn column_name(&self) -> &'static str {
+            match self {
+                Self::CountStar => "COUNT(*)",
+                Self::ColumnA(_) => "COUNT(a)",
+            }
+        }
+
+        /// What is the expected count?
+        fn expected_count(&self) -> i64 {
+            match self {
+                TestAggregate::CountStar => 3,
+                TestAggregate::ColumnA(_) => 2,
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_count_partial_direct_child() -> Result<()> {
+        // basic test case with the aggregation applied on a source with exact statistics
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_star();
+
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            source,
+            Arc::clone(&schema),
+        )?;
+
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            Arc::new(partial_agg),
+            Arc::clone(&schema),
+        )?;
+
+        assert_count_optim_success(final_agg, agg).await?;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_count_partial_with_nulls_direct_child() -> Result<()> {
+        // basic test case with the aggregation applied on a source with exact statistics
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_column(&schema);
+
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            source,
+            Arc::clone(&schema),
+        )?;
+
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            Arc::new(partial_agg),
+            Arc::clone(&schema),
+        )?;
+
+        assert_count_optim_success(final_agg, agg).await?;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_count_partial_indirect_child() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_star();
+
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            source,
+            Arc::clone(&schema),
+        )?;
+
+        // We introduce an intermediate optimization step between the partial and final aggregtator
+        let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
+
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            Arc::new(coalesce),
+            Arc::clone(&schema),
+        )?;
+
+        assert_count_optim_success(final_agg, agg).await?;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_count_partial_with_nulls_indirect_child() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_column(&schema);
+
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            source,
+            Arc::clone(&schema),
+        )?;
+
+        // We introduce an intermediate optimization step between the partial and final aggregtator
+        let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
+
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            Arc::new(coalesce),
+            Arc::clone(&schema),
+        )?;
+
+        assert_count_optim_success(final_agg, agg).await?;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_count_inexact_stat() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_star();
+
+        // adding a filter makes the statistics inexact
+        let filter = Arc::new(FilterExec::try_new(
+            expressions::binary(
+                expressions::col("a", &schema)?,
+                Operator::Gt,
+                cast(expressions::lit(1u32), &schema, DataType::Int32)?,
+                &schema,
+            )?,
+            source,
+        )?);
+
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            filter,
+            Arc::clone(&schema),
+        )?;
+
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            Arc::new(partial_agg),
+            Arc::clone(&schema),
+        )?;
+
+        let conf = ConfigOptions::new();
+        let optimized =
+            AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
+
+        // check that the original ExecutionPlan was not replaced
+        assert!(optimized.as_any().is::<AggregateExec>());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_count_with_nulls_inexact_stat() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_column(&schema);
+
+        // adding a filter makes the statistics inexact
+        let filter = Arc::new(FilterExec::try_new(
+            expressions::binary(
+                expressions::col("a", &schema)?,
+                Operator::Gt,
+                cast(expressions::lit(1u32), &schema, DataType::Int32)?,
+                &schema,
+            )?,
+            source,
+        )?);
+
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            filter,
+            Arc::clone(&schema),
+        )?;
+
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::default(),
+            vec![agg.count_expr(&schema)],
+            vec![None],
+            Arc::new(partial_agg),
+            Arc::clone(&schema),
+        )?;
+
+        let conf = ConfigOptions::new();
+        let optimized =
+            AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
+
+        // check that the original ExecutionPlan was not replaced
+        assert!(optimized.as_any().is::<AggregateExec>());
+
+        Ok(())
+    }
+}
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution_test.rs b/datafusion/core/src/physical_optimizer/enforce_distribution_test.rs
new file mode 100644
index 0000000000000..da92c164254cc
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution_test.rs
@@ -0,0 +1,3319 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Since almost all of these tests explicitly use `ParquetExec` they only run with the parquet  feature flag on
+#[cfg(feature = "parquet")]
+#[cfg(test)]
+pub(crate) mod tests {
+    use datafusion_physical_optimizer::enforce_distribution::EnforceDistribution;
+    use std::ops::Deref;
+
+    use std::fmt::Debug;
+    use std::sync::Arc;
+
+    use datafusion_common::config::ConfigOptions;
+    use datafusion_common::Result;
+    use datafusion_physical_optimizer::enforce_distribution::adjust_input_keys_ordering;
+    use datafusion_physical_plan::aggregates::{
+        AggregateExec, AggregateMode, PhysicalGroupBy,
+    };
+    use datafusion_physical_plan::joins::{
+        HashJoinExec, PartitionMode, SortMergeJoinExec,
+    };
+    use datafusion_physical_plan::projection::ProjectionExec;
+    use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+
+    use datafusion_physical_plan::union::UnionExec;
+    use datafusion_physical_plan::ExecutionPlan;
+
+    use datafusion_common::arrow::compute::SortOptions;
+    use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
+    use datafusion_expr::logical_plan::JoinType;
+    use datafusion_physical_expr::expressions::Column;
+
+    use datafusion_physical_expr::PhysicalExpr;
+
+    use datafusion_physical_plan::ExecutionPlanProperties;
+
+    use datafusion_physical_optimizer::PhysicalOptimizerRule;
+
+    use crate::datasource::file_format::file_compression_type::FileCompressionType;
+    use crate::datasource::listing::PartitionedFile;
+    use crate::datasource::object_store::ObjectStoreUrl;
+    use crate::datasource::physical_plan::{CsvExec, FileScanConfig, ParquetExec};
+    use crate::physical_optimizer::test_utils::{
+        check_integrity, coalesce_partitions_exec, repartition_exec,
+    };
+    use crate::physical_plan::coalesce_batches::CoalesceBatchesExec;
+    use crate::physical_plan::expressions::col;
+    use crate::physical_plan::filter::FilterExec;
+    use crate::physical_plan::joins::utils::JoinOn;
+    use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+    use crate::physical_plan::sorts::sort::SortExec;
+    use crate::physical_plan::{displayable, DisplayAs, DisplayFormatType, Statistics};
+    use datafusion_physical_optimizer::enforce_sorting::EnforceSorting;
+    use datafusion_physical_optimizer::output_requirements::OutputRequirements;
+
+    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::Operator;
+    use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
+    use datafusion_physical_expr::{
+        expressions, expressions::binary, expressions::lit, LexOrdering,
+        PhysicalSortExpr, PhysicalSortRequirement,
+    };
+    use datafusion_physical_plan::tree_node::PlanContext;
+    use datafusion_physical_plan::PlanProperties;
+
+    use datafusion_physical_optimizer::enforce_distribution::ensure_distribution;
+    use datafusion_physical_optimizer::enforce_distribution::reorder_join_keys_to_inputs;
+
+    // TODO: remove these type alias once https://github.com/apache/datafusion/issues/11502 is resolved
+    /// Keeps track of parent required key orderings.
+    type PlanWithKeyRequirements = PlanContext<Vec<Arc<dyn PhysicalExpr>>>;
+
+    /// Keeps track of distribution changing operators (like `RepartitionExec`,
+    /// `SortPreservingMergeExec`, `CoalescePartitionsExec`) and their ancestors.
+    /// Using this information, we can optimize distribution of the plan if/when
+    /// necessary.
+    type DistributionContext = PlanContext<bool>;
+
+    /// Models operators like BoundedWindowExec that require an input
+    /// ordering but is easy to construct
+    #[derive(Debug)]
+    struct SortRequiredExec {
+        input: Arc<dyn ExecutionPlan>,
+        expr: LexOrdering,
+        cache: PlanProperties,
+    }
+
+    impl SortRequiredExec {
+        fn new_with_requirement(
+            input: Arc<dyn ExecutionPlan>,
+            requirement: Vec<PhysicalSortExpr>,
+        ) -> Self {
+            let cache = Self::compute_properties(&input);
+            Self {
+                input,
+                expr: requirement,
+                cache,
+            }
+        }
+
+        /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
+        fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
+            PlanProperties::new(
+                input.equivalence_properties().clone(), // Equivalence Properties
+                input.output_partitioning().clone(),    // Output Partitioning
+                input.execution_mode(),                 // Execution Mode
+            )
+        }
+    }
+
+    impl DisplayAs for SortRequiredExec {
+        fn fmt_as(
+            &self,
+            _t: DisplayFormatType,
+            f: &mut std::fmt::Formatter,
+        ) -> std::fmt::Result {
+            write!(
+                f,
+                "SortRequiredExec: [{}]",
+                PhysicalSortExpr::format_list(&self.expr)
+            )
+        }
+    }
+
+    impl ExecutionPlan for SortRequiredExec {
+        fn name(&self) -> &'static str {
+            "SortRequiredExec"
+        }
+
+        fn as_any(&self) -> &dyn std::any::Any {
+            self
+        }
+
+        fn properties(&self) -> &PlanProperties {
+            &self.cache
+        }
+
+        fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+            vec![false]
+        }
+
+        fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+            vec![&self.input]
+        }
+
+        // model that it requires the output ordering of its input
+        fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+            if self.expr.is_empty() {
+                vec![None]
+            } else {
+                vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))]
+            }
+        }
+
+        fn with_new_children(
+            self: Arc<Self>,
+            mut children: Vec<Arc<dyn ExecutionPlan>>,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            assert_eq!(children.len(), 1);
+            let child = children.pop().unwrap();
+            Ok(Arc::new(Self::new_with_requirement(
+                child,
+                self.expr.clone(),
+            )))
+        }
+
+        fn execute(
+            &self,
+            _partition: usize,
+            _context: Arc<crate::execution::context::TaskContext>,
+        ) -> Result<crate::physical_plan::SendableRecordBatchStream> {
+            unreachable!();
+        }
+
+        fn statistics(&self) -> Result<Statistics> {
+            self.input.statistics()
+        }
+    }
+
+    pub(crate) fn schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, true),
+            Field::new("b", DataType::Int64, true),
+            Field::new("c", DataType::Int64, true),
+            Field::new("d", DataType::Int32, true),
+            Field::new("e", DataType::Boolean, true),
+        ]))
+    }
+
+    fn parquet_exec() -> Arc<ParquetExec> {
+        parquet_exec_with_sort(vec![])
+    }
+
+    /// create a single parquet file that is sorted
+    pub(crate) fn parquet_exec_with_sort(
+        output_ordering: Vec<Vec<PhysicalSortExpr>>,
+    ) -> Arc<ParquetExec> {
+        ParquetExec::builder(
+            FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+                .with_file(PartitionedFile::new("x".to_string(), 100))
+                .with_output_ordering(output_ordering),
+        )
+        .build_arc()
+    }
+
+    fn parquet_exec_multiple() -> Arc<ParquetExec> {
+        parquet_exec_multiple_sorted(vec![])
+    }
+
+    /// Created a sorted parquet exec with multiple files
+    fn parquet_exec_multiple_sorted(
+        output_ordering: Vec<Vec<PhysicalSortExpr>>,
+    ) -> Arc<ParquetExec> {
+        ParquetExec::builder(
+            FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+                .with_file_groups(vec![
+                    vec![PartitionedFile::new("x".to_string(), 100)],
+                    vec![PartitionedFile::new("y".to_string(), 100)],
+                ])
+                .with_output_ordering(output_ordering),
+        )
+        .build_arc()
+    }
+
+    fn csv_exec() -> Arc<CsvExec> {
+        csv_exec_with_sort(vec![])
+    }
+
+    fn csv_exec_with_sort(output_ordering: Vec<Vec<PhysicalSortExpr>>) -> Arc<CsvExec> {
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+                    .with_file(PartitionedFile::new("x".to_string(), 100))
+                    .with_output_ordering(output_ordering),
+            )
+            .with_has_header(false)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
+    }
+
+    fn csv_exec_multiple() -> Arc<CsvExec> {
+        csv_exec_multiple_sorted(vec![])
+    }
+
+    // Created a sorted parquet exec with multiple files
+    fn csv_exec_multiple_sorted(
+        output_ordering: Vec<Vec<PhysicalSortExpr>>,
+    ) -> Arc<CsvExec> {
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema())
+                    .with_file_groups(vec![
+                        vec![PartitionedFile::new("x".to_string(), 100)],
+                        vec![PartitionedFile::new("y".to_string(), 100)],
+                    ])
+                    .with_output_ordering(output_ordering),
+            )
+            .with_has_header(false)
+            .with_delimeter(b',')
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
+    }
+
+    fn projection_exec_with_alias(
+        input: Arc<dyn ExecutionPlan>,
+        alias_pairs: Vec<(String, String)>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let mut exprs = vec![];
+        for (column, alias) in alias_pairs.iter() {
+            exprs.push((col(column, &input.schema()).unwrap(), alias.to_string()));
+        }
+        Arc::new(ProjectionExec::try_new(exprs, input).unwrap())
+    }
+
+    fn aggregate_exec_with_alias(
+        input: Arc<dyn ExecutionPlan>,
+        alias_pairs: Vec<(String, String)>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let schema = schema();
+        let mut group_by_expr: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
+        for (column, alias) in alias_pairs.iter() {
+            group_by_expr
+                .push((col(column, &input.schema()).unwrap(), alias.to_string()));
+        }
+        let group_by = PhysicalGroupBy::new_single(group_by_expr.clone());
+
+        let final_group_by_expr = group_by_expr
+            .iter()
+            .enumerate()
+            .map(|(index, (_col, name))| {
+                (
+                    Arc::new(expressions::Column::new(name, index))
+                        as Arc<dyn PhysicalExpr>,
+                    name.clone(),
+                )
+            })
+            .collect::<Vec<_>>();
+        let final_grouping = PhysicalGroupBy::new_single(final_group_by_expr);
+
+        Arc::new(
+            AggregateExec::try_new(
+                AggregateMode::FinalPartitioned,
+                final_grouping,
+                vec![],
+                vec![],
+                Arc::new(
+                    AggregateExec::try_new(
+                        AggregateMode::Partial,
+                        group_by,
+                        vec![],
+                        vec![],
+                        input,
+                        schema.clone(),
+                    )
+                    .unwrap(),
+                ),
+                schema,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn hash_join_exec(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        join_on: &JoinOn,
+        join_type: &JoinType,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(
+            HashJoinExec::try_new(
+                left,
+                right,
+                join_on.clone(),
+                None,
+                join_type,
+                None,
+                PartitionMode::Partitioned,
+                false,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn sort_merge_join_exec(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        join_on: &JoinOn,
+        join_type: &JoinType,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(
+            SortMergeJoinExec::try_new(
+                left,
+                right,
+                join_on.clone(),
+                None,
+                *join_type,
+                vec![SortOptions::default(); join_on.len()],
+                false,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn filter_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        let predicate = Arc::new(BinaryExpr::new(
+            col("c", &schema()).unwrap(),
+            Operator::Eq,
+            Arc::new(Literal::new(ScalarValue::Int64(Some(0)))),
+        ));
+        Arc::new(FilterExec::try_new(predicate, input).unwrap())
+    }
+
+    fn sort_exec(
+        sort_exprs: Vec<PhysicalSortExpr>,
+        input: Arc<dyn ExecutionPlan>,
+        preserve_partitioning: bool,
+    ) -> Arc<dyn ExecutionPlan> {
+        let new_sort = SortExec::new(sort_exprs, input)
+            .with_preserve_partitioning(preserve_partitioning);
+        Arc::new(new_sort)
+    }
+
+    fn sort_preserving_merge_exec(
+        sort_exprs: Vec<PhysicalSortExpr>,
+        input: Arc<dyn ExecutionPlan>,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(SortPreservingMergeExec::new(sort_exprs, input))
+    }
+
+    fn limit_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        Arc::new(GlobalLimitExec::new(
+            Arc::new(LocalLimitExec::new(input, 100)),
+            0,
+            Some(100),
+        ))
+    }
+
+    fn union_exec(input: Vec<Arc<dyn ExecutionPlan>>) -> Arc<dyn ExecutionPlan> {
+        Arc::new(UnionExec::new(input))
+    }
+
+    fn sort_required_exec_with_req(
+        input: Arc<dyn ExecutionPlan>,
+        sort_exprs: LexOrdering,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(SortRequiredExec::new_with_requirement(input, sort_exprs))
+    }
+
+    pub(crate) fn trim_plan_display(plan: &str) -> Vec<&str> {
+        plan.split('\n')
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .collect()
+    }
+
+    fn ensure_distribution_helper(
+        plan: Arc<dyn ExecutionPlan>,
+        target_partitions: usize,
+        prefer_existing_sort: bool,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let distribution_context = DistributionContext::new_default(plan);
+        let mut config = ConfigOptions::new();
+        config.execution.target_partitions = target_partitions;
+        config.optimizer.enable_round_robin_repartition = true;
+        config.optimizer.repartition_file_scans = false;
+        config.optimizer.repartition_file_min_size = 1024;
+        config.optimizer.prefer_existing_sort = prefer_existing_sort;
+        ensure_distribution(distribution_context, &config).map(|item| item.data.plan)
+    }
+
+    /// Test whether plan matches with expected plan
+    macro_rules! plans_matches_expected {
+        ($EXPECTED_LINES: expr, $PLAN: expr) => {
+            let physical_plan = $PLAN;
+            let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
+            let actual: Vec<&str> = formatted.trim().lines().collect();
+
+            let expected_plan_lines: Vec<&str> = $EXPECTED_LINES
+                .iter().map(|s| *s).collect();
+
+            assert_eq!(
+                expected_plan_lines, actual,
+                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            );
+        }
+    }
+
+    /// Runs the repartition optimizer and asserts the plan against the expected
+    /// Arguments
+    /// * `EXPECTED_LINES` - Expected output plan
+    /// * `PLAN` - Input plan
+    /// * `FIRST_ENFORCE_DIST` -
+    ///     true: (EnforceDistribution, EnforceDistribution,  EnforceSorting)
+    ///     false: else runs (EnforceSorting, EnforceDistribution, EnforceDistribution)
+    /// * `PREFER_EXISTING_SORT` (optional) - if true, will not repartition / resort data if it is already sorted
+    /// * `TARGET_PARTITIONS` (optional) - number of partitions to repartition to
+    /// * `REPARTITION_FILE_SCANS` (optional) - if true, will repartition file scans
+    /// * `REPARTITION_FILE_MIN_SIZE` (optional) - minimum file size to repartition
+    /// * `PREFER_EXISTING_UNION` (optional) - if true, will not attempt to convert Union to Interleave
+    macro_rules! assert_optimized {
+        ($EXPECTED_LINES: expr, $PLAN: expr, $FIRST_ENFORCE_DIST: expr) => {
+            assert_optimized!($EXPECTED_LINES, $PLAN, $FIRST_ENFORCE_DIST, false, 10, false, 1024, false);
+        };
+
+        ($EXPECTED_LINES: expr, $PLAN: expr, $FIRST_ENFORCE_DIST: expr, $PREFER_EXISTING_SORT: expr) => {
+            assert_optimized!($EXPECTED_LINES, $PLAN, $FIRST_ENFORCE_DIST, $PREFER_EXISTING_SORT, 10, false, 1024, false);
+        };
+
+        ($EXPECTED_LINES: expr, $PLAN: expr, $FIRST_ENFORCE_DIST: expr, $PREFER_EXISTING_SORT: expr, $PREFER_EXISTING_UNION: expr) => {
+            assert_optimized!($EXPECTED_LINES, $PLAN, $FIRST_ENFORCE_DIST, $PREFER_EXISTING_SORT, 10, false, 1024, $PREFER_EXISTING_UNION);
+        };
+
+        ($EXPECTED_LINES: expr, $PLAN: expr, $FIRST_ENFORCE_DIST: expr, $PREFER_EXISTING_SORT: expr, $TARGET_PARTITIONS: expr, $REPARTITION_FILE_SCANS: expr, $REPARTITION_FILE_MIN_SIZE: expr) => {
+            assert_optimized!($EXPECTED_LINES, $PLAN, $FIRST_ENFORCE_DIST, $PREFER_EXISTING_SORT, $TARGET_PARTITIONS, $REPARTITION_FILE_SCANS, $REPARTITION_FILE_MIN_SIZE, false);
+        };
+
+        ($EXPECTED_LINES: expr, $PLAN: expr, $FIRST_ENFORCE_DIST: expr, $PREFER_EXISTING_SORT: expr, $TARGET_PARTITIONS: expr, $REPARTITION_FILE_SCANS: expr, $REPARTITION_FILE_MIN_SIZE: expr, $PREFER_EXISTING_UNION: expr) => {
+            let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
+
+            let mut config = ConfigOptions::new();
+            config.execution.target_partitions = $TARGET_PARTITIONS;
+            config.optimizer.repartition_file_scans = $REPARTITION_FILE_SCANS;
+            config.optimizer.repartition_file_min_size = $REPARTITION_FILE_MIN_SIZE;
+            config.optimizer.prefer_existing_sort = $PREFER_EXISTING_SORT;
+            config.optimizer.prefer_existing_union = $PREFER_EXISTING_UNION;
+
+            // NOTE: These tests verify the joint `EnforceDistribution` + `EnforceSorting` cascade
+            //       because they were written prior to the separation of `BasicEnforcement` into
+            //       `EnforceSorting` and `EnforceDistribution`.
+            // TODO: Orthogonalize the tests here just to verify `EnforceDistribution` and create
+            //       new tests for the cascade.
+
+            // Add the ancillary output requirements operator at the start:
+            let optimizer = OutputRequirements::new_add_mode();
+            let optimized = optimizer.optimize($PLAN.clone(), &config)?;
+
+            // This file has 2 rules that use tree node, apply these rules to original plan consecutively
+            // After these operations tree nodes should be in a consistent state.
+            // This code block makes sure that these rules doesn't violate tree node integrity.
+            {
+                let adjusted = if config.optimizer.top_down_join_key_reordering {
+                    // Run adjust_input_keys_ordering rule
+                    let plan_requirements =
+                        PlanWithKeyRequirements::new_default($PLAN.clone());
+                    let adjusted = plan_requirements
+                        .transform_down(adjust_input_keys_ordering)
+                        .data()
+                        .and_then(check_integrity)?;
+                    // TODO: End state payloads will be checked here.
+                    adjusted.plan
+                } else {
+                    // Run reorder_join_keys_to_inputs rule
+                    $PLAN.clone().transform_up(|plan| {
+                        Ok(Transformed::yes(reorder_join_keys_to_inputs(plan)?))
+                    })
+                    .data()?
+                };
+
+                // Then run ensure_distribution rule
+                DistributionContext::new_default(adjusted)
+                    .transform_up(|distribution_context| {
+                        ensure_distribution(distribution_context, &config)
+                    })
+                    .data()
+                    .and_then(check_integrity)?;
+                // TODO: End state payloads will be checked here.
+            }
+
+            let optimized = if $FIRST_ENFORCE_DIST {
+                // Run enforce distribution rule first:
+                let optimizer = EnforceDistribution::new();
+                let optimized = optimizer.optimize(optimized, &config)?;
+                // The rule should be idempotent.
+                // Re-running this rule shouldn't introduce unnecessary operators.
+                let optimizer = EnforceDistribution::new();
+                let optimized = optimizer.optimize(optimized, &config)?;
+                // Run the enforce sorting rule:
+                let optimizer = EnforceSorting::new();
+                let optimized = optimizer.optimize(optimized, &config)?;
+                optimized
+            } else {
+                // Run the enforce sorting rule first:
+                let optimizer = EnforceSorting::new();
+                let optimized = optimizer.optimize(optimized, &config)?;
+                // Run enforce distribution rule:
+                let optimizer = EnforceDistribution::new();
+                let optimized = optimizer.optimize(optimized, &config)?;
+                // The rule should be idempotent.
+                // Re-running this rule shouldn't introduce unnecessary operators.
+                let optimizer = EnforceDistribution::new();
+                let optimized = optimizer.optimize(optimized, &config)?;
+                optimized
+            };
+
+            // Remove the ancillary output requirements operator when done:
+            let optimizer = OutputRequirements::new_remove_mode();
+            let optimized = optimizer.optimize(optimized, &config)?;
+
+            // Now format correctly
+            let plan = displayable(optimized.as_ref()).indent(true).to_string();
+            let actual_lines = trim_plan_display(&plan);
+
+            assert_eq!(
+                &expected_lines, &actual_lines,
+                "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+                expected_lines, actual_lines
+            );
+        };
+    }
+
+    macro_rules! assert_plan_txt {
+        ($EXPECTED_LINES: expr, $PLAN: expr) => {
+            let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
+            // Now format correctly
+            let plan = displayable($PLAN.as_ref()).indent(true).to_string();
+            let actual_lines = trim_plan_display(&plan);
+
+            assert_eq!(
+                &expected_lines, &actual_lines,
+                "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+                expected_lines, actual_lines
+            );
+        };
+    }
+
+    #[test]
+    fn multi_hash_joins() -> Result<()> {
+        let left = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+            ("d".to_string(), "d1".to_string()),
+            ("e".to_string(), "e1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+            JoinType::RightSemi,
+            JoinType::RightAnti,
+        ];
+
+        // Join on (a == b1)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+        )];
+
+        for join_type in join_types {
+            let join = hash_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            let join_plan = format!(
+                "HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(a@0, b1@1)]"
+            );
+
+            match join_type {
+                JoinType::Inner
+                | JoinType::Left
+                | JoinType::Right
+                | JoinType::Full
+                | JoinType::LeftSemi
+                | JoinType::LeftAnti => {
+                    // Join on (a == c)
+                    let top_join_on = vec![(
+                        Arc::new(Column::new_with_schema("a", &join.schema()).unwrap())
+                            as _,
+                        Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                    )];
+                    let top_join = hash_join_exec(
+                        join.clone(),
+                        parquet_exec(),
+                        &top_join_on,
+                        &join_type,
+                    );
+                    let top_join_plan =
+                        format!("HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(a@0, c@2)]");
+
+                    let expected = match join_type {
+                        // Should include 3 RepartitionExecs
+                        JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => vec![
+                            top_join_plan.as_str(),
+                            join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        ],
+                        // Should include 4 RepartitionExecs
+                        _ => vec![
+                            top_join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                            join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        ],
+                    };
+                    assert_optimized!(expected, top_join.clone(), true);
+                    assert_optimized!(expected, top_join, false);
+                }
+                JoinType::RightSemi | JoinType::RightAnti => {}
+            }
+
+            match join_type {
+                JoinType::Inner
+                | JoinType::Left
+                | JoinType::Right
+                | JoinType::Full
+                | JoinType::RightSemi
+                | JoinType::RightAnti => {
+                    // This time we use (b1 == c) for top join
+                    // Join on (b1 == c)
+                    let top_join_on = vec![(
+                        Arc::new(Column::new_with_schema("b1", &join.schema()).unwrap())
+                            as _,
+                        Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                    )];
+
+                    let top_join =
+                        hash_join_exec(join, parquet_exec(), &top_join_on, &join_type);
+                    let top_join_plan = match join_type {
+                        JoinType::RightSemi | JoinType::RightAnti =>
+                            format!("HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(b1@1, c@2)]"),
+                        _ =>
+                            format!("HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(b1@6, c@2)]"),
+                    };
+
+                    let expected = match join_type {
+                        // Should include 3 RepartitionExecs
+                        JoinType::Inner | JoinType::Right | JoinType::RightSemi | JoinType::RightAnti =>
+                            vec![
+                                top_join_plan.as_str(),
+                                join_plan.as_str(),
+                                "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                                "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                                "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                                "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            ],
+                        // Should include 4 RepartitionExecs
+                        _ =>
+                            vec![
+                                top_join_plan.as_str(),
+                                "RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10",
+                                join_plan.as_str(),
+                                "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                                "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                                "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                                "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            ],
+                    };
+                    assert_optimized!(expected, top_join.clone(), true);
+                    assert_optimized!(expected, top_join, false);
+                }
+                JoinType::LeftSemi | JoinType::LeftAnti => {}
+            }
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn multi_joins_after_alias() -> Result<()> {
+        let left = parquet_exec();
+        let right = parquet_exec();
+
+        // Join on (a == b)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+        )];
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Projection(a as a1, a as a2)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("a".to_string(), "a2".to_string()),
+        ];
+        let projection = projection_exec_with_alias(join, alias_pairs);
+
+        // Join on (a1 == c)
+        let top_join_on = vec![(
+            Arc::new(Column::new_with_schema("a1", &projection.schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+        )];
+
+        let top_join = hash_join_exec(
+            projection.clone(),
+            right.clone(),
+            &top_join_on,
+            &JoinType::Inner,
+        );
+
+        // Output partition need to respect the Alias and should not introduce additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, c@2)]",
+            "ProjectionExec: expr=[a@0 as a1, a@0 as a2]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, top_join.clone(), true);
+        assert_optimized!(expected, top_join, false);
+
+        // Join on (a2 == c)
+        let top_join_on = vec![(
+            Arc::new(Column::new_with_schema("a2", &projection.schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+        )];
+
+        let top_join = hash_join_exec(projection, right, &top_join_on, &JoinType::Inner);
+
+        // Output partition need to respect the Alias and should not introduce additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a2@1, c@2)]",
+            "ProjectionExec: expr=[a@0 as a1, a@0 as a2]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, top_join.clone(), true);
+        assert_optimized!(expected, top_join, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn multi_joins_after_multi_alias() -> Result<()> {
+        let left = parquet_exec();
+        let right = parquet_exec();
+
+        // Join on (a == b)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+        )];
+
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Projection(c as c1)
+        let alias_pairs: Vec<(String, String)> =
+            vec![("c".to_string(), "c1".to_string())];
+        let projection = projection_exec_with_alias(join, alias_pairs);
+
+        // Projection(c1 as a)
+        let alias_pairs: Vec<(String, String)> =
+            vec![("c1".to_string(), "a".to_string())];
+        let projection2 = projection_exec_with_alias(projection, alias_pairs);
+
+        // Join on (a == c)
+        let top_join_on = vec![(
+            Arc::new(Column::new_with_schema("a", &projection2.schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+        )];
+
+        let top_join = hash_join_exec(projection2, right, &top_join_on, &JoinType::Inner);
+
+        // The Column 'a' has different meaning now after the two Projections
+        // The original Output partition can not satisfy the Join requirements and need to add an additional RepartitionExec
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, c@2)]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "ProjectionExec: expr=[c1@0 as a]",
+            "ProjectionExec: expr=[c@2 as c1]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, top_join.clone(), true);
+        assert_optimized!(expected, top_join, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn join_after_agg_alias() -> Result<()> {
+        // group by (a as a1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a1".to_string())],
+        );
+        // group by (a as a2)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a2".to_string())],
+        );
+
+        // Join on (a1 == a2)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("a1", &left.schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("a2", &right.schema()).unwrap()) as _,
+        )];
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Only two RepartitionExecs added
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, a2@0)]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a2@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a2], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, join.clone(), true);
+        assert_optimized!(expected, join, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn hash_join_key_ordering() -> Result<()> {
+        // group by (a as a1, b as b1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("a".to_string(), "a1".to_string()),
+                ("b".to_string(), "b1".to_string()),
+            ],
+        );
+        // group by (b, a)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("b".to_string(), "b".to_string()),
+                ("a".to_string(), "a".to_string()),
+            ],
+        );
+
+        // Join on (b1 == b && a1 == a)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("b1", &left.schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("a1", &left.schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Only two RepartitionExecs added
+        let expected = &[
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b1@1, b@0), (a1@0, a@1)]",
+            "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, join.clone(), true);
+        assert_optimized!(expected, join, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn multi_hash_join_key_ordering() -> Result<()> {
+        let left = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
+
+        // Join on (a == a1 and b == b1 and c == c1)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("c1", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let bottom_left_join =
+            hash_join_exec(left.clone(), right.clone(), &join_on, &JoinType::Inner);
+
+        // Projection(a as A, a as AA, b as B, c as C)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "A".to_string()),
+            ("a".to_string(), "AA".to_string()),
+            ("b".to_string(), "B".to_string()),
+            ("c".to_string(), "C".to_string()),
+        ];
+        let bottom_left_projection =
+            projection_exec_with_alias(bottom_left_join, alias_pairs);
+
+        // Join on (c == c1 and b == b1 and a == a1)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("c1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a1", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let bottom_right_join =
+            hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Join on (B == b1 and C == c and AA = a1)
+        let top_join_on = vec![
+            (
+                Arc::new(
+                    Column::new_with_schema("B", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("b1", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+            (
+                Arc::new(
+                    Column::new_with_schema("C", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("c", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+            (
+                Arc::new(
+                    Column::new_with_schema("AA", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("a1", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+        ];
+
+        let top_join = hash_join_exec(
+            bottom_left_projection.clone(),
+            bottom_right_join,
+            &top_join_on,
+            &JoinType::Inner,
+        );
+
+        let predicate: Arc<dyn PhysicalExpr> = binary(
+            col("c", top_join.schema().deref())?,
+            Operator::Gt,
+            lit(1i64),
+            top_join.schema().deref(),
+        )?;
+
+        let filter_top_join: Arc<dyn ExecutionPlan> =
+            Arc::new(FilterExec::try_new(predicate, top_join)?);
+
+        // The bottom joins' join key ordering is adjusted based on the top join. And the top join should not introduce additional RepartitionExec
+        let expected = &[
+            "FilterExec: c@6 > 1",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(B@2, b1@6), (C@3, c@2), (AA@1, a1@5)]",
+            "ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]",
+            "RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]",
+            "RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, filter_top_join.clone(), true);
+        assert_optimized!(expected, filter_top_join, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn reorder_join_keys_to_left_input() -> Result<()> {
+        let left = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
+
+        // Join on (a == a1 and b == b1 and c == c1)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("c1", &right.schema()).unwrap()) as _,
+            ),
+        ];
+
+        let bottom_left_join = ensure_distribution_helper(
+            hash_join_exec(left.clone(), right.clone(), &join_on, &JoinType::Inner),
+            10,
+            true,
+        )?;
+
+        // Projection(a as A, a as AA, b as B, c as C)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "A".to_string()),
+            ("a".to_string(), "AA".to_string()),
+            ("b".to_string(), "B".to_string()),
+            ("c".to_string(), "C".to_string()),
+        ];
+        let bottom_left_projection =
+            projection_exec_with_alias(bottom_left_join, alias_pairs);
+
+        // Join on (c == c1 and b == b1 and a == a1)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("c1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a1", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let bottom_right_join = ensure_distribution_helper(
+            hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner),
+            10,
+            true,
+        )?;
+
+        // Join on (B == b1 and C == c and AA = a1)
+        let top_join_on = vec![
+            (
+                Arc::new(
+                    Column::new_with_schema("B", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("b1", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+            (
+                Arc::new(
+                    Column::new_with_schema("C", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("c", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+            (
+                Arc::new(
+                    Column::new_with_schema("AA", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("a1", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+        ];
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+            JoinType::RightSemi,
+            JoinType::RightAnti,
+        ];
+
+        for join_type in join_types {
+            let top_join = hash_join_exec(
+                bottom_left_projection.clone(),
+                bottom_right_join.clone(),
+                &top_join_on,
+                &join_type,
+            );
+            let top_join_plan =
+                format!("HashJoinExec: mode=Partitioned, join_type={:?}, on=[(AA@1, a1@5), (B@2, b1@6), (C@3, c@2)]", &join_type);
+
+            let reordered = reorder_join_keys_to_inputs(top_join)?;
+
+            // The top joins' join key ordering is adjusted based on the children inputs.
+            let expected = &[
+                top_join_plan.as_str(),
+                "ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
+                "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1), (c@2, c1@2)]",
+                "RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                "RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]",
+                "RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                "RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            ];
+
+            assert_plan_txt!(expected, reordered);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn reorder_join_keys_to_right_input() -> Result<()> {
+        let left = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
+
+        // Join on (a == a1 and b == b1)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let bottom_left_join = ensure_distribution_helper(
+            hash_join_exec(left.clone(), right.clone(), &join_on, &JoinType::Inner),
+            10,
+            true,
+        )?;
+
+        // Projection(a as A, a as AA, b as B, c as C)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "A".to_string()),
+            ("a".to_string(), "AA".to_string()),
+            ("b".to_string(), "B".to_string()),
+            ("c".to_string(), "C".to_string()),
+        ];
+        let bottom_left_projection =
+            projection_exec_with_alias(bottom_left_join, alias_pairs);
+
+        // Join on (c == c1 and b == b1 and a == a1)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("c1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("b", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a1", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let bottom_right_join = ensure_distribution_helper(
+            hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner),
+            10,
+            true,
+        )?;
+
+        // Join on (B == b1 and C == c and AA = a1)
+        let top_join_on = vec![
+            (
+                Arc::new(
+                    Column::new_with_schema("B", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("b1", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+            (
+                Arc::new(
+                    Column::new_with_schema("C", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("c", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+            (
+                Arc::new(
+                    Column::new_with_schema("AA", &bottom_left_projection.schema())
+                        .unwrap(),
+                ) as _,
+                Arc::new(
+                    Column::new_with_schema("a1", &bottom_right_join.schema()).unwrap(),
+                ) as _,
+            ),
+        ];
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+            JoinType::RightSemi,
+            JoinType::RightAnti,
+        ];
+
+        for join_type in join_types {
+            let top_join = hash_join_exec(
+                bottom_left_projection.clone(),
+                bottom_right_join.clone(),
+                &top_join_on,
+                &join_type,
+            );
+            let top_join_plan =
+                format!("HashJoinExec: mode=Partitioned, join_type={:?}, on=[(C@3, c@2), (B@2, b1@6), (AA@1, a1@5)]", &join_type);
+
+            let reordered = reorder_join_keys_to_inputs(top_join)?;
+
+            // The top joins' join key ordering is adjusted based on the children inputs.
+            let expected = &[
+                top_join_plan.as_str(),
+                "ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
+                "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1)]",
+                "RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                "RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]",
+                "RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                "RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10",
+                "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
+                "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            ];
+
+            assert_plan_txt!(expected, reordered);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn multi_smj_joins() -> Result<()> {
+        let left = parquet_exec();
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a1".to_string()),
+            ("b".to_string(), "b1".to_string()),
+            ("c".to_string(), "c1".to_string()),
+            ("d".to_string(), "d1".to_string()),
+            ("e".to_string(), "e1".to_string()),
+        ];
+        let right = projection_exec_with_alias(parquet_exec(), alias_pairs);
+
+        // SortMergeJoin does not support RightSemi and RightAnti join now
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+        ];
+
+        // Join on (a == b1)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema()).unwrap()) as _,
+        )];
+
+        for join_type in join_types {
+            let join =
+                sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            let join_plan =
+                format!("SortMergeJoin: join_type={join_type}, on=[(a@0, b1@1)]");
+
+            // Top join on (a == c)
+            let top_join_on = vec![(
+                Arc::new(Column::new_with_schema("a", &join.schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+            )];
+            let top_join = sort_merge_join_exec(
+                join.clone(),
+                parquet_exec(),
+                &top_join_on,
+                &join_type,
+            );
+            let top_join_plan =
+                format!("SortMergeJoin: join_type={join_type}, on=[(a@0, c@2)]");
+
+            let expected = match join_type {
+                // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
+                JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti =>
+                    vec![
+                        top_join_plan.as_str(),
+                        join_plan.as_str(),
+                        "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        "SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        "SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                    ],
+                // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
+                // Since ordering of the left child is not preserved after SortMergeJoin
+                // when mode is Right, RgihtSemi, RightAnti, Full
+                // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
+                //   when mode is Inner, Left, LeftSemi, LeftAnti
+                // Similarly, since partitioning of the left side is not preserved
+                // when mode is Right, RgihtSemi, RightAnti, Full
+                // - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
+                //   cases when mode is Inner, Left, LeftSemi, LeftAnti
+                _ => vec![
+                        top_join_plan.as_str(),
+                        // Below 2 operators are differences introduced, when join mode is changed
+                        "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                        join_plan.as_str(),
+                        "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        "SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        "SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
+                        "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                ],
+            };
+            assert_optimized!(expected, top_join.clone(), true, true);
+
+            let expected_first_sort_enforcement = match join_type {
+                // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
+                JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti =>
+                    vec![
+                        top_join_plan.as_str(),
+                        join_plan.as_str(),
+                        "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
+                        "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
+                        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+                        "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                    ],
+                // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
+                // Since ordering of the left child is not preserved after SortMergeJoin
+                // when mode is Right, RgihtSemi, RightAnti, Full
+                // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
+                //   when mode is Inner, Left, LeftSemi, LeftAnti
+                // Similarly, since partitioning of the left side is not preserved
+                // when mode is Right, RgihtSemi, RightAnti, Full
+                // - We need to add one additional Hash Repartition and Roundrobin repartition after
+                //   SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
+                _ => vec![
+                    top_join_plan.as_str(),
+                    // Below 4 operators are differences introduced, when join mode is changed
+                    "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
+                    "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                    "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+                    "CoalescePartitionsExec",
+                    join_plan.as_str(),
+                    "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
+                    "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                    "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+                    "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
+                    "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                    "SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
+                    "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                    "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                    "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
+                    "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                    "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+                    "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                ],
+            };
+            assert_optimized!(expected_first_sort_enforcement, top_join, false, true);
+
+            match join_type {
+                JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                    // This time we use (b1 == c) for top join
+                    // Join on (b1 == c)
+                    let top_join_on = vec![(
+                        Arc::new(Column::new_with_schema("b1", &join.schema()).unwrap())
+                            as _,
+                        Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                    )];
+                    let top_join = sort_merge_join_exec(
+                        join,
+                        parquet_exec(),
+                        &top_join_on,
+                        &join_type,
+                    );
+                    let top_join_plan =
+                        format!("SortMergeJoin: join_type={join_type}, on=[(b1@6, c@2)]");
+
+                    let expected = match join_type {
+                        // Should include 6 RepartitionExecs(3 hash, 3 round-robin) and 3 SortExecs
+                        JoinType::Inner | JoinType::Right => vec![
+                            top_join_plan.as_str(),
+                            join_plan.as_str(),
+                            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        ],
+                        // Should include 7 RepartitionExecs (4 hash, 3 round-robin) and 4 SortExecs
+                        JoinType::Left | JoinType::Full => vec![
+                            top_join_plan.as_str(),
+                            "SortExec: expr=[b1@6 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10",
+                            join_plan.as_str(),
+                            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
+                            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        ],
+                        // this match arm cannot be reached
+                        _ => unreachable!()
+                    };
+                    assert_optimized!(expected, top_join.clone(), true, true);
+
+                    let expected_first_sort_enforcement = match join_type {
+                        // Should include 6 RepartitionExecs (3 of them preserves order) and 3 SortExecs
+                        JoinType::Inner | JoinType::Right => vec![
+                            top_join_plan.as_str(),
+                            join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        ],
+                        // Should include 8 RepartitionExecs (4 of them preserves order) and 4 SortExecs
+                        JoinType::Left | JoinType::Full => vec![
+                            top_join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@6 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[b1@6 ASC], preserve_partitioning=[false]",
+                            "CoalescePartitionsExec",
+                            join_plan.as_str(),
+                            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
+                            "ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                            "RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
+                            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+                            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+                            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+                        ],
+                        // this match arm cannot be reached
+                        _ => unreachable!()
+                    };
+                    assert_optimized!(
+                        expected_first_sort_enforcement,
+                        top_join,
+                        false,
+                        true
+                    );
+                }
+                _ => {}
+            }
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn smj_join_key_ordering() -> Result<()> {
+        // group by (a as a1, b as b1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("a".to_string(), "a1".to_string()),
+                ("b".to_string(), "b1".to_string()),
+            ],
+        );
+        //Projection(a1 as a3, b1 as b3)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a1".to_string(), "a3".to_string()),
+            ("b1".to_string(), "b3".to_string()),
+        ];
+        let left = projection_exec_with_alias(left, alias_pairs);
+
+        // group by (b, a)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![
+                ("b".to_string(), "b".to_string()),
+                ("a".to_string(), "a".to_string()),
+            ],
+        );
+
+        //Projection(a as a2, b as b2)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a2".to_string()),
+            ("b".to_string(), "b2".to_string()),
+        ];
+        let right = projection_exec_with_alias(right, alias_pairs);
+
+        // Join on (b3 == b2 && a3 == a2)
+        let join_on = vec![
+            (
+                Arc::new(Column::new_with_schema("b3", &left.schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("b2", &right.schema()).unwrap()) as _,
+            ),
+            (
+                Arc::new(Column::new_with_schema("a3", &left.schema()).unwrap()) as _,
+                Arc::new(Column::new_with_schema("a2", &right.schema()).unwrap()) as _,
+            ),
+        ];
+        let join = sort_merge_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
+
+        // Only two RepartitionExecs added
+        let expected = &[
+            "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]",
+            "SortExec: expr=[b3@1 ASC,a3@0 ASC], preserve_partitioning=[true]",
+            "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
+            "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "SortExec: expr=[b2@1 ASC,a2@0 ASC], preserve_partitioning=[true]",
+            "ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, join.clone(), true, true);
+
+        let expected_first_sort_enforcement = &[
+            "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]",
+            "RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC,a3@0 ASC",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "SortExec: expr=[b3@1 ASC,a3@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
+            "ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC,a2@0 ASC",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "SortExec: expr=[b2@1 ASC,a2@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
+            "AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected_first_sort_enforcement, join, false, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn merge_does_not_need_sort() -> Result<()> {
+        // see https://github.com/apache/datafusion/issues/4331
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+
+        // Scan some sorted parquet files
+        let exec = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
+
+        // CoalesceBatchesExec to mimic behavior after a filter
+        let exec = Arc::new(CoalesceBatchesExec::new(exec, 4096));
+
+        // Merge from multiple parquet files and keep the data sorted
+        let exec: Arc<dyn ExecutionPlan> =
+            Arc::new(SortPreservingMergeExec::new(sort_key, exec));
+
+        // The optimizer should not add an additional SortExec as the
+        // data is already sorted
+        let expected = &[
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "CoalesceBatchesExec: target_batch_size=4096",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+        assert_optimized!(expected, exec, true);
+
+        // In this case preserving ordering through order preserving operators is not desirable
+        // (according to flag: PREFER_EXISTING_SORT)
+        // hence in this case ordering lost during CoalescePartitionsExec and re-introduced with
+        // SortExec at the top.
+        let expected = &[
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "CoalesceBatchesExec: target_batch_size=4096",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+        assert_optimized!(expected, exec, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn union_to_interleave() -> Result<()> {
+        // group by (a as a1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a1".to_string())],
+        );
+        // group by (a as a2)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a1".to_string())],
+        );
+
+        //  Union
+        let plan = Arc::new(UnionExec::new(vec![left, right]));
+
+        // final agg
+        let plan =
+            aggregate_exec_with_alias(plan, vec![("a1".to_string(), "a2".to_string())]);
+
+        // Only two RepartitionExecs added, no final RepartitionExec required
+        let expected = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
+            "AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]",
+            "InterleaveExec",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan.clone(), false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn union_not_to_interleave() -> Result<()> {
+        // group by (a as a1)
+        let left = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a1".to_string())],
+        );
+        // group by (a as a2)
+        let right = aggregate_exec_with_alias(
+            parquet_exec(),
+            vec![("a".to_string(), "a1".to_string())],
+        );
+
+        //  Union
+        let plan = Arc::new(UnionExec::new(vec![left, right]));
+
+        // final agg
+        let plan =
+            aggregate_exec_with_alias(plan, vec![("a1".to_string(), "a2".to_string())]);
+
+        // Only two RepartitionExecs added, no final RepartitionExec required
+        let expected = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a2@0], 10), input_partitions=20",
+            "AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]",
+            "UnionExec",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        // no sort in the plan but since we need it as a parameter, make it default false
+        let prefer_existing_sort = false;
+        let first_enforce_distribution = true;
+        let prefer_existing_union = true;
+
+        assert_optimized!(
+            expected,
+            plan.clone(),
+            first_enforce_distribution,
+            prefer_existing_sort,
+            prefer_existing_union
+        );
+        assert_optimized!(
+            expected,
+            plan,
+            !first_enforce_distribution,
+            prefer_existing_sort,
+            prefer_existing_union
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn added_repartition_to_single_partition() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan = aggregate_exec_with_alias(parquet_exec(), alias);
+
+        let expected = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_deepest_node() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan = aggregate_exec_with_alias(filter_exec(parquet_exec()), alias);
+
+        let expected = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+
+    fn repartition_unsorted_limit() -> Result<()> {
+        let plan = limit_exec(filter_exec(parquet_exec()));
+
+        let expected = &[
+            "GlobalLimitExec: skip=0, fetch=100",
+            "CoalescePartitionsExec",
+            "LocalLimitExec: fetch=100",
+            "FilterExec: c@2 = 0",
+            // nothing sorts the data, so the local limit doesn't require sorted data either
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_sorted_limit() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = limit_exec(sort_exec(sort_key, parquet_exec(), false));
+
+        let expected = &[
+            "GlobalLimitExec: skip=0, fetch=100",
+            "LocalLimitExec: fetch=100",
+            // data is sorted so can't repartition here
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_sorted_limit_with_filter() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_required_exec_with_req(
+            filter_exec(sort_exec(sort_key.clone(), parquet_exec(), false)),
+            sort_key,
+        );
+
+        let expected = &[
+            "SortRequiredExec: [c@2 ASC]",
+            "FilterExec: c@2 = 0",
+            // We can use repartition here, ordering requirement by SortRequiredExec
+            // is still satisfied.
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_ignores_limit() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan = aggregate_exec_with_alias(
+            limit_exec(filter_exec(limit_exec(parquet_exec()))),
+            alias,
+        );
+
+        let expected = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "GlobalLimitExec: skip=0, fetch=100",
+            "CoalescePartitionsExec",
+            "LocalLimitExec: fetch=100",
+            "FilterExec: c@2 = 0",
+            // repartition should happen prior to the filter to maximize parallelism
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "GlobalLimitExec: skip=0, fetch=100",
+            "LocalLimitExec: fetch=100",
+            // Expect no repartition to happen for local limit
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_ignores_union() -> Result<()> {
+        let plan = union_exec(vec![parquet_exec(); 5]);
+
+        let expected = &[
+            "UnionExec",
+            // Expect no repartition of ParquetExec
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_through_sort_preserving_merge() -> Result<()> {
+        // sort preserving merge with non-sorted input
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_preserving_merge_exec(sort_key, parquet_exec());
+
+        // need resort as the data was not sorted correctly
+        let expected = &[
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_ignores_sort_preserving_merge() -> Result<()> {
+        // sort preserving merge already sorted input,
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_preserving_merge_exec(
+            sort_key.clone(),
+            parquet_exec_multiple_sorted(vec![sort_key]),
+        );
+
+        // should not sort (as the data was already sorted)
+        // should not repartition, since increased parallelism is not beneficial for SortPReservingMerge
+        let expected = &[
+            "SortPreservingMergeExec: [c@2 ASC]",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+
+        let expected = &[
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_ignores_sort_preserving_merge_with_union() -> Result<()> {
+        // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved)
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]);
+        let plan = sort_preserving_merge_exec(sort_key, input);
+
+        // should not repartition / sort (as the data was already sorted)
+        let expected = &[
+            "SortPreservingMergeExec: [c@2 ASC]",
+            "UnionExec",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+
+        let expected = &[
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "UnionExec",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_does_not_destroy_sort() -> Result<()> {
+        //  SortRequired
+        //    Parquet(sorted)
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("d", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_required_exec_with_req(
+            filter_exec(parquet_exec_with_sort(vec![sort_key.clone()])),
+            sort_key,
+        );
+
+        // during repartitioning ordering is preserved
+        let expected = &[
+            "SortRequiredExec: [d@3 ASC]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true, true);
+        assert_optimized!(expected, plan, false, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_does_not_destroy_sort_more_complex() -> Result<()> {
+        // model a more complicated scenario where one child of a union can be repartitioned for performance
+        // but the other can not be
+        //
+        // Union
+        //  SortRequired
+        //    Parquet(sorted)
+        //  Filter
+        //    Parquet(unsorted)
+
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input1 = sort_required_exec_with_req(
+            parquet_exec_with_sort(vec![sort_key.clone()]),
+            sort_key,
+        );
+        let input2 = filter_exec(parquet_exec());
+        let plan = union_exec(vec![input1, input2]);
+
+        // should not repartition below the SortRequired as that
+        // branch doesn't benefit from increased parallelism
+        let expected = &[
+            "UnionExec",
+            // union input 1: no repartitioning
+            "SortRequiredExec: [c@2 ASC]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+            // union input 2: should repartition
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_transitively_with_projection() -> Result<()> {
+        let schema = schema();
+        let proj_exprs = vec![(
+            Arc::new(BinaryExpr::new(
+                col("a", &schema).unwrap(),
+                Operator::Plus,
+                col("b", &schema).unwrap(),
+            )) as Arc<dyn PhysicalExpr>,
+            "sum".to_string(),
+        )];
+        // non sorted input
+        let proj = Arc::new(ProjectionExec::try_new(proj_exprs, parquet_exec())?);
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("sum", &proj.schema()).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_preserving_merge_exec(sort_key, proj);
+
+        let expected = &[
+            "SortPreservingMergeExec: [sum@0 ASC]",
+            "SortExec: expr=[sum@0 ASC], preserve_partitioning=[true]",
+            // Since this projection is not trivial, increasing parallelism is beneficial
+            "ProjectionExec: expr=[a@0 + b@1 as sum]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+
+        let expected_first_sort_enforcement = &[
+            "SortExec: expr=[sum@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            // Since this projection is not trivial, increasing parallelism is beneficial
+            "ProjectionExec: expr=[a@0 + b@1 as sum]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected_first_sort_enforcement, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_ignores_transitively_with_projection() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let alias = vec![
+            ("a".to_string(), "a".to_string()),
+            ("b".to_string(), "b".to_string()),
+            ("c".to_string(), "c".to_string()),
+        ];
+        // sorted input
+        let plan = sort_required_exec_with_req(
+            projection_exec_with_alias(
+                parquet_exec_multiple_sorted(vec![sort_key.clone()]),
+                alias,
+            ),
+            sort_key,
+        );
+
+        let expected = &[
+            "SortRequiredExec: [c@2 ASC]",
+            // Since this projection is trivial, increasing parallelism is not beneficial
+            "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_transitively_past_sort_with_projection() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let alias = vec![
+            ("a".to_string(), "a".to_string()),
+            ("b".to_string(), "b".to_string()),
+            ("c".to_string(), "c".to_string()),
+        ];
+        let plan = sort_preserving_merge_exec(
+            sort_key.clone(),
+            sort_exec(
+                sort_key,
+                projection_exec_with_alias(parquet_exec(), alias),
+                true,
+            ),
+        );
+
+        let expected = &[
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
+            // Since this projection is trivial, increasing parallelism is not beneficial
+            "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, plan.clone(), true);
+        assert_optimized!(expected, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn repartition_transitively_past_sort_with_filter() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_exec(sort_key, filter_exec(parquet_exec()), false);
+
+        let expected = &[
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+
+        let expected_first_sort_enforcement = &[
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "FilterExec: c@2 = 0",
+            // Expect repartition on the input of the filter (as it can benefit from additional parallelism)
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected_first_sort_enforcement, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(feature = "parquet")]
+    fn repartition_transitively_past_sort_with_projection_and_filter() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan = sort_exec(
+            sort_key,
+            projection_exec_with_alias(
+                filter_exec(parquet_exec()),
+                vec![
+                    ("a".to_string(), "a".to_string()),
+                    ("b".to_string(), "b".to_string()),
+                    ("c".to_string(), "c".to_string()),
+                ],
+            ),
+            false,
+        );
+
+        let expected = &[
+            "SortPreservingMergeExec: [a@0 ASC]",
+            // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
+            "FilterExec: c@2 = 0",
+            // repartition is lowest down
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        assert_optimized!(expected, plan.clone(), true);
+
+        let expected_first_sort_enforcement = &[
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected_first_sort_enforcement, plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_single_partition() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan_parquet = aggregate_exec_with_alias(parquet_exec(), alias.clone());
+        let plan_csv = aggregate_exec_with_alias(csv_exec(), alias);
+
+        let expected_parquet = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "ParquetExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "CsvExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true, false, 2, true, 10);
+        assert_optimized!(expected_csv, plan_csv, true, false, 2, true, 10);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_multiple_files() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+
+        let plan = filter_exec(parquet_exec_multiple_sorted(vec![sort_key.clone()]));
+        let plan = sort_required_exec_with_req(plan, sort_key);
+
+        // The groups must have only contiguous ranges of rows from the same file
+        // if any group has rows from multiple files, the data is no longer sorted destroyed
+        // https://github.com/apache/datafusion/issues/8451
+        let expected = [
+            "SortRequiredExec: [a@0 ASC]",
+            "FilterExec: c@2 = 0",
+            "ParquetExec: file_groups={3 groups: [[x:0..50], [y:0..100], [x:50..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",        ];
+        let target_partitions = 3;
+        let repartition_size = 1;
+        assert_optimized!(
+            expected,
+            plan,
+            true,
+            true,
+            target_partitions,
+            true,
+            repartition_size,
+            false
+        );
+
+        let expected = [
+            "SortRequiredExec: [a@0 ASC]",
+            "FilterExec: c@2 = 0",
+            "ParquetExec: file_groups={8 groups: [[x:0..25], [y:0..25], [x:25..50], [y:25..50], [x:50..75], [y:50..75], [x:75..100], [y:75..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+        let target_partitions = 8;
+        let repartition_size = 1;
+        assert_optimized!(
+            expected,
+            plan,
+            true,
+            true,
+            target_partitions,
+            true,
+            repartition_size,
+            false
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    /// CsvExec on compressed csv file will not be partitioned
+    /// (Not able to decompress chunked csv file)
+    fn parallelization_compressed_csv() -> Result<()> {
+        let compression_types = [
+            FileCompressionType::GZIP,
+            FileCompressionType::BZIP2,
+            FileCompressionType::XZ,
+            FileCompressionType::ZSTD,
+            FileCompressionType::UNCOMPRESSED,
+        ];
+
+        let expected_not_partitioned = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+
+        let expected_partitioned = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "CsvExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+
+        for compression_type in compression_types {
+            let expected = if compression_type.is_compressed() {
+                &expected_not_partitioned[..]
+            } else {
+                &expected_partitioned[..]
+            };
+
+            let plan = aggregate_exec_with_alias(
+                Arc::new(
+                    CsvExec::builder(
+                        FileScanConfig::new(
+                            ObjectStoreUrl::parse("test:///").unwrap(),
+                            schema(),
+                        )
+                        .with_file(PartitionedFile::new("x".to_string(), 100)),
+                    )
+                    .with_has_header(false)
+                    .with_delimeter(b',')
+                    .with_quote(b'"')
+                    .with_escape(None)
+                    .with_comment(None)
+                    .with_newlines_in_values(false)
+                    .with_file_compression_type(compression_type)
+                    .build(),
+                ),
+                vec![("a".to_string(), "a".to_string())],
+            );
+            assert_optimized!(expected, plan, true, false, 2, true, 10, false);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_two_partitions() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan_parquet =
+            aggregate_exec_with_alias(parquet_exec_multiple(), alias.clone());
+        let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias.clone());
+
+        let expected_parquet = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            // Plan already has two partitions
+            "ParquetExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            // Plan already has two partitions
+            "CsvExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true, false, 2, true, 10);
+        assert_optimized!(expected_csv, plan_csv, true, false, 2, true, 10);
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_two_partitions_into_four() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan_parquet =
+            aggregate_exec_with_alias(parquet_exec_multiple(), alias.clone());
+        let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias.clone());
+
+        let expected_parquet = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            // Multiple source files splitted across partitions
+            "ParquetExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = [
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            // Multiple source files splitted across partitions
+            "CsvExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true, false, 4, true, 10);
+        assert_optimized!(expected_csv, plan_csv, true, false, 4, true, 10);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_sorted_limit() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan_parquet = limit_exec(sort_exec(sort_key.clone(), parquet_exec(), false));
+        let plan_csv = limit_exec(sort_exec(sort_key.clone(), csv_exec(), false));
+
+        let expected_parquet = &[
+            "GlobalLimitExec: skip=0, fetch=100",
+            "LocalLimitExec: fetch=100",
+            // data is sorted so can't repartition here
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            // Doesn't parallelize for SortExec without preserve_partitioning
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = &[
+            "GlobalLimitExec: skip=0, fetch=100",
+            "LocalLimitExec: fetch=100",
+            // data is sorted so can't repartition here
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            // Doesn't parallelize for SortExec without preserve_partitioning
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_limit_with_filter() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan_parquet = limit_exec(filter_exec(sort_exec(
+            sort_key.clone(),
+            parquet_exec(),
+            false,
+        )));
+        let plan_csv =
+            limit_exec(filter_exec(sort_exec(sort_key.clone(), csv_exec(), false)));
+
+        let expected_parquet = &[
+            "GlobalLimitExec: skip=0, fetch=100",
+            "CoalescePartitionsExec",
+            "LocalLimitExec: fetch=100",
+            "FilterExec: c@2 = 0",
+            // even though data is sorted, we can use repartition here. Since
+            // ordering is not used in subsequent stages anyway.
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            // SortExec doesn't benefit from input partitioning
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = &[
+            "GlobalLimitExec: skip=0, fetch=100",
+            "CoalescePartitionsExec",
+            "LocalLimitExec: fetch=100",
+            "FilterExec: c@2 = 0",
+            // even though data is sorted, we can use repartition here. Since
+            // ordering is not used in subsequent stages anyway.
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
+            // SortExec doesn't benefit from input partitioning
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_ignores_limit() -> Result<()> {
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let plan_parquet = aggregate_exec_with_alias(
+            limit_exec(filter_exec(limit_exec(parquet_exec()))),
+            alias.clone(),
+        );
+        let plan_csv = aggregate_exec_with_alias(
+            limit_exec(filter_exec(limit_exec(csv_exec()))),
+            alias.clone(),
+        );
+
+        let expected_parquet = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "GlobalLimitExec: skip=0, fetch=100",
+            "CoalescePartitionsExec",
+            "LocalLimitExec: fetch=100",
+            "FilterExec: c@2 = 0",
+            // repartition should happen prior to the filter to maximize parallelism
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "GlobalLimitExec: skip=0, fetch=100",
+            // Limit doesn't benefit from input partitioning - no parallelism
+            "LocalLimitExec: fetch=100",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "GlobalLimitExec: skip=0, fetch=100",
+            "CoalescePartitionsExec",
+            "LocalLimitExec: fetch=100",
+            "FilterExec: c@2 = 0",
+            // repartition should happen prior to the filter to maximize parallelism
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "GlobalLimitExec: skip=0, fetch=100",
+            // Limit doesn't benefit from input partitioning - no parallelism
+            "LocalLimitExec: fetch=100",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_union_inputs() -> Result<()> {
+        let plan_parquet = union_exec(vec![parquet_exec(); 5]);
+        let plan_csv = union_exec(vec![csv_exec(); 5]);
+
+        let expected_parquet = &[
+            "UnionExec",
+            // Union doesn't benefit from input partitioning - no parallelism
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        let expected_csv = &[
+            "UnionExec",
+            // Union doesn't benefit from input partitioning - no parallelism
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_prior_to_sort_preserving_merge() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        // sort preserving merge already sorted input,
+        let plan_parquet = sort_preserving_merge_exec(
+            sort_key.clone(),
+            parquet_exec_with_sort(vec![sort_key.clone()]),
+        );
+        let plan_csv = sort_preserving_merge_exec(
+            sort_key.clone(),
+            csv_exec_with_sort(vec![sort_key.clone()]),
+        );
+
+        // parallelization is not beneficial for SortPreservingMerge
+        let expected_parquet = &[
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        let expected_csv = &[
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_sort_preserving_merge_with_union() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved)
+        let input_parquet =
+            union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]);
+        let input_csv = union_exec(vec![csv_exec_with_sort(vec![sort_key.clone()]); 2]);
+        let plan_parquet = sort_preserving_merge_exec(sort_key.clone(), input_parquet);
+        let plan_csv = sort_preserving_merge_exec(sort_key.clone(), input_csv);
+
+        // should not repartition (union doesn't benefit from increased parallelism)
+        // should not sort (as the data was already sorted)
+        let expected_parquet = &[
+            "SortPreservingMergeExec: [c@2 ASC]",
+            "UnionExec",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        let expected_csv = &[
+            "SortPreservingMergeExec: [c@2 ASC]",
+            "UnionExec",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_does_not_benefit() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        //  SortRequired
+        //    Parquet(sorted)
+        let plan_parquet = sort_required_exec_with_req(
+            parquet_exec_with_sort(vec![sort_key.clone()]),
+            sort_key.clone(),
+        );
+        let plan_csv = sort_required_exec_with_req(
+            csv_exec_with_sort(vec![sort_key.clone()]),
+            sort_key,
+        );
+
+        // no parallelization, because SortRequiredExec doesn't benefit from increased parallelism
+        let expected_parquet = &[
+            "SortRequiredExec: [c@2 ASC]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        let expected_csv = &[
+            "SortRequiredExec: [c@2 ASC]",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_ignores_transitively_with_projection_parquet() -> Result<()> {
+        // sorted input
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+
+        //Projection(a as a2, b as b2)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a2".to_string()),
+            ("c".to_string(), "c2".to_string()),
+        ];
+        let proj_parquet = projection_exec_with_alias(
+            parquet_exec_with_sort(vec![sort_key.clone()]),
+            alias_pairs.clone(),
+        );
+        let sort_key_after_projection = vec![PhysicalSortExpr {
+            expr: col("c2", &proj_parquet.schema()).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan_parquet =
+            sort_preserving_merge_exec(sort_key_after_projection, proj_parquet);
+        let expected = &[
+            "SortPreservingMergeExec: [c2@1 ASC]",
+            "  ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        plans_matches_expected!(expected, &plan_parquet);
+
+        // data should not be repartitioned / resorted
+        let expected_parquet = &[
+            "ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected_parquet, plan_parquet, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn parallelization_ignores_transitively_with_projection_csv() -> Result<()> {
+        // sorted input
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+
+        //Projection(a as a2, b as b2)
+        let alias_pairs: Vec<(String, String)> = vec![
+            ("a".to_string(), "a2".to_string()),
+            ("c".to_string(), "c2".to_string()),
+        ];
+
+        let proj_csv =
+            projection_exec_with_alias(csv_exec_with_sort(vec![sort_key]), alias_pairs);
+        let sort_key_after_projection = vec![PhysicalSortExpr {
+            expr: col("c2", &proj_csv.schema()).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv);
+        let expected = &[
+            "SortPreservingMergeExec: [c2@1 ASC]",
+            "  ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
+            "    CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false",
+        ];
+        plans_matches_expected!(expected, &plan_csv);
+
+        // data should not be repartitioned / resorted
+        let expected_csv = &[
+            "ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
+            "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false",
+        ];
+        assert_optimized!(expected_csv, plan_csv, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn remove_redundant_roundrobins() -> Result<()> {
+        let input = parquet_exec();
+        let repartition = repartition_exec(repartition_exec(input));
+        let physical_plan = repartition_exec(filter_exec(repartition));
+        let expected = &[
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
+            "  FilterExec: c@2 = 0",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        plans_matches_expected!(expected, &physical_plan);
+
+        let expected = &[
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, physical_plan.clone(), true);
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn remove_unnecessary_spm_after_filter() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
+        let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
+
+        // Original plan expects its output to be ordered by c@2 ASC.
+        // This is still satisfied since, after filter that column is constant.
+        let expected = &[
+            "CoalescePartitionsExec",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c@2 ASC",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        // last flag sets config.optimizer.PREFER_EXISTING_SORT
+        assert_optimized!(expected, physical_plan.clone(), true, true);
+        assert_optimized!(expected, physical_plan, false, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn preserve_ordering_through_repartition() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("d", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
+        let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
+
+        let expected = &[
+            "SortPreservingMergeExec: [d@3 ASC]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=d@3 ASC",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC]",
+        ];
+        // last flag sets config.optimizer.PREFER_EXISTING_SORT
+        assert_optimized!(expected, physical_plan.clone(), true, true);
+        assert_optimized!(expected, physical_plan, false, true);
+
+        Ok(())
+    }
+
+    #[test]
+    fn do_not_preserve_ordering_through_repartition() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
+        let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
+
+        let expected = &[
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+
+        assert_optimized!(expected, physical_plan.clone(), true);
+
+        let expected = &[
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn no_need_for_sort_after_filter() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
+        let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
+
+        let expected = &[
+            // After CoalescePartitionsExec c is still constant. Hence c@2 ASC ordering is already satisfied.
+            "CoalescePartitionsExec",
+            // Since after this stage c is constant. c@2 ASC ordering is already satisfied.
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected, physical_plan.clone(), true);
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn do_not_preserve_ordering_through_repartition2() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key]);
+
+        let sort_req = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let physical_plan = sort_preserving_merge_exec(sort_req, filter_exec(input));
+
+        let expected = &[
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+
+        assert_optimized!(expected, physical_plan.clone(), true);
+
+        let expected = &[
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "CoalescePartitionsExec",
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn do_not_preserve_ordering_through_repartition3() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key]);
+        let physical_plan = filter_exec(input);
+
+        let expected = &[
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        assert_optimized!(expected, physical_plan.clone(), true);
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn do_not_put_sort_when_input_is_invalid() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec();
+        let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key);
+        let expected = &[
+            // Ordering requirement of sort required exec is NOT satisfied
+            // by existing ordering at the source.
+            "SortRequiredExec: [a@0 ASC]",
+            "FilterExec: c@2 = 0",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_plan_txt!(expected, physical_plan);
+
+        let expected = &[
+            "SortRequiredExec: [a@0 ASC]",
+            // Since at the start of the rule ordering requirement is not satisfied
+            // EnforceDistribution rule doesn't satisfy this requirement either.
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+
+        let mut config = ConfigOptions::new();
+        config.execution.target_partitions = 10;
+        config.optimizer.enable_round_robin_repartition = true;
+        config.optimizer.prefer_existing_sort = false;
+        let dist_plan = EnforceDistribution::new().optimize(physical_plan, &config)?;
+        assert_plan_txt!(expected, dist_plan);
+
+        Ok(())
+    }
+
+    #[test]
+    fn put_sort_when_input_is_valid() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("a", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
+        let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key);
+
+        let expected = &[
+            // Ordering requirement of sort required exec is satisfied
+            // by existing ordering at the source.
+            "SortRequiredExec: [a@0 ASC]",
+            "FilterExec: c@2 = 0",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+        assert_plan_txt!(expected, physical_plan);
+
+        let expected = &[
+            // Since at the start of the rule ordering requirement is satisfied
+            // EnforceDistribution rule satisfy this requirement also.
+            "SortRequiredExec: [a@0 ASC]",
+            "FilterExec: c@2 = 0",
+            "ParquetExec: file_groups={10 groups: [[x:0..20], [y:0..20], [x:20..40], [y:20..40], [x:40..60], [y:40..60], [x:60..80], [y:60..80], [x:80..100], [y:80..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+
+        let mut config = ConfigOptions::new();
+        config.execution.target_partitions = 10;
+        config.optimizer.enable_round_robin_repartition = true;
+        config.optimizer.prefer_existing_sort = false;
+        let dist_plan = EnforceDistribution::new().optimize(physical_plan, &config)?;
+        assert_plan_txt!(expected, dist_plan);
+
+        Ok(())
+    }
+
+    #[test]
+    fn do_not_add_unnecessary_hash() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let input = parquet_exec_with_sort(vec![sort_key]);
+        let physical_plan = aggregate_exec_with_alias(input, alias.clone());
+
+        let expected = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        // Make sure target partition number is 1. In this case hash repartition is unnecessary
+        assert_optimized!(expected, physical_plan.clone(), true, false, 1, false, 1024);
+        assert_optimized!(expected, physical_plan, false, false, 1, false, 1024);
+
+        Ok(())
+    }
+
+    #[test]
+    fn do_not_add_unnecessary_hash2() -> Result<()> {
+        let schema = schema();
+        let sort_key = vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let alias = vec![("a".to_string(), "a".to_string())];
+        let input = parquet_exec_multiple_sorted(vec![sort_key]);
+        let aggregate = aggregate_exec_with_alias(input, alias.clone());
+        let physical_plan = aggregate_exec_with_alias(aggregate, alias.clone());
+
+        let expected = &[
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            // Since hash requirements of this operator is satisfied. There shouldn't be
+            // a hash repartition here
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
+            "RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2",
+            "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]",
+        ];
+        // Make sure target partition number is larger than 2 (e.g partition number at the source).
+        assert_optimized!(expected, physical_plan.clone(), true, false, 4, false, 1024);
+        assert_optimized!(expected, physical_plan, false, false, 4, false, 1024);
+
+        Ok(())
+    }
+
+    #[test]
+    fn optimize_away_unnecessary_repartition() -> Result<()> {
+        let physical_plan = coalesce_partitions_exec(repartition_exec(parquet_exec()));
+        let expected = &[
+            "CoalescePartitionsExec",
+            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        plans_matches_expected!(expected, physical_plan.clone());
+
+        let expected =
+            &["ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]"];
+        assert_optimized!(expected, physical_plan.clone(), true);
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[test]
+    fn optimize_away_unnecessary_repartition2() -> Result<()> {
+        let physical_plan = filter_exec(repartition_exec(coalesce_partitions_exec(
+            filter_exec(repartition_exec(parquet_exec())),
+        )));
+        let expected = &[
+            "FilterExec: c@2 = 0",
+            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "    CoalescePartitionsExec",
+            "      FilterExec: c@2 = 0",
+            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "          ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        plans_matches_expected!(expected, physical_plan.clone());
+
+        let expected = &[
+            "FilterExec: c@2 = 0",
+            "FilterExec: c@2 = 0",
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e]",
+        ];
+        assert_optimized!(expected, physical_plan.clone(), true);
+        assert_optimized!(expected, physical_plan, false);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting_test.rs b/datafusion/core/src/physical_optimizer/enforce_sorting_test.rs
new file mode 100644
index 0000000000000..3f100668d558f
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting_test.rs
@@ -0,0 +1,1850 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(test)]
+mod tests {
+    use datafusion_common::tree_node::TransformedResult;
+    use datafusion_common::tree_node::TreeNode;
+    use datafusion_physical_optimizer::enforce_sorting::ensure_sorting;
+    use datafusion_physical_optimizer::enforce_sorting::parallelize_sorts;
+    use datafusion_physical_optimizer::enforce_sorting::EnforceSorting;
+    use datafusion_physical_optimizer::replace_with_order_preserving_variants::replace_with_order_preserving_variants;
+    use datafusion_physical_optimizer::sort_pushdown::assign_initial_requirements;
+    use datafusion_physical_optimizer::sort_pushdown::pushdown_sorts;
+
+    use datafusion_physical_optimizer::sort_pushdown::SortPushDown;
+    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+    use datafusion_physical_plan::repartition::RepartitionExec;
+    use datafusion_physical_plan::sorts::sort::SortExec;
+    use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+    use std::sync::Arc;
+
+    use datafusion_physical_optimizer::replace_with_order_preserving_variants::OrderPreservationContext;
+
+    use crate::physical_optimizer::test_utils::{
+        aggregate_exec, bounded_window_exec, check_integrity, coalesce_batches_exec,
+        coalesce_partitions_exec, filter_exec, global_limit_exec, hash_join_exec,
+        limit_exec, local_limit_exec, memory_exec, parquet_exec, parquet_exec_sorted,
+        repartition_exec, sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec,
+        sort_preserving_merge_exec, spr_repartition_exec, union_exec,
+        RequirementsTestExec,
+    };
+    use crate::prelude::{SessionConfig, SessionContext};
+    use crate::test::{csv_exec_ordered, csv_exec_sorted, stream_exec_ordered};
+    use datafusion_physical_optimizer::enforce_distribution::EnforceDistribution;
+    use datafusion_physical_plan::{
+        displayable, get_plan_string, ExecutionPlan, Partitioning,
+    };
+
+    use datafusion_common::arrow::compute::SortOptions;
+    use datafusion_common::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+    use datafusion_common::Result;
+    use datafusion_expr::JoinType;
+    use datafusion_physical_expr::expressions::{col, Column, NotExpr};
+
+    use datafusion_physical_optimizer::PhysicalOptimizerRule;
+    use rstest::rstest;
+
+    // TODO: remove these two type aliases when tests move to the datafusion_physical_optimizer module
+    use datafusion_physical_plan::tree_node::PlanContext;
+    /// This object is used within the [`EnforceSorting`] rule to track the closest
+    /// [`SortExec`] descendant(s) for every child of a plan. The data attribute
+    /// stores whether the plan is a `SortExec` or is connected to a `SortExec`
+    /// via its children.
+    type PlanWithCorrespondingSort = PlanContext<bool>;
+
+    /// This object is used within the [`EnforceSorting`] rule to track the closest
+    /// [`CoalescePartitionsExec`] descendant(s) for every child of a plan. The data
+    /// attribute stores whether the plan is a `CoalescePartitionsExec` or is
+    /// connected to a `CoalescePartitionsExec` via its children.
+    type PlanWithCorrespondingCoalescePartitions = PlanContext<bool>;
+
+    fn create_test_schema() -> Result<SchemaRef> {
+        let nullable_column = Field::new("nullable_col", DataType::Int32, true);
+        let non_nullable_column = Field::new("non_nullable_col", DataType::Int32, false);
+        let schema = Arc::new(Schema::new(vec![nullable_column, non_nullable_column]));
+        Ok(schema)
+    }
+
+    fn create_test_schema2() -> Result<SchemaRef> {
+        let col_a = Field::new("col_a", DataType::Int32, true);
+        let col_b = Field::new("col_b", DataType::Int32, true);
+        let schema = Arc::new(Schema::new(vec![col_a, col_b]));
+        Ok(schema)
+    }
+
+    // Generate a schema which consists of 5 columns (a, b, c, d, e)
+    fn create_test_schema3() -> Result<SchemaRef> {
+        let a = Field::new("a", DataType::Int32, true);
+        let b = Field::new("b", DataType::Int32, false);
+        let c = Field::new("c", DataType::Int32, true);
+        let d = Field::new("d", DataType::Int32, false);
+        let e = Field::new("e", DataType::Int32, false);
+        let schema = Arc::new(Schema::new(vec![a, b, c, d, e]));
+        Ok(schema)
+    }
+
+    /// Runs the sort enforcement optimizer and asserts the plan
+    /// against the original and expected plans
+    ///
+    /// `$EXPECTED_PLAN_LINES`: input plan
+    /// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
+    /// `$PLAN`: the plan to optimized
+    /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
+    ///
+    macro_rules! assert_optimized {
+        ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => {
+            let config = SessionConfig::new().with_repartition_sorts($REPARTITION_SORTS);
+            let session_ctx = SessionContext::new_with_config(config);
+            let state = session_ctx.state();
+
+            // This file has 4 rules that use tree node, apply these rules as in the
+            // EnforSorting::optimize implementation
+            // After these operations tree nodes should be in a consistent state.
+            // This code block makes sure that these rules doesn't violate tree node integrity.
+            {
+                let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
+                let adjusted = plan_requirements
+                    .transform_up(ensure_sorting)
+                    .data()
+                    .and_then(check_integrity)?;
+                // TODO: End state payloads will be checked here.
+
+                let new_plan = if state.config_options().optimizer.repartition_sorts {
+                    let plan_with_coalesce_partitions =
+                        PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
+                    let parallel = plan_with_coalesce_partitions
+                        .transform_up(parallelize_sorts)
+                        .data()
+                        .and_then(check_integrity)?;
+                    // TODO: End state payloads will be checked here.
+                    parallel.plan
+                } else {
+                    adjusted.plan
+                };
+
+                let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
+                let updated_plan = plan_with_pipeline_fixer
+                    .transform_up(|plan_with_pipeline_fixer| {
+                        replace_with_order_preserving_variants(
+                            plan_with_pipeline_fixer,
+                            false,
+                            true,
+                            state.config_options(),
+                        )
+                    })
+                    .data()
+                    .and_then(check_integrity)?;
+                // TODO: End state payloads will be checked here.
+
+                let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
+                assign_initial_requirements(&mut sort_pushdown);
+                sort_pushdown
+                    .transform_down(pushdown_sorts)
+                    .data()
+                    .and_then(check_integrity)?;
+                // TODO: End state payloads will be checked here.
+            }
+
+            let physical_plan = $PLAN;
+            let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
+            let actual: Vec<&str> = formatted.trim().lines().collect();
+
+            let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+                .iter().map(|s| *s).collect();
+
+            assert_eq!(
+                expected_plan_lines, actual,
+                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            );
+
+            let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
+                .iter().map(|s| *s).collect();
+
+            // Run the actual optimizer
+            let optimized_physical_plan =
+                EnforceSorting::new().optimize(physical_plan, state.config_options())?;
+
+            // Get string representation of the plan
+            let actual = get_plan_string(&optimized_physical_plan);
+            assert_eq!(
+                expected_optimized_lines, actual,
+                "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            );
+
+        };
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let input = sort_exec(vec![sort_expr("non_nullable_col", &schema)], source);
+        let physical_plan = sort_exec(vec![sort_expr("nullable_col", &schema)], input);
+
+        let expected_input = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+
+        let sort_exprs = vec![sort_expr_options(
+            "non_nullable_col",
+            &source.schema(),
+            SortOptions {
+                descending: true,
+                nulls_first: true,
+            },
+        )];
+        let sort = sort_exec(sort_exprs.clone(), source);
+        // Add dummy layer propagating Sort above, to test whether sort can be removed from multi layer before
+        let coalesce_batches = coalesce_batches_exec(sort);
+
+        let window_agg =
+            bounded_window_exec("non_nullable_col", sort_exprs, coalesce_batches);
+
+        let sort_exprs = vec![sort_expr_options(
+            "non_nullable_col",
+            &window_agg.schema(),
+            SortOptions {
+                descending: false,
+                nulls_first: false,
+            },
+        )];
+
+        let sort = sort_exec(sort_exprs.clone(), window_agg);
+
+        // Add dummy layer propagating Sort above, to test whether sort can be removed from multi layer before
+        let filter = filter_exec(
+            Arc::new(NotExpr::new(
+                col("non_nullable_col", schema.as_ref()).unwrap(),
+            )),
+            sort,
+        );
+
+        let physical_plan = bounded_window_exec("non_nullable_col", sort_exprs, filter);
+
+        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  FilterExec: NOT non_nullable_col@1",
+            "    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]",
+            "      BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "        CoalesceBatchesExec: target_batch_size=128",
+            "          SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]",
+            "            MemoryExec: partitions=1, partition_sizes=[0]"];
+
+        let expected_optimized = ["WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
+            "  FilterExec: NOT non_nullable_col@1",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "      CoalesceBatchesExec: target_batch_size=128",
+            "        SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]",
+            "          MemoryExec: partitions=1, partition_sizes=[0]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_add_required_sort() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, source);
+
+        let expected_input = [
+            "SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort1() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort = sort_exec(sort_exprs.clone(), source);
+        let spm = sort_preserving_merge_exec(sort_exprs, sort);
+
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort = sort_exec(sort_exprs.clone(), spm);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, sort);
+        let expected_input = [
+            "SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "    SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "      SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "        MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort2() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
+        let sort = sort_exec(sort_exprs.clone(), source);
+        let spm = sort_preserving_merge_exec(sort_exprs, sort);
+
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort2 = sort_exec(sort_exprs.clone(), spm);
+        let spm2 = sort_preserving_merge_exec(sort_exprs, sort2);
+
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort3 = sort_exec(sort_exprs, spm2);
+        let physical_plan = repartition_exec(repartition_exec(sort3));
+
+        let expected_input = [
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
+            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "          SortPreservingMergeExec: [non_nullable_col@1 ASC]",
+            "            SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "              MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+
+        let expected_optimized = [
+            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
+            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort3() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
+        let sort = sort_exec(sort_exprs.clone(), source);
+        let spm = sort_preserving_merge_exec(sort_exprs, sort);
+
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let repartition_exec = repartition_exec(spm);
+        let sort2 = Arc::new(
+            SortExec::new(sort_exprs.clone(), repartition_exec)
+                .with_preserve_partitioning(true),
+        ) as _;
+        let spm2 = sort_preserving_merge_exec(sort_exprs, sort2);
+
+        let physical_plan = aggregate_exec(spm2);
+
+        // When removing a `SortPreservingMergeExec`, make sure that partitioning
+        // requirements are not violated. In some cases, we may need to replace
+        // it with a `CoalescePartitionsExec` instead of directly removing it.
+        let expected_input = [
+            "AggregateExec: mode=Final, gby=[], aggr=[]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        SortPreservingMergeExec: [non_nullable_col@1 ASC]",
+            "          SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "            MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+
+        let expected_optimized = [
+            "AggregateExec: mode=Final, gby=[], aggr=[]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort4() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source1 = repartition_exec(memory_exec(&schema));
+
+        let source2 = repartition_exec(memory_exec(&schema));
+        let union = union_exec(vec![source1, source2]);
+
+        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
+        // let sort = sort_exec(sort_exprs.clone(), union);
+        let sort = Arc::new(
+            SortExec::new(sort_exprs.clone(), union).with_preserve_partitioning(true),
+        ) as _;
+        let spm = sort_preserving_merge_exec(sort_exprs, sort);
+
+        let filter = filter_exec(
+            Arc::new(NotExpr::new(
+                col("non_nullable_col", schema.as_ref()).unwrap(),
+            )),
+            spm,
+        );
+
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let physical_plan = sort_exec(sort_exprs, filter);
+
+        // When removing a `SortPreservingMergeExec`, make sure that partitioning
+        // requirements are not violated. In some cases, we may need to replace
+        // it with a `CoalescePartitionsExec` instead of directly removing it.
+        let expected_input = ["SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "  FilterExec: NOT non_nullable_col@1",
+            "    SortPreservingMergeExec: [non_nullable_col@1 ASC]",
+            "      SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[true]",
+            "        UnionExec",
+            "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "            MemoryExec: partitions=1, partition_sizes=[0]",
+            "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "            MemoryExec: partitions=1, partition_sizes=[0]"];
+
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
+            "    FilterExec: NOT non_nullable_col@1",
+            "      UnionExec",
+            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "          MemoryExec: partitions=1, partition_sizes=[0]",
+            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "          MemoryExec: partitions=1, partition_sizes=[0]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_sort5() -> Result<()> {
+        let left_schema = create_test_schema2()?;
+        let right_schema = create_test_schema3()?;
+        let left_input = memory_exec(&left_schema);
+        let parquet_sort_exprs = vec![sort_expr("a", &right_schema)];
+        let right_input = parquet_exec_sorted(&right_schema, parquet_sort_exprs);
+
+        let on = vec![(
+            Arc::new(Column::new_with_schema("col_a", &left_schema)?) as _,
+            Arc::new(Column::new_with_schema("c", &right_schema)?) as _,
+        )];
+        let join = hash_join_exec(left_input, right_input, on, None, &JoinType::Inner)?;
+        let physical_plan = sort_exec(vec![sort_expr("a", &join.schema())], join);
+
+        let expected_input = ["SortExec: expr=[a@2 ASC], preserve_partitioning=[false]",
+            "  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]"];
+
+        let expected_optimized = ["HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_remove_unnecessary_spm1() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let input = sort_preserving_merge_exec(
+            vec![sort_expr("non_nullable_col", &schema)],
+            source,
+        );
+        let input2 = sort_preserving_merge_exec(
+            vec![sort_expr("non_nullable_col", &schema)],
+            input,
+        );
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_expr("nullable_col", &schema)], input2);
+
+        let expected_input = [
+            "SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  SortPreservingMergeExec: [non_nullable_col@1 ASC]",
+            "    SortPreservingMergeExec: [non_nullable_col@1 ASC]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_do_not_remove_sort_with_limit() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort = sort_exec(sort_exprs.clone(), source1);
+        let limit = limit_exec(sort);
+
+        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs);
+
+        let union = union_exec(vec![source2, limit]);
+        let repartition = repartition_exec(union);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, repartition);
+
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "    UnionExec",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "      GlobalLimitExec: skip=0, fetch=100",
+            "        LocalLimitExec: fetch=100",
+            "          SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "            ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+
+        // We should keep the bottom `SortExec`.
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
+            "      UnionExec",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "        GlobalLimitExec: skip=0, fetch=100",
+            "          LocalLimitExec: fetch=100",
+            "            SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "              ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_change_wrong_sorting() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort = sort_exec(vec![sort_exprs[0].clone()], source);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, sort);
+        let expected_input = [
+            "SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_change_wrong_sorting2() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let spm1 = sort_preserving_merge_exec(sort_exprs.clone(), source);
+        let sort2 = sort_exec(vec![sort_exprs[0].clone()], spm1);
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_exprs[1].clone()], sort2);
+
+        let expected_input = [
+            "SortPreservingMergeExec: [non_nullable_col@1 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "    SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        let expected_optimized = [
+            "SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "  MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_sorted() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort = sort_exec(sort_exprs.clone(), source1);
+
+        let source2 = parquet_exec_sorted(&schema, sort_exprs.clone());
+
+        let union = union_exec(vec![source2, sort]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, union);
+
+        // one input to the union is already sorted, one is not.
+        let expected_input = vec![
+            "SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+        ];
+        // should not add a sort at the output of the union, input plan should not be changed
+        let expected_optimized = expected_input.clone();
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let sort = sort_exec(sort_exprs.clone(), source1);
+
+        let parquet_sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs);
+
+        let union = union_exec(vec![source2, sort]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, union);
+
+        // one input to the union is already sorted, one is not.
+        let expected_input = vec![
+            "SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+        ];
+        // should not add a sort at the output of the union, input plan should not be changed
+        let expected_optimized = expected_input.clone();
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted2() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort = sort_exec(sort_exprs.clone(), source1);
+
+        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs);
+
+        let union = union_exec(vec![source2, sort]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs, union);
+
+        // Input is an invalid plan. In this case rule should add required sorting in appropriate places.
+        // First ParquetExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the
+        // required ordering of SortPreservingMergeExec.
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  UnionExec",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted3() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort1 = sort_exec(sort_exprs1, source1.clone());
+        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
+        let sort2 = sort_exec(sort_exprs2, source1);
+
+        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs.clone());
+
+        let union = union_exec(vec![sort1, source2, sort2]);
+        let physical_plan = sort_preserving_merge_exec(parquet_sort_exprs, union);
+
+        // First input to the union is not Sorted (SortExec is finer than required ordering by the SortPreservingMergeExec above).
+        // Second input to the union is already Sorted (matches with the required ordering by the SortPreservingMergeExec above).
+        // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above).
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        // should adjust sorting in the first input of the union such that it is not unnecessarily fine
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted4() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
+        let sort1 = sort_exec(sort_exprs2.clone(), source1.clone());
+        let sort2 = sort_exec(sort_exprs2.clone(), source1);
+
+        let source2 = parquet_exec_sorted(&schema, sort_exprs2);
+
+        let union = union_exec(vec![sort1, source2, sort2]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs1, union);
+
+        // Ordering requirement of the `SortPreservingMergeExec` is not met.
+        // Should modify the plan to ensure that all three inputs to the
+        // `UnionExec` satisfy the ordering, OR add a single sort after
+        // the `UnionExec` (both of which are equally good for this example).
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted5() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs2 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr_options(
+                "non_nullable_col",
+                &schema,
+                SortOptions {
+                    descending: true,
+                    nulls_first: false,
+                },
+            ),
+        ];
+        let sort_exprs3 = vec![sort_expr("nullable_col", &schema)];
+        let sort1 = sort_exec(sort_exprs1, source1.clone());
+        let sort2 = sort_exec(sort_exprs2, source1);
+
+        let union = union_exec(vec![sort1, sort2]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs3, union);
+
+        // The `UnionExec` doesn't preserve any of the inputs ordering in the
+        // example below. However, we should be able to change the unnecessarily
+        // fine `SortExec`s below with required `SortExec`s that are absolutely necessary.
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted6() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![sort_expr("nullable_col", &schema)];
+        let sort1 = sort_exec(sort_exprs1, source1.clone());
+        let sort_exprs2 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let repartition = repartition_exec(source1);
+        let spm = sort_preserving_merge_exec(sort_exprs2, repartition);
+
+        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs.clone());
+
+        let union = union_exec(vec![sort1, source2, spm]);
+        let physical_plan = sort_preserving_merge_exec(parquet_sort_exprs, union);
+
+        // The plan is not valid as it is -- the input ordering requirement
+        // of the `SortPreservingMergeExec` under the third child of the
+        // `UnionExec` is not met. We should add a `SortExec` below it.
+        // At the same time, this ordering requirement is unnecessarily fine.
+        // The final plan should be valid AND the ordering of the third child
+        // shouldn't be finer than necessary.
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        // Should adjust the requirement in the third input of the union so
+        // that it is not unnecessarily fine.
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted7() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs3 = vec![sort_expr("nullable_col", &schema)];
+        let sort1 = sort_exec(sort_exprs1.clone(), source1.clone());
+        let sort2 = sort_exec(sort_exprs1, source1);
+
+        let union = union_exec(vec![sort1, sort2]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs3, union);
+
+        // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering.
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec
+        let expected_output = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_output, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted8() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs2 = vec![
+            sort_expr_options(
+                "nullable_col",
+                &schema,
+                SortOptions {
+                    descending: true,
+                    nulls_first: false,
+                },
+            ),
+            sort_expr_options(
+                "non_nullable_col",
+                &schema,
+                SortOptions {
+                    descending: true,
+                    nulls_first: false,
+                },
+            ),
+        ];
+        let sort1 = sort_exec(sort_exprs1, source1.clone());
+        let sort2 = sort_exec(sort_exprs2, source1);
+
+        let physical_plan = union_exec(vec![sort1, sort2]);
+
+        // The `UnionExec` doesn't preserve any of the inputs ordering in the
+        // example below.
+        let expected_input = ["UnionExec",
+            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "  SortExec: expr=[nullable_col@0 DESC NULLS LAST,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        // Since `UnionExec` doesn't preserve ordering in the plan above.
+        // We shouldn't keep SortExecs in the plan.
+        let expected_optimized = ["UnionExec",
+            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_window_multi_path_sort() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
+        // reverse sorting of sort_exprs2
+        let sort_exprs3 = vec![sort_expr_options(
+            "nullable_col",
+            &schema,
+            SortOptions {
+                descending: true,
+                nulls_first: false,
+            },
+        )];
+        let source1 = parquet_exec_sorted(&schema, sort_exprs1);
+        let source2 = parquet_exec_sorted(&schema, sort_exprs2);
+        let sort1 = sort_exec(sort_exprs3.clone(), source1);
+        let sort2 = sort_exec(sort_exprs3.clone(), source2);
+
+        let union = union_exec(vec![sort1, sort2]);
+        let spm = sort_preserving_merge_exec(sort_exprs3.clone(), union);
+        let physical_plan = bounded_window_exec("nullable_col", sort_exprs3, spm);
+
+        // The `WindowAggExec` gets its sorting from multiple children jointly.
+        // During the removal of `SortExec`s, it should be able to remove the
+        // corresponding SortExecs together. Also, the inputs of these `SortExec`s
+        // are not necessarily the same to be able to remove them.
+        let expected_input = [
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]",
+            "    UnionExec",
+            "      SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "      SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
+        let expected_optimized = [
+            "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "    UnionExec",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_window_multi_path_sort2() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
+        let source1 = parquet_exec_sorted(&schema, sort_exprs2.clone());
+        let source2 = parquet_exec_sorted(&schema, sort_exprs2.clone());
+        let sort1 = sort_exec(sort_exprs1.clone(), source1);
+        let sort2 = sort_exec(sort_exprs1.clone(), source2);
+
+        let union = union_exec(vec![sort1, sort2]);
+        let spm = Arc::new(SortPreservingMergeExec::new(sort_exprs1, union)) as _;
+        let physical_plan = bounded_window_exec("nullable_col", sort_exprs2, spm);
+
+        // The `WindowAggExec` can get its required sorting from the leaf nodes directly.
+        // The unnecessary SortExecs should be removed
+        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+            "    UnionExec",
+            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
+        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "    UnionExec",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_union_inputs_different_sorted_with_limit() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let sort_exprs1 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+        let sort_exprs2 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr_options(
+                "non_nullable_col",
+                &schema,
+                SortOptions {
+                    descending: true,
+                    nulls_first: false,
+                },
+            ),
+        ];
+        let sort_exprs3 = vec![sort_expr("nullable_col", &schema)];
+        let sort1 = sort_exec(sort_exprs1, source1.clone());
+
+        let sort2 = sort_exec(sort_exprs2, source1);
+        let limit = local_limit_exec(sort2);
+        let limit = global_limit_exec(limit);
+
+        let union = union_exec(vec![sort1, limit]);
+        let physical_plan = sort_preserving_merge_exec(sort_exprs3, union);
+
+        // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec`
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    GlobalLimitExec: skip=0, fetch=100",
+            "      LocalLimitExec: fetch=100",
+            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "          ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  UnionExec",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    GlobalLimitExec: skip=0, fetch=100",
+            "      LocalLimitExec: fetch=100",
+            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
+            "          ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_sort_merge_join_order_by_left() -> Result<()> {
+        let left_schema = create_test_schema()?;
+        let right_schema = create_test_schema2()?;
+
+        let left = parquet_exec(&left_schema);
+        let right = parquet_exec(&right_schema);
+
+        // Join on (nullable_col == col_a)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("nullable_col", &left.schema()).unwrap())
+                as _,
+            Arc::new(Column::new_with_schema("col_a", &right.schema()).unwrap()) as _,
+        )];
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+        ];
+        for join_type in join_types {
+            let join =
+                sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            let sort_exprs = vec![
+                sort_expr("nullable_col", &join.schema()),
+                sort_expr("non_nullable_col", &join.schema()),
+            ];
+            let physical_plan = sort_preserving_merge_exec(sort_exprs.clone(), join);
+
+            let join_plan = format!(
+                "SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
+            );
+            let join_plan2 = format!(
+                "  SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
+            );
+            let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
+                join_plan2.as_str(),
+                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
+            let expected_optimized = match join_type {
+                JoinType::Inner
+                | JoinType::Left
+                | JoinType::LeftSemi
+                | JoinType::LeftAnti => {
+                    // can push down the sort requirements and save 1 SortExec
+                    vec![
+                        join_plan.as_str(),
+                        "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+                        "  SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
+                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
+                    ]
+                }
+                _ => {
+                    // can not push down the sort requirements
+                    vec![
+                        "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+                        join_plan2.as_str(),
+                        "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+                        "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
+                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
+                    ]
+                }
+            };
+            assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_sort_merge_join_order_by_right() -> Result<()> {
+        let left_schema = create_test_schema()?;
+        let right_schema = create_test_schema2()?;
+
+        let left = parquet_exec(&left_schema);
+        let right = parquet_exec(&right_schema);
+
+        // Join on (nullable_col == col_a)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("nullable_col", &left.schema()).unwrap())
+                as _,
+            Arc::new(Column::new_with_schema("col_a", &right.schema()).unwrap()) as _,
+        )];
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::RightAnti,
+        ];
+        for join_type in join_types {
+            let join =
+                sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
+            let sort_exprs = vec![
+                sort_expr("col_a", &join.schema()),
+                sort_expr("col_b", &join.schema()),
+            ];
+            let physical_plan = sort_preserving_merge_exec(sort_exprs, join);
+
+            let join_plan = format!(
+                "SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
+            );
+            let spm_plan = match join_type {
+                JoinType::RightAnti => {
+                    "SortPreservingMergeExec: [col_a@0 ASC,col_b@1 ASC]"
+                }
+                _ => "SortPreservingMergeExec: [col_a@2 ASC,col_b@3 ASC]",
+            };
+            let join_plan2 = format!(
+                "  SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
+            );
+            let expected_input = [spm_plan,
+                join_plan2.as_str(),
+                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
+            let expected_optimized = match join_type {
+                JoinType::Inner | JoinType::Right | JoinType::RightAnti => {
+                    // can push down the sort requirements and save 1 SortExec
+                    vec![
+                        join_plan.as_str(),
+                        "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+                        "  SortExec: expr=[col_a@0 ASC,col_b@1 ASC], preserve_partitioning=[false]",
+                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
+                    ]
+                }
+                _ => {
+                    // can not push down the sort requirements for Left and Full join.
+                    vec![
+                        "SortExec: expr=[col_a@2 ASC,col_b@3 ASC], preserve_partitioning=[false]",
+                        join_plan2.as_str(),
+                        "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+                        "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
+                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
+                    ]
+                }
+            };
+            assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_sort_merge_join_complex_order_by() -> Result<()> {
+        let left_schema = create_test_schema()?;
+        let right_schema = create_test_schema2()?;
+
+        let left = parquet_exec(&left_schema);
+        let right = parquet_exec(&right_schema);
+
+        // Join on (nullable_col == col_a)
+        let join_on = vec![(
+            Arc::new(Column::new_with_schema("nullable_col", &left.schema()).unwrap())
+                as _,
+            Arc::new(Column::new_with_schema("col_a", &right.schema()).unwrap()) as _,
+        )];
+
+        let join = sort_merge_join_exec(left, right, &join_on, &JoinType::Inner);
+
+        // order by (col_b, col_a)
+        let sort_exprs1 = vec![
+            sort_expr("col_b", &join.schema()),
+            sort_expr("col_a", &join.schema()),
+        ];
+        let physical_plan = sort_preserving_merge_exec(sort_exprs1, join.clone());
+
+        let expected_input = ["SortPreservingMergeExec: [col_b@3 ASC,col_a@2 ASC]",
+            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
+
+        // can not push down the sort requirements, need to add SortExec
+        let expected_optimized = ["SortExec: expr=[col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]",
+            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        // order by (nullable_col, col_b, col_a)
+        let sort_exprs2 = vec![
+            sort_expr("nullable_col", &join.schema()),
+            sort_expr("col_b", &join.schema()),
+            sort_expr("col_a", &join.schema()),
+        ];
+        let physical_plan = sort_preserving_merge_exec(sort_exprs2, join);
+
+        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC]",
+            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
+
+        // can not push down the sort requirements, need to add SortExec
+        let expected_optimized = ["SortExec: expr=[nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]",
+            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
+            "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
+            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multiple_sort_window_exec() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+
+        let sort_exprs1 = vec![sort_expr("nullable_col", &schema)];
+        let sort_exprs2 = vec![
+            sort_expr("nullable_col", &schema),
+            sort_expr("non_nullable_col", &schema),
+        ];
+
+        let sort1 = sort_exec(sort_exprs1.clone(), source);
+        let window_agg1 =
+            bounded_window_exec("non_nullable_col", sort_exprs1.clone(), sort1);
+        let window_agg2 =
+            bounded_window_exec("non_nullable_col", sort_exprs2, window_agg1);
+        // let filter_exec = sort_exec;
+        let physical_plan =
+            bounded_window_exec("non_nullable_col", sort_exprs1, window_agg2);
+
+        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "      SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "        MemoryExec: partitions=1, partition_sizes=[0]"];
+
+        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
+            "        MemoryExec: partitions=1, partition_sizes=[0]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multilayer_coalesce_partitions() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let source1 = parquet_exec(&schema);
+        let repartition = repartition_exec(source1);
+        let coalesce = Arc::new(CoalescePartitionsExec::new(repartition)) as _;
+        // Add dummy layer propagating Sort above, to test whether sort can be removed from multi layer before
+        let filter = filter_exec(
+            Arc::new(NotExpr::new(
+                col("non_nullable_col", schema.as_ref()).unwrap(),
+            )),
+            coalesce,
+        );
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let physical_plan = sort_exec(sort_exprs, filter);
+
+        // CoalescePartitionsExec and SortExec are not directly consecutive. In this case
+        // we should be able to parallelize Sorting also (given that executors in between don't require)
+        // single partition.
+        let expected_input = ["SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  FilterExec: NOT non_nullable_col@1",
+            "    CoalescePartitionsExec",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
+            "    FilterExec: NOT non_nullable_col@1",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    // With new change in SortEnforcement EnforceSorting->EnforceDistribution->EnforceSorting
+    // should produce same result with EnforceDistribution+EnforceSorting
+    // This enables us to use EnforceSorting possibly before EnforceDistribution
+    // Given that it will be called at least once after last EnforceDistribution. The reason is that
+    // EnforceDistribution may invalidate ordering invariant.
+    async fn test_commutativity() -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let session_ctx = SessionContext::new();
+        let state = session_ctx.state();
+
+        let memory_exec = memory_exec(&schema);
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        let window = bounded_window_exec("nullable_col", sort_exprs.clone(), memory_exec);
+        let repartition = repartition_exec(window);
+
+        let orig_plan =
+            Arc::new(SortExec::new(sort_exprs, repartition)) as Arc<dyn ExecutionPlan>;
+        let actual = get_plan_string(&orig_plan);
+        let expected_input = vec![
+            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_eq!(
+            expected_input, actual,
+            "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_input:#?}\nactual:\n\n{actual:#?}\n\n"
+        );
+
+        let mut plan = orig_plan.clone();
+        let rules = vec![
+            Arc::new(EnforceDistribution::new()) as Arc<dyn PhysicalOptimizerRule>,
+            Arc::new(EnforceSorting::new()) as Arc<dyn PhysicalOptimizerRule>,
+        ];
+        for rule in rules {
+            plan = rule.optimize(plan, state.config_options())?;
+        }
+        let first_plan = plan.clone();
+
+        let mut plan = orig_plan.clone();
+        let rules = vec![
+            Arc::new(EnforceSorting::new()) as Arc<dyn PhysicalOptimizerRule>,
+            Arc::new(EnforceDistribution::new()) as Arc<dyn PhysicalOptimizerRule>,
+            Arc::new(EnforceSorting::new()) as Arc<dyn PhysicalOptimizerRule>,
+        ];
+        for rule in rules {
+            plan = rule.optimize(plan, state.config_options())?;
+        }
+        let second_plan = plan.clone();
+
+        assert_eq!(get_plan_string(&first_plan), get_plan_string(&second_plan));
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_coalesce_propagate() -> Result<()> {
+        let schema = create_test_schema()?;
+        let source = memory_exec(&schema);
+        let repartition = repartition_exec(source);
+        let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(repartition));
+        let repartition = repartition_exec(coalesce_partitions);
+        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
+        // Add local sort
+        let sort = Arc::new(
+            SortExec::new(sort_exprs.clone(), repartition)
+                .with_preserve_partitioning(true),
+        ) as _;
+        let spm = sort_preserving_merge_exec(sort_exprs.clone(), sort);
+        let sort = sort_exec(sort_exprs, spm);
+
+        let physical_plan = sort.clone();
+        // Sort Parallelize rule should end Coalesce + Sort linkage when Sort is Global Sort
+        // Also input plan is not valid as it is. We need to add SortExec before SortPreservingMergeExec.
+        let expected_input = ["SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
+            "  SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        CoalescePartitionsExec",
+            "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "            MemoryExec: partitions=1, partition_sizes=[0]"];
+        let expected_optimized = [
+            "SortPreservingMergeExec: [nullable_col@0 ASC]",
+            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_with_lost_ordering_bounded() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = csv_exec_sorted(&schema, sort_exprs);
+        let repartition_rr = repartition_exec(source);
+        let repartition_hash = Arc::new(RepartitionExec::try_new(
+            repartition_rr,
+            Partitioning::Hash(vec![col("c", &schema).unwrap()], 10),
+        )?) as _;
+        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
+        let physical_plan = sort_exec(vec![sort_expr("a", &schema)], coalesce_partitions);
+
+        let expected_input = ["SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=false"];
+        let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=false"];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_with_lost_ordering_unbounded_bounded(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema3()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        // create either bounded or unbounded source
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_ordered(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec(source);
+        let repartition_hash = Arc::new(RepartitionExec::try_new(
+            repartition_rr,
+            Partitioning::Hash(vec![col("c", &schema).unwrap()], 10),
+        )?) as _;
+        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
+        let physical_plan = sort_exec(vec![sort_expr("a", &schema)], coalesce_partitions);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = vec![
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
+        ];
+        let expected_input_bounded = vec![
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = vec![
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = vec![
+            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true",
+        ];
+        let expected_optimized_bounded_parallelize_sort = vec![
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true",
+        ];
+        let (expected_input, expected_optimized, expected_optimized_sort_parallelize) =
+            if source_unbounded {
+                (
+                    expected_input_unbounded,
+                    expected_optimized_unbounded.clone(),
+                    expected_optimized_unbounded,
+                )
+            } else {
+                (
+                    expected_input_bounded,
+                    expected_optimized_bounded,
+                    expected_optimized_bounded_parallelize_sort,
+                )
+            };
+        assert_optimized!(
+            expected_input,
+            expected_optimized,
+            physical_plan.clone(),
+            false
+        );
+        assert_optimized!(
+            expected_input,
+            expected_optimized_sort_parallelize,
+            physical_plan,
+            true
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_do_not_pushdown_through_spm() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let sort_exprs = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
+        let source = csv_exec_sorted(&schema, sort_exprs.clone());
+        let repartition_rr = repartition_exec(source);
+        let spm = sort_preserving_merge_exec(sort_exprs, repartition_rr);
+        let physical_plan = sort_exec(vec![sort_expr("b", &schema)], spm);
+
+        let expected_input = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
+            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
+        let expected_optimized = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
+            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_pushdown_through_spm() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let sort_exprs = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
+        let source = csv_exec_sorted(&schema, sort_exprs.clone());
+        let repartition_rr = repartition_exec(source);
+        let spm = sort_preserving_merge_exec(sort_exprs, repartition_rr);
+        let physical_plan = sort_exec(
+            vec![
+                sort_expr("a", &schema),
+                sort_expr("b", &schema),
+                sort_expr("c", &schema),
+            ],
+            spm,
+        );
+
+        let expected_input = ["SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
+        let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "  SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_window_multi_layer_requirement() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let sort_exprs = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
+        let source = csv_exec_sorted(&schema, vec![]);
+        let sort = sort_exec(sort_exprs.clone(), source);
+        let repartition = repartition_exec(sort);
+        let repartition = spr_repartition_exec(repartition);
+        let spm = sort_preserving_merge_exec(sort_exprs.clone(), repartition);
+
+        let physical_plan = bounded_window_exec("a", sort_exprs, spm);
+
+        let expected_input = [
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
+            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "        SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "          CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        let expected_optimized = [
+            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
+            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "    CoalescePartitionsExec",
+            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
+            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
+            "          CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, false);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_replace_with_partial_sort() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let input_sort_exprs = vec![sort_expr("a", &schema)];
+        let unbounded_input = stream_exec_ordered(&schema, input_sort_exprs);
+
+        let physical_plan = sort_exec(
+            vec![sort_expr("a", &schema), sort_expr("c", &schema)],
+            unbounded_input,
+        );
+
+        let expected_input = [
+            "SortExec: expr=[a@0 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]"
+        ];
+        let expected_optimized = [
+            "PartialSortExec: expr=[a@0 ASC,c@2 ASC], common_prefix_length=[1]",
+            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_replace_with_partial_sort2() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let input_sort_exprs = vec![sort_expr("a", &schema), sort_expr("c", &schema)];
+        let unbounded_input = stream_exec_ordered(&schema, input_sort_exprs);
+
+        let physical_plan = sort_exec(
+            vec![
+                sort_expr("a", &schema),
+                sort_expr("c", &schema),
+                sort_expr("d", &schema),
+            ],
+            unbounded_input,
+        );
+
+        let expected_input = [
+            "SortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], preserve_partitioning=[false]",
+            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]"
+        ];
+        // let optimized
+        let expected_optimized = [
+            "PartialSortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], common_prefix_length=[2]",
+            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]",
+        ];
+        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let input_sort_exprs = vec![sort_expr("b", &schema), sort_expr("c", &schema)];
+        let parquet_input = parquet_exec_sorted(&schema, input_sort_exprs);
+
+        let physical_plan = sort_exec(
+            vec![
+                sort_expr("a", &schema),
+                sort_expr("b", &schema),
+                sort_expr("c", &schema),
+            ],
+            parquet_input,
+        );
+        let expected_input = [
+            "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC]"
+        ];
+        let expected_no_change = expected_input;
+        assert_optimized!(expected_input, expected_no_change, physical_plan, false);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> {
+        let schema = create_test_schema3()?;
+        let input_sort_exprs = vec![sort_expr("b", &schema), sort_expr("c", &schema)];
+        let unbounded_input = stream_exec_ordered(&schema, input_sort_exprs);
+
+        let physical_plan = sort_exec(
+            vec![
+                sort_expr("a", &schema),
+                sort_expr("b", &schema),
+                sort_expr("c", &schema),
+            ],
+            unbounded_input,
+        );
+        let expected_input = [
+            "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
+            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]"
+        ];
+        let expected_no_change = expected_input;
+        assert_optimized!(expected_input, expected_no_change, physical_plan, true);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_push_with_required_input_ordering_prohibited() -> Result<()> {
+        // SortExec: expr=[b]            <-- can't push this down
+        //  RequiredInputOrder expr=[a]  <-- this requires input sorted by a, and preserves the input order
+        //    SortExec: expr=[a]
+        //      MemoryExec
+        let schema = create_test_schema3()?;
+        let sort_exprs_a = vec![sort_expr("a", &schema)];
+        let sort_exprs_b = vec![sort_expr("b", &schema)];
+        let plan = memory_exec(&schema);
+        let plan = sort_exec(sort_exprs_a.clone(), plan);
+        let plan = RequirementsTestExec::new(plan)
+            .with_required_input_ordering(sort_exprs_a)
+            .with_maintains_input_order(true)
+            .into_arc();
+        let plan = sort_exec(sort_exprs_b, plan);
+
+        let expected_input = [
+            "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
+            "  RequiredInputOrderingExec",
+            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        // should not be able to push shorts
+        let expected_no_change = expected_input;
+        assert_optimized!(expected_input, expected_no_change, plan, true);
+        Ok(())
+    }
+
+    // test when the required input ordering is satisfied so could push through
+    #[tokio::test]
+    async fn test_push_with_required_input_ordering_allowed() -> Result<()> {
+        // SortExec: expr=[a,b]          <-- can push this down (as it is compatible with the required input ordering)
+        //  RequiredInputOrder expr=[a]  <-- this requires input sorted by a, and preserves the input order
+        //    SortExec: expr=[a]
+        //      MemoryExec
+        let schema = create_test_schema3()?;
+        let sort_exprs_a = vec![sort_expr("a", &schema)];
+        let sort_exprs_ab = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
+        let plan = memory_exec(&schema);
+        let plan = sort_exec(sort_exprs_a.clone(), plan);
+        let plan = RequirementsTestExec::new(plan)
+            .with_required_input_ordering(sort_exprs_a)
+            .with_maintains_input_order(true)
+            .into_arc();
+        let plan = sort_exec(sort_exprs_ab, plan);
+
+        let expected_input = [
+            "SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "  RequiredInputOrderingExec",
+            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "      MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        // should able to push shorts
+        let expected = [
+            "RequiredInputOrderingExec",
+            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
+            "    MemoryExec: partitions=1, partition_sizes=[0]",
+        ];
+        assert_optimized!(expected_input, expected, plan, true);
+        Ok(())
+    }
+}
diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation_test.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation_test.rs
new file mode 100644
index 0000000000000..018f9b30631e2
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation_test.rs
@@ -0,0 +1,443 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(test)]
+mod tests {
+
+    use std::sync::Arc;
+
+    use crate::error::Result;
+    use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
+    use datafusion_physical_optimizer::PhysicalOptimizerRule;
+    use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+
+    use crate::physical_optimizer::aggregate_statistics_test::tests::TestAggregate;
+    use crate::physical_optimizer::enforce_distribution_test::tests::{
+        parquet_exec_with_sort, schema, trim_plan_display,
+    };
+    use crate::prelude::SessionContext;
+    use datafusion_physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
+    use datafusion_physical_plan::memory::MemoryExec;
+    use datafusion_physical_plan::{collect, ExecutionPlan};
+
+    use arrow_schema::SchemaRef;
+    use datafusion_common::arrow::array::Int32Array;
+    use datafusion_common::arrow::compute::SortOptions;
+    use datafusion_common::arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::arrow::record_batch::RecordBatch;
+    use datafusion_common::arrow::util::pretty::pretty_format_batches;
+    use datafusion_execution::config::SessionConfig;
+    use datafusion_expr::Operator;
+    use datafusion_physical_expr::expressions::{cast, col};
+    use datafusion_physical_expr::{expressions, PhysicalExpr, PhysicalSortExpr};
+    use datafusion_physical_plan::aggregates::AggregateMode;
+    use datafusion_physical_plan::displayable;
+
+    fn mock_data() -> Result<Arc<MemoryExec>> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Int32Array::from(vec![
+                    Some(1),
+                    Some(2),
+                    None,
+                    Some(1),
+                    Some(4),
+                    Some(5),
+                ])),
+                Arc::new(Int32Array::from(vec![
+                    Some(1),
+                    None,
+                    Some(6),
+                    Some(2),
+                    Some(8),
+                    Some(9),
+                ])),
+            ],
+        )?;
+
+        Ok(Arc::new(MemoryExec::try_new(
+            &[vec![batch]],
+            Arc::clone(&schema),
+            None,
+        )?))
+    }
+
+    fn assert_plan_matches_expected(
+        plan: &Arc<dyn ExecutionPlan>,
+        expected: &[&str],
+    ) -> Result<()> {
+        let expected_lines: Vec<&str> = expected.to_vec();
+        let session_ctx = SessionContext::new();
+        let state = session_ctx.state();
+
+        let optimized = LimitedDistinctAggregation::new()
+            .optimize(Arc::clone(plan), state.config_options())?;
+
+        let optimized_result = displayable(optimized.as_ref()).indent(true).to_string();
+        let actual_lines = trim_plan_display(&optimized_result);
+
+        assert_eq!(
+            &expected_lines, &actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+
+        Ok(())
+    }
+
+    async fn assert_results_match_expected(
+        plan: Arc<dyn ExecutionPlan>,
+        expected: &str,
+    ) -> Result<()> {
+        let cfg = SessionConfig::new().with_target_partitions(1);
+        let ctx = SessionContext::new_with_config(cfg);
+        let batches = collect(plan, ctx.task_ctx()).await?;
+        let actual = format!("{}", pretty_format_batches(&batches)?);
+        assert_eq!(actual, expected);
+        Ok(())
+    }
+
+    pub fn build_group_by(
+        input_schema: &SchemaRef,
+        columns: Vec<String>,
+    ) -> PhysicalGroupBy {
+        let mut group_by_expr: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
+        for column in columns.iter() {
+            group_by_expr.push((col(column, input_schema).unwrap(), column.to_string()));
+        }
+        PhysicalGroupBy::new_single(group_by_expr.clone())
+    }
+
+    #[tokio::test]
+    async fn test_partial_final() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+
+        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Partial/Final AggregateExec
+        let partial_agg = AggregateExec::try_new(
+            AggregateMode::Partial,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![],         /* aggr_expr */
+            vec![],         /* filter_expr */
+            source,         /* input */
+            schema.clone(), /* input_schema */
+        )?;
+        let final_agg = AggregateExec::try_new(
+            AggregateMode::Final,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![],                /* aggr_expr */
+            vec![],                /* filter_expr */
+            Arc::new(partial_agg), /* input */
+            schema.clone(),        /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(final_agg),
+            4, // fetch
+        );
+        // expected to push the limit to the Partial and Final AggregateExecs
+        let expected = [
+            "LocalLimitExec: fetch=4",
+            "AggregateExec: mode=Final, gby=[a@0 as a], aggr=[], lim=[4]",
+            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[], lim=[4]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        let expected = r#"
++---+
+| a |
++---+
+| 1 |
+| 2 |
+|   |
+| 4 |
++---+
+"#
+        .trim();
+        assert_results_match_expected(plan, expected).await?;
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_local() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+
+        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
+        let single_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![],         /* aggr_expr */
+            vec![],         /* filter_expr */
+            source,         /* input */
+            schema.clone(), /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(single_agg),
+            4, // fetch
+        );
+        // expected to push the limit to the AggregateExec
+        let expected = [
+            "LocalLimitExec: fetch=4",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        let expected = r#"
++---+
+| a |
++---+
+| 1 |
+| 2 |
+|   |
+| 4 |
++---+
+"#
+        .trim();
+        assert_results_match_expected(plan, expected).await?;
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_global() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+
+        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
+        let single_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![],         /* aggr_expr */
+            vec![],         /* filter_expr */
+            source,         /* input */
+            schema.clone(), /* input_schema */
+        )?;
+        let limit_exec = GlobalLimitExec::new(
+            Arc::new(single_agg),
+            1,       // skip
+            Some(3), // fetch
+        );
+        // expected to push the skip+fetch limit to the AggregateExec
+        let expected = [
+            "GlobalLimitExec: skip=1, fetch=3",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        let expected = r#"
++---+
+| a |
++---+
+| 2 |
+|   |
+| 4 |
++---+
+"#
+        .trim();
+        assert_results_match_expected(plan, expected).await?;
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+
+        // `SELECT distinct a FROM MemoryExec GROUP BY a, b LIMIT 4;`, Single/Single AggregateExec
+        let group_by_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string(), "b".to_string()]),
+            vec![],         /* aggr_expr */
+            vec![],         /* filter_expr */
+            source,         /* input */
+            schema.clone(), /* input_schema */
+        )?;
+        let distinct_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![],                 /* aggr_expr */
+            vec![],                 /* filter_expr */
+            Arc::new(group_by_agg), /* input */
+            schema.clone(),         /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(distinct_agg),
+            4, // fetch
+        );
+        // expected to push the limit to the outer AggregateExec only
+        let expected = [
+            "LocalLimitExec: fetch=4",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
+            "AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        let expected = r#"
++---+
+| a |
++---+
+| 1 |
+| 2 |
+|   |
+| 4 |
++---+
+"#
+        .trim();
+        assert_results_match_expected(plan, expected).await?;
+        Ok(())
+    }
+
+    #[test]
+    fn test_no_group_by() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+
+        // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
+        let single_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec![]),
+            vec![],         /* aggr_expr */
+            vec![],         /* filter_expr */
+            source,         /* input */
+            schema.clone(), /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(single_agg),
+            10, // fetch
+        );
+        // expected not to push the limit to the AggregateExec
+        let expected = [
+            "LocalLimitExec: fetch=10",
+            "AggregateExec: mode=Single, gby=[], aggr=[]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        Ok(())
+    }
+
+    #[test]
+    fn test_has_aggregate_expression() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+        let agg = TestAggregate::new_count_star();
+
+        // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
+        let single_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![agg.count_expr(&schema)], /* aggr_expr */
+            vec![None],                    /* filter_expr */
+            source,                        /* input */
+            schema.clone(),                /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(single_agg),
+            10, // fetch
+        );
+        // expected not to push the limit to the AggregateExec
+        let expected = [
+            "LocalLimitExec: fetch=10",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        Ok(())
+    }
+
+    #[test]
+    fn test_has_filter() -> Result<()> {
+        let source = mock_data()?;
+        let schema = source.schema();
+
+        // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
+        // the `a > 1` filter is applied in the AggregateExec
+        let filter_expr = Some(expressions::binary(
+            expressions::col("a", &schema)?,
+            Operator::Gt,
+            cast(expressions::lit(1u32), &schema, DataType::Int32)?,
+            &schema,
+        )?);
+        let agg = TestAggregate::new_count_star();
+        let single_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![agg.count_expr(&schema)], /* aggr_expr */
+            vec![filter_expr],             /* filter_expr */
+            source,                        /* input */
+            schema.clone(),                /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(single_agg),
+            10, // fetch
+        );
+        // expected not to push the limit to the AggregateExec
+        // TODO(msirek): open an issue for `filter_expr` of `AggregateExec` not printing out
+        let expected = [
+            "LocalLimitExec: fetch=10",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
+            "MemoryExec: partitions=1, partition_sizes=[1]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        Ok(())
+    }
+
+    #[test]
+    fn test_has_order_by() -> Result<()> {
+        let sort_key = vec![PhysicalSortExpr {
+            expr: expressions::col("a", &schema()).unwrap(),
+            options: SortOptions::default(),
+        }];
+        let source = parquet_exec_with_sort(vec![sort_key]);
+        let schema = source.schema();
+
+        // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
+        // the `a > 1` filter is applied in the AggregateExec
+        let single_agg = AggregateExec::try_new(
+            AggregateMode::Single,
+            build_group_by(&schema.clone(), vec!["a".to_string()]),
+            vec![],         /* aggr_expr */
+            vec![],         /* filter_expr */
+            source,         /* input */
+            schema.clone(), /* input_schema */
+        )?;
+        let limit_exec = LocalLimitExec::new(
+            Arc::new(single_agg),
+            10, // fetch
+        );
+        // expected not to push the limit to the AggregateExec
+        let expected = [
+            "LocalLimitExec: fetch=10",
+            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], ordering_mode=Sorted",
+            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
+        ];
+        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
+        assert_plan_matches_expected(&plan, &expected)?;
+        Ok(())
+    }
+}
diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs
index 9c1242fdf77dd..4fbc3ad2f5cd6 100644
--- a/datafusion/core/src/physical_optimizer/mod.rs
+++ b/datafusion/core/src/physical_optimizer/mod.rs
@@ -21,14 +21,19 @@
 //! "Repartition" or "Sortedness"
 //!
 //! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan
+
+mod aggregate_statistics_test;
 pub mod coalesce_batches;
 pub mod combine_partial_final_agg;
-pub mod enforce_distribution;
+mod enforce_distribution_test;
+mod enforce_sorting_test;
 pub mod join_selection;
 pub mod limit_pushdown;
+mod limited_distinct_aggregation_test;
 pub mod optimizer;
 pub mod projection_pushdown;
 pub mod pruning;
+mod replace_with_order_preserving_variants_test;
 pub mod sanity_checker;
 #[cfg(test)]
 pub mod test_utils;
diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs
index e09d7b28bf5f2..1b5914a3c59e1 100644
--- a/datafusion/core/src/physical_optimizer/optimizer.rs
+++ b/datafusion/core/src/physical_optimizer/optimizer.rs
@@ -22,17 +22,17 @@ use std::sync::Arc;
 
 use super::projection_pushdown::ProjectionPushdown;
 use super::update_aggr_exprs::OptimizeAggregateOrder;
-use crate::physical_optimizer::aggregate_statistics::AggregateStatistics;
 use crate::physical_optimizer::coalesce_batches::CoalesceBatches;
 use crate::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate;
-use crate::physical_optimizer::enforce_distribution::EnforceDistribution;
-use crate::physical_optimizer::enforce_sorting::EnforceSorting;
 use crate::physical_optimizer::join_selection::JoinSelection;
 use crate::physical_optimizer::limit_pushdown::LimitPushdown;
-use crate::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
 use crate::physical_optimizer::output_requirements::OutputRequirements;
 use crate::physical_optimizer::sanity_checker::SanityCheckPlan;
 use crate::physical_optimizer::topk_aggregation::TopKAggregation;
+use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics;
+use datafusion_physical_optimizer::enforce_distribution::EnforceDistribution;
+use datafusion_physical_optimizer::enforce_sorting::EnforceSorting;
+use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation;
 
 /// A rule-based physical optimizer.
 #[derive(Clone)]
diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants_test.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants_test.rs
new file mode 100644
index 0000000000000..0ee7102e137b3
--- /dev/null
+++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants_test.rs
@@ -0,0 +1,1264 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(test)]
+mod tests {
+    use datafusion_physical_optimizer::replace_with_order_preserving_variants::replace_with_order_preserving_variants;
+    use datafusion_physical_plan::repartition::RepartitionExec;
+    use std::sync::Arc;
+
+    use datafusion_physical_optimizer::replace_with_order_preserving_variants::OrderPreservationContext;
+    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+    use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+
+    use crate::datasource::file_format::file_compression_type::FileCompressionType;
+    use crate::datasource::listing::PartitionedFile;
+    use crate::datasource::physical_plan::{CsvExec, FileScanConfig};
+    use crate::physical_optimizer::test_utils::check_integrity;
+    use crate::prelude::SessionConfig;
+    use crate::test::TestStreamPartition;
+    use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+    use datafusion_physical_plan::filter::FilterExec;
+    use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+    use datafusion_physical_plan::sorts::sort::SortExec;
+    use datafusion_physical_plan::{
+        displayable, get_plan_string, ExecutionPlan, Partitioning,
+    };
+
+    use datafusion_common::arrow::compute::SortOptions;
+    use datafusion_common::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+    use datafusion_common::tree_node::{TransformedResult, TreeNode};
+    use datafusion_common::Result;
+    use datafusion_execution::object_store::ObjectStoreUrl;
+    use datafusion_expr::{JoinType, Operator};
+    use datafusion_physical_expr::expressions::{self, col, Column};
+    use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_plan::streaming::StreamingTableExec;
+
+    use rstest::rstest;
+
+    /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts
+    /// the plan against the original and expected plans for both bounded and
+    /// unbounded cases.
+    ///
+    /// # Parameters
+    ///
+    /// * `EXPECTED_UNBOUNDED_PLAN_LINES`: Expected input unbounded plan.
+    /// * `EXPECTED_BOUNDED_PLAN_LINES`: Expected input bounded plan.
+    /// * `EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES`: Optimized plan, which is
+    ///   the same regardless of the value of the `prefer_existing_sort` flag.
+    /// * `EXPECTED_BOUNDED_OPTIMIZED_PLAN_LINES`: Optimized plan when the flag
+    ///   `prefer_existing_sort` is `false` for bounded cases.
+    /// * `EXPECTED_BOUNDED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES`: Optimized plan
+    ///   when the flag `prefer_existing_sort` is `true` for bounded cases.
+    /// * `$PLAN`: The plan to optimize.
+    /// * `$SOURCE_UNBOUNDED`: Whether the given plan contains an unbounded source.
+    macro_rules! assert_optimized_in_all_boundedness_situations {
+        ($EXPECTED_UNBOUNDED_PLAN_LINES: expr,  $EXPECTED_BOUNDED_PLAN_LINES: expr, $EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES: expr, $EXPECTED_BOUNDED_OPTIMIZED_PLAN_LINES: expr, $EXPECTED_BOUNDED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $SOURCE_UNBOUNDED: expr) => {
+            if $SOURCE_UNBOUNDED {
+                assert_optimized_prefer_sort_on_off!(
+                    $EXPECTED_UNBOUNDED_PLAN_LINES,
+                    $EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES,
+                    $EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES,
+                    $PLAN
+                );
+            } else {
+                assert_optimized_prefer_sort_on_off!(
+                    $EXPECTED_BOUNDED_PLAN_LINES,
+                    $EXPECTED_BOUNDED_OPTIMIZED_PLAN_LINES,
+                    $EXPECTED_BOUNDED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES,
+                    $PLAN
+                );
+            }
+        };
+    }
+
+    /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts
+    /// the plan against the original and expected plans.
+    ///
+    /// # Parameters
+    ///
+    /// * `$EXPECTED_PLAN_LINES`: Expected input plan.
+    /// * `EXPECTED_OPTIMIZED_PLAN_LINES`: Optimized plan when the flag
+    ///   `prefer_existing_sort` is `false`.
+    /// * `EXPECTED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES`: Optimized plan when
+    ///   the flag `prefer_existing_sort` is `true`.
+    /// * `$PLAN`: The plan to optimize.
+    macro_rules! assert_optimized_prefer_sort_on_off {
+        ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $EXPECTED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr) => {
+            assert_optimized!(
+                $EXPECTED_PLAN_LINES,
+                $EXPECTED_OPTIMIZED_PLAN_LINES,
+                $PLAN.clone(),
+                false
+            );
+            assert_optimized!(
+                $EXPECTED_PLAN_LINES,
+                $EXPECTED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES,
+                $PLAN,
+                true
+            );
+        };
+    }
+
+    /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts
+    /// the plan against the original and expected plans.
+    ///
+    /// # Parameters
+    ///
+    /// * `$EXPECTED_PLAN_LINES`: Expected input plan.
+    /// * `$EXPECTED_OPTIMIZED_PLAN_LINES`: Expected optimized plan.
+    /// * `$PLAN`: The plan to optimize.
+    /// * `$PREFER_EXISTING_SORT`: Value of the `prefer_existing_sort` flag.
+    macro_rules! assert_optimized {
+        ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $PREFER_EXISTING_SORT: expr) => {
+            let physical_plan = $PLAN;
+            let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
+            let actual: Vec<&str> = formatted.trim().lines().collect();
+
+            let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
+                .iter().map(|s| *s).collect();
+
+            assert_eq!(
+                expected_plan_lines, actual,
+                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            );
+
+            let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES.iter().map(|s| *s).collect();
+
+            // Run the rule top-down
+            let config = SessionConfig::new().with_prefer_existing_sort($PREFER_EXISTING_SORT);
+            let plan_with_pipeline_fixer = OrderPreservationContext::new_default(physical_plan);
+            let parallel = plan_with_pipeline_fixer.transform_up(|plan_with_pipeline_fixer| replace_with_order_preserving_variants(plan_with_pipeline_fixer, false, false, config.options())).data().and_then(check_integrity)?;
+            let optimized_physical_plan = parallel.plan;
+
+            // Get string representation of the plan
+            let actual = get_plan_string(&optimized_physical_plan);
+            assert_eq!(
+                expected_optimized_lines, actual,
+                "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            );
+        };
+    }
+
+    #[rstest]
+    #[tokio::test]
+    // Searches for a simple sort and a repartition just after it, the second repartition with 1 input partition should not be affected
+    async fn test_replace_multiple_input_repartition_1(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition = repartition_exec_hash(repartition_exec_round_robin(source));
+        let sort = sort_exec(vec![sort_expr("a", &schema)], repartition, true);
+
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "      StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "      CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_with_inter_children_change_only(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr_default("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
+        let sort = sort_exec(
+            vec![sort_expr_default("a", &coalesce_partitions.schema())],
+            coalesce_partitions,
+            false,
+        );
+        let repartition_rr2 = repartition_exec_round_robin(sort);
+        let repartition_hash2 = repartition_exec_hash(repartition_rr2);
+        let filter = filter_exec(repartition_hash2);
+        let sort2 =
+            sort_exec(vec![sort_expr_default("a", &filter.schema())], filter, true);
+
+        let physical_plan = sort_preserving_merge_exec(
+            vec![sort_expr_default("a", &sort2.schema())],
+            sort2,
+        );
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "            CoalescePartitionsExec",
+            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                  StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "            CoalescePartitionsExec",
+            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  FilterExec: c@1 > 3",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        SortPreservingMergeExec: [a@0 ASC]",
+            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
+            "            CoalescePartitionsExec",
+            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  FilterExec: c@1 > 3",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        SortPreservingMergeExec: [a@0 ASC]",
+            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_replace_multiple_input_repartition_2(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let filter = filter_exec(repartition_rr);
+        let repartition_hash = repartition_exec_hash(filter);
+        let sort = sort_exec(vec![sort_expr("a", &schema)], repartition_hash, true);
+
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded =  [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded =  [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded =  [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_replace_multiple_input_repartition_with_extra_steps(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let filter = filter_exec(repartition_hash);
+        let coalesce_batches_exec: Arc<dyn ExecutionPlan> = coalesce_batches_exec(filter);
+        let sort = sort_exec(vec![sort_expr("a", &schema)], coalesce_batches_exec, true);
+
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_replace_multiple_input_repartition_with_extra_steps_2(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let coalesce_batches_exec_1 = coalesce_batches_exec(repartition_rr);
+        let repartition_hash = repartition_exec_hash(coalesce_batches_exec_1);
+        let filter = filter_exec(repartition_hash);
+        let coalesce_batches_exec_2 = coalesce_batches_exec(filter);
+        let sort =
+            sort_exec(vec![sort_expr("a", &schema)], coalesce_batches_exec_2, true);
+
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          CoalesceBatchesExec: target_batch_size=8192",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          CoalesceBatchesExec: target_batch_size=8192",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "        CoalesceBatchesExec: target_batch_size=8192",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          CoalesceBatchesExec: target_batch_size=8192",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "        CoalesceBatchesExec: target_batch_size=8192",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_not_replacing_when_no_need_to_preserve_sorting(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let filter = filter_exec(repartition_hash);
+        let coalesce_batches_exec: Arc<dyn ExecutionPlan> = coalesce_batches_exec(filter);
+
+        let physical_plan: Arc<dyn ExecutionPlan> =
+            coalesce_partitions_exec(coalesce_batches_exec);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "CoalescePartitionsExec",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "CoalescePartitionsExec",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "CoalescePartitionsExec",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results same with and without flag, because there is no executor  with ordering requirement
+        let expected_optimized_bounded = [
+            "CoalescePartitionsExec",
+            "  CoalesceBatchesExec: target_batch_size=8192",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = expected_optimized_bounded;
+
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_with_multiple_replacable_repartitions(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let filter = filter_exec(repartition_hash);
+        let coalesce_batches = coalesce_batches_exec(filter);
+        let repartition_hash_2 = repartition_exec_hash(coalesce_batches);
+        let sort = sort_exec(vec![sort_expr("a", &schema)], repartition_hash_2, true);
+
+        let physical_plan =
+            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      CoalesceBatchesExec: target_batch_size=8192",
+            "        FilterExec: c@1 > 3",
+            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      CoalesceBatchesExec: target_batch_size=8192",
+            "        FilterExec: c@1 > 3",
+            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      CoalesceBatchesExec: target_batch_size=8192",
+            "        FilterExec: c@1 > 3",
+            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    CoalesceBatchesExec: target_batch_size=8192",
+            "      FilterExec: c@1 > 3",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_not_replace_with_different_orderings(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let sort = sort_exec(
+            vec![sort_expr_default("c", &repartition_hash.schema())],
+            repartition_hash,
+            true,
+        );
+
+        let physical_plan = sort_preserving_merge_exec(
+            vec![sort_expr_default("c", &sort.schema())],
+            sort,
+        );
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results same with and without flag, because ordering requirement of the executor is different than the existing ordering.
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = expected_optimized_bounded;
+
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_with_lost_ordering(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
+        let physical_plan =
+            sort_exec(vec![sort_expr("a", &schema)], coalesce_partitions, false);
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "      StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]",
+            "  CoalescePartitionsExec",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
+            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
+            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "      CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_with_lost_and_kept_ordering(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+        let sort_exprs = vec![sort_expr("a", &schema)];
+        let source = if source_unbounded {
+            stream_exec_ordered(&schema, sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, sort_exprs)
+        };
+        let repartition_rr = repartition_exec_round_robin(source);
+        let repartition_hash = repartition_exec_hash(repartition_rr);
+        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
+        let sort = sort_exec(
+            vec![sort_expr_default("c", &coalesce_partitions.schema())],
+            coalesce_partitions,
+            false,
+        );
+        let repartition_rr2 = repartition_exec_round_robin(sort);
+        let repartition_hash2 = repartition_exec_hash(repartition_rr2);
+        let filter = filter_exec(repartition_hash2);
+        let sort2 =
+            sort_exec(vec![sort_expr_default("c", &filter.schema())], filter, true);
+
+        let physical_plan = sort_preserving_merge_exec(
+            vec![sort_expr_default("c", &sort2.schema())],
+            sort2,
+        );
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
+            "            CoalescePartitionsExec",
+            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                  StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
+            "            CoalescePartitionsExec",
+            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  FilterExec: c@1 > 3",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=c@1 ASC",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
+            "          CoalescePartitionsExec",
+            "            RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "              RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results with and without flag
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
+            "    FilterExec: c@1 > 3",
+            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "          SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
+            "            CoalescePartitionsExec",
+            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = [
+            "SortPreservingMergeExec: [c@1 ASC]",
+            "  FilterExec: c@1 > 3",
+            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=c@1 ASC",
+            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "        SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
+            "          CoalescePartitionsExec",
+            "            RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "              RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "                CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    #[rstest]
+    #[tokio::test]
+    async fn test_with_multiple_child_trees(
+        #[values(false, true)] source_unbounded: bool,
+    ) -> Result<()> {
+        let schema = create_test_schema()?;
+
+        let left_sort_exprs = vec![sort_expr("a", &schema)];
+        let left_source = if source_unbounded {
+            stream_exec_ordered(&schema, left_sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, left_sort_exprs)
+        };
+        let left_repartition_rr = repartition_exec_round_robin(left_source);
+        let left_repartition_hash = repartition_exec_hash(left_repartition_rr);
+        let left_coalesce_partitions =
+            Arc::new(CoalesceBatchesExec::new(left_repartition_hash, 4096));
+
+        let right_sort_exprs = vec![sort_expr("a", &schema)];
+        let right_source = if source_unbounded {
+            stream_exec_ordered(&schema, right_sort_exprs)
+        } else {
+            csv_exec_sorted(&schema, right_sort_exprs)
+        };
+        let right_repartition_rr = repartition_exec_round_robin(right_source);
+        let right_repartition_hash = repartition_exec_hash(right_repartition_rr);
+        let right_coalesce_partitions =
+            Arc::new(CoalesceBatchesExec::new(right_repartition_hash, 4096));
+
+        let hash_join_exec =
+            hash_join_exec(left_coalesce_partitions, right_coalesce_partitions);
+        let sort = sort_exec(
+            vec![sort_expr_default("a", &hash_join_exec.schema())],
+            hash_join_exec,
+            true,
+        );
+
+        let physical_plan = sort_preserving_merge_exec(
+            vec![sort_expr_default("a", &sort.schema())],
+            sort,
+        );
+
+        // Expected inputs unbounded and bounded
+        let expected_input_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+        let expected_input_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+
+        // Expected unbounded result (same for with and without flag)
+        let expected_optimized_unbounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
+        ];
+
+        // Expected bounded results same with and without flag, because ordering get lost during intermediate executor anyway. Hence no need to preserve
+        // existing ordering.
+        let expected_optimized_bounded = [
+            "SortPreservingMergeExec: [a@0 ASC]",
+            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
+            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+            "      CoalesceBatchesExec: target_batch_size=4096",
+            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
+            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
+            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
+        ];
+        let expected_optimized_bounded_sort_preserve = expected_optimized_bounded;
+
+        assert_optimized_in_all_boundedness_situations!(
+            expected_input_unbounded,
+            expected_input_bounded,
+            expected_optimized_unbounded,
+            expected_optimized_bounded,
+            expected_optimized_bounded_sort_preserve,
+            physical_plan,
+            source_unbounded
+        );
+        Ok(())
+    }
+
+    // End test cases
+    // Start test helpers
+
+    fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr {
+        let sort_opts = SortOptions {
+            nulls_first: false,
+            descending: false,
+        };
+        sort_expr_options(name, schema, sort_opts)
+    }
+
+    fn sort_expr_default(name: &str, schema: &Schema) -> PhysicalSortExpr {
+        let sort_opts = SortOptions::default();
+        sort_expr_options(name, schema, sort_opts)
+    }
+
+    fn sort_expr_options(
+        name: &str,
+        schema: &Schema,
+        options: SortOptions,
+    ) -> PhysicalSortExpr {
+        PhysicalSortExpr {
+            expr: col(name, schema).unwrap(),
+            options,
+        }
+    }
+
+    fn sort_exec(
+        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
+        input: Arc<dyn ExecutionPlan>,
+        preserve_partitioning: bool,
+    ) -> Arc<dyn ExecutionPlan> {
+        let sort_exprs = sort_exprs.into_iter().collect();
+        Arc::new(
+            SortExec::new(sort_exprs, input)
+                .with_preserve_partitioning(preserve_partitioning),
+        )
+    }
+
+    fn sort_preserving_merge_exec(
+        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
+        input: Arc<dyn ExecutionPlan>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let sort_exprs = sort_exprs.into_iter().collect();
+        Arc::new(SortPreservingMergeExec::new(sort_exprs, input))
+    }
+
+    fn repartition_exec_round_robin(
+        input: Arc<dyn ExecutionPlan>,
+    ) -> Arc<dyn ExecutionPlan> {
+        Arc::new(
+            RepartitionExec::try_new(input, Partitioning::RoundRobinBatch(8)).unwrap(),
+        )
+    }
+
+    fn repartition_exec_hash(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        let input_schema = input.schema();
+        Arc::new(
+            RepartitionExec::try_new(
+                input,
+                Partitioning::Hash(vec![col("c", &input_schema).unwrap()], 8),
+            )
+            .unwrap(),
+        )
+    }
+
+    fn filter_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        let input_schema = input.schema();
+        let predicate = expressions::binary(
+            col("c", &input_schema).unwrap(),
+            Operator::Gt,
+            expressions::lit(3i32),
+            &input_schema,
+        )
+        .unwrap();
+        Arc::new(FilterExec::try_new(predicate, input).unwrap())
+    }
+
+    fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        Arc::new(CoalesceBatchesExec::new(input, 8192))
+    }
+
+    fn coalesce_partitions_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
+        Arc::new(CoalescePartitionsExec::new(input))
+    }
+
+    fn hash_join_exec(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let left_on = col("c", &left.schema()).unwrap();
+        let right_on = col("c", &right.schema()).unwrap();
+        let left_col = left_on.as_any().downcast_ref::<Column>().unwrap();
+        let right_col = right_on.as_any().downcast_ref::<Column>().unwrap();
+        Arc::new(
+            HashJoinExec::try_new(
+                left,
+                right,
+                vec![(Arc::new(left_col.clone()), Arc::new(right_col.clone()))],
+                None,
+                &JoinType::Inner,
+                None,
+                PartitionMode::Partitioned,
+                false,
+            )
+            .unwrap(),
+        )
+    }
+
+    fn create_test_schema() -> Result<SchemaRef> {
+        let column_a = Field::new("a", DataType::Int32, false);
+        let column_b = Field::new("b", DataType::Int32, false);
+        let column_c = Field::new("c", DataType::Int32, false);
+        let column_d = Field::new("d", DataType::Int32, false);
+        let schema = Arc::new(Schema::new(vec![column_a, column_b, column_c, column_d]));
+
+        Ok(schema)
+    }
+
+    // creates a stream exec source for the test purposes
+    fn stream_exec_ordered(
+        schema: &SchemaRef,
+        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let sort_exprs = sort_exprs.into_iter().collect();
+        let projection: Vec<usize> = vec![0, 2, 3];
+
+        Arc::new(
+            StreamingTableExec::try_new(
+                schema.clone(),
+                vec![Arc::new(TestStreamPartition {
+                    schema: schema.clone(),
+                }) as _],
+                Some(&projection),
+                vec![sort_exprs],
+                true,
+                None,
+            )
+            .unwrap(),
+        )
+    }
+
+    // creates a csv exec source for the test purposes
+    // projection and has_header parameters are given static due to testing needs
+    fn csv_exec_sorted(
+        schema: &SchemaRef,
+        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
+    ) -> Arc<dyn ExecutionPlan> {
+        let sort_exprs = sort_exprs.into_iter().collect();
+        let projection: Vec<usize> = vec![0, 2, 3];
+
+        Arc::new(
+            CsvExec::builder(
+                FileScanConfig::new(
+                    ObjectStoreUrl::parse("test:///").unwrap(),
+                    schema.clone(),
+                )
+                .with_file(PartitionedFile::new("file_path".to_string(), 100))
+                .with_projection(Some(projection))
+                .with_output_ordering(vec![sort_exprs]),
+            )
+            .with_has_header(true)
+            .with_delimeter(0)
+            .with_quote(b'"')
+            .with_escape(None)
+            .with_comment(None)
+            .with_newlines_in_values(false)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build(),
+        )
+    }
+}
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 7cacbff0a1027..7e5a3973ad61a 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -32,6 +32,7 @@ rust-version = { workspace = true }
 workspace = true
 
 [dependencies]
+async-trait = { workspace = true }
 datafusion-common = { workspace = true, default-features = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
@@ -42,5 +43,6 @@ datafusion-physical-plan = { workspace = true }
 itertools = { workspace = true }
 
 [dev-dependencies]
-datafusion = { workspace = true }
+arrow-schema = { workspace = true }
+rstest = { workspace = true }
 tokio = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 7edd52e468ab9..14d5571645a40 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -307,362 +307,3 @@ fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
 
     false
 }
-
-#[cfg(test)]
-pub(crate) mod tests {
-    use super::*;
-
-    use datafusion::prelude::SessionContext;
-    use datafusion_expr::Operator;
-    use datafusion_physical_plan::aggregates::PhysicalGroupBy;
-    use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use datafusion_physical_plan::common;
-    use datafusion_physical_plan::filter::FilterExec;
-    use datafusion_physical_plan::memory::MemoryExec;
-
-    use datafusion_common::arrow::array::Int32Array;
-    use datafusion_common::arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::arrow::record_batch::RecordBatch;
-    use datafusion_common::cast::as_int64_array;
-    use datafusion_functions_aggregate::count::count_udaf;
-    use datafusion_physical_expr::expressions::cast;
-    use datafusion_physical_expr::PhysicalExpr;
-    use datafusion_physical_expr_common::aggregate::AggregateExprBuilder;
-    use datafusion_physical_plan::aggregates::AggregateMode;
-
-    /// Mock data using a MemoryExec which has an exact count statistic
-    fn mock_data() -> Result<Arc<MemoryExec>> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Int32Array::from(vec![Some(1), Some(2), None])),
-                Arc::new(Int32Array::from(vec![Some(4), None, Some(6)])),
-            ],
-        )?;
-
-        Ok(Arc::new(MemoryExec::try_new(
-            &[vec![batch]],
-            Arc::clone(&schema),
-            None,
-        )?))
-    }
-
-    /// Checks that the count optimization was applied and we still get the right result
-    async fn assert_count_optim_success(
-        plan: AggregateExec,
-        agg: TestAggregate,
-    ) -> Result<()> {
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(plan);
-
-        let optimized = AggregateStatistics::new()
-            .optimize(Arc::clone(&plan), state.config_options())?;
-
-        // A ProjectionExec is a sign that the count optimization was applied
-        assert!(optimized.as_any().is::<ProjectionExec>());
-
-        // run both the optimized and nonoptimized plan
-        let optimized_result =
-            common::collect(optimized.execute(0, session_ctx.task_ctx())?).await?;
-        let nonoptimized_result =
-            common::collect(plan.execute(0, session_ctx.task_ctx())?).await?;
-        assert_eq!(optimized_result.len(), nonoptimized_result.len());
-
-        //  and validate the results are the same and expected
-        assert_eq!(optimized_result.len(), 1);
-        check_batch(optimized_result.into_iter().next().unwrap(), &agg);
-        // check the non optimized one too to ensure types and names remain the same
-        assert_eq!(nonoptimized_result.len(), 1);
-        check_batch(nonoptimized_result.into_iter().next().unwrap(), &agg);
-
-        Ok(())
-    }
-
-    fn check_batch(batch: RecordBatch, agg: &TestAggregate) {
-        let schema = batch.schema();
-        let fields = schema.fields();
-        assert_eq!(fields.len(), 1);
-
-        let field = &fields[0];
-        assert_eq!(field.name(), agg.column_name());
-        assert_eq!(field.data_type(), &DataType::Int64);
-        // note that nullabiolity differs
-
-        assert_eq!(
-            as_int64_array(batch.column(0)).unwrap().values(),
-            &[agg.expected_count()]
-        );
-    }
-
-    /// Describe the type of aggregate being tested
-    pub(crate) enum TestAggregate {
-        /// Testing COUNT(*) type aggregates
-        CountStar,
-
-        /// Testing for COUNT(column) aggregate
-        ColumnA(Arc<Schema>),
-    }
-
-    impl TestAggregate {
-        pub(crate) fn new_count_star() -> Self {
-            Self::CountStar
-        }
-
-        fn new_count_column(schema: &Arc<Schema>) -> Self {
-            Self::ColumnA(schema.clone())
-        }
-
-        // Return appropriate expr depending if COUNT is for col or table (*)
-        pub(crate) fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
-            AggregateExprBuilder::new(count_udaf(), vec![self.column()])
-                .schema(Arc::new(schema.clone()))
-                .name(self.column_name())
-                .build()
-                .unwrap()
-        }
-
-        /// what argument would this aggregate need in the plan?
-        fn column(&self) -> Arc<dyn PhysicalExpr> {
-            match self {
-                Self::CountStar => expressions::lit(COUNT_STAR_EXPANSION),
-                Self::ColumnA(s) => expressions::col("a", s).unwrap(),
-            }
-        }
-
-        /// What name would this aggregate produce in a plan?
-        fn column_name(&self) -> &'static str {
-            match self {
-                Self::CountStar => "COUNT(*)",
-                Self::ColumnA(_) => "COUNT(a)",
-            }
-        }
-
-        /// What is the expected count?
-        fn expected_count(&self) -> i64 {
-            match self {
-                TestAggregate::CountStar => 3,
-                TestAggregate::ColumnA(_) => 2,
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_direct_child() -> Result<()> {
-        // basic test case with the aggregation applied on a source with exact statistics
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_with_nulls_direct_child() -> Result<()> {
-        // basic test case with the aggregation applied on a source with exact statistics
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_column(&schema);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_indirect_child() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        // We introduce an intermediate optimization step between the partial and final aggregtator
-        let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(coalesce),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_partial_with_nulls_indirect_child() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_column(&schema);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            source,
-            Arc::clone(&schema),
-        )?;
-
-        // We introduce an intermediate optimization step between the partial and final aggregtator
-        let coalesce = CoalescePartitionsExec::new(Arc::new(partial_agg));
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(coalesce),
-            Arc::clone(&schema),
-        )?;
-
-        assert_count_optim_success(final_agg, agg).await?;
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_inexact_stat() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        // adding a filter makes the statistics inexact
-        let filter = Arc::new(FilterExec::try_new(
-            expressions::binary(
-                expressions::col("a", &schema)?,
-                Operator::Gt,
-                cast(expressions::lit(1u32), &schema, DataType::Int32)?,
-                &schema,
-            )?,
-            source,
-        )?);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            filter,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        let conf = ConfigOptions::new();
-        let optimized =
-            AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
-
-        // check that the original ExecutionPlan was not replaced
-        assert!(optimized.as_any().is::<AggregateExec>());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_count_with_nulls_inexact_stat() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_column(&schema);
-
-        // adding a filter makes the statistics inexact
-        let filter = Arc::new(FilterExec::try_new(
-            expressions::binary(
-                expressions::col("a", &schema)?,
-                Operator::Gt,
-                cast(expressions::lit(1u32), &schema, DataType::Int32)?,
-                &schema,
-            )?,
-            source,
-        )?);
-
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            filter,
-            Arc::clone(&schema),
-        )?;
-
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            PhysicalGroupBy::default(),
-            vec![agg.count_expr(&schema)],
-            vec![None],
-            Arc::new(partial_agg),
-            Arc::clone(&schema),
-        )?;
-
-        let conf = ConfigOptions::new();
-        let optimized =
-            AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?;
-
-        // check that the original ExecutionPlan was not replaced
-        assert!(optimized.as_any().is::<AggregateExec>());
-
-        Ok(())
-    }
-}
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs
similarity index 99%
rename from datafusion/core/src/physical_optimizer/enforce_distribution.rs
rename to datafusion/physical-optimizer/src/enforce_distribution.rs
index 1f076e448e600..643e28b4ea5e5 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/physical-optimizer/src/enforce_distribution.rs
@@ -24,26 +24,28 @@
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use crate::config::ConfigOptions;
-use crate::error::Result;
-use crate::physical_optimizer::utils::{
+use crate::utils::{
     add_sort_above_with_check, is_coalesce_partitions, is_repartition,
     is_sort_preserving_merge,
 };
-use crate::physical_plan::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
-use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use crate::physical_plan::joins::{
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::Result;
+use datafusion_physical_plan::aggregates::{
+    AggregateExec, AggregateMode, PhysicalGroupBy,
+};
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::joins::{
     CrossJoinExec, HashJoinExec, PartitionMode, SortMergeJoinExec,
 };
-use crate::physical_plan::projection::ProjectionExec;
-use crate::physical_plan::repartition::RepartitionExec;
-use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use crate::physical_plan::tree_node::PlanContext;
-use crate::physical_plan::union::{can_interleave, InterleaveExec, UnionExec};
-use crate::physical_plan::windows::WindowAggExec;
-use crate::physical_plan::{Distribution, ExecutionPlan, Partitioning};
-
-use arrow::compute::SortOptions;
+use datafusion_physical_plan::projection::ProjectionExec;
+use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion_physical_plan::tree_node::PlanContext;
+use datafusion_physical_plan::union::{can_interleave, InterleaveExec, UnionExec};
+use datafusion_physical_plan::windows::WindowAggExec;
+use datafusion_physical_plan::{Distribution, ExecutionPlan, Partitioning};
+
+use datafusion_common::arrow::compute::SortOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_expr::logical_plan::JoinType;
 use datafusion_physical_expr::expressions::{Column, NoOp};
@@ -54,8 +56,8 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
 use datafusion_physical_plan::ExecutionPlanProperties;
 
-use datafusion_physical_optimizer::output_requirements::OutputRequirementExec;
-use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use crate::output_requirements::OutputRequirementExec;
+use crate::PhysicalOptimizerRule;
 use itertools::izip;
 
 /// The `EnforceDistribution` rule ensures that distribution requirements are
@@ -269,9 +271,10 @@ impl PhysicalOptimizerRule for EnforceDistribution {
 /// 4) If the current plan is Projection, transform the requirements to the columns before the Projection and push down requirements
 /// 5) For other types of operators, by default, pushdown the parent requirements to children.
 ///
-fn adjust_input_keys_ordering(
+pub fn adjust_input_keys_ordering(
     mut requirements: PlanWithKeyRequirements,
 ) -> Result<Transformed<PlanWithKeyRequirements>> {
+    // TODO: make this function private when https://github.com/apache/datafusion/issues/11502 is resolved
     let plan = requirements.plan.clone();
 
     if let Some(HashJoinExec {
@@ -590,9 +593,10 @@ fn shift_right_required(
 /// The Bottom-Up approach will be useful in future if we plan to support storage partition-wised Joins.
 /// In that case, the datasources/tables might be pre-partitioned and we can't adjust the key ordering of the datasources
 /// and then can't apply the Top-Down reordering process.
-pub(crate) fn reorder_join_keys_to_inputs(
+pub fn reorder_join_keys_to_inputs(
     plan: Arc<dyn ExecutionPlan>,
 ) -> Result<Arc<dyn ExecutionPlan>> {
+    // TODO: make this function pub crate once https://github.com/apache/datafusion/issues/11502 is resolved
     let plan_any = plan.as_any();
     if let Some(HashJoinExec {
         left,
@@ -1035,10 +1039,11 @@ fn replace_order_preserving_variants(
 /// operators to satisfy distribution requirements. Since this function
 /// takes care of such requirements, we should avoid manually adding data
 /// exchange operators in other places.
-fn ensure_distribution(
+pub fn ensure_distribution(
     dist_context: DistributionContext,
     config: &ConfigOptions,
 ) -> Result<Transformed<DistributionContext>> {
+    // TODO: make this function private once https://github.com/apache/datafusion/issues/11502 is resolved
     let dist_context = update_children(dist_context)?;
 
     if dist_context.plan.children().is_empty() {
diff --git a/datafusion/physical-optimizer/src/enforce_sorting.rs b/datafusion/physical-optimizer/src/enforce_sorting.rs
index 0707f77cd754c..f10c40047ba60 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting.rs
@@ -254,9 +254,10 @@ fn replace_with_partial_sort(
 /// ```
 /// by following connections from [`CoalescePartitionsExec`]s to [`SortExec`]s.
 /// By performing sorting in parallel, we can increase performance in some scenarios.
-fn parallelize_sorts(
+pub fn parallelize_sorts(
     mut requirements: PlanWithCorrespondingCoalescePartitions,
 ) -> Result<Transformed<PlanWithCorrespondingCoalescePartitions>> {
+    // TODO: make this fucntion private once https://github.com/apache/datafusion/issues/11502 is resolved
     update_coalesce_ctx_children(&mut requirements);
 
     if requirements.children.is_empty() || !requirements.children[0].data {
@@ -315,9 +316,10 @@ fn parallelize_sorts(
 
 /// This function enforces sorting requirements and makes optimizations without
 /// violating these requirements whenever possible.
-fn ensure_sorting(
+pub fn ensure_sorting(
     mut requirements: PlanWithCorrespondingSort,
 ) -> Result<Transformed<PlanWithCorrespondingSort>> {
+    // TODO: make this function private once https://github.com/apache/datafusion/issues/11502 is resolved
     requirements = update_sort_ctx_children(requirements, false)?;
 
     // Perform naive analysis at the beginning -- remove already-satisfied sorts:
@@ -607,1805 +609,3 @@ fn get_sort_exprs(
         plan_err!("Given ExecutionPlan is not a SortExec or a SortPreservingMergeExec")
     }
 }
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_optimizer::enforce_distribution::EnforceDistribution;
-    use crate::physical_optimizer::test_utils::{
-        aggregate_exec, bounded_window_exec, check_integrity, coalesce_batches_exec,
-        coalesce_partitions_exec, filter_exec, global_limit_exec, hash_join_exec,
-        limit_exec, local_limit_exec, memory_exec, parquet_exec, parquet_exec_sorted,
-        repartition_exec, sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec,
-        sort_preserving_merge_exec, spr_repartition_exec, union_exec,
-        RequirementsTestExec,
-    };
-    use crate::physical_plan::{displayable, get_plan_string, Partitioning};
-    use crate::prelude::{SessionConfig, SessionContext};
-    use crate::test::{csv_exec_ordered, csv_exec_sorted, stream_exec_ordered};
-
-    use arrow::compute::SortOptions;
-    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-    use datafusion_common::Result;
-    use datafusion_expr::JoinType;
-    use datafusion_physical_expr::expressions::{col, Column, NotExpr};
-
-    use datafusion_physical_optimizer::PhysicalOptimizerRule;
-    use rstest::rstest;
-
-    fn create_test_schema() -> Result<SchemaRef> {
-        let nullable_column = Field::new("nullable_col", DataType::Int32, true);
-        let non_nullable_column = Field::new("non_nullable_col", DataType::Int32, false);
-        let schema = Arc::new(Schema::new(vec![nullable_column, non_nullable_column]));
-        Ok(schema)
-    }
-
-    fn create_test_schema2() -> Result<SchemaRef> {
-        let col_a = Field::new("col_a", DataType::Int32, true);
-        let col_b = Field::new("col_b", DataType::Int32, true);
-        let schema = Arc::new(Schema::new(vec![col_a, col_b]));
-        Ok(schema)
-    }
-
-    // Generate a schema which consists of 5 columns (a, b, c, d, e)
-    fn create_test_schema3() -> Result<SchemaRef> {
-        let a = Field::new("a", DataType::Int32, true);
-        let b = Field::new("b", DataType::Int32, false);
-        let c = Field::new("c", DataType::Int32, true);
-        let d = Field::new("d", DataType::Int32, false);
-        let e = Field::new("e", DataType::Int32, false);
-        let schema = Arc::new(Schema::new(vec![a, b, c, d, e]));
-        Ok(schema)
-    }
-
-    /// Runs the sort enforcement optimizer and asserts the plan
-    /// against the original and expected plans
-    ///
-    /// `$EXPECTED_PLAN_LINES`: input plan
-    /// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan
-    /// `$PLAN`: the plan to optimized
-    /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option.
-    ///
-    macro_rules! assert_optimized {
-        ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => {
-            let config = SessionConfig::new().with_repartition_sorts($REPARTITION_SORTS);
-            let session_ctx = SessionContext::new_with_config(config);
-            let state = session_ctx.state();
-
-            // This file has 4 rules that use tree node, apply these rules as in the
-            // EnforSorting::optimize implementation
-            // After these operations tree nodes should be in a consistent state.
-            // This code block makes sure that these rules doesn't violate tree node integrity.
-            {
-                let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone());
-                let adjusted = plan_requirements
-                    .transform_up(ensure_sorting)
-                    .data()
-                    .and_then(check_integrity)?;
-                // TODO: End state payloads will be checked here.
-
-                let new_plan = if state.config_options().optimizer.repartition_sorts {
-                    let plan_with_coalesce_partitions =
-                        PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan);
-                    let parallel = plan_with_coalesce_partitions
-                        .transform_up(parallelize_sorts)
-                        .data()
-                        .and_then(check_integrity)?;
-                    // TODO: End state payloads will be checked here.
-                    parallel.plan
-                } else {
-                    adjusted.plan
-                };
-
-                let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan);
-                let updated_plan = plan_with_pipeline_fixer
-                    .transform_up(|plan_with_pipeline_fixer| {
-                        replace_with_order_preserving_variants(
-                            plan_with_pipeline_fixer,
-                            false,
-                            true,
-                            state.config_options(),
-                        )
-                    })
-                    .data()
-                    .and_then(check_integrity)?;
-                // TODO: End state payloads will be checked here.
-
-                let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan);
-                assign_initial_requirements(&mut sort_pushdown);
-                sort_pushdown
-                    .transform_down(pushdown_sorts)
-                    .data()
-                    .and_then(check_integrity)?;
-                // TODO: End state payloads will be checked here.
-            }
-
-            let physical_plan = $PLAN;
-            let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-            let actual: Vec<&str> = formatted.trim().lines().collect();
-
-            let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-                .iter().map(|s| *s).collect();
-
-            assert_eq!(
-                expected_plan_lines, actual,
-                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-            );
-
-            let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES
-                .iter().map(|s| *s).collect();
-
-            // Run the actual optimizer
-            let optimized_physical_plan =
-                EnforceSorting::new().optimize(physical_plan, state.config_options())?;
-
-            // Get string representation of the plan
-            let actual = get_plan_string(&optimized_physical_plan);
-            assert_eq!(
-                expected_optimized_lines, actual,
-                "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-            );
-
-        };
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let input = sort_exec(vec![sort_expr("non_nullable_col", &schema)], source);
-        let physical_plan = sort_exec(vec![sort_expr("nullable_col", &schema)], input);
-
-        let expected_input = [
-            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "    MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        let expected_optimized = [
-            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-
-        let sort_exprs = vec![sort_expr_options(
-            "non_nullable_col",
-            &source.schema(),
-            SortOptions {
-                descending: true,
-                nulls_first: true,
-            },
-        )];
-        let sort = sort_exec(sort_exprs.clone(), source);
-        // Add dummy layer propagating Sort above, to test whether sort can be removed from multi layer before
-        let coalesce_batches = coalesce_batches_exec(sort);
-
-        let window_agg =
-            bounded_window_exec("non_nullable_col", sort_exprs, coalesce_batches);
-
-        let sort_exprs = vec![sort_expr_options(
-            "non_nullable_col",
-            &window_agg.schema(),
-            SortOptions {
-                descending: false,
-                nulls_first: false,
-            },
-        )];
-
-        let sort = sort_exec(sort_exprs.clone(), window_agg);
-
-        // Add dummy layer propagating Sort above, to test whether sort can be removed from multi layer before
-        let filter = filter_exec(
-            Arc::new(NotExpr::new(
-                col("non_nullable_col", schema.as_ref()).unwrap(),
-            )),
-            sort,
-        );
-
-        let physical_plan = bounded_window_exec("non_nullable_col", sort_exprs, filter);
-
-        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  FilterExec: NOT non_nullable_col@1",
-            "    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]",
-            "      BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "        CoalesceBatchesExec: target_batch_size=128",
-            "          SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]",
-            "            MemoryExec: partitions=1, partition_sizes=[0]"];
-
-        let expected_optimized = ["WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
-            "  FilterExec: NOT non_nullable_col@1",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "      CoalesceBatchesExec: target_batch_size=128",
-            "        SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]",
-            "          MemoryExec: partitions=1, partition_sizes=[0]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_add_required_sort() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, source);
-
-        let expected_input = [
-            "SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        let expected_optimized = [
-            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort1() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let sort = sort_exec(sort_exprs.clone(), source);
-        let spm = sort_preserving_merge_exec(sort_exprs, sort);
-
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let sort = sort_exec(sort_exprs.clone(), spm);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, sort);
-        let expected_input = [
-            "SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "    SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "      SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "        MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        let expected_optimized = [
-            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort2() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
-        let sort = sort_exec(sort_exprs.clone(), source);
-        let spm = sort_preserving_merge_exec(sort_exprs, sort);
-
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort2 = sort_exec(sort_exprs.clone(), spm);
-        let spm2 = sort_preserving_merge_exec(sort_exprs, sort2);
-
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let sort3 = sort_exec(sort_exprs, spm2);
-        let physical_plan = repartition_exec(repartition_exec(sort3));
-
-        let expected_input = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
-            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "          SortPreservingMergeExec: [non_nullable_col@1 ASC]",
-            "            SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "              MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-
-        let expected_optimized = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
-            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "    MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort3() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
-        let sort = sort_exec(sort_exprs.clone(), source);
-        let spm = sort_preserving_merge_exec(sort_exprs, sort);
-
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let repartition_exec = repartition_exec(spm);
-        let sort2 = Arc::new(
-            SortExec::new(sort_exprs.clone(), repartition_exec)
-                .with_preserve_partitioning(true),
-        ) as _;
-        let spm2 = sort_preserving_merge_exec(sort_exprs, sort2);
-
-        let physical_plan = aggregate_exec(spm2);
-
-        // When removing a `SortPreservingMergeExec`, make sure that partitioning
-        // requirements are not violated. In some cases, we may need to replace
-        // it with a `CoalescePartitionsExec` instead of directly removing it.
-        let expected_input = [
-            "AggregateExec: mode=Final, gby=[], aggr=[]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        SortPreservingMergeExec: [non_nullable_col@1 ASC]",
-            "          SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "            MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-
-        let expected_optimized = [
-            "AggregateExec: mode=Final, gby=[], aggr=[]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort4() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source1 = repartition_exec(memory_exec(&schema));
-
-        let source2 = repartition_exec(memory_exec(&schema));
-        let union = union_exec(vec![source1, source2]);
-
-        let sort_exprs = vec![sort_expr("non_nullable_col", &schema)];
-        // let sort = sort_exec(sort_exprs.clone(), union);
-        let sort = Arc::new(
-            SortExec::new(sort_exprs.clone(), union).with_preserve_partitioning(true),
-        ) as _;
-        let spm = sort_preserving_merge_exec(sort_exprs, sort);
-
-        let filter = filter_exec(
-            Arc::new(NotExpr::new(
-                col("non_nullable_col", schema.as_ref()).unwrap(),
-            )),
-            spm,
-        );
-
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let physical_plan = sort_exec(sort_exprs, filter);
-
-        // When removing a `SortPreservingMergeExec`, make sure that partitioning
-        // requirements are not violated. In some cases, we may need to replace
-        // it with a `CoalescePartitionsExec` instead of directly removing it.
-        let expected_input = ["SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "  FilterExec: NOT non_nullable_col@1",
-            "    SortPreservingMergeExec: [non_nullable_col@1 ASC]",
-            "      SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[true]",
-            "        UnionExec",
-            "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "            MemoryExec: partitions=1, partition_sizes=[0]",
-            "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "            MemoryExec: partitions=1, partition_sizes=[0]"];
-
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
-            "    FilterExec: NOT non_nullable_col@1",
-            "      UnionExec",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          MemoryExec: partitions=1, partition_sizes=[0]",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          MemoryExec: partitions=1, partition_sizes=[0]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_sort5() -> Result<()> {
-        let left_schema = create_test_schema2()?;
-        let right_schema = create_test_schema3()?;
-        let left_input = memory_exec(&left_schema);
-        let parquet_sort_exprs = vec![sort_expr("a", &right_schema)];
-        let right_input = parquet_exec_sorted(&right_schema, parquet_sort_exprs);
-
-        let on = vec![(
-            Arc::new(Column::new_with_schema("col_a", &left_schema)?) as _,
-            Arc::new(Column::new_with_schema("c", &right_schema)?) as _,
-        )];
-        let join = hash_join_exec(left_input, right_input, on, None, &JoinType::Inner)?;
-        let physical_plan = sort_exec(vec![sort_expr("a", &join.schema())], join);
-
-        let expected_input = ["SortExec: expr=[a@2 ASC], preserve_partitioning=[false]",
-            "  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]",
-            "    MemoryExec: partitions=1, partition_sizes=[0]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]"];
-
-        let expected_optimized = ["HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_remove_unnecessary_spm1() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let input = sort_preserving_merge_exec(
-            vec![sort_expr("non_nullable_col", &schema)],
-            source,
-        );
-        let input2 = sort_preserving_merge_exec(
-            vec![sort_expr("non_nullable_col", &schema)],
-            input,
-        );
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_expr("nullable_col", &schema)], input2);
-
-        let expected_input = [
-            "SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  SortPreservingMergeExec: [non_nullable_col@1 ASC]",
-            "    SortPreservingMergeExec: [non_nullable_col@1 ASC]",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        let expected_optimized = [
-            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_do_not_remove_sort_with_limit() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort = sort_exec(sort_exprs.clone(), source1);
-        let limit = limit_exec(sort);
-
-        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs);
-
-        let union = union_exec(vec![source2, limit]);
-        let repartition = repartition_exec(union);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, repartition);
-
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-            "    UnionExec",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "      GlobalLimitExec: skip=0, fetch=100",
-            "        LocalLimitExec: fetch=100",
-            "          SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "            ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-
-        // We should keep the bottom `SortExec`.
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-            "      UnionExec",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "        GlobalLimitExec: skip=0, fetch=100",
-            "          LocalLimitExec: fetch=100",
-            "            SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "              ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_change_wrong_sorting() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort = sort_exec(vec![sort_exprs[0].clone()], source);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, sort);
-        let expected_input = [
-            "SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "    MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        let expected_optimized = [
-            "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_change_wrong_sorting2() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let spm1 = sort_preserving_merge_exec(sort_exprs.clone(), source);
-        let sort2 = sort_exec(vec![sort_exprs[0].clone()], spm1);
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_exprs[1].clone()], sort2);
-
-        let expected_input = [
-            "SortPreservingMergeExec: [non_nullable_col@1 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "    SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        let expected_optimized = [
-            "SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "  MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_sorted() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let sort = sort_exec(sort_exprs.clone(), source1);
-
-        let source2 = parquet_exec_sorted(&schema, sort_exprs.clone());
-
-        let union = union_exec(vec![source2, sort]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, union);
-
-        // one input to the union is already sorted, one is not.
-        let expected_input = vec![
-            "SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-        ];
-        // should not add a sort at the output of the union, input plan should not be changed
-        let expected_optimized = expected_input.clone();
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let sort = sort_exec(sort_exprs.clone(), source1);
-
-        let parquet_sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs);
-
-        let union = union_exec(vec![source2, sort]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, union);
-
-        // one input to the union is already sorted, one is not.
-        let expected_input = vec![
-            "SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-        ];
-        // should not add a sort at the output of the union, input plan should not be changed
-        let expected_optimized = expected_input.clone();
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted2() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort = sort_exec(sort_exprs.clone(), source1);
-
-        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs);
-
-        let union = union_exec(vec![source2, sort]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs, union);
-
-        // Input is an invalid plan. In this case rule should add required sorting in appropriate places.
-        // First ParquetExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the
-        // required ordering of SortPreservingMergeExec.
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  UnionExec",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted3() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort1 = sort_exec(sort_exprs1, source1.clone());
-        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
-        let sort2 = sort_exec(sort_exprs2, source1);
-
-        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs.clone());
-
-        let union = union_exec(vec![sort1, source2, sort2]);
-        let physical_plan = sort_preserving_merge_exec(parquet_sort_exprs, union);
-
-        // First input to the union is not Sorted (SortExec is finer than required ordering by the SortPreservingMergeExec above).
-        // Second input to the union is already Sorted (matches with the required ordering by the SortPreservingMergeExec above).
-        // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above).
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        // should adjust sorting in the first input of the union such that it is not unnecessarily fine
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted4() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
-        let sort1 = sort_exec(sort_exprs2.clone(), source1.clone());
-        let sort2 = sort_exec(sort_exprs2.clone(), source1);
-
-        let source2 = parquet_exec_sorted(&schema, sort_exprs2);
-
-        let union = union_exec(vec![sort1, source2, sort2]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs1, union);
-
-        // Ordering requirement of the `SortPreservingMergeExec` is not met.
-        // Should modify the plan to ensure that all three inputs to the
-        // `UnionExec` satisfy the ordering, OR add a single sort after
-        // the `UnionExec` (both of which are equally good for this example).
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted5() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs2 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr_options(
-                "non_nullable_col",
-                &schema,
-                SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                },
-            ),
-        ];
-        let sort_exprs3 = vec![sort_expr("nullable_col", &schema)];
-        let sort1 = sort_exec(sort_exprs1, source1.clone());
-        let sort2 = sort_exec(sort_exprs2, source1);
-
-        let union = union_exec(vec![sort1, sort2]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs3, union);
-
-        // The `UnionExec` doesn't preserve any of the inputs ordering in the
-        // example below. However, we should be able to change the unnecessarily
-        // fine `SortExec`s below with required `SortExec`s that are absolutely necessary.
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted6() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![sort_expr("nullable_col", &schema)];
-        let sort1 = sort_exec(sort_exprs1, source1.clone());
-        let sort_exprs2 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let repartition = repartition_exec(source1);
-        let spm = sort_preserving_merge_exec(sort_exprs2, repartition);
-
-        let parquet_sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let source2 = parquet_exec_sorted(&schema, parquet_sort_exprs.clone());
-
-        let union = union_exec(vec![sort1, source2, spm]);
-        let physical_plan = sort_preserving_merge_exec(parquet_sort_exprs, union);
-
-        // The plan is not valid as it is -- the input ordering requirement
-        // of the `SortPreservingMergeExec` under the third child of the
-        // `UnionExec` is not met. We should add a `SortExec` below it.
-        // At the same time, this ordering requirement is unnecessarily fine.
-        // The final plan should be valid AND the ordering of the third child
-        // shouldn't be finer than necessary.
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        // Should adjust the requirement in the third input of the union so
-        // that it is not unnecessarily fine.
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted7() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs3 = vec![sort_expr("nullable_col", &schema)];
-        let sort1 = sort_exec(sort_exprs1.clone(), source1.clone());
-        let sort2 = sort_exec(sort_exprs1, source1);
-
-        let union = union_exec(vec![sort1, sort2]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs3, union);
-
-        // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering.
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec
-        let expected_output = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_output, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted8() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs2 = vec![
-            sort_expr_options(
-                "nullable_col",
-                &schema,
-                SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                },
-            ),
-            sort_expr_options(
-                "non_nullable_col",
-                &schema,
-                SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                },
-            ),
-        ];
-        let sort1 = sort_exec(sort_exprs1, source1.clone());
-        let sort2 = sort_exec(sort_exprs2, source1);
-
-        let physical_plan = union_exec(vec![sort1, sort2]);
-
-        // The `UnionExec` doesn't preserve any of the inputs ordering in the
-        // example below.
-        let expected_input = ["UnionExec",
-            "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "  SortExec: expr=[nullable_col@0 DESC NULLS LAST,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        // Since `UnionExec` doesn't preserve ordering in the plan above.
-        // We shouldn't keep SortExecs in the plan.
-        let expected_optimized = ["UnionExec",
-            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_window_multi_path_sort() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
-        // reverse sorting of sort_exprs2
-        let sort_exprs3 = vec![sort_expr_options(
-            "nullable_col",
-            &schema,
-            SortOptions {
-                descending: true,
-                nulls_first: false,
-            },
-        )];
-        let source1 = parquet_exec_sorted(&schema, sort_exprs1);
-        let source2 = parquet_exec_sorted(&schema, sort_exprs2);
-        let sort1 = sort_exec(sort_exprs3.clone(), source1);
-        let sort2 = sort_exec(sort_exprs3.clone(), source2);
-
-        let union = union_exec(vec![sort1, sort2]);
-        let spm = sort_preserving_merge_exec(sort_exprs3.clone(), union);
-        let physical_plan = bounded_window_exec("nullable_col", sort_exprs3, spm);
-
-        // The `WindowAggExec` gets its sorting from multiple children jointly.
-        // During the removal of `SortExec`s, it should be able to remove the
-        // corresponding SortExecs together. Also, the inputs of these `SortExec`s
-        // are not necessarily the same to be able to remove them.
-        let expected_input = [
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]",
-            "    UnionExec",
-            "      SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
-            "      SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
-        let expected_optimized = [
-            "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL), is_causal: false }]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "    UnionExec",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_window_multi_path_sort2() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs2 = vec![sort_expr("nullable_col", &schema)];
-        let source1 = parquet_exec_sorted(&schema, sort_exprs2.clone());
-        let source2 = parquet_exec_sorted(&schema, sort_exprs2.clone());
-        let sort1 = sort_exec(sort_exprs1.clone(), source1);
-        let sort2 = sort_exec(sort_exprs1.clone(), source2);
-
-        let union = union_exec(vec![sort1, sort2]);
-        let spm = Arc::new(SortPreservingMergeExec::new(sort_exprs1, union)) as _;
-        let physical_plan = bounded_window_exec("nullable_col", sort_exprs2, spm);
-
-        // The `WindowAggExec` can get its required sorting from the leaf nodes directly.
-        // The unnecessary SortExecs should be removed
-        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-            "    UnionExec",
-            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
-        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "    UnionExec",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_union_inputs_different_sorted_with_limit() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let sort_exprs1 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-        let sort_exprs2 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr_options(
-                "non_nullable_col",
-                &schema,
-                SortOptions {
-                    descending: true,
-                    nulls_first: false,
-                },
-            ),
-        ];
-        let sort_exprs3 = vec![sort_expr("nullable_col", &schema)];
-        let sort1 = sort_exec(sort_exprs1, source1.clone());
-
-        let sort2 = sort_exec(sort_exprs2, source1);
-        let limit = local_limit_exec(sort2);
-        let limit = global_limit_exec(limit);
-
-        let union = union_exec(vec![sort1, limit]);
-        let physical_plan = sort_preserving_merge_exec(sort_exprs3, union);
-
-        // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec`
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    GlobalLimitExec: skip=0, fetch=100",
-            "      LocalLimitExec: fetch=100",
-            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
-            "          ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  UnionExec",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    GlobalLimitExec: skip=0, fetch=100",
-            "      LocalLimitExec: fetch=100",
-            "        SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]",
-            "          ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_sort_merge_join_order_by_left() -> Result<()> {
-        let left_schema = create_test_schema()?;
-        let right_schema = create_test_schema2()?;
-
-        let left = parquet_exec(&left_schema);
-        let right = parquet_exec(&right_schema);
-
-        // Join on (nullable_col == col_a)
-        let join_on = vec![(
-            Arc::new(Column::new_with_schema("nullable_col", &left.schema()).unwrap())
-                as _,
-            Arc::new(Column::new_with_schema("col_a", &right.schema()).unwrap()) as _,
-        )];
-
-        let join_types = vec![
-            JoinType::Inner,
-            JoinType::Left,
-            JoinType::Right,
-            JoinType::Full,
-            JoinType::LeftSemi,
-            JoinType::LeftAnti,
-        ];
-        for join_type in join_types {
-            let join =
-                sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
-            let sort_exprs = vec![
-                sort_expr("nullable_col", &join.schema()),
-                sort_expr("non_nullable_col", &join.schema()),
-            ];
-            let physical_plan = sort_preserving_merge_exec(sort_exprs.clone(), join);
-
-            let join_plan = format!(
-                "SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
-            );
-            let join_plan2 = format!(
-                "  SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
-            );
-            let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]",
-                join_plan2.as_str(),
-                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
-            let expected_optimized = match join_type {
-                JoinType::Inner
-                | JoinType::Left
-                | JoinType::LeftSemi
-                | JoinType::LeftAnti => {
-                    // can push down the sort requirements and save 1 SortExec
-                    vec![
-                        join_plan.as_str(),
-                        "  SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                        "  SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
-                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
-                    ]
-                }
-                _ => {
-                    // can not push down the sort requirements
-                    vec![
-                        "SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-                        join_plan2.as_str(),
-                        "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                        "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
-                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
-                    ]
-                }
-            };
-            assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_sort_merge_join_order_by_right() -> Result<()> {
-        let left_schema = create_test_schema()?;
-        let right_schema = create_test_schema2()?;
-
-        let left = parquet_exec(&left_schema);
-        let right = parquet_exec(&right_schema);
-
-        // Join on (nullable_col == col_a)
-        let join_on = vec![(
-            Arc::new(Column::new_with_schema("nullable_col", &left.schema()).unwrap())
-                as _,
-            Arc::new(Column::new_with_schema("col_a", &right.schema()).unwrap()) as _,
-        )];
-
-        let join_types = vec![
-            JoinType::Inner,
-            JoinType::Left,
-            JoinType::Right,
-            JoinType::Full,
-            JoinType::RightAnti,
-        ];
-        for join_type in join_types {
-            let join =
-                sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
-            let sort_exprs = vec![
-                sort_expr("col_a", &join.schema()),
-                sort_expr("col_b", &join.schema()),
-            ];
-            let physical_plan = sort_preserving_merge_exec(sort_exprs, join);
-
-            let join_plan = format!(
-                "SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
-            );
-            let spm_plan = match join_type {
-                JoinType::RightAnti => {
-                    "SortPreservingMergeExec: [col_a@0 ASC,col_b@1 ASC]"
-                }
-                _ => "SortPreservingMergeExec: [col_a@2 ASC,col_b@3 ASC]",
-            };
-            let join_plan2 = format!(
-                "  SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]"
-            );
-            let expected_input = [spm_plan,
-                join_plan2.as_str(),
-                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
-            let expected_optimized = match join_type {
-                JoinType::Inner | JoinType::Right | JoinType::RightAnti => {
-                    // can push down the sort requirements and save 1 SortExec
-                    vec![
-                        join_plan.as_str(),
-                        "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                        "  SortExec: expr=[col_a@0 ASC,col_b@1 ASC], preserve_partitioning=[false]",
-                        "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
-                    ]
-                }
-                _ => {
-                    // can not push down the sort requirements for Left and Full join.
-                    vec![
-                        "SortExec: expr=[col_a@2 ASC,col_b@3 ASC], preserve_partitioning=[false]",
-                        join_plan2.as_str(),
-                        "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-                        "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
-                        "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]",
-                    ]
-                }
-            };
-            assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-        }
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_sort_merge_join_complex_order_by() -> Result<()> {
-        let left_schema = create_test_schema()?;
-        let right_schema = create_test_schema2()?;
-
-        let left = parquet_exec(&left_schema);
-        let right = parquet_exec(&right_schema);
-
-        // Join on (nullable_col == col_a)
-        let join_on = vec![(
-            Arc::new(Column::new_with_schema("nullable_col", &left.schema()).unwrap())
-                as _,
-            Arc::new(Column::new_with_schema("col_a", &right.schema()).unwrap()) as _,
-        )];
-
-        let join = sort_merge_join_exec(left, right, &join_on, &JoinType::Inner);
-
-        // order by (col_b, col_a)
-        let sort_exprs1 = vec![
-            sort_expr("col_b", &join.schema()),
-            sort_expr("col_a", &join.schema()),
-        ];
-        let physical_plan = sort_preserving_merge_exec(sort_exprs1, join.clone());
-
-        let expected_input = ["SortPreservingMergeExec: [col_b@3 ASC,col_a@2 ASC]",
-            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
-
-        // can not push down the sort requirements, need to add SortExec
-        let expected_optimized = ["SortExec: expr=[col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]",
-            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        // order by (nullable_col, col_b, col_a)
-        let sort_exprs2 = vec![
-            sort_expr("nullable_col", &join.schema()),
-            sort_expr("col_b", &join.schema()),
-            sort_expr("col_a", &join.schema()),
-        ];
-        let physical_plan = sort_preserving_merge_exec(sort_exprs2, join);
-
-        let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC]",
-            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
-
-        // can not push down the sort requirements, need to add SortExec
-        let expected_optimized = ["SortExec: expr=[nullable_col@0 ASC,col_b@3 ASC,col_a@2 ASC], preserve_partitioning=[false]",
-            "  SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]",
-            "    SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]",
-            "      ParquetExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_multiple_sort_window_exec() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-
-        let sort_exprs1 = vec![sort_expr("nullable_col", &schema)];
-        let sort_exprs2 = vec![
-            sort_expr("nullable_col", &schema),
-            sort_expr("non_nullable_col", &schema),
-        ];
-
-        let sort1 = sort_exec(sort_exprs1.clone(), source);
-        let window_agg1 =
-            bounded_window_exec("non_nullable_col", sort_exprs1.clone(), sort1);
-        let window_agg2 =
-            bounded_window_exec("non_nullable_col", sort_exprs2, window_agg1);
-        // let filter_exec = sort_exec;
-        let physical_plan =
-            bounded_window_exec("non_nullable_col", sort_exprs1, window_agg2);
-
-        let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "      SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "        MemoryExec: partitions=1, partition_sizes=[0]"];
-
-        let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "      SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC], preserve_partitioning=[false]",
-            "        MemoryExec: partitions=1, partition_sizes=[0]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_multilayer_coalesce_partitions() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let source1 = parquet_exec(&schema);
-        let repartition = repartition_exec(source1);
-        let coalesce = Arc::new(CoalescePartitionsExec::new(repartition)) as _;
-        // Add dummy layer propagating Sort above, to test whether sort can be removed from multi layer before
-        let filter = filter_exec(
-            Arc::new(NotExpr::new(
-                col("non_nullable_col", schema.as_ref()).unwrap(),
-            )),
-            coalesce,
-        );
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let physical_plan = sort_exec(sort_exprs, filter);
-
-        // CoalescePartitionsExec and SortExec are not directly consecutive. In this case
-        // we should be able to parallelize Sorting also (given that executors in between don't require)
-        // single partition.
-        let expected_input = ["SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  FilterExec: NOT non_nullable_col@1",
-            "    CoalescePartitionsExec",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
-            "    FilterExec: NOT non_nullable_col@1",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    // With new change in SortEnforcement EnforceSorting->EnforceDistribution->EnforceSorting
-    // should produce same result with EnforceDistribution+EnforceSorting
-    // This enables us to use EnforceSorting possibly before EnforceDistribution
-    // Given that it will be called at least once after last EnforceDistribution. The reason is that
-    // EnforceDistribution may invalidate ordering invariant.
-    async fn test_commutativity() -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-
-        let memory_exec = memory_exec(&schema);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        let window = bounded_window_exec("nullable_col", sort_exprs.clone(), memory_exec);
-        let repartition = repartition_exec(window);
-
-        let orig_plan =
-            Arc::new(SortExec::new(sort_exprs, repartition)) as Arc<dyn ExecutionPlan>;
-        let actual = get_plan_string(&orig_plan);
-        let expected_input = vec![
-            "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "    BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_eq!(
-            expected_input, actual,
-            "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_input:#?}\nactual:\n\n{actual:#?}\n\n"
-        );
-
-        let mut plan = orig_plan.clone();
-        let rules = vec![
-            Arc::new(EnforceDistribution::new()) as Arc<dyn PhysicalOptimizerRule>,
-            Arc::new(EnforceSorting::new()) as Arc<dyn PhysicalOptimizerRule>,
-        ];
-        for rule in rules {
-            plan = rule.optimize(plan, state.config_options())?;
-        }
-        let first_plan = plan.clone();
-
-        let mut plan = orig_plan.clone();
-        let rules = vec![
-            Arc::new(EnforceSorting::new()) as Arc<dyn PhysicalOptimizerRule>,
-            Arc::new(EnforceDistribution::new()) as Arc<dyn PhysicalOptimizerRule>,
-            Arc::new(EnforceSorting::new()) as Arc<dyn PhysicalOptimizerRule>,
-        ];
-        for rule in rules {
-            plan = rule.optimize(plan, state.config_options())?;
-        }
-        let second_plan = plan.clone();
-
-        assert_eq!(get_plan_string(&first_plan), get_plan_string(&second_plan));
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_coalesce_propagate() -> Result<()> {
-        let schema = create_test_schema()?;
-        let source = memory_exec(&schema);
-        let repartition = repartition_exec(source);
-        let coalesce_partitions = Arc::new(CoalescePartitionsExec::new(repartition));
-        let repartition = repartition_exec(coalesce_partitions);
-        let sort_exprs = vec![sort_expr("nullable_col", &schema)];
-        // Add local sort
-        let sort = Arc::new(
-            SortExec::new(sort_exprs.clone(), repartition)
-                .with_preserve_partitioning(true),
-        ) as _;
-        let spm = sort_preserving_merge_exec(sort_exprs.clone(), sort);
-        let sort = sort_exec(sort_exprs, spm);
-
-        let physical_plan = sort.clone();
-        // Sort Parallelize rule should end Coalesce + Sort linkage when Sort is Global Sort
-        // Also input plan is not valid as it is. We need to add SortExec before SortPreservingMergeExec.
-        let expected_input = ["SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "    SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        CoalescePartitionsExec",
-            "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "            MemoryExec: partitions=1, partition_sizes=[0]"];
-        let expected_optimized = [
-            "SortPreservingMergeExec: [nullable_col@0 ASC]",
-            "  SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_with_lost_ordering_bounded() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = csv_exec_sorted(&schema, sort_exprs);
-        let repartition_rr = repartition_exec(source);
-        let repartition_hash = Arc::new(RepartitionExec::try_new(
-            repartition_rr,
-            Partitioning::Hash(vec![col("c", &schema).unwrap()], 10),
-        )?) as _;
-        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
-        let physical_plan = sort_exec(vec![sort_expr("a", &schema)], coalesce_partitions);
-
-        let expected_input = ["SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=false"];
-        let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=false"];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_with_lost_ordering_unbounded_bounded(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema3()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        // create either bounded or unbounded source
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_ordered(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec(source);
-        let repartition_hash = Arc::new(RepartitionExec::try_new(
-            repartition_rr,
-            Partitioning::Hash(vec![col("c", &schema).unwrap()], 10),
-        )?) as _;
-        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
-        let physical_plan = sort_exec(vec![sort_expr("a", &schema)], coalesce_partitions);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = vec![
-            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
-        ];
-        let expected_input_bounded = vec![
-            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = vec![
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = vec![
-            "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true",
-        ];
-        let expected_optimized_bounded_parallelize_sort = vec![
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true",
-        ];
-        let (expected_input, expected_optimized, expected_optimized_sort_parallelize) =
-            if source_unbounded {
-                (
-                    expected_input_unbounded,
-                    expected_optimized_unbounded.clone(),
-                    expected_optimized_unbounded,
-                )
-            } else {
-                (
-                    expected_input_bounded,
-                    expected_optimized_bounded,
-                    expected_optimized_bounded_parallelize_sort,
-                )
-            };
-        assert_optimized!(
-            expected_input,
-            expected_optimized,
-            physical_plan.clone(),
-            false
-        );
-        assert_optimized!(
-            expected_input,
-            expected_optimized_sort_parallelize,
-            physical_plan,
-            true
-        );
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_do_not_pushdown_through_spm() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let sort_exprs = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
-        let source = csv_exec_sorted(&schema, sort_exprs.clone());
-        let repartition_rr = repartition_exec(source);
-        let spm = sort_preserving_merge_exec(sort_exprs, repartition_rr);
-        let physical_plan = sort_exec(vec![sort_expr("b", &schema)], spm);
-
-        let expected_input = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
-        let expected_optimized = ["SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, false);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_pushdown_through_spm() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let sort_exprs = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
-        let source = csv_exec_sorted(&schema, sort_exprs.clone());
-        let repartition_rr = repartition_exec(source);
-        let spm = sort_preserving_merge_exec(sort_exprs, repartition_rr);
-        let physical_plan = sort_exec(
-            vec![
-                sort_expr("a", &schema),
-                sort_expr("b", &schema),
-                sort_expr("c", &schema),
-            ],
-            spm,
-        );
-
-        let expected_input = ["SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
-        let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "  SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "      CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], has_header=false",];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, false);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_window_multi_layer_requirement() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let sort_exprs = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
-        let source = csv_exec_sorted(&schema, vec![]);
-        let sort = sort_exec(sort_exprs.clone(), source);
-        let repartition = repartition_exec(sort);
-        let repartition = spr_repartition_exec(repartition);
-        let spm = sort_preserving_merge_exec(sort_exprs.clone(), repartition);
-
-        let physical_plan = bounded_window_exec("a", sort_exprs, spm);
-
-        let expected_input = [
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortPreservingMergeExec: [a@0 ASC,b@1 ASC]",
-            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
-            "          CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
-        ];
-        let expected_optimized = [
-            "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]",
-            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
-            "    CoalescePartitionsExec",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, false);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_replace_with_partial_sort() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let input_sort_exprs = vec![sort_expr("a", &schema)];
-        let unbounded_input = stream_exec_ordered(&schema, input_sort_exprs);
-
-        let physical_plan = sort_exec(
-            vec![sort_expr("a", &schema), sort_expr("c", &schema)],
-            unbounded_input,
-        );
-
-        let expected_input = [
-            "SortExec: expr=[a@0 ASC,c@2 ASC], preserve_partitioning=[false]",
-            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]"
-        ];
-        let expected_optimized = [
-            "PartialSortExec: expr=[a@0 ASC,c@2 ASC], common_prefix_length=[1]",
-            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_replace_with_partial_sort2() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let input_sort_exprs = vec![sort_expr("a", &schema), sort_expr("c", &schema)];
-        let unbounded_input = stream_exec_ordered(&schema, input_sort_exprs);
-
-        let physical_plan = sort_exec(
-            vec![
-                sort_expr("a", &schema),
-                sort_expr("c", &schema),
-                sort_expr("d", &schema),
-            ],
-            unbounded_input,
-        );
-
-        let expected_input = [
-            "SortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], preserve_partitioning=[false]",
-            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]"
-        ];
-        // let optimized
-        let expected_optimized = [
-            "PartialSortExec: expr=[a@0 ASC,c@2 ASC,d@3 ASC], common_prefix_length=[2]",
-            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]",
-        ];
-        assert_optimized!(expected_input, expected_optimized, physical_plan, true);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let input_sort_exprs = vec![sort_expr("b", &schema), sort_expr("c", &schema)];
-        let parquet_input = parquet_exec_sorted(&schema, input_sort_exprs);
-
-        let physical_plan = sort_exec(
-            vec![
-                sort_expr("a", &schema),
-                sort_expr("b", &schema),
-                sort_expr("c", &schema),
-            ],
-            parquet_input,
-        );
-        let expected_input = [
-            "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
-            "  ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC]"
-        ];
-        let expected_no_change = expected_input;
-        assert_optimized!(expected_input, expected_no_change, physical_plan, false);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> {
-        let schema = create_test_schema3()?;
-        let input_sort_exprs = vec![sort_expr("b", &schema), sort_expr("c", &schema)];
-        let unbounded_input = stream_exec_ordered(&schema, input_sort_exprs);
-
-        let physical_plan = sort_exec(
-            vec![
-                sort_expr("a", &schema),
-                sort_expr("b", &schema),
-                sort_expr("c", &schema),
-            ],
-            unbounded_input,
-        );
-        let expected_input = [
-            "SortExec: expr=[a@0 ASC,b@1 ASC,c@2 ASC], preserve_partitioning=[false]",
-            "  StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]"
-        ];
-        let expected_no_change = expected_input;
-        assert_optimized!(expected_input, expected_no_change, physical_plan, true);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_push_with_required_input_ordering_prohibited() -> Result<()> {
-        // SortExec: expr=[b]            <-- can't push this down
-        //  RequiredInputOrder expr=[a]  <-- this requires input sorted by a, and preserves the input order
-        //    SortExec: expr=[a]
-        //      MemoryExec
-        let schema = create_test_schema3()?;
-        let sort_exprs_a = vec![sort_expr("a", &schema)];
-        let sort_exprs_b = vec![sort_expr("b", &schema)];
-        let plan = memory_exec(&schema);
-        let plan = sort_exec(sort_exprs_a.clone(), plan);
-        let plan = RequirementsTestExec::new(plan)
-            .with_required_input_ordering(sort_exprs_a)
-            .with_maintains_input_order(true)
-            .into_arc();
-        let plan = sort_exec(sort_exprs_b, plan);
-
-        let expected_input = [
-            "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]",
-            "  RequiredInputOrderingExec",
-            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        // should not be able to push shorts
-        let expected_no_change = expected_input;
-        assert_optimized!(expected_input, expected_no_change, plan, true);
-        Ok(())
-    }
-
-    // test when the required input ordering is satisfied so could push through
-    #[tokio::test]
-    async fn test_push_with_required_input_ordering_allowed() -> Result<()> {
-        // SortExec: expr=[a,b]          <-- can push this down (as it is compatible with the required input ordering)
-        //  RequiredInputOrder expr=[a]  <-- this requires input sorted by a, and preserves the input order
-        //    SortExec: expr=[a]
-        //      MemoryExec
-        let schema = create_test_schema3()?;
-        let sort_exprs_a = vec![sort_expr("a", &schema)];
-        let sort_exprs_ab = vec![sort_expr("a", &schema), sort_expr("b", &schema)];
-        let plan = memory_exec(&schema);
-        let plan = sort_exec(sort_exprs_a.clone(), plan);
-        let plan = RequirementsTestExec::new(plan)
-            .with_required_input_ordering(sort_exprs_a)
-            .with_maintains_input_order(true)
-            .into_arc();
-        let plan = sort_exec(sort_exprs_ab, plan);
-
-        let expected_input = [
-            "SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
-            "  RequiredInputOrderingExec",
-            "    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "      MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        // should able to push shorts
-        let expected = [
-            "RequiredInputOrderingExec",
-            "  SortExec: expr=[a@0 ASC,b@1 ASC], preserve_partitioning=[false]",
-            "    MemoryExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_optimized!(expected_input, expected, plan, true);
-        Ok(())
-    }
-}
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 3329a00931099..159e41f908770 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -18,6 +18,7 @@
 #![deny(clippy::clone_on_ref_ptr)]
 
 pub mod aggregate_statistics;
+pub mod enforce_distribution;
 pub mod enforce_sorting;
 pub mod limited_distinct_aggregation;
 mod optimizer;
diff --git a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
index ada0f3410123d..f8c33136736cf 100644
--- a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
+++ b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
@@ -188,424 +188,3 @@ impl PhysicalOptimizerRule for LimitedDistinctAggregation {
         true
     }
 }
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-    use crate::physical_optimizer::enforce_distribution::tests::{
-        parquet_exec_with_sort, schema, trim_plan_display,
-    };
-    use crate::physical_plan::aggregates::PhysicalGroupBy;
-    use crate::physical_plan::collect;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::prelude::SessionContext;
-    use datafusion_physical_optimizer::aggregate_statistics::tests::TestAggregate;
-
-    use arrow::array::Int32Array;
-    use arrow::compute::SortOptions;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-    use arrow::util::pretty::pretty_format_batches;
-    use arrow_schema::SchemaRef;
-    use datafusion_execution::config::SessionConfig;
-    use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::{cast, col};
-    use datafusion_physical_expr::{expressions, PhysicalExpr, PhysicalSortExpr};
-    use datafusion_physical_plan::aggregates::AggregateMode;
-    use datafusion_physical_plan::displayable;
-
-    fn mock_data() -> Result<Arc<MemoryExec>> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, true),
-            Field::new("b", DataType::Int32, true),
-        ]));
-
-        let batch = RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![
-                Arc::new(Int32Array::from(vec![
-                    Some(1),
-                    Some(2),
-                    None,
-                    Some(1),
-                    Some(4),
-                    Some(5),
-                ])),
-                Arc::new(Int32Array::from(vec![
-                    Some(1),
-                    None,
-                    Some(6),
-                    Some(2),
-                    Some(8),
-                    Some(9),
-                ])),
-            ],
-        )?;
-
-        Ok(Arc::new(MemoryExec::try_new(
-            &[vec![batch]],
-            Arc::clone(&schema),
-            None,
-        )?))
-    }
-
-    fn assert_plan_matches_expected(
-        plan: &Arc<dyn ExecutionPlan>,
-        expected: &[&str],
-    ) -> Result<()> {
-        let expected_lines: Vec<&str> = expected.to_vec();
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-
-        let optimized = LimitedDistinctAggregation::new()
-            .optimize(Arc::clone(plan), state.config_options())?;
-
-        let optimized_result = displayable(optimized.as_ref()).indent(true).to_string();
-        let actual_lines = trim_plan_display(&optimized_result);
-
-        assert_eq!(
-            &expected_lines, &actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-
-        Ok(())
-    }
-
-    async fn assert_results_match_expected(
-        plan: Arc<dyn ExecutionPlan>,
-        expected: &str,
-    ) -> Result<()> {
-        let cfg = SessionConfig::new().with_target_partitions(1);
-        let ctx = SessionContext::new_with_config(cfg);
-        let batches = collect(plan, ctx.task_ctx()).await?;
-        let actual = format!("{}", pretty_format_batches(&batches)?);
-        assert_eq!(actual, expected);
-        Ok(())
-    }
-
-    pub fn build_group_by(
-        input_schema: &SchemaRef,
-        columns: Vec<String>,
-    ) -> PhysicalGroupBy {
-        let mut group_by_expr: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
-        for column in columns.iter() {
-            group_by_expr.push((col(column, input_schema).unwrap(), column.to_string()));
-        }
-        PhysicalGroupBy::new_single(group_by_expr.clone())
-    }
-
-    #[tokio::test]
-    async fn test_partial_final() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Partial/Final AggregateExec
-        let partial_agg = AggregateExec::try_new(
-            AggregateMode::Partial,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let final_agg = AggregateExec::try_new(
-            AggregateMode::Final,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],                /* aggr_expr */
-            vec![],                /* filter_expr */
-            Arc::new(partial_agg), /* input */
-            schema.clone(),        /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(final_agg),
-            4, // fetch
-        );
-        // expected to push the limit to the Partial and Final AggregateExecs
-        let expected = [
-            "LocalLimitExec: fetch=4",
-            "AggregateExec: mode=Final, gby=[a@0 as a], aggr=[], lim=[4]",
-            "AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[], lim=[4]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_single_local() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            4, // fetch
-        );
-        // expected to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=4",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_single_global() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec GROUP BY a LIMIT 4;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = GlobalLimitExec::new(
-            Arc::new(single_agg),
-            1,       // skip
-            Some(3), // fetch
-        );
-        // expected to push the skip+fetch limit to the AggregateExec
-        let expected = [
-            "GlobalLimitExec: skip=1, fetch=3",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_distinct_cols_different_than_group_by_cols() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT distinct a FROM MemoryExec GROUP BY a, b LIMIT 4;`, Single/Single AggregateExec
-        let group_by_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string(), "b".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let distinct_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],                 /* aggr_expr */
-            vec![],                 /* filter_expr */
-            Arc::new(group_by_agg), /* input */
-            schema.clone(),         /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(distinct_agg),
-            4, // fetch
-        );
-        // expected to push the limit to the outer AggregateExec only
-        let expected = [
-            "LocalLimitExec: fetch=4",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], lim=[4]",
-            "AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        let expected = r#"
-+---+
-| a |
-+---+
-| 1 |
-| 2 |
-|   |
-| 4 |
-+---+
-"#
-        .trim();
-        assert_results_match_expected(plan, expected).await?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_no_group_by() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec![]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[], aggr=[]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_has_aggregate_expression() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-        let agg = TestAggregate::new_count_star();
-
-        // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![agg.count_expr(&schema)], /* aggr_expr */
-            vec![None],                    /* filter_expr */
-            source,                        /* input */
-            schema.clone(),                /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_has_filter() -> Result<()> {
-        let source = mock_data()?;
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
-        // the `a > 1` filter is applied in the AggregateExec
-        let filter_expr = Some(expressions::binary(
-            expressions::col("a", &schema)?,
-            Operator::Gt,
-            cast(expressions::lit(1u32), &schema, DataType::Int32)?,
-            &schema,
-        )?);
-        let agg = TestAggregate::new_count_star();
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![agg.count_expr(&schema)], /* aggr_expr */
-            vec![filter_expr],             /* filter_expr */
-            source,                        /* input */
-            schema.clone(),                /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        // TODO(msirek): open an issue for `filter_expr` of `AggregateExec` not printing out
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[COUNT(*)]",
-            "MemoryExec: partitions=1, partition_sizes=[1]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-
-    #[test]
-    fn test_has_order_by() -> Result<()> {
-        let sort_key = vec![PhysicalSortExpr {
-            expr: expressions::col("a", &schema()).unwrap(),
-            options: SortOptions::default(),
-        }];
-        let source = parquet_exec_with_sort(vec![sort_key]);
-        let schema = source.schema();
-
-        // `SELECT a FROM MemoryExec WHERE a > 1 GROUP BY a LIMIT 10;`, Single AggregateExec
-        // the `a > 1` filter is applied in the AggregateExec
-        let single_agg = AggregateExec::try_new(
-            AggregateMode::Single,
-            build_group_by(&schema.clone(), vec!["a".to_string()]),
-            vec![],         /* aggr_expr */
-            vec![],         /* filter_expr */
-            source,         /* input */
-            schema.clone(), /* input_schema */
-        )?;
-        let limit_exec = LocalLimitExec::new(
-            Arc::new(single_agg),
-            10, // fetch
-        );
-        // expected not to push the limit to the AggregateExec
-        let expected = [
-            "LocalLimitExec: fetch=10",
-            "AggregateExec: mode=Single, gby=[a@0 as a], aggr=[], ordering_mode=Sorted",
-            "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]",
-        ];
-        let plan: Arc<dyn ExecutionPlan> = Arc::new(limit_exec);
-        assert_plan_matches_expected(&plan, &expected)?;
-        Ok(())
-    }
-}
diff --git a/datafusion/physical-optimizer/src/replace_with_order_preserving_variants.rs b/datafusion/physical-optimizer/src/replace_with_order_preserving_variants.rs
index 492509bbde900..42049071d4aa0 100644
--- a/datafusion/physical-optimizer/src/replace_with_order_preserving_variants.rs
+++ b/datafusion/physical-optimizer/src/replace_with_order_preserving_variants.rs
@@ -220,7 +220,8 @@ fn plan_with_order_breaking_variants(
 ///    use updated plan. Otherwise, use the original plan.
 /// 5. Continue the bottom-up traversal until another `SortExec` is seen, or the
 ///    traversal is complete.
-pub(crate) fn replace_with_order_preserving_variants(
+pub fn replace_with_order_preserving_variants(
+    // TODO: make this function to pub crate when https://github.com/apache/datafusion/issues/11502 is resolved
     mut requirements: OrderPreservationContext,
     // A flag indicating that replacing `RepartitionExec`s with sort-preserving
     // variants is desirable when it helps to remove a `SortExec` from the plan.
@@ -269,1245 +270,3 @@ pub(crate) fn replace_with_order_preserving_variants(
         Ok(Transformed::yes(requirements))
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::datasource::file_format::file_compression_type::FileCompressionType;
-    use crate::datasource::listing::PartitionedFile;
-    use crate::datasource::physical_plan::{CsvExec, FileScanConfig};
-    use crate::physical_optimizer::test_utils::check_integrity;
-    use crate::physical_plan::coalesce_batches::CoalesceBatchesExec;
-    use crate::physical_plan::filter::FilterExec;
-    use crate::physical_plan::joins::{HashJoinExec, PartitionMode};
-    use crate::physical_plan::sorts::sort::SortExec;
-    use crate::physical_plan::{
-        displayable, get_plan_string, ExecutionPlan, Partitioning,
-    };
-    use crate::prelude::SessionConfig;
-    use crate::test::TestStreamPartition;
-
-    use arrow::compute::SortOptions;
-    use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-    use datafusion_common::tree_node::{TransformedResult, TreeNode};
-    use datafusion_common::Result;
-    use datafusion_execution::object_store::ObjectStoreUrl;
-    use datafusion_expr::{JoinType, Operator};
-    use datafusion_physical_expr::expressions::{self, col, Column};
-    use datafusion_physical_expr::PhysicalSortExpr;
-    use datafusion_physical_plan::streaming::StreamingTableExec;
-
-    use rstest::rstest;
-
-    /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts
-    /// the plan against the original and expected plans for both bounded and
-    /// unbounded cases.
-    ///
-    /// # Parameters
-    ///
-    /// * `EXPECTED_UNBOUNDED_PLAN_LINES`: Expected input unbounded plan.
-    /// * `EXPECTED_BOUNDED_PLAN_LINES`: Expected input bounded plan.
-    /// * `EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES`: Optimized plan, which is
-    ///   the same regardless of the value of the `prefer_existing_sort` flag.
-    /// * `EXPECTED_BOUNDED_OPTIMIZED_PLAN_LINES`: Optimized plan when the flag
-    ///   `prefer_existing_sort` is `false` for bounded cases.
-    /// * `EXPECTED_BOUNDED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES`: Optimized plan
-    ///   when the flag `prefer_existing_sort` is `true` for bounded cases.
-    /// * `$PLAN`: The plan to optimize.
-    /// * `$SOURCE_UNBOUNDED`: Whether the given plan contains an unbounded source.
-    macro_rules! assert_optimized_in_all_boundedness_situations {
-        ($EXPECTED_UNBOUNDED_PLAN_LINES: expr,  $EXPECTED_BOUNDED_PLAN_LINES: expr, $EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES: expr, $EXPECTED_BOUNDED_OPTIMIZED_PLAN_LINES: expr, $EXPECTED_BOUNDED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $SOURCE_UNBOUNDED: expr) => {
-            if $SOURCE_UNBOUNDED {
-                assert_optimized_prefer_sort_on_off!(
-                    $EXPECTED_UNBOUNDED_PLAN_LINES,
-                    $EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES,
-                    $EXPECTED_UNBOUNDED_OPTIMIZED_PLAN_LINES,
-                    $PLAN
-                );
-            } else {
-                assert_optimized_prefer_sort_on_off!(
-                    $EXPECTED_BOUNDED_PLAN_LINES,
-                    $EXPECTED_BOUNDED_OPTIMIZED_PLAN_LINES,
-                    $EXPECTED_BOUNDED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES,
-                    $PLAN
-                );
-            }
-        };
-    }
-
-    /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts
-    /// the plan against the original and expected plans.
-    ///
-    /// # Parameters
-    ///
-    /// * `$EXPECTED_PLAN_LINES`: Expected input plan.
-    /// * `EXPECTED_OPTIMIZED_PLAN_LINES`: Optimized plan when the flag
-    ///   `prefer_existing_sort` is `false`.
-    /// * `EXPECTED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES`: Optimized plan when
-    ///   the flag `prefer_existing_sort` is `true`.
-    /// * `$PLAN`: The plan to optimize.
-    macro_rules! assert_optimized_prefer_sort_on_off {
-        ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $EXPECTED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr) => {
-            assert_optimized!(
-                $EXPECTED_PLAN_LINES,
-                $EXPECTED_OPTIMIZED_PLAN_LINES,
-                $PLAN.clone(),
-                false
-            );
-            assert_optimized!(
-                $EXPECTED_PLAN_LINES,
-                $EXPECTED_PREFER_SORT_ON_OPTIMIZED_PLAN_LINES,
-                $PLAN,
-                true
-            );
-        };
-    }
-
-    /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts
-    /// the plan against the original and expected plans.
-    ///
-    /// # Parameters
-    ///
-    /// * `$EXPECTED_PLAN_LINES`: Expected input plan.
-    /// * `$EXPECTED_OPTIMIZED_PLAN_LINES`: Expected optimized plan.
-    /// * `$PLAN`: The plan to optimize.
-    /// * `$PREFER_EXISTING_SORT`: Value of the `prefer_existing_sort` flag.
-    macro_rules! assert_optimized {
-        ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $PREFER_EXISTING_SORT: expr) => {
-            let physical_plan = $PLAN;
-            let formatted = displayable(physical_plan.as_ref()).indent(true).to_string();
-            let actual: Vec<&str> = formatted.trim().lines().collect();
-
-            let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-                .iter().map(|s| *s).collect();
-
-            assert_eq!(
-                expected_plan_lines, actual,
-                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-            );
-
-            let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES.iter().map(|s| *s).collect();
-
-            // Run the rule top-down
-            let config = SessionConfig::new().with_prefer_existing_sort($PREFER_EXISTING_SORT);
-            let plan_with_pipeline_fixer = OrderPreservationContext::new_default(physical_plan);
-            let parallel = plan_with_pipeline_fixer.transform_up(|plan_with_pipeline_fixer| replace_with_order_preserving_variants(plan_with_pipeline_fixer, false, false, config.options())).data().and_then(check_integrity)?;
-            let optimized_physical_plan = parallel.plan;
-
-            // Get string representation of the plan
-            let actual = get_plan_string(&optimized_physical_plan);
-            assert_eq!(
-                expected_optimized_lines, actual,
-                "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-            );
-        };
-    }
-
-    #[rstest]
-    #[tokio::test]
-    // Searches for a simple sort and a repartition just after it, the second repartition with 1 input partition should not be affected
-    async fn test_replace_multiple_input_repartition_1(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition = repartition_exec_hash(repartition_exec_round_robin(source));
-        let sort = sort_exec(vec![sort_expr("a", &schema)], repartition, true);
-
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "      StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "      CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_with_inter_children_change_only(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr_default("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
-        let sort = sort_exec(
-            vec![sort_expr_default("a", &coalesce_partitions.schema())],
-            coalesce_partitions,
-            false,
-        );
-        let repartition_rr2 = repartition_exec_round_robin(sort);
-        let repartition_hash2 = repartition_exec_hash(repartition_rr2);
-        let filter = filter_exec(repartition_hash2);
-        let sort2 =
-            sort_exec(vec![sort_expr_default("a", &filter.schema())], filter, true);
-
-        let physical_plan = sort_preserving_merge_exec(
-            vec![sort_expr_default("a", &sort2.schema())],
-            sort2,
-        );
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "            CoalescePartitionsExec",
-            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                  StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "            CoalescePartitionsExec",
-            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  FilterExec: c@1 > 3",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        SortPreservingMergeExec: [a@0 ASC]",
-            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-            "            CoalescePartitionsExec",
-            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  FilterExec: c@1 > 3",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        SortPreservingMergeExec: [a@0 ASC]",
-            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_replace_multiple_input_repartition_2(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let filter = filter_exec(repartition_rr);
-        let repartition_hash = repartition_exec_hash(filter);
-        let sort = sort_exec(vec![sort_expr("a", &schema)], repartition_hash, true);
-
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded =  [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded =  [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded =  [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_replace_multiple_input_repartition_with_extra_steps(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let filter = filter_exec(repartition_hash);
-        let coalesce_batches_exec: Arc<dyn ExecutionPlan> = coalesce_batches_exec(filter);
-        let sort = sort_exec(vec![sort_expr("a", &schema)], coalesce_batches_exec, true);
-
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_replace_multiple_input_repartition_with_extra_steps_2(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let coalesce_batches_exec_1 = coalesce_batches_exec(repartition_rr);
-        let repartition_hash = repartition_exec_hash(coalesce_batches_exec_1);
-        let filter = filter_exec(repartition_hash);
-        let coalesce_batches_exec_2 = coalesce_batches_exec(filter);
-        let sort =
-            sort_exec(vec![sort_expr("a", &schema)], coalesce_batches_exec_2, true);
-
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          CoalesceBatchesExec: target_batch_size=8192",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          CoalesceBatchesExec: target_batch_size=8192",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "        CoalesceBatchesExec: target_batch_size=8192",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          CoalesceBatchesExec: target_batch_size=8192",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "        CoalesceBatchesExec: target_batch_size=8192",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_not_replacing_when_no_need_to_preserve_sorting(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let filter = filter_exec(repartition_hash);
-        let coalesce_batches_exec: Arc<dyn ExecutionPlan> = coalesce_batches_exec(filter);
-
-        let physical_plan: Arc<dyn ExecutionPlan> =
-            coalesce_partitions_exec(coalesce_batches_exec);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "CoalescePartitionsExec",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "CoalescePartitionsExec",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "CoalescePartitionsExec",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results same with and without flag, because there is no executor  with ordering requirement
-        let expected_optimized_bounded = [
-            "CoalescePartitionsExec",
-            "  CoalesceBatchesExec: target_batch_size=8192",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = expected_optimized_bounded;
-
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_with_multiple_replacable_repartitions(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let filter = filter_exec(repartition_hash);
-        let coalesce_batches = coalesce_batches_exec(filter);
-        let repartition_hash_2 = repartition_exec_hash(coalesce_batches);
-        let sort = sort_exec(vec![sort_expr("a", &schema)], repartition_hash_2, true);
-
-        let physical_plan =
-            sort_preserving_merge_exec(vec![sort_expr("a", &schema)], sort);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      CoalesceBatchesExec: target_batch_size=8192",
-            "        FilterExec: c@1 > 3",
-            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      CoalesceBatchesExec: target_batch_size=8192",
-            "        FilterExec: c@1 > 3",
-            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      CoalesceBatchesExec: target_batch_size=8192",
-            "        FilterExec: c@1 > 3",
-            "          RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "            RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "              CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    CoalesceBatchesExec: target_batch_size=8192",
-            "      FilterExec: c@1 > 3",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_not_replace_with_different_orderings(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let sort = sort_exec(
-            vec![sort_expr_default("c", &repartition_hash.schema())],
-            repartition_hash,
-            true,
-        );
-
-        let physical_plan = sort_preserving_merge_exec(
-            vec![sort_expr_default("c", &sort.schema())],
-            sort,
-        );
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results same with and without flag, because ordering requirement of the executor is different than the existing ordering.
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = expected_optimized_bounded;
-
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_with_lost_ordering(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
-        let physical_plan =
-            sort_exec(vec![sort_expr("a", &schema)], coalesce_partitions, false);
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "      StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]",
-            "  CoalescePartitionsExec",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [a@0 ASC NULLS LAST]",
-            "  RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a@0 ASC NULLS LAST",
-            "    RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "      CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_with_lost_and_kept_ordering(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-        let sort_exprs = vec![sort_expr("a", &schema)];
-        let source = if source_unbounded {
-            stream_exec_ordered(&schema, sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, sort_exprs)
-        };
-        let repartition_rr = repartition_exec_round_robin(source);
-        let repartition_hash = repartition_exec_hash(repartition_rr);
-        let coalesce_partitions = coalesce_partitions_exec(repartition_hash);
-        let sort = sort_exec(
-            vec![sort_expr_default("c", &coalesce_partitions.schema())],
-            coalesce_partitions,
-            false,
-        );
-        let repartition_rr2 = repartition_exec_round_robin(sort);
-        let repartition_hash2 = repartition_exec_hash(repartition_rr2);
-        let filter = filter_exec(repartition_hash2);
-        let sort2 =
-            sort_exec(vec![sort_expr_default("c", &filter.schema())], filter, true);
-
-        let physical_plan = sort_preserving_merge_exec(
-            vec![sort_expr_default("c", &sort2.schema())],
-            sort2,
-        );
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
-            "            CoalescePartitionsExec",
-            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                  StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
-            "            CoalescePartitionsExec",
-            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  FilterExec: c@1 > 3",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=c@1 ASC",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
-            "          CoalescePartitionsExec",
-            "            RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "              RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results with and without flag
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  SortExec: expr=[c@1 ASC], preserve_partitioning=[true]",
-            "    FilterExec: c@1 > 3",
-            "      RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "        RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "          SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
-            "            CoalescePartitionsExec",
-            "              RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "                RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                  CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = [
-            "SortPreservingMergeExec: [c@1 ASC]",
-            "  FilterExec: c@1 > 3",
-            "    RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8, preserve_order=true, sort_exprs=c@1 ASC",
-            "      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "        SortExec: expr=[c@1 ASC], preserve_partitioning=[false]",
-            "          CoalescePartitionsExec",
-            "            RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "              RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "                CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    #[rstest]
-    #[tokio::test]
-    async fn test_with_multiple_child_trees(
-        #[values(false, true)] source_unbounded: bool,
-    ) -> Result<()> {
-        let schema = create_test_schema()?;
-
-        let left_sort_exprs = vec![sort_expr("a", &schema)];
-        let left_source = if source_unbounded {
-            stream_exec_ordered(&schema, left_sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, left_sort_exprs)
-        };
-        let left_repartition_rr = repartition_exec_round_robin(left_source);
-        let left_repartition_hash = repartition_exec_hash(left_repartition_rr);
-        let left_coalesce_partitions =
-            Arc::new(CoalesceBatchesExec::new(left_repartition_hash, 4096));
-
-        let right_sort_exprs = vec![sort_expr("a", &schema)];
-        let right_source = if source_unbounded {
-            stream_exec_ordered(&schema, right_sort_exprs)
-        } else {
-            csv_exec_sorted(&schema, right_sort_exprs)
-        };
-        let right_repartition_rr = repartition_exec_round_robin(right_source);
-        let right_repartition_hash = repartition_exec_hash(right_repartition_rr);
-        let right_coalesce_partitions =
-            Arc::new(CoalesceBatchesExec::new(right_repartition_hash, 4096));
-
-        let hash_join_exec =
-            hash_join_exec(left_coalesce_partitions, right_coalesce_partitions);
-        let sort = sort_exec(
-            vec![sort_expr_default("a", &hash_join_exec.schema())],
-            hash_join_exec,
-            true,
-        );
-
-        let physical_plan = sort_preserving_merge_exec(
-            vec![sort_expr_default("a", &sort.schema())],
-            sort,
-        );
-
-        // Expected inputs unbounded and bounded
-        let expected_input_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-        let expected_input_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-
-        // Expected unbounded result (same for with and without flag)
-        let expected_optimized_unbounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            StreamingTableExec: partition_sizes=1, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST]",
-        ];
-
-        // Expected bounded results same with and without flag, because ordering get lost during intermediate executor anyway. Hence no need to preserve
-        // existing ordering.
-        let expected_optimized_bounded = [
-            "SortPreservingMergeExec: [a@0 ASC]",
-            "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@1, c@1)]",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-            "      CoalesceBatchesExec: target_batch_size=4096",
-            "        RepartitionExec: partitioning=Hash([c@1], 8), input_partitions=8",
-            "          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
-            "            CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true",
-        ];
-        let expected_optimized_bounded_sort_preserve = expected_optimized_bounded;
-
-        assert_optimized_in_all_boundedness_situations!(
-            expected_input_unbounded,
-            expected_input_bounded,
-            expected_optimized_unbounded,
-            expected_optimized_bounded,
-            expected_optimized_bounded_sort_preserve,
-            physical_plan,
-            source_unbounded
-        );
-        Ok(())
-    }
-
-    // End test cases
-    // Start test helpers
-
-    fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr {
-        let sort_opts = SortOptions {
-            nulls_first: false,
-            descending: false,
-        };
-        sort_expr_options(name, schema, sort_opts)
-    }
-
-    fn sort_expr_default(name: &str, schema: &Schema) -> PhysicalSortExpr {
-        let sort_opts = SortOptions::default();
-        sort_expr_options(name, schema, sort_opts)
-    }
-
-    fn sort_expr_options(
-        name: &str,
-        schema: &Schema,
-        options: SortOptions,
-    ) -> PhysicalSortExpr {
-        PhysicalSortExpr {
-            expr: col(name, schema).unwrap(),
-            options,
-        }
-    }
-
-    fn sort_exec(
-        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
-        input: Arc<dyn ExecutionPlan>,
-        preserve_partitioning: bool,
-    ) -> Arc<dyn ExecutionPlan> {
-        let sort_exprs = sort_exprs.into_iter().collect();
-        Arc::new(
-            SortExec::new(sort_exprs, input)
-                .with_preserve_partitioning(preserve_partitioning),
-        )
-    }
-
-    fn sort_preserving_merge_exec(
-        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Arc<dyn ExecutionPlan> {
-        let sort_exprs = sort_exprs.into_iter().collect();
-        Arc::new(SortPreservingMergeExec::new(sort_exprs, input))
-    }
-
-    fn repartition_exec_round_robin(
-        input: Arc<dyn ExecutionPlan>,
-    ) -> Arc<dyn ExecutionPlan> {
-        Arc::new(
-            RepartitionExec::try_new(input, Partitioning::RoundRobinBatch(8)).unwrap(),
-        )
-    }
-
-    fn repartition_exec_hash(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        let input_schema = input.schema();
-        Arc::new(
-            RepartitionExec::try_new(
-                input,
-                Partitioning::Hash(vec![col("c", &input_schema).unwrap()], 8),
-            )
-            .unwrap(),
-        )
-    }
-
-    fn filter_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        let input_schema = input.schema();
-        let predicate = expressions::binary(
-            col("c", &input_schema).unwrap(),
-            Operator::Gt,
-            expressions::lit(3i32),
-            &input_schema,
-        )
-        .unwrap();
-        Arc::new(FilterExec::try_new(predicate, input).unwrap())
-    }
-
-    fn coalesce_batches_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        Arc::new(CoalesceBatchesExec::new(input, 8192))
-    }
-
-    fn coalesce_partitions_exec(input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
-        Arc::new(CoalescePartitionsExec::new(input))
-    }
-
-    fn hash_join_exec(
-        left: Arc<dyn ExecutionPlan>,
-        right: Arc<dyn ExecutionPlan>,
-    ) -> Arc<dyn ExecutionPlan> {
-        let left_on = col("c", &left.schema()).unwrap();
-        let right_on = col("c", &right.schema()).unwrap();
-        let left_col = left_on.as_any().downcast_ref::<Column>().unwrap();
-        let right_col = right_on.as_any().downcast_ref::<Column>().unwrap();
-        Arc::new(
-            HashJoinExec::try_new(
-                left,
-                right,
-                vec![(Arc::new(left_col.clone()), Arc::new(right_col.clone()))],
-                None,
-                &JoinType::Inner,
-                None,
-                PartitionMode::Partitioned,
-                false,
-            )
-            .unwrap(),
-        )
-    }
-
-    fn create_test_schema() -> Result<SchemaRef> {
-        let column_a = Field::new("a", DataType::Int32, false);
-        let column_b = Field::new("b", DataType::Int32, false);
-        let column_c = Field::new("c", DataType::Int32, false);
-        let column_d = Field::new("d", DataType::Int32, false);
-        let schema = Arc::new(Schema::new(vec![column_a, column_b, column_c, column_d]));
-
-        Ok(schema)
-    }
-
-    // creates a stream exec source for the test purposes
-    fn stream_exec_ordered(
-        schema: &SchemaRef,
-        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
-    ) -> Arc<dyn ExecutionPlan> {
-        let sort_exprs = sort_exprs.into_iter().collect();
-        let projection: Vec<usize> = vec![0, 2, 3];
-
-        Arc::new(
-            StreamingTableExec::try_new(
-                schema.clone(),
-                vec![Arc::new(TestStreamPartition {
-                    schema: schema.clone(),
-                }) as _],
-                Some(&projection),
-                vec![sort_exprs],
-                true,
-                None,
-            )
-            .unwrap(),
-        )
-    }
-
-    // creates a csv exec source for the test purposes
-    // projection and has_header parameters are given static due to testing needs
-    fn csv_exec_sorted(
-        schema: &SchemaRef,
-        sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
-    ) -> Arc<dyn ExecutionPlan> {
-        let sort_exprs = sort_exprs.into_iter().collect();
-        let projection: Vec<usize> = vec![0, 2, 3];
-
-        Arc::new(
-            CsvExec::builder(
-                FileScanConfig::new(
-                    ObjectStoreUrl::parse("test:///").unwrap(),
-                    schema.clone(),
-                )
-                .with_file(PartitionedFile::new("file_path".to_string(), 100))
-                .with_projection(Some(projection))
-                .with_output_ordering(vec![sort_exprs]),
-            )
-            .with_has_header(true)
-            .with_delimeter(0)
-            .with_quote(b'"')
-            .with_escape(None)
-            .with_comment(None)
-            .with_newlines_in_values(false)
-            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
-            .build(),
-        )
-    }
-}
diff --git a/datafusion/physical-optimizer/src/sort_pushdown.rs b/datafusion/physical-optimizer/src/sort_pushdown.rs
index 4fc8fd1d28de3..a1319cdd5e193 100644
--- a/datafusion/physical-optimizer/src/sort_pushdown.rs
+++ b/datafusion/physical-optimizer/src/sort_pushdown.rs
@@ -52,7 +52,8 @@ pub fn assign_initial_requirements(node: &mut SortPushDown) {
     }
 }
 
-pub(crate) fn pushdown_sorts(
+// TODO: make this function pub crate once https://github.com/apache/datafusion/issues/11502 is resolved
+pub fn pushdown_sorts(
     mut requirements: SortPushDown,
 ) -> Result<Transformed<SortPushDown>> {
     let plan = &requirements.plan;