diff --git a/.github/workflows/pr_comment.yml b/.github/workflows/pr_comment.yml
deleted file mode 100644
index 8b6df1c75687..000000000000
--- a/.github/workflows/pr_comment.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-# Downloads any `message` artifacts created by other jobs
-# and posts them as comments to the PR
-name: PR Comment
-
-on:
-  workflow_run:
-    workflows: ["Benchmarks"]
-    types:
-      - completed
-
-jobs:
-  comment:
-    name: PR Comment
-    runs-on: ubuntu-latest
-    if: github.event.workflow_run.conclusion == 'success'
-    steps:
-      - name: Dump GitHub context
-        env:
-          GITHUB_CONTEXT: ${{ toJSON(github) }}
-        run: echo "$GITHUB_CONTEXT"
-
-      - name: Download comment message
-        uses: actions/download-artifact@v4
-        with:
-          name: message
-          run-id: ${{ github.event.workflow_run.id }}
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Download pr number
-        uses: actions/download-artifact@v4
-        with:
-          name: pr
-          run-id: ${{ github.event.workflow_run.id }}
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Print message and pr number
-        run: |
-          cat pr
-          echo "PR_NUMBER=$(cat pr)" >> "$GITHUB_ENV"
-          cat message.md
-
-      - name: Post the comment
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const content = fs.readFileSync('message.md', 'utf8');
-            github.rest.issues.createComment({
-              issue_number: process.env.PR_NUMBER,
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: content,
-            })
diff --git a/Cargo.toml b/Cargo.toml
index ae344a46a1bd..50d234c39576 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,6 +16,7 @@
 # under the License.
 
 [workspace]
+# datafusion-cli is excluded because of its Cargo.lock. See datafusion-cli/README.md.
 exclude = ["datafusion-cli", "dev/depcheck"]
 members = [
     "datafusion/common",
@@ -33,7 +34,6 @@ members = [
     "datafusion/optimizer",
     "datafusion/physical-expr",
     "datafusion/physical-expr-common",
-    "datafusion/physical-expr-functions-aggregate",
     "datafusion/physical-optimizer",
     "datafusion/physical-plan",
     "datafusion/proto",
@@ -45,7 +45,6 @@ members = [
     "datafusion/substrait",
     "datafusion/wasmtest",
     "datafusion-examples",
-    "docs",
     "test-utils",
     "benchmarks",
 ]
@@ -107,7 +106,6 @@ datafusion-functions-window = { path = "datafusion/functions-window", version =
 datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false }
 datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false }
 datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false }
-datafusion-physical-expr-functions-aggregate = { path = "datafusion/physical-expr-functions-aggregate", version = "41.0.0" }
 datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "41.0.0" }
 datafusion-physical-plan = { path = "datafusion/physical-plan", version = "41.0.0" }
 datafusion-proto = { path = "datafusion/proto", version = "41.0.0" }
@@ -121,7 +119,7 @@ futures = "0.3"
 half = { version = "2.2.1", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
 indexmap = "2.0.0"
-itertools = "0.12"
+itertools = "0.13"
 log = "^0.4"
 num_cpus = "1.13.0"
 object_store = { version = "0.10.2", default-features = false }
diff --git a/benchmarks/queries/clickbench/README.md b/benchmarks/queries/clickbench/README.md
index 560b54181d5f..c6bd8fe9d6f5 100644
--- a/benchmarks/queries/clickbench/README.md
+++ b/benchmarks/queries/clickbench/README.md
@@ -58,6 +58,21 @@ LIMIT 10;
 ```
 
 
+### Q3: What is the income distribution for users in specific regions
+
+**Question**: "What regions and social networks have the highest variance of parameter price 
+
+**Important Query Properties**: STDDEV and VAR aggregation functions, GROUP BY multiple small ints
+
+```sql
+SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") 
+FROM 'hits.parquet' 
+GROUP BY "SocialSourceNetworkID", "RegionID"  
+HAVING s IS NOT NULL
+ORDER BY s DESC 
+LIMIT 10;
+```
+
 ## Data Notes
 
 Here are some interesting statistics about the data used in the queries
diff --git a/benchmarks/queries/clickbench/extended.sql b/benchmarks/queries/clickbench/extended.sql
index 0a2999fceb49..2f814ad8450a 100644
--- a/benchmarks/queries/clickbench/extended.sql
+++ b/benchmarks/queries/clickbench/extended.sql
@@ -1,3 +1,4 @@
 SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits;
 SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage")  FROM hits;
-SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
\ No newline at end of file
+SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
+SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice")  FROM hits GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s DESC LIMIT 10;
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index a164b74c55a5..1e89bb3af87e 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -17,6 +17,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
 [[package]]
 name = "adler32"
 version = "1.2.0"
@@ -167,9 +173,9 @@ checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a"
 
 [[package]]
 name = "arrayvec"
-version = "0.7.4"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
@@ -430,7 +436,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -765,7 +771,7 @@ dependencies = [
  "cc",
  "cfg-if",
  "libc",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
  "object",
  "rustc-demangle",
 ]
@@ -815,9 +821,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.3"
+version = "1.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210"
+checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -999,7 +1005,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1155,7 +1161,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f"
 dependencies = [
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1206,7 +1212,6 @@ dependencies = [
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
- "datafusion-physical-expr-functions-aggregate",
  "datafusion-physical-optimizer",
  "datafusion-physical-plan",
  "datafusion-sql",
@@ -1216,7 +1221,7 @@ dependencies = [
  "half",
  "hashbrown",
  "indexmap",
- "itertools 0.12.1",
+ "itertools",
  "log",
  "num-traits",
  "num_cpus",
@@ -1295,12 +1300,14 @@ dependencies = [
  "parquet",
  "paste",
  "sqlparser",
+ "tokio",
 ]
 
 [[package]]
 name = "datafusion-common-runtime"
 version = "41.0.0"
 dependencies = [
+ "log",
  "tokio",
 ]
 
@@ -1367,7 +1374,7 @@ dependencies = [
  "datafusion-expr",
  "hashbrown",
  "hex",
- "itertools 0.12.1",
+ "itertools",
  "log",
  "md-5",
  "rand",
@@ -1422,7 +1429,8 @@ dependencies = [
  "datafusion-expr",
  "datafusion-functions",
  "datafusion-functions-aggregate",
- "itertools 0.12.1",
+ "datafusion-physical-expr-common",
+ "itertools",
  "log",
  "paste",
  "rand",
@@ -1450,7 +1458,7 @@ dependencies = [
  "datafusion-physical-expr",
  "hashbrown",
  "indexmap",
- "itertools 0.12.1",
+ "itertools",
  "log",
  "paste",
  "regex-syntax",
@@ -1479,7 +1487,7 @@ dependencies = [
  "hashbrown",
  "hex",
  "indexmap",
- "itertools 0.12.1",
+ "itertools",
  "log",
  "paste",
  "petgraph",
@@ -1498,20 +1506,6 @@ dependencies = [
  "rand",
 ]
 
-[[package]]
-name = "datafusion-physical-expr-functions-aggregate"
-version = "41.0.0"
-dependencies = [
- "ahash",
- "arrow",
- "datafusion-common",
- "datafusion-expr",
- "datafusion-expr-common",
- "datafusion-functions-aggregate-common",
- "datafusion-physical-expr-common",
- "rand",
-]
-
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "41.0.0"
@@ -1520,7 +1514,7 @@ dependencies = [
  "datafusion-execution",
  "datafusion-physical-expr",
  "datafusion-physical-plan",
- "itertools 0.12.1",
+ "itertools",
 ]
 
 [[package]]
@@ -1543,12 +1537,11 @@ dependencies = [
  "datafusion-functions-aggregate-common",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
- "datafusion-physical-expr-functions-aggregate",
  "futures",
  "half",
  "hashbrown",
  "indexmap",
- "itertools 0.12.1",
+ "itertools",
  "log",
  "once_cell",
  "parking_lot",
@@ -1740,12 +1733,12 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.31"
+version = "1.0.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920"
+checksum = "9c0596c1eac1f9e04ed902702e9878208b336edc9d6fddc8a48387349bab3666"
 dependencies = [
  "crc32fast",
- "miniz_oxide",
+ "miniz_oxide 0.8.0",
 ]
 
 [[package]]
@@ -1828,7 +1821,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1921,9 +1914,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
+checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -2108,7 +2101,7 @@ dependencies = [
  "bytes",
  "futures-channel",
  "futures-util",
- "h2 0.4.5",
+ "h2 0.4.6",
  "http 1.1.0",
  "http-body 1.0.1",
  "httparse",
@@ -2145,7 +2138,7 @@ dependencies = [
  "hyper 1.4.1",
  "hyper-util",
  "rustls 0.23.12",
- "rustls-native-certs 0.7.1",
+ "rustls-native-certs 0.7.2",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls 0.26.0",
@@ -2245,15 +2238,6 @@ version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
-[[package]]
-name = "itertools"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itertools"
 version = "0.13.0"
@@ -2359,9 +2343,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.156"
+version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a"
+checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
 [[package]]
 name = "libflate"
@@ -2495,6 +2479,15 @@ dependencies = [
  "adler",
 ]
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+dependencies = [
+ "adler2",
+]
+
 [[package]]
 name = "mio"
 version = "1.0.2"
@@ -2645,7 +2638,7 @@ dependencies = [
  "futures",
  "humantime",
  "hyper 1.4.1",
- "itertools 0.13.0",
+ "itertools",
  "md-5",
  "parking_lot",
  "percent-encoding",
@@ -2835,7 +2828,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3034,9 +3027,9 @@ dependencies = [
 
 [[package]]
 name = "redox_users"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891"
+checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
  "getrandom",
  "libredox",
@@ -3080,15 +3073,15 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
 
 [[package]]
 name = "reqwest"
-version = "0.12.5"
+version = "0.12.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37"
+checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63"
 dependencies = [
  "base64 0.22.1",
  "bytes",
  "futures-core",
  "futures-util",
- "h2 0.4.5",
+ "h2 0.4.6",
  "http 1.1.0",
  "http-body 1.0.1",
  "http-body-util",
@@ -3104,7 +3097,7 @@ dependencies = [
  "pin-project-lite",
  "quinn",
  "rustls 0.23.12",
- "rustls-native-certs 0.7.1",
+ "rustls-native-certs 0.7.2",
  "rustls-pemfile 2.1.3",
  "rustls-pki-types",
  "serde",
@@ -3120,7 +3113,7 @@ dependencies = [
  "wasm-bindgen-futures",
  "wasm-streams",
  "web-sys",
- "winreg",
+ "windows-registry",
 ]
 
 [[package]]
@@ -3259,9 +3252,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba"
+checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa"
 dependencies = [
  "openssl-probe",
  "rustls-pemfile 2.1.3",
@@ -3427,7 +3420,7 @@ checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3569,7 +3562,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3615,7 +3608,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3628,7 +3621,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3650,9 +3643,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.74"
+version = "2.0.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
+checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3664,6 +3657,9 @@ name = "sync_wrapper"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
+dependencies = [
+ "futures-core",
+]
 
 [[package]]
 name = "tempfile"
@@ -3710,7 +3706,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3780,9 +3776,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.39.2"
+version = "1.39.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1"
+checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5"
 dependencies = [
  "backtrace",
  "bytes",
@@ -3804,7 +3800,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3901,7 +3897,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -3946,7 +3942,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -4101,7 +4097,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
  "wasm-bindgen-shared",
 ]
 
@@ -4135,7 +4131,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -4219,6 +4215,36 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-registry"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
+dependencies = [
+ "windows-result",
+ "windows-strings",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.48.0"
@@ -4367,16 +4393,6 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
-[[package]]
-name = "winreg"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "xmlparser"
 version = "0.13.6"
@@ -4410,7 +4426,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.74",
+ "syn 2.0.75",
 ]
 
 [[package]]
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs
index 903defafe3ab..f6860bb5b87a 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -300,7 +300,7 @@ impl IndexTableProvider {
         // analyze the predicate. In a real system, using
         // `PruningPredicate::prune` would likely be easier to do.
         let pruning_predicate =
-            PruningPredicate::try_new(Arc::clone(predicate), self.schema().clone())?;
+            PruningPredicate::try_new(Arc::clone(predicate), self.schema())?;
 
         // The PruningPredicate's guarantees must all be satisfied in order for
         // the predicate to possibly evaluate to true.
diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_file_format.rs
index 8612a1cc4430..1d9b587f15b9 100644
--- a/datafusion-examples/examples/custom_file_format.rs
+++ b/datafusion-examples/examples/custom_file_format.rs
@@ -23,6 +23,7 @@ use arrow::{
 };
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use datafusion::execution::session_state::SessionStateBuilder;
+use datafusion::physical_expr::LexRequirement;
 use datafusion::{
     datasource::{
         file_format::{
@@ -38,7 +39,7 @@ use datafusion::{
     prelude::SessionContext,
 };
 use datafusion_common::{GetExt, Statistics};
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
 use object_store::{ObjectMeta, ObjectStore};
 use tempfile::tempdir;
 
@@ -123,7 +124,7 @@ impl FileFormat for TSVFileFormat {
         input: Arc<dyn ExecutionPlan>,
         state: &SessionState,
         conf: FileSinkConfig,
-        order_requirements: Option<Vec<PhysicalSortRequirement>>,
+        order_requirements: Option<LexRequirement>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         self.csv_file_format
             .create_writer_physical_plan(input, state, conf, order_requirements)
diff --git a/datafusion-examples/examples/file_stream_provider.rs b/datafusion-examples/examples/file_stream_provider.rs
index b8549bd6b6e6..4db7e0200f53 100644
--- a/datafusion-examples/examples/file_stream_provider.rs
+++ b/datafusion-examples/examples/file_stream_provider.rs
@@ -100,7 +100,7 @@ mod non_windows {
     ) {
         // Timeout for a long period of BrokenPipe error
         let broken_pipe_timeout = Duration::from_secs(10);
-        let sa = file_path.clone();
+        let sa = file_path;
         // Spawn a new thread to write to the FIFO file
         #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
         tasks.spawn_blocking(move || {
diff --git a/datafusion/catalog/src/table.rs b/datafusion/catalog/src/table.rs
index 792315642a00..69fa81faf8e2 100644
--- a/datafusion/catalog/src/table.rs
+++ b/datafusion/catalog/src/table.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use std::any::Any;
+use std::borrow::Cow;
 use std::sync::Arc;
 
 use crate::session::Session;
@@ -56,8 +57,8 @@ pub trait TableProvider: Sync + Send {
         None
     }
 
-    /// Get the [`LogicalPlan`] of this table, if available
-    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
+    /// Get the [`LogicalPlan`] of this table, if available.
+    fn get_logical_plan(&self) -> Option<Cow<LogicalPlan>> {
         None
     }
 
diff --git a/datafusion/common-runtime/Cargo.toml b/datafusion/common-runtime/Cargo.toml
index c10436087675..a21c72cd9f83 100644
--- a/datafusion/common-runtime/Cargo.toml
+++ b/datafusion/common-runtime/Cargo.toml
@@ -36,4 +36,8 @@ name = "datafusion_common_runtime"
 path = "src/lib.rs"
 
 [dependencies]
+log = { workspace = true }
 tokio = { workspace = true }
+
+[dev-dependencies]
+tokio = { version = "1.36", features = ["rt", "rt-multi-thread", "time"] }
diff --git a/datafusion/common-runtime/src/common.rs b/datafusion/common-runtime/src/common.rs
index 2f7ddb972f42..30f7526bc0b2 100644
--- a/datafusion/common-runtime/src/common.rs
+++ b/datafusion/common-runtime/src/common.rs
@@ -60,8 +60,8 @@ impl<R: 'static> SpawnedTask<R> {
     }
 
     /// Joins the task and unwinds the panic if it happens.
-    pub async fn join_unwind(self) -> R {
-        self.join().await.unwrap_or_else(|e| {
+    pub async fn join_unwind(self) -> Result<R, JoinError> {
+        self.join().await.map_err(|e| {
             // `JoinError` can be caused either by panic or cancellation. We have to handle panics:
             if e.is_panic() {
                 std::panic::resume_unwind(e.into_panic());
@@ -69,9 +69,53 @@ impl<R: 'static> SpawnedTask<R> {
                 // Cancellation may be caused by two reasons:
                 // 1. Abort is called, but since we consumed `self`, it's not our case (`JoinHandle` not accessible outside).
                 // 2. The runtime is shutting down.
-                // So we consider this branch as unreachable.
-                unreachable!("SpawnedTask was cancelled unexpectedly");
+                log::warn!("SpawnedTask was polled during shutdown");
+                e
             }
         })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::future::{pending, Pending};
+
+    use tokio::runtime::Runtime;
+
+    #[tokio::test]
+    async fn runtime_shutdown() {
+        let rt = Runtime::new().unwrap();
+        let task = rt
+            .spawn(async {
+                SpawnedTask::spawn(async {
+                    let fut: Pending<()> = pending();
+                    fut.await;
+                    unreachable!("should never return");
+                })
+            })
+            .await
+            .unwrap();
+
+        // caller shutdown their DF runtime (e.g. timeout, error in caller, etc)
+        rt.shutdown_background();
+
+        // race condition
+        // poll occurs during shutdown (buffered stream poll calls, etc)
+        assert!(matches!(
+            task.join_unwind().await,
+            Err(e) if e.is_cancelled()
+        ));
+    }
+
+    #[tokio::test]
+    #[should_panic(expected = "foo")]
+    async fn panic_resume() {
+        // this should panic w/o an `unwrap`
+        SpawnedTask::spawn(async { panic!("foo") })
+            .join_unwind()
+            .await
+            .ok();
+    }
+}
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 8435d0632576..79e20ba1215c 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -63,6 +63,7 @@ parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
 pyo3 = { version = "0.21.0", optional = true }
 sqlparser = { workspace = true }
+tokio = { workspace = true }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
 instant = { version = "0.1", features = ["wasm-bindgen"] }
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index f0eecd2ffeb1..095f4c510194 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -1242,10 +1242,9 @@ mod tests {
     #[test]
     fn into() {
         // Demonstrate how to convert back and forth between Schema, SchemaRef, DFSchema, and DFSchemaRef
-        let metadata = test_metadata();
         let arrow_schema = Schema::new_with_metadata(
             vec![Field::new("c0", DataType::Int64, true)],
-            metadata.clone(),
+            test_metadata(),
         );
         let arrow_schema_ref = Arc::new(arrow_schema.clone());
 
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 27a25d0c9dd5..05988d6c6da4 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -34,6 +34,7 @@ use arrow::error::ArrowError;
 #[cfg(feature = "parquet")]
 use parquet::errors::ParquetError;
 use sqlparser::parser::ParserError;
+use tokio::task::JoinError;
 
 /// Result type for operations that could result in an [DataFusionError]
 pub type Result<T, E = DataFusionError> = result::Result<T, E>;
@@ -112,6 +113,10 @@ pub enum DataFusionError {
     /// SQL method, opened a CSV file that is broken, or tried to divide an
     /// integer by zero.
     Execution(String),
+    /// [`JoinError`] during execution of the query.
+    ///
+    /// This error can unoccur for unjoined tasks, such as execution shutdown.
+    ExecutionJoin(JoinError),
     /// Error when resources (such as memory of scratch disk space) are exhausted.
     ///
     /// This error is thrown when a consumer cannot acquire additional memory
@@ -306,6 +311,7 @@ impl Error for DataFusionError {
             DataFusionError::Plan(_) => None,
             DataFusionError::SchemaError(e, _) => Some(e),
             DataFusionError::Execution(_) => None,
+            DataFusionError::ExecutionJoin(e) => Some(e),
             DataFusionError::ResourcesExhausted(_) => None,
             DataFusionError::External(e) => Some(e.as_ref()),
             DataFusionError::Context(_, e) => Some(e.as_ref()),
@@ -418,6 +424,7 @@ impl DataFusionError {
             DataFusionError::Configuration(_) => "Invalid or Unsupported Configuration: ",
             DataFusionError::SchemaError(_, _) => "Schema error: ",
             DataFusionError::Execution(_) => "Execution error: ",
+            DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ",
             DataFusionError::ResourcesExhausted(_) => "Resources exhausted: ",
             DataFusionError::External(_) => "External error: ",
             DataFusionError::Context(_, _) => "",
@@ -453,6 +460,7 @@ impl DataFusionError {
                 Cow::Owned(format!("{desc}{backtrace}"))
             }
             DataFusionError::Execution(ref desc) => Cow::Owned(desc.to_string()),
+            DataFusionError::ExecutionJoin(ref desc) => Cow::Owned(desc.to_string()),
             DataFusionError::ResourcesExhausted(ref desc) => Cow::Owned(desc.to_string()),
             DataFusionError::External(ref desc) => Cow::Owned(desc.to_string()),
             #[cfg(feature = "object_store")]
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 4a229fe01b54..e42fb96ed6a5 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -565,7 +565,7 @@ mod tests {
                 column_options_with_non_defaults(&parquet_options),
             )]
             .into(),
-            key_value_metadata: [(key.clone(), value.clone())].into(),
+            key_value_metadata: [(key, value)].into(),
         };
 
         let writer_props = WriterPropertiesBuilder::try_from(&table_parquet_opts)
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 677685b2c65b..5acc2b6f188e 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -4905,7 +4905,7 @@ mod tests {
         let data_type =
             DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
 
-        assert_eq!(non_null_list_scalar.data_type(), data_type.clone());
+        assert_eq!(non_null_list_scalar.data_type(), data_type);
         assert_eq!(null_list_scalar.data_type(), data_type);
     }
 
@@ -5582,13 +5582,13 @@ mod tests {
 
         // Define list-of-structs scalars
 
-        let nl0_array = ScalarValue::iter_to_array(vec![s0.clone(), s1.clone()]).unwrap();
+        let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap();
         let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array)));
 
-        let nl1_array = ScalarValue::iter_to_array(vec![s2.clone()]).unwrap();
+        let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap();
         let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array)));
 
-        let nl2_array = ScalarValue::iter_to_array(vec![s1.clone()]).unwrap();
+        let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap();
         let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array)));
 
         // iter_to_array for list-of-struct
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index 50ae4e3ca71f..d8e62b3045f9 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -557,6 +557,7 @@ mod tests {
         let precision: Precision<ScalarValue> =
             Precision::Exact(ScalarValue::Int64(Some(42)));
         // Clippy would complain about this if it were Copy
+        #[allow(clippy::redundant_clone)]
         let p2 = precision.clone();
         assert_eq!(precision, p2);
     }
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index d7059e882e55..839f890bf077 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -20,6 +20,7 @@
 pub mod expr;
 pub mod memory;
 pub mod proxy;
+pub mod string_utils;
 
 use crate::error::{_internal_datafusion_err, _internal_err};
 use crate::{arrow_datafusion_err, DataFusionError, Result, ScalarValue};
diff --git a/datafusion/physical-expr-functions-aggregate/src/lib.rs b/datafusion/common/src/utils/string_utils.rs
similarity index 59%
rename from datafusion/physical-expr-functions-aggregate/src/lib.rs
rename to datafusion/common/src/utils/string_utils.rs
index 2ff7ff5777ec..a2231e6786a7 100644
--- a/datafusion/physical-expr-functions-aggregate/src/lib.rs
+++ b/datafusion/common/src/utils/string_utils.rs
@@ -15,6 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Technically, all aggregate functions that depend on `expr` crate should be included here.
+//! Utilities for working with strings
 
-pub mod aggregate;
+use arrow::{array::AsArray, datatypes::DataType};
+use arrow_array::Array;
+
+/// Convenient function to convert an Arrow string array to a vector of strings
+pub fn string_array_to_vec(array: &dyn Array) -> Vec<Option<&str>> {
+    match array.data_type() {
+        DataType::Utf8 => array.as_string::<i32>().iter().collect(),
+        DataType::LargeUtf8 => array.as_string::<i64>().iter().collect(),
+        DataType::Utf8View => array.as_string_view().iter().collect(),
+        _ => unreachable!(),
+    }
+}
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index adbba3eb31d6..de228e058096 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -110,7 +110,6 @@ datafusion-functions-window = { workspace = true }
 datafusion-optimizer = { workspace = true }
 datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
-datafusion-physical-expr-functions-aggregate = { workspace = true }
 datafusion-physical-optimizer = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-sql = { workspace = true }
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 42203e5fe84e..c516c7985d54 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -21,6 +21,7 @@
 mod parquet;
 
 use std::any::Any;
+use std::borrow::Cow;
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -1648,8 +1649,8 @@ impl TableProvider for DataFrameTableProvider {
         self
     }
 
-    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
-        Some(&self.plan)
+    fn get_logical_plan(&self) -> Option<Cow<LogicalPlan>> {
+        Some(Cow::Borrowed(&self.plan))
     }
 
     fn supports_filters_pushdown(
@@ -1707,7 +1708,7 @@ mod tests {
     use crate::test_util::{register_aggregate_csv, test_table, test_table_with_name};
 
     use arrow::array::{self, Int32Array};
-    use datafusion_common::{Constraint, Constraints, ScalarValue};
+    use datafusion_common::{assert_batches_eq, Constraint, Constraints, ScalarValue};
     use datafusion_common_runtime::SpawnedTask;
     use datafusion_expr::expr::WindowFunction;
     use datafusion_expr::{
@@ -3699,4 +3700,32 @@ mod tests {
         assert!(result.is_err());
         Ok(())
     }
+
+    // Test issue: https://github.com/apache/datafusion/issues/12065
+    #[tokio::test]
+    async fn filtered_aggr_with_param_values() -> Result<()> {
+        let cfg = SessionConfig::new()
+            .set("datafusion.sql_parser.dialect", "PostgreSQL".into());
+        let ctx = SessionContext::new_with_config(cfg);
+        register_aggregate_csv(&ctx, "table1").await?;
+
+        let df = ctx
+            .sql("select count (c2) filter (where c3 > $1) from table1")
+            .await?
+            .with_param_values(ParamValues::List(vec![ScalarValue::from(10u64)]));
+
+        let df_results = df?.collect().await?;
+        assert_batches_eq!(
+            &[
+                "+------------------------------------------------+",
+                "| count(table1.c2) FILTER (WHERE table1.c3 > $1) |",
+                "+------------------------------------------------+",
+                "| 54                                             |",
+                "+------------------------------------------------+",
+            ],
+            &df_results
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/core/src/datasource/cte_worktable.rs b/datafusion/core/src/datasource/cte_worktable.rs
index d7d224828dda..d2da15c64f52 100644
--- a/datafusion/core/src/datasource/cte_worktable.rs
+++ b/datafusion/core/src/datasource/cte_worktable.rs
@@ -17,8 +17,8 @@
 
 //! CteWorkTable implementation used for recursive queries
 
-use std::any::Any;
 use std::sync::Arc;
+use std::{any::Any, borrow::Cow};
 
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
@@ -63,7 +63,7 @@ impl TableProvider for CteWorkTable {
         self
     }
 
-    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
+    fn get_logical_plan(&self) -> Option<Cow<LogicalPlan>> {
         None
     }
 
diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs
index 977e681d6641..b4a5a76fc9ff 100644
--- a/datafusion/core/src/datasource/default_table_source.rs
+++ b/datafusion/core/src/datasource/default_table_source.rs
@@ -17,8 +17,8 @@
 
 //! Default TableSource implementation used in DataFusion physical plans
 
-use std::any::Any;
 use std::sync::Arc;
+use std::{any::Any, borrow::Cow};
 
 use crate::datasource::TableProvider;
 
@@ -70,7 +70,7 @@ impl TableSource for DefaultTableSource {
         self.table_provider.supports_filters_pushdown(filter)
     }
 
-    fn get_logical_plan(&self) -> Option<&datafusion_expr::LogicalPlan> {
+    fn get_logical_plan(&self) -> Option<Cow<datafusion_expr::LogicalPlan>> {
         self.table_provider.get_logical_plan()
     }
 
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 8b6a8800119d..6ee4280956e8 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -47,12 +47,13 @@ use datafusion_common::{
     not_impl_err, DataFusionError, GetExt, Statistics, DEFAULT_ARROW_EXTENSION,
 };
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
 use datafusion_physical_plan::insert::{DataSink, DataSinkExec};
 use datafusion_physical_plan::metrics::MetricsSet;
 
 use async_trait::async_trait;
 use bytes::Bytes;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::stream::BoxStream;
 use futures::StreamExt;
 use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
@@ -178,7 +179,7 @@ impl FileFormat for ArrowFormat {
         input: Arc<dyn ExecutionPlan>,
         _state: &SessionState,
         conf: FileSinkConfig,
-        order_requirements: Option<Vec<PhysicalSortRequirement>>,
+        order_requirements: Option<LexRequirement>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         if conf.overwrite {
             return not_impl_err!("Overwrites are not implemented yet for Arrow format");
@@ -341,7 +342,10 @@ impl DataSink for ArrowFileSink {
             }
         }
 
-        demux_task.join_unwind().await?;
+        demux_task
+            .join_unwind()
+            .await
+            .map_err(DataFusionError::ExecutionJoin)??;
         Ok(row_count as u64)
     }
 }
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 24d55ea54068..e43f6ab29abc 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -46,11 +46,12 @@ use datafusion_common::{
     exec_err, not_impl_err, DataFusionError, GetExt, DEFAULT_CSV_EXTENSION,
 };
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
 use datafusion_physical_plan::metrics::MetricsSet;
 
 use async_trait::async_trait;
 use bytes::{Buf, Bytes};
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::stream::BoxStream;
 use futures::{pin_mut, Stream, StreamExt, TryStreamExt};
 use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore};
@@ -371,7 +372,7 @@ impl FileFormat for CsvFormat {
         input: Arc<dyn ExecutionPlan>,
         state: &SessionState,
         conf: FileSinkConfig,
-        order_requirements: Option<Vec<PhysicalSortRequirement>>,
+        order_requirements: Option<LexRequirement>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         if conf.overwrite {
             return not_impl_err!("Overwrites are not implemented yet for CSV");
@@ -679,7 +680,7 @@ mod tests {
     use datafusion_common::cast::as_string_array;
     use datafusion_common::internal_err;
     use datafusion_common::stats::Precision;
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_expr::{col, lit};
 
     use crate::execution::session_state::SessionStateBuilder;
@@ -862,7 +863,7 @@ mod tests {
     async fn query_compress_data(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
-        let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap());
+        let runtime = Arc::new(RuntimeEnvBuilder::new().build()?);
         let mut cfg = SessionConfig::new();
         cfg.options_mut().catalog.has_header = true;
         let session_state = SessionStateBuilder::new()
diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index 7c579e890c8c..4471d7d6cb31 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -46,12 +46,13 @@ use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions};
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::{not_impl_err, GetExt, DEFAULT_JSON_EXTENSION};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
 use datafusion_physical_plan::metrics::MetricsSet;
 use datafusion_physical_plan::ExecutionPlan;
 
 use async_trait::async_trait;
 use bytes::{Buf, Bytes};
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
 
 #[derive(Default)]
@@ -249,7 +250,7 @@ impl FileFormat for JsonFormat {
         input: Arc<dyn ExecutionPlan>,
         _state: &SessionState,
         conf: FileSinkConfig,
-        order_requirements: Option<Vec<PhysicalSortRequirement>>,
+        order_requirements: Option<LexRequirement>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         if conf.overwrite {
             return not_impl_err!("Overwrites are not implemented yet for Json");
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index a324a4578424..d21464b74b53 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -45,12 +45,14 @@ use crate::physical_plan::{ExecutionPlan, Statistics};
 use arrow_schema::{DataType, Field, Schema};
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{internal_err, not_impl_err, GetExt};
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
 
 use async_trait::async_trait;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use file_compression_type::FileCompressionType;
 use object_store::{ObjectMeta, ObjectStore};
 use std::fmt::Debug;
+
 /// Factory for creating [`FileFormat`] instances based on session and command level options
 ///
 /// Users can provide their own `FileFormatFactory` to support arbitrary file formats
@@ -132,7 +134,7 @@ pub trait FileFormat: Send + Sync + fmt::Debug {
         _input: Arc<dyn ExecutionPlan>,
         _state: &SessionState,
         _conf: FileSinkConfig,
-        _order_requirements: Option<Vec<PhysicalSortRequirement>>,
+        _order_requirements: Option<LexRequirement>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         not_impl_err!("Writer not implemented for this format")
     }
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index f233f3842c8c..23e765f0f2cd 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -51,7 +51,7 @@ use datafusion_common_runtime::SpawnedTask;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_functions_aggregate::min_max::{MaxAccumulator, MinAccumulator};
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
 use datafusion_physical_plan::metrics::MetricsSet;
 
 use async_trait::async_trait;
@@ -76,6 +76,7 @@ use tokio::sync::mpsc::{self, Receiver, Sender};
 use tokio::task::JoinSet;
 
 use crate::datasource::physical_plan::parquet::ParquetExecBuilder;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::{StreamExt, TryStreamExt};
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
@@ -376,7 +377,7 @@ impl FileFormat for ParquetFormat {
         input: Arc<dyn ExecutionPlan>,
         _state: &SessionState,
         conf: FileSinkConfig,
-        order_requirements: Option<Vec<PhysicalSortRequirement>>,
+        order_requirements: Option<LexRequirement>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         if conf.overwrite {
             return not_impl_err!("Overwrites are not implemented yet for Parquet");
@@ -836,7 +837,10 @@ impl DataSink for ParquetSink {
             }
         }
 
-        demux_task.join_unwind().await?;
+        demux_task
+            .join_unwind()
+            .await
+            .map_err(DataFusionError::ExecutionJoin)??;
 
         Ok(row_count as u64)
     }
@@ -942,7 +946,10 @@ fn spawn_rg_join_and_finalize_task(
         let num_cols = column_writer_tasks.len();
         let mut finalized_rg = Vec::with_capacity(num_cols);
         for task in column_writer_tasks.into_iter() {
-            let (writer, _col_reservation) = task.join_unwind().await?;
+            let (writer, _col_reservation) = task
+                .join_unwind()
+                .await
+                .map_err(DataFusionError::ExecutionJoin)??;
             let encoded_size = writer.get_estimated_total_bytes();
             rg_reservation.grow(encoded_size);
             finalized_rg.push(writer.close()?);
@@ -1070,7 +1077,8 @@ async fn concatenate_parallel_row_groups(
     while let Some(task) = serialize_rx.recv().await {
         let result = task.join_unwind().await;
         let mut rg_out = parquet_writer.next_row_group()?;
-        let (serialized_columns, mut rg_reservation, _cnt) = result?;
+        let (serialized_columns, mut rg_reservation, _cnt) =
+            result.map_err(DataFusionError::ExecutionJoin)??;
         for chunk in serialized_columns {
             chunk.append_to_row_group(&mut rg_out)?;
             rg_reservation.free();
@@ -1134,7 +1142,10 @@ async fn output_single_parquet_file_parallelized(
     )
     .await?;
 
-    launch_serialization_task.join_unwind().await?;
+    launch_serialization_task
+        .join_unwind()
+        .await
+        .map_err(DataFusionError::ExecutionJoin)??;
     Ok(file_metadata)
 }
 
diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs
index 1d32063ee9f3..6f27e6f3889f 100644
--- a/datafusion/core/src/datasource/file_format/write/orchestration.rs
+++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs
@@ -298,8 +298,8 @@ pub(crate) async fn stateless_multipart_put(
         write_coordinator_task.join_unwind(),
         demux_task.join_unwind()
     );
-    r1?;
-    r2?;
+    r1.map_err(DataFusionError::ExecutionJoin)??;
+    r2.map_err(DataFusionError::ExecutionJoin)??;
 
     let total_count = rx_row_cnt.await.map_err(|_| {
         internal_datafusion_err!("Did not receive row count from write coordinator")
diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs
index b5dd2dd12e10..f6e938b72dab 100644
--- a/datafusion/core/src/datasource/listing/helpers.rs
+++ b/datafusion/core/src/datasource/listing/helpers.rs
@@ -282,7 +282,7 @@ async fn prune_partitions(
         Default::default(),
     )?;
 
-    let batch = RecordBatch::try_new(schema.clone(), arrays)?;
+    let batch = RecordBatch::try_new(schema, arrays)?;
 
     // TODO: Plumb this down
     let props = ExecutionProps::new();
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 89066d8234ac..a0345a38e40c 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -1016,7 +1016,7 @@ impl ListingTable {
             .collected_statistics
             .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
         {
-            Some(statistics) => Ok(statistics.clone()),
+            Some(statistics) => Ok(statistics),
             None => {
                 let statistics = self
                     .options
diff --git a/datafusion/core/src/datasource/physical_plan/file_groups.rs b/datafusion/core/src/datasource/physical_plan/file_groups.rs
index 6456bd5c7276..28f975ae193d 100644
--- a/datafusion/core/src/datasource/physical_plan/file_groups.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_groups.rs
@@ -256,7 +256,7 @@ impl FileGroupPartitioner {
                 },
             )
             .flatten()
-            .group_by(|(partition_idx, _)| *partition_idx)
+            .chunk_by(|(partition_idx, _)| *partition_idx)
             .into_iter()
             .map(|(_, group)| group.map(|(_, vals)| vals).collect_vec())
             .collect_vec();
@@ -394,7 +394,7 @@ mod test {
     #[test]
     fn repartition_empty_file_only() {
         let partitioned_file_empty = pfile("empty", 0);
-        let file_group = vec![vec![partitioned_file_empty.clone()]];
+        let file_group = vec![vec![partitioned_file_empty]];
 
         let partitioned_files = FileGroupPartitioner::new()
             .with_target_partitions(4)
@@ -817,10 +817,7 @@ mod test {
             .with_preserve_order_within_groups(true)
             .repartition_file_groups(&file_groups);
 
-        assert_partitioned_files(
-            repartitioned.clone(),
-            repartitioned_preserving_sort.clone(),
-        );
+        assert_partitioned_files(repartitioned.clone(), repartitioned_preserving_sort);
         repartitioned
     }
 }
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index 34fb6226c1a2..bfa5488e5b5e 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -908,7 +908,7 @@ mod tests {
             schema.clone(),
             Some(vec![0, 3, 5, schema.fields().len()]),
             Statistics::new_unknown(&schema),
-            to_partition_cols(partition_cols.clone()),
+            to_partition_cols(partition_cols),
         )
         .projected_file_schema();
 
@@ -941,7 +941,7 @@ mod tests {
             schema.clone(),
             None,
             Statistics::new_unknown(&schema),
-            to_partition_cols(partition_cols.clone()),
+            to_partition_cols(partition_cols),
         )
         .projected_file_schema();
 
diff --git a/datafusion/core/src/datasource/schema_adapter.rs b/datafusion/core/src/datasource/schema_adapter.rs
index 40cb40a83af2..5d2d0ff91b15 100644
--- a/datafusion/core/src/datasource/schema_adapter.rs
+++ b/datafusion/core/src/datasource/schema_adapter.rs
@@ -369,7 +369,7 @@ mod tests {
             let f1 = Field::new("id", DataType::Int32, true);
             let f2 = Field::new("extra_column", DataType::Utf8, true);
 
-            let schema = Arc::new(Schema::new(vec![f1.clone(), f2.clone()]));
+            let schema = Arc::new(Schema::new(vec![f1, f2]));
 
             let extra_column = Arc::new(StringArray::from(vec!["foo"]));
             let mut new_columns = batch.columns().to_vec();
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 682565aea909..b53fe8663178 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -438,6 +438,9 @@ impl DataSink for StreamWrite {
             }
         }
         drop(sender);
-        write_task.join_unwind().await
+        write_task
+            .join_unwind()
+            .await
+            .map_err(DataFusionError::ExecutionJoin)?
     }
 }
diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs
index a81942bf769e..947714c1e4f9 100644
--- a/datafusion/core/src/datasource/view.rs
+++ b/datafusion/core/src/datasource/view.rs
@@ -17,7 +17,7 @@
 
 //! View data source which uses a LogicalPlan as it's input.
 
-use std::{any::Any, sync::Arc};
+use std::{any::Any, borrow::Cow, sync::Arc};
 
 use crate::{
     error::Result,
@@ -90,8 +90,8 @@ impl TableProvider for ViewTable {
         self
     }
 
-    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
-        Some(&self.logical_plan)
+    fn get_logical_plan(&self) -> Option<Cow<LogicalPlan>> {
+        Some(Cow::Borrowed(&self.logical_plan))
     }
 
     fn schema(&self) -> SchemaRef {
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 972a6f643733..c67424c0fa53 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -128,6 +128,9 @@ where
 /// the state of the connection between a user and an instance of the
 /// DataFusion engine.
 ///
+/// See examples below for how to use the `SessionContext` to execute queries
+/// and how to configure the session.
+///
 /// # Overview
 ///
 /// [`SessionContext`] provides the following functionality:
@@ -200,7 +203,38 @@ where
 /// # }
 /// ```
 ///
-/// # `SessionContext`, `SessionState`, and `TaskContext`
+/// # Example: Configuring `SessionContext`
+///
+/// The `SessionContext` can be configured by creating a [`SessionState`] using
+/// [`SessionStateBuilder`]:
+///
+/// ```
+/// # use std::sync::Arc;
+/// # use datafusion::prelude::*;
+/// # use datafusion::execution::SessionStateBuilder;
+/// # use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+/// // Configure a 4k batch size
+/// let config = SessionConfig::new() .with_batch_size(4 * 1024);
+///
+/// // configure a memory limit of 1GB with 20%  slop
+///  let runtime_env = RuntimeEnvBuilder::new()
+///     .with_memory_limit(1024 * 1024 * 1024, 0.80)
+///     .build()
+///     .unwrap();
+///
+/// // Create a SessionState using the config and runtime_env
+/// let state = SessionStateBuilder::new()
+///   .with_config(config)
+///   .with_runtime_env(Arc::new(runtime_env))
+///   // include support for built in functions and configurations
+///   .with_default_features()
+///   .build();
+///
+/// // Create a SessionContext
+/// let ctx = SessionContext::from(state);
+/// ```
+///
+/// # Relationship between `SessionContext`, `SessionState`, and `TaskContext`
 ///
 /// The state required to optimize, and evaluate queries is
 /// broken into three levels to allow tailoring
@@ -654,7 +688,7 @@ impl SessionContext {
             column_defaults,
         } = cmd;
 
-        let input = Arc::try_unwrap(input).unwrap_or_else(|e| e.as_ref().clone());
+        let input = Arc::unwrap_or_clone(input);
         let input = self.state().optimize(&input)?;
         let table = self.table(name.clone()).await;
         match (if_not_exists, or_replace, table) {
@@ -1131,7 +1165,7 @@ impl SessionContext {
         // check schema uniqueness
         let mut batches = batches.into_iter().peekable();
         let schema = if let Some(batch) = batches.peek() {
-            batch.schema().clone()
+            batch.schema()
         } else {
             Arc::new(Schema::empty())
         };
@@ -1427,6 +1461,12 @@ impl From<&SessionContext> for TaskContext {
     }
 }
 
+impl From<SessionState> for SessionContext {
+    fn from(state: SessionState) -> Self {
+        Self::new_with_state(state)
+    }
+}
+
 /// A planner used to add extensions to DataFusion logical and physical plans.
 #[async_trait]
 pub trait QueryPlanner {
@@ -1583,7 +1623,7 @@ mod tests {
     use super::{super::options::CsvReadOptions, *};
     use crate::assert_batches_eq;
     use crate::execution::memory_pool::MemoryConsumer;
-    use crate::execution::runtime_env::RuntimeConfig;
+    use crate::execution::runtime_env::RuntimeEnvBuilder;
     use crate::test;
     use crate::test_util::{plan_and_collect, populate_csv_partitions};
 
@@ -1718,8 +1758,7 @@ mod tests {
         let path = path.join("tests/tpch-csv");
         let url = format!("file://{}", path.display());
 
-        let rt_cfg = RuntimeConfig::new();
-        let runtime = Arc::new(RuntimeEnv::new(rt_cfg).unwrap());
+        let runtime = Arc::new(RuntimeEnvBuilder::new().build()?);
         let cfg = SessionConfig::new()
             .set_str("datafusion.catalog.location", url.as_str())
             .set_str("datafusion.catalog.format", "CSV")
diff --git a/datafusion/core/src/execution/mod.rs b/datafusion/core/src/execution/mod.rs
index a1b3eab25f33..10aa16ffe47a 100644
--- a/datafusion/core/src/execution/mod.rs
+++ b/datafusion/core/src/execution/mod.rs
@@ -19,6 +19,8 @@
 
 pub mod context;
 pub mod session_state;
+pub use session_state::{SessionState, SessionStateBuilder};
+
 mod session_state_defaults;
 
 pub use session_state_defaults::SessionStateDefaults;
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index daeb21db9d05..67f3cb01c0a4 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -490,7 +490,6 @@
 //! [`PhysicalOptimizerRule`]: datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule
 //! [`Schema`]: arrow::datatypes::Schema
 //! [`PhysicalExpr`]: physical_plan::PhysicalExpr
-//! [`AggregateExpr`]: physical_plan::AggregateExpr
 //! [`RecordBatch`]: arrow::record_batch::RecordBatch
 //! [`RecordBatchReader`]: arrow::record_batch::RecordBatchReader
 //! [`Array`]: arrow::array::Array
@@ -556,11 +555,6 @@ pub mod physical_expr_common {
     pub use datafusion_physical_expr_common::*;
 }
 
-/// re-export of [`datafusion_physical_expr_functions_aggregate`] crate
-pub mod physical_expr_functions_aggregate {
-    pub use datafusion_physical_expr_functions_aggregate::*;
-}
-
 /// re-export of [`datafusion_physical_expr`] crate
 pub mod physical_expr {
     pub use datafusion_physical_expr::*;
@@ -678,6 +672,12 @@ doc_comment::doctest!(
     library_user_guide_sql_api
 );
 
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/library-user-guide/building-logical-plans.md",
+    library_user_guide_logical_plans
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/library-user-guide/using-the-dataframe-api.md",
diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index f65a4c837a60..1a12fc7de888 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -26,7 +26,8 @@ use crate::physical_plan::ExecutionPlan;
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_physical_expr::{physical_exprs_equal, AggregateExpr, PhysicalExpr};
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
+use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr};
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 
 /// CombinePartialFinalAggregate optimizer rule combines the adjacent Partial and Final AggregateExecs
@@ -51,62 +52,57 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate {
         _config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         plan.transform_down(|plan| {
-            let transformed =
-                plan.as_any()
-                    .downcast_ref::<AggregateExec>()
-                    .and_then(|agg_exec| {
-                        if matches!(
-                            agg_exec.mode(),
-                            AggregateMode::Final | AggregateMode::FinalPartitioned
-                        ) {
-                            agg_exec
-                                .input()
-                                .as_any()
-                                .downcast_ref::<AggregateExec>()
-                                .and_then(|input_agg_exec| {
-                                    if matches!(
-                                        input_agg_exec.mode(),
-                                        AggregateMode::Partial
-                                    ) && can_combine(
-                                        (
-                                            agg_exec.group_expr(),
-                                            agg_exec.aggr_expr(),
-                                            agg_exec.filter_expr(),
-                                        ),
-                                        (
-                                            input_agg_exec.group_expr(),
-                                            input_agg_exec.aggr_expr(),
-                                            input_agg_exec.filter_expr(),
-                                        ),
-                                    ) {
-                                        let mode =
-                                            if agg_exec.mode() == &AggregateMode::Final {
-                                                AggregateMode::Single
-                                            } else {
-                                                AggregateMode::SinglePartitioned
-                                            };
-                                        AggregateExec::try_new(
-                                            mode,
-                                            input_agg_exec.group_expr().clone(),
-                                            input_agg_exec.aggr_expr().to_vec(),
-                                            input_agg_exec.filter_expr().to_vec(),
-                                            input_agg_exec.input().clone(),
-                                            input_agg_exec.input_schema(),
-                                        )
-                                        .map(|combined_agg| {
-                                            combined_agg.with_limit(agg_exec.limit())
-                                        })
-                                        .ok()
-                                        .map(Arc::new)
-                                    } else {
-                                        None
-                                    }
-                                })
-                        } else {
-                            None
-                        }
-                    });
-
+            // Check if the plan is AggregateExec
+            let Some(agg_exec) = plan.as_any().downcast_ref::<AggregateExec>() else {
+                return Ok(Transformed::no(plan));
+            };
+
+            if !matches!(
+                agg_exec.mode(),
+                AggregateMode::Final | AggregateMode::FinalPartitioned
+            ) {
+                return Ok(Transformed::no(plan));
+            }
+
+            // Check if the input is AggregateExec
+            let Some(input_agg_exec) =
+                agg_exec.input().as_any().downcast_ref::<AggregateExec>()
+            else {
+                return Ok(Transformed::no(plan));
+            };
+
+            let transformed = if matches!(input_agg_exec.mode(), AggregateMode::Partial)
+                && can_combine(
+                    (
+                        agg_exec.group_expr(),
+                        agg_exec.aggr_expr(),
+                        agg_exec.filter_expr(),
+                    ),
+                    (
+                        input_agg_exec.group_expr(),
+                        input_agg_exec.aggr_expr(),
+                        input_agg_exec.filter_expr(),
+                    ),
+                ) {
+                let mode = if agg_exec.mode() == &AggregateMode::Final {
+                    AggregateMode::Single
+                } else {
+                    AggregateMode::SinglePartitioned
+                };
+                AggregateExec::try_new(
+                    mode,
+                    input_agg_exec.group_expr().clone(),
+                    input_agg_exec.aggr_expr().to_vec(),
+                    input_agg_exec.filter_expr().to_vec(),
+                    input_agg_exec.input().clone(),
+                    input_agg_exec.input_schema(),
+                )
+                .map(|combined_agg| combined_agg.with_limit(agg_exec.limit()))
+                .ok()
+                .map(Arc::new)
+            } else {
+                None
+            };
             Ok(if let Some(transformed) = transformed {
                 Transformed::yes(transformed)
             } else {
@@ -127,7 +123,7 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate {
 
 type GroupExprsRef<'a> = (
     &'a PhysicalGroupBy,
-    &'a [Arc<dyn AggregateExpr>],
+    &'a [Arc<AggregateFunctionExpr>],
     &'a [Option<Arc<dyn PhysicalExpr>>],
 );
 
@@ -176,8 +172,8 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::sum::sum_udaf;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
     use datafusion_physical_expr::expressions::col;
-    use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 
     /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
     macro_rules! assert_optimized {
@@ -229,7 +225,7 @@ mod tests {
     fn partial_aggregate_exec(
         input: Arc<dyn ExecutionPlan>,
         group_by: PhysicalGroupBy,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+        aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
     ) -> Arc<dyn ExecutionPlan> {
         let schema = input.schema();
         let n_aggr = aggr_expr.len();
@@ -249,7 +245,7 @@ mod tests {
     fn final_aggregate_exec(
         input: Arc<dyn ExecutionPlan>,
         group_by: PhysicalGroupBy,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+        aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
     ) -> Arc<dyn ExecutionPlan> {
         let schema = input.schema();
         let n_aggr = aggr_expr.len();
@@ -277,7 +273,7 @@ mod tests {
         expr: Arc<dyn PhysicalExpr>,
         name: &str,
         schema: &Schema,
-    ) -> Arc<dyn AggregateExpr> {
+    ) -> Arc<AggregateFunctionExpr> {
         AggregateExprBuilder::new(count_udaf(), vec![expr])
             .schema(Arc::new(schema.clone()))
             .alias(name)
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 2ee5624c83dd..ba6f7d0439c2 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1419,6 +1419,7 @@ pub(crate) mod tests {
         expressions, expressions::binary, expressions::lit, LexOrdering,
         PhysicalSortExpr, PhysicalSortRequirement,
     };
+    use datafusion_physical_expr_common::sort_expr::LexRequirement;
     use datafusion_physical_plan::PlanProperties;
 
     /// Models operators like BoundedWindowExec that require an input
@@ -1489,7 +1490,7 @@ pub(crate) mod tests {
         }
 
         // model that it requires the output ordering of its input
-        fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+        fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
             if self.expr.is_empty() {
                 vec![None]
             } else {
@@ -3907,7 +3908,7 @@ pub(crate) mod tests {
         let alias = vec![("a".to_string(), "a".to_string())];
         let plan_parquet =
             aggregate_exec_with_alias(parquet_exec_multiple(), alias.clone());
-        let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias.clone());
+        let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias);
 
         let expected_parquet = [
             "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
@@ -3933,7 +3934,7 @@ pub(crate) mod tests {
         let alias = vec![("a".to_string(), "a".to_string())];
         let plan_parquet =
             aggregate_exec_with_alias(parquet_exec_multiple(), alias.clone());
-        let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias.clone());
+        let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias);
 
         let expected_parquet = [
             "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
@@ -3963,7 +3964,7 @@ pub(crate) mod tests {
             options: SortOptions::default(),
         }];
         let plan_parquet = limit_exec(sort_exec(sort_key.clone(), parquet_exec(), false));
-        let plan_csv = limit_exec(sort_exec(sort_key.clone(), csv_exec(), false));
+        let plan_csv = limit_exec(sort_exec(sort_key, csv_exec(), false));
 
         let expected_parquet = &[
             "GlobalLimitExec: skip=0, fetch=100",
@@ -3999,8 +4000,7 @@ pub(crate) mod tests {
             parquet_exec(),
             false,
         )));
-        let plan_csv =
-            limit_exec(filter_exec(sort_exec(sort_key.clone(), csv_exec(), false)));
+        let plan_csv = limit_exec(filter_exec(sort_exec(sort_key, csv_exec(), false)));
 
         let expected_parquet = &[
             "GlobalLimitExec: skip=0, fetch=100",
@@ -4041,7 +4041,7 @@ pub(crate) mod tests {
         );
         let plan_csv = aggregate_exec_with_alias(
             limit_exec(filter_exec(limit_exec(csv_exec()))),
-            alias.clone(),
+            alias,
         );
 
         let expected_parquet = &[
@@ -4125,7 +4125,7 @@ pub(crate) mod tests {
         );
         let plan_csv = sort_preserving_merge_exec(
             sort_key.clone(),
-            csv_exec_with_sort(vec![sort_key.clone()]),
+            csv_exec_with_sort(vec![sort_key]),
         );
 
         // parallelization is not beneficial for SortPreservingMerge
@@ -4153,7 +4153,7 @@ pub(crate) mod tests {
             union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]);
         let input_csv = union_exec(vec![csv_exec_with_sort(vec![sort_key.clone()]); 2]);
         let plan_parquet = sort_preserving_merge_exec(sort_key.clone(), input_parquet);
-        let plan_csv = sort_preserving_merge_exec(sort_key.clone(), input_csv);
+        let plan_csv = sort_preserving_merge_exec(sort_key, input_csv);
 
         // should not repartition (union doesn't benefit from increased parallelism)
         // should not sort (as the data was already sorted)
@@ -4223,8 +4223,8 @@ pub(crate) mod tests {
             ("c".to_string(), "c2".to_string()),
         ];
         let proj_parquet = projection_exec_with_alias(
-            parquet_exec_with_sort(vec![sort_key.clone()]),
-            alias_pairs.clone(),
+            parquet_exec_with_sort(vec![sort_key]),
+            alias_pairs,
         );
         let sort_key_after_projection = vec![PhysicalSortExpr {
             expr: col("c2", &proj_parquet.schema()).unwrap(),
@@ -4559,7 +4559,7 @@ pub(crate) mod tests {
         }];
         let alias = vec![("a".to_string(), "a".to_string())];
         let input = parquet_exec_with_sort(vec![sort_key]);
-        let physical_plan = aggregate_exec_with_alias(input, alias.clone());
+        let physical_plan = aggregate_exec_with_alias(input, alias);
 
         let expected = &[
             "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
@@ -4583,7 +4583,7 @@ pub(crate) mod tests {
         let alias = vec![("a".to_string(), "a".to_string())];
         let input = parquet_exec_multiple_sorted(vec![sort_key]);
         let aggregate = aggregate_exec_with_alias(input, alias.clone());
-        let physical_plan = aggregate_exec_with_alias(aggregate, alias.clone());
+        let physical_plan = aggregate_exec_with_alias(aggregate, alias);
 
         let expected = &[
             "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index b849df88e4aa..2643ade8f481 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -908,7 +908,7 @@ mod tests_statistical {
         );
 
         let optimized_join = JoinSelection::new()
-            .optimize(join.clone(), &ConfigOptions::new())
+            .optimize(join, &ConfigOptions::new())
             .unwrap();
 
         let swapping_projection = optimized_join
@@ -964,7 +964,7 @@ mod tests_statistical {
         );
 
         let optimized_join = JoinSelection::new()
-            .optimize(join.clone(), &ConfigOptions::new())
+            .optimize(join, &ConfigOptions::new())
             .unwrap();
 
         let swapped_join = optimized_join
@@ -1140,7 +1140,7 @@ mod tests_statistical {
         );
 
         let optimized_join = JoinSelection::new()
-            .optimize(join.clone(), &ConfigOptions::new())
+            .optimize(join, &ConfigOptions::new())
             .unwrap();
 
         let swapped_join = optimized_join
@@ -1180,7 +1180,7 @@ mod tests_statistical {
         );
 
         let optimized_join = JoinSelection::new()
-            .optimize(join.clone(), &ConfigOptions::new())
+            .optimize(join, &ConfigOptions::new())
             .unwrap();
 
         let swapping_projection = optimized_join
@@ -1356,7 +1356,7 @@ mod tests_statistical {
             Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()) as _,
         )];
         check_join_partition_mode(
-            big.clone(),
+            big,
             small.clone(),
             join_on,
             true,
@@ -1380,8 +1380,8 @@ mod tests_statistical {
             Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()) as _,
         )];
         check_join_partition_mode(
-            empty.clone(),
-            small.clone(),
+            empty,
+            small,
             join_on,
             true,
             PartitionMode::CollectLeft,
@@ -1424,7 +1424,7 @@ mod tests_statistical {
             Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _,
         )];
         check_join_partition_mode(
-            bigger.clone(),
+            bigger,
             big.clone(),
             join_on,
             true,
@@ -1472,7 +1472,7 @@ mod tests_statistical {
         );
 
         let optimized_join = JoinSelection::new()
-            .optimize(join.clone(), &ConfigOptions::new())
+            .optimize(join, &ConfigOptions::new())
             .unwrap();
 
         if !is_swapped {
@@ -1913,8 +1913,7 @@ mod hash_join_tests {
             false,
         )?);
 
-        let optimized_join_plan =
-            hash_join_swap_subrule(join.clone(), &ConfigOptions::new())?;
+        let optimized_join_plan = hash_join_swap_subrule(join, &ConfigOptions::new())?;
 
         // If swap did happen
         let projection_added = optimized_join_plan.as_any().is::<ProjectionExec>();
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 9c545c17da3c..b3f3f90154d0 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -1692,12 +1692,9 @@ mod tests {
         ]));
         Arc::new(
             CsvExec::builder(
-                FileScanConfig::new(
-                    ObjectStoreUrl::parse("test:///").unwrap(),
-                    schema.clone(),
-                )
-                .with_file(PartitionedFile::new("x".to_string(), 100))
-                .with_projection(Some(vec![0, 1, 2, 3, 4])),
+                FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema)
+                    .with_file(PartitionedFile::new("x".to_string(), 100))
+                    .with_projection(Some(vec![0, 1, 2, 3, 4])),
             )
             .with_has_header(false)
             .with_delimeter(0)
@@ -1719,12 +1716,9 @@ mod tests {
         ]));
         Arc::new(
             CsvExec::builder(
-                FileScanConfig::new(
-                    ObjectStoreUrl::parse("test:///").unwrap(),
-                    schema.clone(),
-                )
-                .with_file(PartitionedFile::new("x".to_string(), 100))
-                .with_projection(Some(vec![3, 2, 1])),
+                FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema)
+                    .with_file(PartitionedFile::new("x".to_string(), 100))
+                    .with_projection(Some(vec![3, 2, 1])),
             )
             .with_has_header(false)
             .with_delimeter(0)
diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs
index 0ef390fff45c..a16abc607ee6 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -1369,7 +1369,6 @@ fn build_predicate_expression(
             let change_expr = in_list
                 .list()
                 .iter()
-                .cloned()
                 .map(|e| {
                     Arc::new(phys_expr::BinaryExpr::new(
                         in_list.expr().clone(),
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 17d63a06a6f8..9ab6802d18f1 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -36,6 +36,7 @@ use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
     LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement,
 };
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 
 /// This is a "data class" we use within the [`EnforceSorting`] rule to push
 /// down [`SortExec`] in the plan. In some cases, we can reduce the total
@@ -46,7 +47,7 @@ use datafusion_physical_expr::{
 /// [`EnforceSorting`]: crate::physical_optimizer::enforce_sorting::EnforceSorting
 #[derive(Default, Clone)]
 pub struct ParentRequirements {
-    ordering_requirement: Option<Vec<PhysicalSortRequirement>>,
+    ordering_requirement: Option<LexRequirement>,
     fetch: Option<usize>,
 }
 
@@ -159,7 +160,7 @@ fn pushdown_sorts_helper(
 fn pushdown_requirement_to_children(
     plan: &Arc<dyn ExecutionPlan>,
     parent_required: LexRequirementRef,
-) -> Result<Option<Vec<Option<Vec<PhysicalSortRequirement>>>>> {
+) -> Result<Option<Vec<Option<LexRequirement>>>> {
     let maintains_input_order = plan.maintains_input_order();
     if is_window(plan) {
         let required_input_ordering = plan.required_input_ordering();
@@ -345,7 +346,7 @@ fn try_pushdown_requirements_to_join(
     parent_required: LexRequirementRef,
     sort_expr: Vec<PhysicalSortExpr>,
     push_side: JoinSide,
-) -> Result<Option<Vec<Option<Vec<PhysicalSortRequirement>>>>> {
+) -> Result<Option<Vec<Option<LexRequirement>>>> {
     let left_eq_properties = smj.left().equivalence_properties();
     let right_eq_properties = smj.right().equivalence_properties();
     let mut smj_required_orderings = smj.required_input_ordering();
@@ -460,7 +461,7 @@ fn expr_source_side(
 fn shift_right_required(
     parent_required: LexRequirementRef,
     left_columns_len: usize,
-) -> Result<Vec<PhysicalSortRequirement>> {
+) -> Result<LexRequirement> {
     let new_right_required = parent_required
         .iter()
         .filter_map(|r| {
@@ -486,7 +487,7 @@ enum RequirementsCompatibility {
     /// Requirements satisfy
     Satisfy,
     /// Requirements compatible
-    Compatible(Option<Vec<PhysicalSortRequirement>>),
+    Compatible(Option<LexRequirement>),
     /// Requirements not compatible
     NonCompatible,
 }
diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index 90853c347672..98f1a7c21a39 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -56,7 +56,9 @@ use datafusion_physical_plan::{
 
 use async_trait::async_trait;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement;
+use datafusion_physical_expr_common::sort_expr::{
+    LexRequirement, PhysicalSortRequirement,
+};
 
 async fn register_current_csv(
     ctx: &SessionContext,
@@ -416,7 +418,7 @@ impl ExecutionPlan for RequirementsTestExec {
         self.input.properties()
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         let requirement =
             PhysicalSortRequirement::from_sort_exprs(&self.required_input_ordering);
         vec![Some(requirement)]
diff --git a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
index f8edf73e3d2a..a2726d62e9f6 100644
--- a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
+++ b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs
@@ -23,8 +23,9 @@ use std::sync::Arc;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{plan_datafusion_err, Result};
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion_physical_expr::{
-    reverse_order_bys, AggregateExpr, EquivalenceProperties, PhysicalSortRequirement,
+    reverse_order_bys, EquivalenceProperties, PhysicalSortRequirement,
 };
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::aggregates::concat_slices;
@@ -117,7 +118,7 @@ impl PhysicalOptimizerRule for OptimizeAggregateOrder {
 ///
 /// # Parameters
 ///
-/// * `aggr_exprs` - A vector of `Arc<dyn AggregateExpr>` representing the
+/// * `aggr_exprs` - A vector of `Arc<AggregateFunctionExpr>` representing the
 ///   aggregate expressions to be optimized.
 /// * `prefix_requirement` - An array slice representing the ordering
 ///   requirements preceding the aggregate expressions.
@@ -130,10 +131,10 @@ impl PhysicalOptimizerRule for OptimizeAggregateOrder {
 /// successfully. Any errors occurring during the conversion process are
 /// passed through.
 fn try_convert_aggregate_if_better(
-    aggr_exprs: Vec<Arc<dyn AggregateExpr>>,
+    aggr_exprs: Vec<Arc<AggregateFunctionExpr>>,
     prefix_requirement: &[PhysicalSortRequirement],
     eq_properties: &EquivalenceProperties,
-) -> Result<Vec<Arc<dyn AggregateExpr>>> {
+) -> Result<Vec<Arc<AggregateFunctionExpr>>> {
     aggr_exprs
         .into_iter()
         .map(|aggr_expr| {
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 6536f9a01439..fe8d79846630 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -58,8 +58,8 @@ use crate::physical_plan::unnest::UnnestExec;
 use crate::physical_plan::values::ValuesExec;
 use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use crate::physical_plan::{
-    displayable, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties,
-    InputOrderMode, Partitioning, PhysicalExpr, WindowExpr,
+    displayable, windows, ExecutionPlan, ExecutionPlanProperties, InputOrderMode,
+    Partitioning, PhysicalExpr, WindowExpr,
 };
 
 use arrow::compute::SortOptions;
@@ -73,8 +73,7 @@ use datafusion_common::{
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, create_function_physical_name, physical_name, AggregateFunction, Alias,
-    GroupingSet, WindowFunction,
+    self, physical_name, AggregateFunction, Alias, GroupingSet, WindowFunction,
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
@@ -82,9 +81,9 @@ use datafusion_expr::{
     DescribeTable, DmlStatement, Extension, Filter, RecursiveQuery, StringifiedPlan,
     WindowFrame, WindowFrameBound, WriteOp,
 };
+use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_expr::LexOrdering;
-use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
@@ -702,7 +701,7 @@ impl DefaultPhysicalPlanner {
                 let initial_aggr = Arc::new(AggregateExec::try_new(
                     AggregateMode::Partial,
                     groups.clone(),
-                    aggregates.clone(),
+                    aggregates,
                     filters.clone(),
                     input_exec,
                     physical_input_schema.clone(),
@@ -720,7 +719,7 @@ impl DefaultPhysicalPlanner {
                 // optimization purposes. For example, a FIRST_VALUE may turn
                 // into a LAST_VALUE with the reverse ordering requirement.
                 // To reflect such changes to subsequent stages, use the updated
-                // `AggregateExpr`/`PhysicalSortExpr` objects.
+                // `AggregateFunctionExpr`/`PhysicalSortExpr` objects.
                 let updated_aggregates = initial_aggr.aggr_expr().to_vec();
 
                 let next_partition_mode = if can_repartition {
@@ -1542,7 +1541,7 @@ pub fn create_window_expr(
 }
 
 type AggregateExprWithOptionalArgs = (
-    Arc<dyn AggregateExpr>,
+    Arc<AggregateFunctionExpr>,
     // The filter clause, if any
     Option<Arc<dyn PhysicalExpr>>,
     // Ordering requirements, if any
@@ -1569,12 +1568,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
             let name = if let Some(name) = name {
                 name
             } else {
-                create_function_physical_name(
-                    func.name(),
-                    *distinct,
-                    args,
-                    order_by.as_ref(),
-                )?
+                physical_name(e)?
             };
 
             let physical_args =
@@ -1588,8 +1582,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 None => None,
             };
 
-            let ignore_nulls = null_treatment
-                .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
+            let ignore_nulls = null_treatment.unwrap_or(NullTreatment::RespectNulls)
                 == NullTreatment::IgnoreNulls;
 
             let (agg_expr, filter, order_by) = {
@@ -2576,7 +2569,7 @@ mod tests {
 
     impl NoOpExecutionPlan {
         fn new(schema: SchemaRef) -> Self {
-            let cache = Self::compute_properties(schema.clone());
+            let cache = Self::compute_properties(schema);
             Self { cache }
         }
 
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index ca8376fdec0a..faa9378535fd 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -48,13 +48,11 @@ use datafusion_common::TableReference;
 use datafusion_expr::utils::COUNT_STAR_EXPANSION;
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
 use datafusion_functions_aggregate::count::count_udaf;
-use datafusion_physical_expr::{
-    expressions, AggregateExpr, EquivalenceProperties, PhysicalExpr,
-};
+use datafusion_physical_expr::{expressions, EquivalenceProperties, PhysicalExpr};
 
 use async_trait::async_trait;
 use datafusion_catalog::Session;
-use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
+use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use futures::Stream;
 use tempfile::TempDir;
 // backwards compatibility
@@ -429,7 +427,7 @@ impl TestAggregate {
     }
 
     /// Return appropriate expr depending if COUNT is for col or table (*)
-    pub fn count_expr(&self, schema: &Schema) -> Arc<dyn AggregateExpr> {
+    pub fn count_expr(&self, schema: &Schema) -> Arc<AggregateFunctionExpr> {
         AggregateExprBuilder::new(count_udaf(), vec![self.column()])
             .schema(Arc::new(schema.clone()))
             .alias(self.column_name())
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 138e5bda7f39..62e9be63983c 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -25,7 +25,7 @@ use arrow::util::pretty::pretty_format_batches;
 use arrow_array::types::Int64Type;
 use datafusion::common::Result;
 use datafusion::datasource::MemTable;
-use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
+use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
index f1cca66712d7..1c2d8ece2f36 100644
--- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs
@@ -350,12 +350,10 @@ impl JoinFuzzTestCase {
     fn left_right(&self) -> (Arc<MemoryExec>, Arc<MemoryExec>) {
         let schema1 = self.input1[0].schema();
         let schema2 = self.input2[0].schema();
-        let left = Arc::new(
-            MemoryExec::try_new(&[self.input1.clone()], schema1.clone(), None).unwrap(),
-        );
-        let right = Arc::new(
-            MemoryExec::try_new(&[self.input2.clone()], schema2.clone(), None).unwrap(),
-        );
+        let left =
+            Arc::new(MemoryExec::try_new(&[self.input1.clone()], schema1, None).unwrap());
+        let right =
+            Arc::new(MemoryExec::try_new(&[self.input2.clone()], schema2, None).unwrap());
         (left, right)
     }
 
diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
index f4b4f16aa160..1980589491a5 100644
--- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs
@@ -22,7 +22,7 @@ use arrow::{
     compute::SortOptions,
     record_batch::RecordBatch,
 };
-use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
 use datafusion::physical_plan::expressions::PhysicalSortExpr;
 use datafusion::physical_plan::memory::MemoryExec;
 use datafusion::physical_plan::sorts::sort::SortExec;
@@ -136,9 +136,12 @@ impl SortTest {
                     .sort_spill_reservation_bytes,
             );
 
-            let runtime_config = RuntimeConfig::new()
-                .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size)));
-            let runtime = Arc::new(RuntimeEnv::new(runtime_config).unwrap());
+            let runtime = Arc::new(
+                RuntimeEnvBuilder::new()
+                    .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size)))
+                    .build()
+                    .unwrap(),
+            );
             SessionContext::new_with_config_rt(session_config, runtime)
         } else {
             SessionContext::new_with_config(session_config)
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index dbd5592e8020..592c25dedc50 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -40,7 +40,7 @@ use tokio::fs::File;
 use datafusion::datasource::streaming::StreamingTable;
 use datafusion::datasource::{MemTable, TableProvider};
 use datafusion::execution::disk_manager::DiskManagerConfig;
-use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::runtime_env::RuntimeEnvBuilder;
 use datafusion::execution::session_state::SessionStateBuilder;
 use datafusion::physical_optimizer::join_selection::JoinSelection;
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
@@ -509,17 +509,17 @@ impl TestCase {
 
         let table = scenario.table();
 
-        let mut rt_config = RuntimeConfig::new()
+        let rt_config = RuntimeEnvBuilder::new()
             // disk manager setting controls the spilling
             .with_disk_manager(disk_manager_config)
             .with_memory_limit(memory_limit, MEMORY_FRACTION);
 
-        if let Some(pool) = memory_pool {
-            rt_config = rt_config.with_memory_pool(pool);
+        let runtime = if let Some(pool) = memory_pool {
+            rt_config.with_memory_pool(pool).build().unwrap()
+        } else {
+            rt_config.build().unwrap()
         };
 
-        let runtime = RuntimeEnv::new(rt_config).unwrap();
-
         // Configure execution
         let builder = SessionStateBuilder::new()
             .with_config(config)
diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs
index bf25b36f48e8..bd251f1a6669 100644
--- a/datafusion/core/tests/parquet/file_statistics.rs
+++ b/datafusion/core/tests/parquet/file_statistics.rs
@@ -33,7 +33,7 @@ use datafusion_execution::cache::cache_unit::{
     DefaultFileStatisticsCache, DefaultListFilesCache,
 };
 use datafusion_execution::config::SessionConfig;
-use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 
 use datafusion::execution::session_state::SessionStateBuilder;
 use tempfile::tempdir;
@@ -198,7 +198,10 @@ fn get_cache_runtime_state() -> (
         .with_list_files_cache(Some(list_file_cache.clone()));
 
     let rt = Arc::new(
-        RuntimeEnv::new(RuntimeConfig::new().with_cache_manager(cache_config)).unwrap(),
+        RuntimeEnvBuilder::new()
+            .with_cache_manager(cache_config)
+            .build()
+            .expect("could not build runtime environment"),
     );
     let state = SessionContext::new_with_config_rt(SessionConfig::default(), rt).state();
 
diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
index b051feb5750e..750544ecdec1 100644
--- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs
@@ -54,7 +54,7 @@ impl PartitionStream for DummyStreamPartition {
 fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
 ) -> datafusion_common::Result<()> {
     let schema = create_schema();
-    let streaming_table = streaming_table_exec(schema.clone())?;
+    let streaming_table = streaming_table_exec(schema)?;
     let global_limit = global_limit_exec(streaming_table, 0, Some(5));
 
     let initial = get_plan_string(&global_limit);
@@ -79,7 +79,7 @@ fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero(
 fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero(
 ) -> datafusion_common::Result<()> {
     let schema = create_schema();
-    let streaming_table = streaming_table_exec(schema.clone())?;
+    let streaming_table = streaming_table_exec(schema)?;
     let global_limit = global_limit_exec(streaming_table, 2, Some(5));
 
     let initial = get_plan_string(&global_limit);
@@ -107,7 +107,7 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi
     let schema = create_schema();
     let streaming_table = streaming_table_exec(schema.clone())?;
     let repartition = repartition_exec(streaming_table)?;
-    let filter = filter_exec(schema.clone(), repartition)?;
+    let filter = filter_exec(schema, repartition)?;
     let coalesce_batches = coalesce_batches_exec(filter);
     let local_limit = local_limit_exec(coalesce_batches, 5);
     let coalesce_partitions = coalesce_partitions_exec(local_limit);
@@ -146,7 +146,7 @@ fn pushes_global_limit_exec_through_projection_exec() -> datafusion_common::Resu
     let schema = create_schema();
     let streaming_table = streaming_table_exec(schema.clone())?;
     let filter = filter_exec(schema.clone(), streaming_table)?;
-    let projection = projection_exec(schema.clone(), filter)?;
+    let projection = projection_exec(schema, filter)?;
     let global_limit = global_limit_exec(projection, 0, Some(5));
 
     let initial = get_plan_string(&global_limit);
@@ -178,7 +178,7 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc
     let schema = create_schema();
     let streaming_table = streaming_table_exec(schema.clone()).unwrap();
     let coalesce_batches = coalesce_batches_exec(streaming_table);
-    let projection = projection_exec(schema.clone(), coalesce_batches)?;
+    let projection = projection_exec(schema, coalesce_batches)?;
     let global_limit = global_limit_exec(projection, 0, Some(5));
 
     let initial = get_plan_string(&global_limit);
@@ -256,7 +256,7 @@ fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions(
     let schema = create_schema();
     let streaming_table = streaming_table_exec(schema.clone())?;
     let repartition = repartition_exec(streaming_table)?;
-    let filter = filter_exec(schema.clone(), repartition)?;
+    let filter = filter_exec(schema, repartition)?;
     let coalesce_partitions = coalesce_partitions_exec(filter);
     let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5));
 
@@ -398,9 +398,7 @@ fn streaming_table_exec(
 ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
     Ok(Arc::new(StreamingTableExec::try_new(
         schema.clone(),
-        vec![Arc::new(DummyStreamPartition {
-            schema: schema.clone(),
-        }) as _],
+        vec![Arc::new(DummyStreamPartition { schema }) as _],
         None,
         None,
         true,
diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
index 48389b0304f6..042f6d622565 100644
--- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs
@@ -316,11 +316,11 @@ fn test_no_group_by() -> Result<()> {
     // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
     let single_agg = AggregateExec::try_new(
         AggregateMode::Single,
-        build_group_by(&schema.clone(), vec![]),
-        vec![],         /* aggr_expr */
-        vec![],         /* filter_expr */
-        source,         /* input */
-        schema.clone(), /* input_schema */
+        build_group_by(&schema, vec![]),
+        vec![], /* aggr_expr */
+        vec![], /* filter_expr */
+        source, /* input */
+        schema, /* input_schema */
     )?;
     let limit_exec = LocalLimitExec::new(
         Arc::new(single_agg),
@@ -346,7 +346,7 @@ fn test_has_aggregate_expression() -> Result<()> {
     // `SELECT <aggregate with no expressions> FROM MemoryExec LIMIT 10;`, Single AggregateExec
     let single_agg = AggregateExec::try_new(
         AggregateMode::Single,
-        build_group_by(&schema.clone(), vec!["a".to_string()]),
+        build_group_by(&schema, vec!["a".to_string()]),
         vec![agg.count_expr(&schema)], /* aggr_expr */
         vec![None],                    /* filter_expr */
         source,                        /* input */
@@ -418,11 +418,11 @@ fn test_has_order_by() -> Result<()> {
     // the `a > 1` filter is applied in the AggregateExec
     let single_agg = AggregateExec::try_new(
         AggregateMode::Single,
-        build_group_by(&schema.clone(), vec!["a".to_string()]),
-        vec![],         /* aggr_expr */
-        vec![],         /* filter_expr */
-        source,         /* input */
-        schema.clone(), /* input_schema */
+        build_group_by(&schema, vec!["a".to_string()]),
+        vec![], /* aggr_expr */
+        vec![], /* filter_expr */
+        source, /* input */
+        schema, /* input_schema */
     )?;
     let limit_exec = LocalLimitExec::new(
         Arc::new(single_agg),
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 93550d38021a..1e0d3d9d514e 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -272,7 +272,7 @@ async fn deregister_udaf() -> Result<()> {
         Arc::new(vec![DataType::UInt64, DataType::Float64]),
     );
 
-    ctx.register_udaf(my_avg.clone());
+    ctx.register_udaf(my_avg);
 
     assert!(ctx.state().aggregate_functions().contains_key("my_avg"));
 
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 259cce74f2e5..0f1c3b8e53c4 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -594,7 +594,7 @@ async fn deregister_udf() -> Result<()> {
     let cast2i64 = ScalarUDF::from(CastToI64UDF::new());
     let ctx = SessionContext::new();
 
-    ctx.register_udf(cast2i64.clone());
+    ctx.register_udf(cast2i64);
 
     assert!(ctx.udfs().contains("cast_to_i64"));
 
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index d3cd93979baf..e169c1f319cc 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -366,7 +366,7 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
                     // wrap OOM message in top consumers
                     DataFusionError::ResourcesExhausted(
                         provide_top_memory_consumers_to_error_msg(
-                            e.to_owned(),
+                            e,
                             self.report_top(self.top.into()),
                         ),
                     )
@@ -540,7 +540,7 @@ mod tests {
         // Test: will be the same per Top Consumers reported.
         r0.grow(10); // make r0=10, pool available=90
         let new_consumer_same_name = MemoryConsumer::new(same_name);
-        let mut r1 = new_consumer_same_name.clone().register(&pool);
+        let mut r1 = new_consumer_same_name.register(&pool);
         // TODO: the insufficient_capacity_err() message is per reservation, not per consumer.
         // a followup PR will clarify this message "0 bytes already allocated for this reservation"
         let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index 420246595558..e7b48be95cff 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -41,7 +41,7 @@ use url::Url;
 /// Execution runtime environment that manages system resources such
 /// as memory, disk, cache and storage.
 ///
-/// A [`RuntimeEnv`] is created from a [`RuntimeConfig`] and has the
+/// A [`RuntimeEnv`] is created from a [`RuntimeEnvBuilder`] and has the
 /// following resource management functionality:
 ///
 /// * [`MemoryPool`]: Manage memory
@@ -147,13 +147,17 @@ impl RuntimeEnv {
 
 impl Default for RuntimeEnv {
     fn default() -> Self {
-        RuntimeEnv::new(RuntimeConfig::new()).unwrap()
+        RuntimeEnvBuilder::new().build().unwrap()
     }
 }
 
+/// Please see: <https://github.com/apache/datafusion/issues/12156>
+/// This a type alias for backwards compatibility.
+pub type RuntimeConfig = RuntimeEnvBuilder;
+
 #[derive(Clone)]
 /// Execution runtime configuration
-pub struct RuntimeConfig {
+pub struct RuntimeEnvBuilder {
     /// DiskManager to manage temporary disk file usage
     pub disk_manager: DiskManagerConfig,
     /// [`MemoryPool`] from which to allocate memory
@@ -166,13 +170,13 @@ pub struct RuntimeConfig {
     pub object_store_registry: Arc<dyn ObjectStoreRegistry>,
 }
 
-impl Default for RuntimeConfig {
+impl Default for RuntimeEnvBuilder {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl RuntimeConfig {
+impl RuntimeEnvBuilder {
     /// New with default values
     pub fn new() -> Self {
         Self {
@@ -228,4 +232,18 @@ impl RuntimeConfig {
     pub fn with_temp_file_path(self, path: impl Into<PathBuf>) -> Self {
         self.with_disk_manager(DiskManagerConfig::new_specified(vec![path.into()]))
     }
+
+    /// Build a RuntimeEnv
+    pub fn build(self) -> Result<RuntimeEnv> {
+        let memory_pool = self
+            .memory_pool
+            .unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default()));
+
+        Ok(RuntimeEnv {
+            memory_pool,
+            disk_manager: DiskManager::try_new(self.disk_manager)?,
+            cache_manager: CacheManager::try_new(&self.cache_manager)?,
+            object_store_registry: self.object_store_registry,
+        })
+    }
 }
diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs
index 21a644284c42..35689b8e08df 100644
--- a/datafusion/execution/src/task.rs
+++ b/datafusion/execution/src/task.rs
@@ -24,7 +24,7 @@ use crate::{
     config::SessionConfig,
     memory_pool::MemoryPool,
     registry::FunctionRegistry,
-    runtime_env::{RuntimeConfig, RuntimeEnv},
+    runtime_env::{RuntimeEnv, RuntimeEnvBuilder},
 };
 use datafusion_common::{plan_datafusion_err, DataFusionError, Result};
 use datafusion_expr::planner::ExprPlanner;
@@ -57,7 +57,8 @@ pub struct TaskContext {
 
 impl Default for TaskContext {
     fn default() -> Self {
-        let runtime = RuntimeEnv::new(RuntimeConfig::new())
+        let runtime = RuntimeEnvBuilder::new()
+            .build()
             .expect("default runtime created successfully");
 
         // Create a default task context, mostly useful for testing
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs
index e3ff412e785b..6424888c896a 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -1877,11 +1877,7 @@ mod tests {
                 .sub(value.clone())
                 .unwrap()
                 .lt(&eps));
-            assert!(value
-                .clone()
-                .sub(prev_value(value.clone()))
-                .unwrap()
-                .lt(&eps));
+            assert!(value.sub(prev_value(value.clone())).unwrap().lt(&eps));
             assert_ne!(next_value(value.clone()), value);
             assert_ne!(prev_value(value.clone()), value);
         });
@@ -1913,11 +1909,11 @@ mod tests {
         min_max.into_iter().zip(inf).for_each(|((min, max), inf)| {
             assert_eq!(next_value(max.clone()), inf);
             assert_ne!(prev_value(max.clone()), max);
-            assert_ne!(prev_value(max.clone()), inf);
+            assert_ne!(prev_value(max), inf);
 
             assert_eq!(prev_value(min.clone()), inf);
             assert_ne!(next_value(min.clone()), min);
-            assert_ne!(next_value(min.clone()), inf);
+            assert_ne!(next_value(min), inf);
 
             assert_eq!(next_value(inf.clone()), inf);
             assert_eq!(prev_value(inf.clone()), inf);
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 6d2fb660f669..3617f56905a9 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -481,16 +481,22 @@ fn type_union_resolution_coercion(
     }
 }
 
-/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation
-/// Unlike `coerced_from`, usually the coerced type is for comparison only.
-/// For example, compare with Dictionary and Dictionary, only value type is what we care about
+/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
+/// comparison operation
+///
+/// Example comparison operations are `lhs = rhs` and `lhs > rhs`
+///
+/// Binary comparison kernels require the two arguments to be the (exact) same
+/// data type. However, users can write queries where the two arguments are
+/// different data types. In such cases, the data types are automatically cast
+/// (coerced) to a single data type to pass to the kernels.
 pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     if lhs_type == rhs_type {
         // same type => equality is possible
         return Some(lhs_type.clone());
     }
     binary_numeric_coercion(lhs_type, rhs_type)
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
+        .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, true))
         .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| list_coercion(lhs_type, rhs_type))
@@ -501,7 +507,11 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
         .or_else(|| struct_coercion(lhs_type, rhs_type))
 }
 
-/// Coerce `lhs_type` and `rhs_type` to a common type for value exprs
+/// Coerce `lhs_type` and `rhs_type` to a common type for `VALUES` expression
+///
+/// For example `VALUES (1, 2), (3.0, 4.0)` where the first row is `Int32` and
+/// the second row is `Float64` will coerce to `Float64`
+///
 pub fn values_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     if lhs_type == rhs_type {
         // same type => equality is possible
@@ -883,7 +893,7 @@ fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) ->
 ///
 /// Not all operators support dictionaries, if `preserve_dictionaries` is true
 /// dictionaries will be preserved if possible
-fn dictionary_coercion(
+fn dictionary_comparison_coercion(
     lhs_type: &DataType,
     rhs_type: &DataType,
     preserve_dictionaries: bool,
@@ -912,26 +922,25 @@ fn dictionary_coercion(
 
 /// Coercion rules for string concat.
 /// This is a union of string coercion rules and specified rules:
-/// 1. At lease one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
+/// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
 /// 2. Data type of the other side should be able to cast to string type
 fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
-    match (lhs_type, rhs_type) {
-        // If Utf8View is in any side, we coerce to Utf8.
-        // Ref: https://github.com/apache/datafusion/pull/11796
-        (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => {
-            Some(Utf8)
+    string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
+        (Utf8View, from_type) | (from_type, Utf8View) => {
+            string_concat_internal_coercion(from_type, &Utf8View)
         }
-        _ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
-            (Utf8, from_type) | (from_type, Utf8) => {
-                string_concat_internal_coercion(from_type, &Utf8)
-            }
-            (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
-                string_concat_internal_coercion(from_type, &LargeUtf8)
-            }
-            _ => None,
-        }),
-    }
+        (Utf8, from_type) | (from_type, Utf8) => {
+            string_concat_internal_coercion(from_type, &Utf8)
+        }
+        (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
+            string_concat_internal_coercion(from_type, &LargeUtf8)
+        }
+        (Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => {
+            string_coercion(lhs_value_type, rhs_value_type).or(None)
+        }
+        _ => None,
+    })
 }
 
 fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
@@ -942,6 +951,8 @@ fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
     }
 }
 
+/// If `from_type` can be casted to `to_type`, return `to_type`, otherwise
+/// return `None`.
 fn string_concat_internal_coercion(
     from_type: &DataType,
     to_type: &DataType,
@@ -967,6 +978,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
         }
         // Then, if LargeUtf8 is in any side, we coerce to LargeUtf8.
         (LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8),
+        // Utf8 coerces to Utf8
         (Utf8, Utf8) => Some(Utf8),
         _ => None,
     }
@@ -1044,7 +1056,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
     string_coercion(lhs_type, rhs_type)
         .or_else(|| list_coercion(lhs_type, rhs_type))
         .or_else(|| binary_to_string_coercion(lhs_type, rhs_type))
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
+        .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, false))
         .or_else(|| regex_null_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
 }
@@ -1064,7 +1076,7 @@ fn regex_null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataT
 /// This is a union of string coercion rules and dictionary coercion rules
 pub fn regex_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     string_coercion(lhs_type, rhs_type)
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type, false))
+        .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, false))
         .or_else(|| regex_null_coercion(lhs_type, rhs_type))
 }
 
@@ -1324,38 +1336,50 @@ mod tests {
 
         let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32));
         let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Int32));
         assert_eq!(
-            dictionary_coercion(&lhs_type, &rhs_type, false),
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
+            Some(Int32)
+        );
+        assert_eq!(
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, false),
             Some(Int32)
         );
 
         // Since we can coerce values of Int16 to Utf8 can support this
         let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
         let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Utf8));
+        assert_eq!(
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
+            Some(Utf8)
+        );
 
         // Since we can coerce values of Utf8 to Binary can support this
         let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
         let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary));
         assert_eq!(
-            dictionary_coercion(&lhs_type, &rhs_type, true),
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
             Some(Binary)
         );
 
         let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
         let rhs_type = Utf8;
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8));
         assert_eq!(
-            dictionary_coercion(&lhs_type, &rhs_type, true),
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, false),
+            Some(Utf8)
+        );
+        assert_eq!(
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
             Some(lhs_type.clone())
         );
 
         let lhs_type = Utf8;
         let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8));
         assert_eq!(
-            dictionary_coercion(&lhs_type, &rhs_type, true),
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, false),
+            Some(Utf8)
+        );
+        assert_eq!(
+            dictionary_comparison_coercion(&lhs_type, &rhs_type, true),
             Some(rhs_type.clone())
         );
     }
@@ -1857,7 +1881,7 @@ mod tests {
         );
         test_coercion_binary_rule!(
             DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())),
-            DataType::Timestamp(TimeUnit::Second, utc.clone()),
+            DataType::Timestamp(TimeUnit::Second, utc),
             Operator::Eq,
             DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into()))
         );
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 88939ccf41b8..85ba80396c8e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -38,8 +38,7 @@ use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
 use datafusion_common::{
-    internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue,
-    TableReference,
+    plan_err, Column, DFSchema, Result, ScalarValue, TableReference,
 };
 use sqlparser::ast::{
     display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem,
@@ -1082,7 +1081,7 @@ impl Expr {
     /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow
     /// [`Schema`] will have a field with this name.
     ///
-    /// Note that the resulting string is subtlety different than the `Display`
+    /// Note that the resulting string is subtlety different from the `Display`
     /// representation for certain `Expr`. Some differences:
     ///
     /// 1. [`Expr::Alias`], which shows only the alias itself
@@ -1104,6 +1103,7 @@ impl Expr {
     }
 
     /// Returns a full and complete string representation of this expression.
+    #[deprecated(note = "use format! instead")]
     pub fn canonical_name(&self) -> String {
         format!("{self}")
     }
@@ -2386,263 +2386,13 @@ fn fmt_function(
     write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
 }
 
-pub fn create_function_physical_name(
-    fun: &str,
-    distinct: bool,
-    args: &[Expr],
-    order_by: Option<&Vec<Expr>>,
-) -> Result<String> {
-    let names: Vec<String> = args
-        .iter()
-        .map(|e| create_physical_name(e, false))
-        .collect::<Result<_>>()?;
-
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-
-    let phys_name = format!("{}({}{})", fun, distinct_str, names.join(","));
-
-    Ok(order_by
-        .map(|order_by| format!("{} ORDER BY [{}]", phys_name, expr_vec_fmt!(order_by)))
-        .unwrap_or(phys_name))
-}
-
-pub fn physical_name(e: &Expr) -> Result<String> {
-    create_physical_name(e, true)
-}
-
-fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
-    match e {
-        Expr::Unnest(_) => {
-            internal_err!(
-                "Expr::Unnest should have been converted to LogicalPlan::Unnest"
-            )
-        }
-        Expr::Column(c) => {
-            if is_first_expr {
-                Ok(c.name.clone())
-            } else {
-                Ok(c.flat_name())
-            }
-        }
-        Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
-        Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")),
-        Expr::Literal(value) => Ok(format!("{value:?}")),
-        Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
-            let left = create_physical_name(left, false)?;
-            let right = create_physical_name(right, false)?;
-            Ok(format!("{left} {op} {right}"))
-        }
-        Expr::Case(case) => {
-            let mut name = "CASE ".to_string();
-            if let Some(e) = &case.expr {
-                let _ = write!(name, "{} ", create_physical_name(e, false)?);
-            }
-            for (w, t) in &case.when_then_expr {
-                let _ = write!(
-                    name,
-                    "WHEN {} THEN {} ",
-                    create_physical_name(w, false)?,
-                    create_physical_name(t, false)?
-                );
-            }
-            if let Some(e) = &case.else_expr {
-                let _ = write!(name, "ELSE {} ", create_physical_name(e, false)?);
-            }
-            name += "END";
-            Ok(name)
-        }
-        Expr::Cast(Cast { expr, .. }) => {
-            // CAST does not change the expression name
-            create_physical_name(expr, false)
-        }
-        Expr::TryCast(TryCast { expr, .. }) => {
-            // CAST does not change the expression name
-            create_physical_name(expr, false)
-        }
-        Expr::Not(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("NOT {expr}"))
-        }
-        Expr::Negative(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("(- {expr})"))
-        }
-        Expr::IsNull(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NULL"))
-        }
-        Expr::IsNotNull(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT NULL"))
-        }
-        Expr::IsTrue(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS TRUE"))
-        }
-        Expr::IsFalse(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS FALSE"))
-        }
-        Expr::IsUnknown(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS UNKNOWN"))
-        }
-        Expr::IsNotTrue(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT TRUE"))
-        }
-        Expr::IsNotFalse(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT FALSE"))
-        }
-        Expr::IsNotUnknown(expr) => {
-            let expr = create_physical_name(expr, false)?;
-            Ok(format!("{expr} IS NOT UNKNOWN"))
-        }
-        Expr::ScalarFunction(fun) => fun.func.schema_name(&fun.args),
-        Expr::WindowFunction(WindowFunction {
-            fun,
-            args,
-            order_by,
-            ..
-        }) => {
-            create_function_physical_name(&fun.to_string(), false, args, Some(order_by))
-        }
-        Expr::AggregateFunction(AggregateFunction {
-            func,
-            distinct,
-            args,
-            filter: _,
-            order_by,
-            null_treatment: _,
-        }) => {
-            create_function_physical_name(func.name(), *distinct, args, order_by.as_ref())
-        }
-        Expr::GroupingSet(grouping_set) => match grouping_set {
-            GroupingSet::Rollup(exprs) => Ok(format!(
-                "ROLLUP ({})",
-                exprs
-                    .iter()
-                    .map(|e| create_physical_name(e, false))
-                    .collect::<Result<Vec<_>>>()?
-                    .join(", ")
-            )),
-            GroupingSet::Cube(exprs) => Ok(format!(
-                "CUBE ({})",
-                exprs
-                    .iter()
-                    .map(|e| create_physical_name(e, false))
-                    .collect::<Result<Vec<_>>>()?
-                    .join(", ")
-            )),
-            GroupingSet::GroupingSets(lists_of_exprs) => {
-                let mut strings = vec![];
-                for exprs in lists_of_exprs {
-                    let exprs_str = exprs
-                        .iter()
-                        .map(|e| create_physical_name(e, false))
-                        .collect::<Result<Vec<_>>>()?
-                        .join(", ");
-                    strings.push(format!("({exprs_str})"));
-                }
-                Ok(format!("GROUPING SETS ({})", strings.join(", ")))
-            }
-        },
-
-        Expr::InList(InList {
-            expr,
-            list,
-            negated,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let list = list.iter().map(|expr| create_physical_name(expr, false));
-            if *negated {
-                Ok(format!("{expr} NOT IN ({list:?})"))
-            } else {
-                Ok(format!("{expr} IN ({list:?})"))
-            }
-        }
-        Expr::Exists { .. } => {
-            not_impl_err!("EXISTS is not yet supported in the physical plan")
-        }
-        Expr::InSubquery(_) => {
-            not_impl_err!("IN subquery is not yet supported in the physical plan")
-        }
-        Expr::ScalarSubquery(_) => {
-            not_impl_err!("Scalar subqueries are not yet supported in the physical plan")
-        }
-        Expr::Between(Between {
-            expr,
-            negated,
-            low,
-            high,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let low = create_physical_name(low, false)?;
-            let high = create_physical_name(high, false)?;
-            if *negated {
-                Ok(format!("{expr} NOT BETWEEN {low} AND {high}"))
-            } else {
-                Ok(format!("{expr} BETWEEN {low} AND {high}"))
-            }
-        }
-        Expr::Like(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let pattern = create_physical_name(pattern, false)?;
-            let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" };
-            let escape = if let Some(char) = escape_char {
-                format!("CHAR '{char}'")
-            } else {
-                "".to_string()
-            };
-            if *negated {
-                Ok(format!("{expr} NOT {op_name} {pattern}{escape}"))
-            } else {
-                Ok(format!("{expr} {op_name} {pattern}{escape}"))
-            }
-        }
-        Expr::SimilarTo(Like {
-            negated,
-            expr,
-            pattern,
-            escape_char,
-            case_insensitive: _,
-        }) => {
-            let expr = create_physical_name(expr, false)?;
-            let pattern = create_physical_name(pattern, false)?;
-            let escape = if let Some(char) = escape_char {
-                format!("CHAR '{char}'")
-            } else {
-                "".to_string()
-            };
-            if *negated {
-                Ok(format!("{expr} NOT SIMILAR TO {pattern}{escape}"))
-            } else {
-                Ok(format!("{expr} SIMILAR TO {pattern}{escape}"))
-            }
-        }
-        Expr::Sort { .. } => {
-            internal_err!("Create physical name does not support sort expression")
-        }
-        Expr::Wildcard { qualifier, options } => match qualifier {
-            Some(qualifier) => Ok(format!("{}.*{}", qualifier, options)),
-            None => Ok(format!("*{}", options)),
-        },
-        Expr::Placeholder(_) => {
-            internal_err!("Create physical name does not support placeholder")
-        }
-        Expr::OuterReferenceColumn(_, _) => {
-            internal_err!("Create physical name does not support OuterReferenceColumn")
-        }
+/// The name of the column (field) that this `Expr` will produce in the physical plan.
+/// The difference from [Expr::schema_name] is that top-level columns are unqualified.
+pub fn physical_name(expr: &Expr) -> Result<String> {
+    if let Expr::Column(col) = expr {
+        Ok(col.name.clone())
+    } else {
+        Ok(expr.schema_name().to_string())
     }
 }
 
@@ -2658,6 +2408,7 @@ mod test {
     use std::any::Any;
 
     #[test]
+    #[allow(deprecated)]
     fn format_case_when() -> Result<()> {
         let expr = case(col("a"))
             .when(lit(1), lit(true))
@@ -2670,6 +2421,7 @@ mod test {
     }
 
     #[test]
+    #[allow(deprecated)]
     fn format_cast() -> Result<()> {
         let expr = Expr::Cast(Cast {
             expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)))),
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 4e6022399653..1e0b601146dd 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -394,7 +394,7 @@ pub fn create_udf(
     volatility: Volatility,
     fun: ScalarFunctionImplementation,
 ) -> ScalarUDF {
-    let return_type = Arc::try_unwrap(return_type).unwrap_or_else(|t| t.as_ref().clone());
+    let return_type = Arc::unwrap_or_clone(return_type);
     ScalarUDF::from(SimpleScalarUDF::new(
         name,
         input_types,
@@ -476,8 +476,8 @@ pub fn create_udaf(
     accumulator: AccumulatorFactoryFunction,
     state_type: Arc<Vec<DataType>>,
 ) -> AggregateUDF {
-    let return_type = Arc::try_unwrap(return_type).unwrap_or_else(|t| t.as_ref().clone());
-    let state_type = Arc::try_unwrap(state_type).unwrap_or_else(|t| t.as_ref().clone());
+    let return_type = Arc::unwrap_or_clone(return_type);
+    let state_type = Arc::unwrap_or_clone(state_type);
     let state_fields = state_type
         .into_iter()
         .enumerate()
@@ -594,7 +594,7 @@ pub fn create_udwf(
     volatility: Volatility,
     partition_evaluator_factory: PartitionEvaluatorFactory,
 ) -> WindowUDF {
-    let return_type = Arc::try_unwrap(return_type).unwrap_or_else(|t| t.as_ref().clone());
+    let return_type = Arc::unwrap_or_clone(return_type);
     WindowUDF::from(SimpleWindowUDF::new(
         name,
         input_type,
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index c26970cb053a..768c4aabc840 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -279,6 +279,57 @@ where
     expr.alias_if_changed(original_name)
 }
 
+/// Handles ensuring the name of rewritten expressions is not changed.
+///
+/// For example, if an expression `1 + 2` is rewritten to `3`, the name of the
+/// expression should be preserved: `3 as "1 + 2"`
+///
+/// See <https://github.com/apache/datafusion/issues/3555> for details
+pub struct NamePreserver {
+    use_alias: bool,
+}
+
+/// If the name of an expression is remembered, it will be preserved when
+/// rewriting the expression
+pub struct SavedName(Option<String>);
+
+impl NamePreserver {
+    /// Create a new NamePreserver for rewriting the `expr` that is part of the specified plan
+    pub fn new(plan: &LogicalPlan) -> Self {
+        Self {
+            use_alias: !matches!(plan, LogicalPlan::Filter(_) | LogicalPlan::Join(_)),
+        }
+    }
+
+    /// Create a new NamePreserver for rewriting the `expr`s in `Projection`
+    ///
+    /// This will use aliases
+    pub fn new_for_projection() -> Self {
+        Self { use_alias: true }
+    }
+
+    pub fn save(&self, expr: &Expr) -> Result<SavedName> {
+        let original_name = if self.use_alias {
+            Some(expr.name_for_alias()?)
+        } else {
+            None
+        };
+
+        Ok(SavedName(original_name))
+    }
+}
+
+impl SavedName {
+    /// Ensures the name of the rewritten expression is preserved
+    pub fn restore(self, expr: Expr) -> Result<Expr> {
+        let Self(original_name) = self;
+        match original_name {
+            Some(name) => expr.alias_if_changed(name),
+            None => Ok(expr),
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
     use std::ops::Add;
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 10ec10e61239..3920a1a3517c 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -184,7 +184,7 @@ impl ExprSchemable for Expr {
                                 err,
                                 utils::generate_signature_error_msg(
                                     fun.name(),
-                                    fun.signature().clone(),
+                                    fun.signature(),
                                     &data_types
                                 )
                             )
@@ -199,7 +199,7 @@ impl ExprSchemable for Expr {
                                     err,
                                     utils::generate_signature_error_msg(
                                         fun.name(),
-                                        fun.signature().clone(),
+                                        fun.signature(),
                                         &data_types
                                     )
                                 )
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index a96caa03d611..2c2300b123c2 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -41,9 +41,8 @@ use crate::utils::{
     find_valid_equijoin_key_pair, group_window_expr_by_sort_keys,
 };
 use crate::{
-    and, binary_expr, logical_plan::tree_node::unwrap_arc, DmlStatement, Expr,
-    ExprSchemable, Operator, RecursiveQuery, TableProviderFilterPushDown, TableSource,
-    WriteOp,
+    and, binary_expr, DmlStatement, Expr, ExprSchemable, Operator, RecursiveQuery,
+    TableProviderFilterPushDown, TableSource, WriteOp,
 };
 
 use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
@@ -60,6 +59,7 @@ pub const UNNAMED_TABLE: &str = "?table?";
 
 /// Builder for logical plans
 ///
+/// # Example building a simple plan
 /// ```
 /// # use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
 /// # use datafusion_common::Result;
@@ -88,17 +88,27 @@ pub const UNNAMED_TABLE: &str = "?table?";
 ///  .project(vec![col("last_name")])?
 ///  .build()?;
 ///
+/// // Convert from plan back to builder
+/// let builder = LogicalPlanBuilder::from(plan);
+///
 /// # Ok(())
 /// # }
 /// ```
 #[derive(Debug, Clone)]
 pub struct LogicalPlanBuilder {
-    plan: LogicalPlan,
+    plan: Arc<LogicalPlan>,
 }
 
 impl LogicalPlanBuilder {
     /// Create a builder from an existing plan
-    pub fn from(plan: LogicalPlan) -> Self {
+    pub fn new(plan: LogicalPlan) -> Self {
+        Self {
+            plan: Arc::new(plan),
+        }
+    }
+
+    /// Create a builder from an existing plan
+    pub fn new_from_arc(plan: Arc<LogicalPlan>) -> Self {
         Self { plan }
     }
 
@@ -116,7 +126,7 @@ impl LogicalPlanBuilder {
     ///
     /// `produce_one_row` set to true means this empty node needs to produce a placeholder row.
     pub fn empty(produce_one_row: bool) -> Self {
-        Self::from(LogicalPlan::EmptyRelation(EmptyRelation {
+        Self::new(LogicalPlan::EmptyRelation(EmptyRelation {
             produce_one_row,
             schema: DFSchemaRef::new(DFSchema::empty()),
         }))
@@ -125,7 +135,7 @@ impl LogicalPlanBuilder {
     /// Convert a regular plan into a recursive query.
     /// `is_distinct` indicates whether the recursive term should be de-duplicated (`UNION`) after each iteration or not (`UNION ALL`).
     pub fn to_recursive_query(
-        &self,
+        self,
         name: String,
         recursive_term: LogicalPlan,
         is_distinct: bool,
@@ -150,7 +160,7 @@ impl LogicalPlanBuilder {
             coerce_plan_expr_for_schema(recursive_term, self.plan.schema())?;
         Ok(Self::from(LogicalPlan::RecursiveQuery(RecursiveQuery {
             name,
-            static_term: Arc::new(self.plan.clone()),
+            static_term: self.plan,
             recursive_term: Arc::new(coerced_recursive_term),
             is_distinct,
         })))
@@ -201,7 +211,7 @@ impl LogicalPlanBuilder {
                     };
                     common_type = Some(new_type);
                 } else {
-                    common_type = Some(data_type.clone());
+                    common_type = Some(data_type);
                 }
             }
             field_types.push(common_type.unwrap_or(DataType::Utf8));
@@ -210,7 +220,7 @@ impl LogicalPlanBuilder {
         for row in &mut values {
             for (j, field_type) in field_types.iter().enumerate() {
                 if let Expr::Literal(ScalarValue::Null) = row[j] {
-                    row[j] = Expr::Literal(ScalarValue::try_from(field_type.clone())?);
+                    row[j] = Expr::Literal(ScalarValue::try_from(field_type)?);
                 } else {
                     row[j] =
                         std::mem::take(&mut row[j]).cast_to(field_type, &empty_schema)?;
@@ -228,7 +238,7 @@ impl LogicalPlanBuilder {
             .collect::<Vec<_>>();
         let dfschema = DFSchema::from_unqualified_fields(fields.into(), HashMap::new())?;
         let schema = DFSchemaRef::new(dfschema);
-        Ok(Self::from(LogicalPlan::Values(Values { schema, values })))
+        Ok(Self::new(LogicalPlan::Values(Values { schema, values })))
     }
 
     /// Convert a table provider into a builder with a TableScan
@@ -279,7 +289,7 @@ impl LogicalPlanBuilder {
         options: HashMap<String, String>,
         partition_by: Vec<String>,
     ) -> Result<Self> {
-        Ok(Self::from(LogicalPlan::Copy(CopyTo {
+        Ok(Self::new(LogicalPlan::Copy(CopyTo {
             input: Arc::new(input),
             output_url,
             partition_by,
@@ -303,7 +313,7 @@ impl LogicalPlanBuilder {
             WriteOp::InsertInto
         };
 
-        Ok(Self::from(LogicalPlan::Dml(DmlStatement::new(
+        Ok(Self::new(LogicalPlan::Dml(DmlStatement::new(
             table_name.into(),
             table_schema,
             op,
@@ -320,7 +330,7 @@ impl LogicalPlanBuilder {
     ) -> Result<Self> {
         TableScan::try_new(table_name, table_source, projection, filters, None)
             .map(LogicalPlan::TableScan)
-            .map(Self::from)
+            .map(Self::new)
     }
 
     /// Wrap a plan in a window
@@ -365,7 +375,7 @@ impl LogicalPlanBuilder {
         self,
         expr: impl IntoIterator<Item = impl Into<Expr>>,
     ) -> Result<Self> {
-        project(self.plan, expr).map(Self::from)
+        project(Arc::unwrap_or_clone(self.plan), expr).map(Self::new)
     }
 
     /// Select the given column indices
@@ -380,17 +390,25 @@ impl LogicalPlanBuilder {
     /// Apply a filter
     pub fn filter(self, expr: impl Into<Expr>) -> Result<Self> {
         let expr = normalize_col(expr.into(), &self.plan)?;
-        Filter::try_new(expr, Arc::new(self.plan))
+        Filter::try_new(expr, self.plan)
+            .map(LogicalPlan::Filter)
+            .map(Self::new)
+    }
+
+    /// Apply a filter which is used for a having clause
+    pub fn having(self, expr: impl Into<Expr>) -> Result<Self> {
+        let expr = normalize_col(expr.into(), &self.plan)?;
+        Filter::try_new_with_having(expr, self.plan)
             .map(LogicalPlan::Filter)
             .map(Self::from)
     }
 
     /// Make a builder for a prepare logical plan from the builder's plan
     pub fn prepare(self, name: String, data_types: Vec<DataType>) -> Result<Self> {
-        Ok(Self::from(LogicalPlan::Prepare(Prepare {
+        Ok(Self::new(LogicalPlan::Prepare(Prepare {
             name,
             data_types,
-            input: Arc::new(self.plan),
+            input: self.plan,
         })))
     }
 
@@ -401,16 +419,16 @@ impl LogicalPlanBuilder {
     /// `fetch` - Maximum number of rows to fetch, after skipping `skip` rows,
     ///          if specified.
     pub fn limit(self, skip: usize, fetch: Option<usize>) -> Result<Self> {
-        Ok(Self::from(LogicalPlan::Limit(Limit {
+        Ok(Self::new(LogicalPlan::Limit(Limit {
             skip,
             fetch,
-            input: Arc::new(self.plan),
+            input: self.plan,
         })))
     }
 
     /// Apply an alias
     pub fn alias(self, alias: impl Into<TableReference>) -> Result<Self> {
-        subquery_alias(self.plan, alias).map(Self::from)
+        subquery_alias(Arc::unwrap_or_clone(self.plan), alias).map(Self::new)
     }
 
     /// Add missing sort columns to all downstream projection
@@ -465,7 +483,7 @@ impl LogicalPlanBuilder {
                     Self::ambiguous_distinct_check(&missing_exprs, missing_cols, &expr)?;
                 }
                 expr.extend(missing_exprs);
-                project((*input).clone(), expr)
+                project(Arc::unwrap_or_clone(input), expr)
             }
             _ => {
                 let is_distinct =
@@ -534,25 +552,22 @@ impl LogicalPlanBuilder {
 
         // Collect sort columns that are missing in the input plan's schema
         let mut missing_cols: Vec<Column> = vec![];
-        exprs
-            .clone()
-            .into_iter()
-            .try_for_each::<_, Result<()>>(|expr| {
-                let columns = expr.column_refs();
+        exprs.iter().try_for_each::<_, Result<()>>(|expr| {
+            let columns = expr.column_refs();
 
-                columns.into_iter().for_each(|c| {
-                    if !schema.has_column(c) {
-                        missing_cols.push(c.clone());
-                    }
-                });
+            columns.into_iter().for_each(|c| {
+                if !schema.has_column(c) {
+                    missing_cols.push(c.clone());
+                }
+            });
 
-                Ok(())
-            })?;
+            Ok(())
+        })?;
 
         if missing_cols.is_empty() {
-            return Ok(Self::from(LogicalPlan::Sort(Sort {
+            return Ok(Self::new(LogicalPlan::Sort(Sort {
                 expr: normalize_cols(exprs, &self.plan)?,
-                input: Arc::new(self.plan),
+                input: self.plan,
                 fetch: None,
             })));
         }
@@ -561,7 +576,11 @@ impl LogicalPlanBuilder {
         let new_expr = schema.columns().into_iter().map(Expr::Column).collect();
 
         let is_distinct = false;
-        let plan = Self::add_missing_columns(self.plan, &missing_cols, is_distinct)?;
+        let plan = Self::add_missing_columns(
+            Arc::unwrap_or_clone(self.plan),
+            &missing_cols,
+            is_distinct,
+        )?;
         let sort_plan = LogicalPlan::Sort(Sort {
             expr: normalize_cols(exprs, &plan)?,
             input: Arc::new(plan),
@@ -570,29 +589,27 @@ impl LogicalPlanBuilder {
 
         Projection::try_new(new_expr, Arc::new(sort_plan))
             .map(LogicalPlan::Projection)
-            .map(Self::from)
+            .map(Self::new)
     }
 
     /// Apply a union, preserving duplicate rows
     pub fn union(self, plan: LogicalPlan) -> Result<Self> {
-        union(self.plan, plan).map(Self::from)
+        union(Arc::unwrap_or_clone(self.plan), plan).map(Self::new)
     }
 
     /// Apply a union, removing duplicate rows
     pub fn union_distinct(self, plan: LogicalPlan) -> Result<Self> {
-        let left_plan: LogicalPlan = self.plan;
+        let left_plan: LogicalPlan = Arc::unwrap_or_clone(self.plan);
         let right_plan: LogicalPlan = plan;
 
-        Ok(Self::from(LogicalPlan::Distinct(Distinct::All(Arc::new(
+        Ok(Self::new(LogicalPlan::Distinct(Distinct::All(Arc::new(
             union(left_plan, right_plan)?,
         )))))
     }
 
     /// Apply deduplication: Only distinct (different) values are returned)
     pub fn distinct(self) -> Result<Self> {
-        Ok(Self::from(LogicalPlan::Distinct(Distinct::All(Arc::new(
-            self.plan,
-        )))))
+        Ok(Self::new(LogicalPlan::Distinct(Distinct::All(self.plan))))
     }
 
     /// Project first values of the specified expression list according to the provided
@@ -603,8 +620,8 @@ impl LogicalPlanBuilder {
         select_expr: Vec<Expr>,
         sort_expr: Option<Vec<Expr>>,
     ) -> Result<Self> {
-        Ok(Self::from(LogicalPlan::Distinct(Distinct::On(
-            DistinctOn::try_new(on_expr, select_expr, sort_expr, Arc::new(self.plan))?,
+        Ok(Self::new(LogicalPlan::Distinct(Distinct::On(
+            DistinctOn::try_new(on_expr, select_expr, sort_expr, self.plan)?,
         ))))
     }
 
@@ -690,7 +707,7 @@ impl LogicalPlanBuilder {
 
     pub(crate) fn normalize(
         plan: &LogicalPlan,
-        column: impl Into<Column> + Clone,
+        column: impl Into<Column>,
     ) -> Result<Column> {
         let schema = plan.schema();
         let fallback_schemas = plan.fallback_normalize_schemas();
@@ -819,8 +836,8 @@ impl LogicalPlanBuilder {
         let join_schema =
             build_join_schema(self.plan.schema(), right.schema(), &join_type)?;
 
-        Ok(Self::from(LogicalPlan::Join(Join {
-            left: Arc::new(self.plan),
+        Ok(Self::new(LogicalPlan::Join(Join {
+            left: self.plan,
             right: Arc::new(right),
             on,
             filter,
@@ -883,8 +900,8 @@ impl LogicalPlanBuilder {
                 DataFusionError::Internal("filters should not be None here".to_string())
             })?)
         } else {
-            Ok(Self::from(LogicalPlan::Join(Join {
-                left: Arc::new(self.plan),
+            Ok(Self::new(LogicalPlan::Join(Join {
+                left: self.plan,
                 right: Arc::new(right),
                 on: join_on,
                 filter: filters,
@@ -900,8 +917,8 @@ impl LogicalPlanBuilder {
     pub fn cross_join(self, right: LogicalPlan) -> Result<Self> {
         let join_schema =
             build_join_schema(self.plan.schema(), right.schema(), &JoinType::Inner)?;
-        Ok(Self::from(LogicalPlan::CrossJoin(CrossJoin {
-            left: Arc::new(self.plan),
+        Ok(Self::new(LogicalPlan::CrossJoin(CrossJoin {
+            left: self.plan,
             right: Arc::new(right),
             schema: DFSchemaRef::new(join_schema),
         })))
@@ -909,8 +926,8 @@ impl LogicalPlanBuilder {
 
     /// Repartition
     pub fn repartition(self, partitioning_scheme: Partitioning) -> Result<Self> {
-        Ok(Self::from(LogicalPlan::Repartition(Repartition {
-            input: Arc::new(self.plan),
+        Ok(Self::new(LogicalPlan::Repartition(Repartition {
+            input: self.plan,
             partitioning_scheme,
         })))
     }
@@ -922,9 +939,9 @@ impl LogicalPlanBuilder {
     ) -> Result<Self> {
         let window_expr = normalize_cols(window_expr, &self.plan)?;
         validate_unique_names("Windows", &window_expr)?;
-        Ok(Self::from(LogicalPlan::Window(Window::try_new(
+        Ok(Self::new(LogicalPlan::Window(Window::try_new(
             window_expr,
-            Arc::new(self.plan),
+            self.plan,
         )?)))
     }
 
@@ -941,9 +958,9 @@ impl LogicalPlanBuilder {
 
         let group_expr =
             add_group_by_exprs_from_dependencies(group_expr, self.plan.schema())?;
-        Aggregate::try_new(Arc::new(self.plan), group_expr, aggr_expr)
+        Aggregate::try_new(self.plan, group_expr, aggr_expr)
             .map(LogicalPlan::Aggregate)
-            .map(Self::from)
+            .map(Self::new)
     }
 
     /// Create an expression to represent the explanation of the plan
@@ -957,18 +974,18 @@ impl LogicalPlanBuilder {
         let schema = schema.to_dfschema_ref()?;
 
         if analyze {
-            Ok(Self::from(LogicalPlan::Analyze(Analyze {
+            Ok(Self::new(LogicalPlan::Analyze(Analyze {
                 verbose,
-                input: Arc::new(self.plan),
+                input: self.plan,
                 schema,
             })))
         } else {
             let stringified_plans =
                 vec![self.plan.to_stringified(PlanType::InitialLogicalPlan)];
 
-            Ok(Self::from(LogicalPlan::Explain(Explain {
+            Ok(Self::new(LogicalPlan::Explain(Explain {
                 verbose,
-                plan: Arc::new(self.plan),
+                plan: self.plan,
                 stringified_plans,
                 schema,
                 logical_optimization_succeeded: false,
@@ -1046,7 +1063,7 @@ impl LogicalPlanBuilder {
 
     /// Build the plan
     pub fn build(self) -> Result<LogicalPlan> {
-        Ok(self.plan)
+        Ok(Arc::unwrap_or_clone(self.plan))
     }
 
     /// Apply a join with the expression on constraint.
@@ -1106,8 +1123,8 @@ impl LogicalPlanBuilder {
         let join_schema =
             build_join_schema(self.plan.schema(), right.schema(), &join_type)?;
 
-        Ok(Self::from(LogicalPlan::Join(Join {
-            left: Arc::new(self.plan),
+        Ok(Self::new(LogicalPlan::Join(Join {
+            left: self.plan,
             right: Arc::new(right),
             on: join_key_pairs,
             filter,
@@ -1120,7 +1137,7 @@ impl LogicalPlanBuilder {
 
     /// Unnest the given column.
     pub fn unnest_column(self, column: impl Into<Column>) -> Result<Self> {
-        Ok(Self::from(unnest(self.plan, vec![column.into()])?))
+        unnest(Arc::unwrap_or_clone(self.plan), vec![column.into()]).map(Self::new)
     }
 
     /// Unnest the given column given [`UnnestOptions`]
@@ -1129,11 +1146,12 @@ impl LogicalPlanBuilder {
         column: impl Into<Column>,
         options: UnnestOptions,
     ) -> Result<Self> {
-        Ok(Self::from(unnest_with_options(
-            self.plan,
+        unnest_with_options(
+            Arc::unwrap_or_clone(self.plan),
             vec![column.into()],
             options,
-        )?))
+        )
+        .map(Self::new)
     }
 
     /// Unnest the given columns with the given [`UnnestOptions`]
@@ -1142,45 +1160,20 @@ impl LogicalPlanBuilder {
         columns: Vec<Column>,
         options: UnnestOptions,
     ) -> Result<Self> {
-        Ok(Self::from(unnest_with_options(
-            self.plan, columns, options,
-        )?))
+        unnest_with_options(Arc::unwrap_or_clone(self.plan), columns, options)
+            .map(Self::new)
     }
 }
 
-/// Converts a `Arc<LogicalPlan>` into `LogicalPlanBuilder`
-/// ```
-/// # use datafusion_expr::{Expr, expr, col, LogicalPlanBuilder, logical_plan::table_scan};
-/// # use datafusion_common::Result;
-/// # use arrow::datatypes::{Schema, DataType, Field};
-/// # fn main() -> Result<()> {
-/// #
-/// # fn employee_schema() -> Schema {
-/// #    Schema::new(vec![
-/// #           Field::new("id", DataType::Int32, false),
-/// #           Field::new("first_name", DataType::Utf8, false),
-/// #           Field::new("last_name", DataType::Utf8, false),
-/// #           Field::new("state", DataType::Utf8, false),
-/// #           Field::new("salary", DataType::Int32, false),
-/// #       ])
-/// #   }
-/// #
-/// // Create the plan
-/// let plan = table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3, 4]))?
-///     .sort(vec![
-///         Expr::Sort(expr::Sort::new(Box::new(col("state")), true, true)),
-///         Expr::Sort(expr::Sort::new(Box::new(col("salary")), false, false)),
-///      ])?
-///     .build()?;
-/// // Convert LogicalPlan into LogicalPlanBuilder
-/// let plan_builder: LogicalPlanBuilder = std::sync::Arc::new(plan).into();
-/// # Ok(())
-/// # }
-/// ```
+impl From<LogicalPlan> for LogicalPlanBuilder {
+    fn from(plan: LogicalPlan) -> Self {
+        LogicalPlanBuilder::new(plan)
+    }
+}
 
 impl From<Arc<LogicalPlan>> for LogicalPlanBuilder {
     fn from(plan: Arc<LogicalPlan>) -> Self {
-        LogicalPlanBuilder::from(unwrap_arc(plan))
+        LogicalPlanBuilder::new_from_arc(plan)
     }
 }
 
@@ -1342,7 +1335,17 @@ pub fn validate_unique_names<'a>(
     })
 }
 
-/// Union two logical plans.
+/// Union two [`LogicalPlan`]s.
+///
+/// Constructs the UNION plan, but does not perform type-coercion. Therefore the
+/// subtree expressions will not be properly typed until the optimizer pass.
+///
+/// If a properly typed UNION plan is needed, refer to [`TypeCoercionRewriter::coerce_union`]
+/// or alternatively, merge the union input schema using [`coerce_union_schema`] and
+/// apply the expression rewrite with [`coerce_plan_expr_for_schema`].
+///
+/// [`TypeCoercionRewriter::coerce_union`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/struct.TypeCoercionRewriter.html#method.coerce_union
+/// [`coerce_union_schema`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/fn.coerce_union_schema.html
 pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalPlan> {
     // Temporarily use the schema from the left input and later rely on the analyzer to
     // coerce the two schemas into a common one.
@@ -1530,7 +1533,7 @@ pub fn get_unnested_columns(
         | DataType::FixedSizeList(field, _)
         | DataType::LargeList(field) => {
             let new_field = Arc::new(Field::new(
-                col_name.clone(),
+                col_name,
                 field.data_type().clone(),
                 // Unnesting may produce NULLs even if the list is not null.
                 // For example: unnset([1], []) -> 1, null
@@ -2069,7 +2072,7 @@ mod tests {
         let schema = Schema::new(vec![
             Field::new("scalar", DataType::UInt32, false),
             Field::new_list("strings", string_field, false),
-            Field::new_list("structs", struct_field_in_list.clone(), false),
+            Field::new_list("structs", struct_field_in_list, false),
             Field::new(
                 "struct_singular",
                 DataType::Struct(Fields::from(vec![
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index 343eda056ffe..5a881deb54e1 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -387,19 +387,16 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                     }
 
                     if !full_filter.is_empty() {
-                        object["Full Filters"] = serde_json::Value::String(
-                            expr_vec_fmt!(full_filter).to_string(),
-                        );
+                        object["Full Filters"] =
+                            serde_json::Value::String(expr_vec_fmt!(full_filter));
                     };
                     if !partial_filter.is_empty() {
-                        object["Partial Filters"] = serde_json::Value::String(
-                            expr_vec_fmt!(partial_filter).to_string(),
-                        );
+                        object["Partial Filters"] =
+                            serde_json::Value::String(expr_vec_fmt!(partial_filter));
                     }
                     if !unsupported_filters.is_empty() {
-                        object["Unsupported Filters"] = serde_json::Value::String(
-                            expr_vec_fmt!(unsupported_filters).to_string(),
-                        );
+                        object["Unsupported Filters"] =
+                            serde_json::Value::String(expr_vec_fmt!(unsupported_filters));
                     }
                 }
 
@@ -595,9 +592,8 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> {
                         "Select": expr_vec_fmt!(select_expr),
                     });
                     if let Some(sort_expr) = sort_expr {
-                        object["Sort"] = serde_json::Value::String(
-                            expr_vec_fmt!(sort_expr).to_string(),
-                        );
+                        object["Sort"] =
+                            serde_json::Value::String(expr_vec_fmt!(sort_expr));
                     }
 
                     object
diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs
index b58208591920..5b5a842fa4cf 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -26,7 +26,7 @@ pub mod tree_node;
 
 pub use builder::{
     build_join_schema, table_scan, union, wrap_projection_for_join_if_necessary,
-    LogicalPlanBuilder, UNNAMED_TABLE,
+    LogicalPlanBuilder, LogicalTableSource, UNNAMED_TABLE,
 };
 pub use ddl::{
     CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction,
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index f93b7c0fedd0..359de2d30a57 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -26,7 +26,7 @@ use super::dml::CopyTo;
 use super::DdlStatement;
 use crate::builder::{change_redundant_column, unnest_with_options};
 use crate::expr::{Placeholder, Sort as SortExpr, WindowFunction};
-use crate::expr_rewriter::{create_col_from_scalar_expr, normalize_cols};
+use crate::expr_rewriter::{create_col_from_scalar_expr, normalize_cols, NamePreserver};
 use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
 use crate::logical_plan::extension::UserDefinedLogicalNode;
 use crate::logical_plan::{DmlStatement, Statement};
@@ -51,7 +51,6 @@ use datafusion_common::{
 
 // backwards compatibility
 use crate::display::PgJsonVisitor;
-use crate::logical_plan::tree_node::unwrap_arc;
 pub use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
 pub use datafusion_common::{JoinConstraint, JoinType};
 
@@ -643,9 +642,12 @@ impl LogicalPlan {
                 // todo it isn't clear why the schema is not recomputed here
                 Ok(LogicalPlan::Values(Values { schema, values }))
             }
-            LogicalPlan::Filter(Filter { predicate, input }) => {
-                Filter::try_new(predicate, input).map(LogicalPlan::Filter)
-            }
+            LogicalPlan::Filter(Filter {
+                predicate,
+                input,
+                having,
+            }) => Filter::try_new_internal(predicate, input, having)
+                .map(LogicalPlan::Filter),
             LogicalPlan::Repartition(_) => Ok(self),
             LogicalPlan::Window(Window {
                 input,
@@ -767,7 +769,7 @@ impl LogicalPlan {
                 ..
             }) => {
                 // Update schema with unnested column type.
-                unnest_with_options(unwrap_arc(input), exec_columns, options)
+                unnest_with_options(Arc::unwrap_or_clone(input), exec_columns, options)
             }
         }
     }
@@ -1157,10 +1159,7 @@ impl LogicalPlan {
         Ok(if let LogicalPlan::Prepare(prepare_lp) = plan_with_values {
             param_values.verify(&prepare_lp.data_types)?;
             // try and take ownership of the input if is not shared, clone otherwise
-            match Arc::try_unwrap(prepare_lp.input) {
-                Ok(input) => input,
-                Err(arc_input) => arc_input.as_ref().clone(),
-            }
+            Arc::unwrap_or_clone(prepare_lp.input)
         } else {
             plan_with_values
         })
@@ -1336,15 +1335,20 @@ impl LogicalPlan {
     ) -> Result<LogicalPlan> {
         self.transform_up_with_subqueries(|plan| {
             let schema = Arc::clone(plan.schema());
+            let name_preserver = NamePreserver::new(&plan);
             plan.map_expressions(|e| {
-                e.infer_placeholder_types(&schema)?.transform_up(|e| {
-                    if let Expr::Placeholder(Placeholder { id, .. }) = e {
-                        let value = param_values.get_placeholders_with_values(&id)?;
-                        Ok(Transformed::yes(Expr::Literal(value)))
-                    } else {
-                        Ok(Transformed::no(e))
-                    }
-                })
+                let original_name = name_preserver.save(&e)?;
+                let transformed_expr =
+                    e.infer_placeholder_types(&schema)?.transform_up(|e| {
+                        if let Expr::Placeholder(Placeholder { id, .. }) = e {
+                            let value = param_values.get_placeholders_with_values(&id)?;
+                            Ok(Transformed::yes(Expr::Literal(value)))
+                        } else {
+                            Ok(Transformed::no(e))
+                        }
+                    })?;
+                // Preserve name to avoid breaking column references to this expression
+                transformed_expr.map_data(|expr| original_name.restore(expr))
             })
         })
         .map(|res| res.data)
@@ -2080,6 +2084,8 @@ pub struct Filter {
     pub predicate: Expr,
     /// The incoming logical plan
     pub input: Arc<LogicalPlan>,
+    /// The flag to indicate if the filter is a having clause
+    pub having: bool,
 }
 
 impl Filter {
@@ -2088,6 +2094,20 @@ impl Filter {
     /// Notes: as Aliases have no effect on the output of a filter operator,
     /// they are removed from the predicate expression.
     pub fn try_new(predicate: Expr, input: Arc<LogicalPlan>) -> Result<Self> {
+        Self::try_new_internal(predicate, input, false)
+    }
+
+    /// Create a new filter operator for a having clause.
+    /// This is similar to a filter, but its having flag is set to true.
+    pub fn try_new_with_having(predicate: Expr, input: Arc<LogicalPlan>) -> Result<Self> {
+        Self::try_new_internal(predicate, input, true)
+    }
+
+    fn try_new_internal(
+        predicate: Expr,
+        input: Arc<LogicalPlan>,
+        having: bool,
+    ) -> Result<Self> {
         // Filter predicates must return a boolean value so we try and validate that here.
         // Note that it is not always possible to resolve the predicate expression during plan
         // construction (such as with correlated subqueries) so we make a best effort here and
@@ -2104,6 +2124,7 @@ impl Filter {
         Ok(Self {
             predicate: predicate.unalias_nested().data,
             input,
+            having,
         })
     }
 
@@ -2907,7 +2928,11 @@ impl Debug for Subquery {
     }
 }
 
-/// Logical partitioning schemes supported by the repartition operator.
+/// Logical partitioning schemes supported by [`LogicalPlan::Repartition`]
+///
+/// See [`Partitioning`] for more details on partitioning
+///
+/// [`Partitioning`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html#
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Partitioning {
     /// Allocate batches using a round-robin algorithm and the specified number of partitions
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs
index dbe43128fd38..273404c8df31 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -87,8 +87,17 @@ impl TreeNode for LogicalPlan {
                     schema,
                 })
             }),
-            LogicalPlan::Filter(Filter { predicate, input }) => rewrite_arc(input, f)?
-                .update_data(|input| LogicalPlan::Filter(Filter { predicate, input })),
+            LogicalPlan::Filter(Filter {
+                predicate,
+                input,
+                having,
+            }) => rewrite_arc(input, f)?.update_data(|input| {
+                LogicalPlan::Filter(Filter {
+                    predicate,
+                    input,
+                    having,
+                })
+            }),
             LogicalPlan::Repartition(Repartition {
                 input,
                 partitioning_scheme,
@@ -370,21 +379,12 @@ impl TreeNode for LogicalPlan {
     }
 }
 
-/// Converts a `Arc<LogicalPlan>` without copying, if possible. Copies the plan
-/// if there is a shared reference
-pub fn unwrap_arc(plan: Arc<LogicalPlan>) -> LogicalPlan {
-    Arc::try_unwrap(plan)
-        // if None is returned, there is another reference to this
-        // LogicalPlan, so we can not own it, and must clone instead
-        .unwrap_or_else(|node| node.as_ref().clone())
-}
-
 /// Applies `f` to rewrite a `Arc<LogicalPlan>` without copying, if possible
 fn rewrite_arc<F: FnMut(LogicalPlan) -> Result<Transformed<LogicalPlan>>>(
     plan: Arc<LogicalPlan>,
     mut f: F,
 ) -> Result<Transformed<Arc<LogicalPlan>>> {
-    f(unwrap_arc(plan))?.map_data(|new_plan| Ok(Arc::new(new_plan)))
+    f(Arc::unwrap_or_clone(plan))?.map_data(|new_plan| Ok(Arc::new(new_plan)))
 }
 
 /// rewrite a `Vec` of `Arc<LogicalPlan>` without copying, if possible
@@ -561,10 +561,17 @@ impl LogicalPlan {
                     value.into_iter().map_until_stop_and_collect(&mut f)
                 })?
                 .update_data(|values| LogicalPlan::Values(Values { schema, values })),
-            LogicalPlan::Filter(Filter { predicate, input }) => f(predicate)?
-                .update_data(|predicate| {
-                    LogicalPlan::Filter(Filter { predicate, input })
-                }),
+            LogicalPlan::Filter(Filter {
+                predicate,
+                input,
+                having,
+            }) => f(predicate)?.update_data(|predicate| {
+                LogicalPlan::Filter(Filter {
+                    predicate,
+                    input,
+                    having,
+                })
+            }),
             LogicalPlan::Repartition(Repartition {
                 input,
                 partitioning_scheme,
diff --git a/datafusion/expr/src/table_source.rs b/datafusion/expr/src/table_source.rs
index 2de3cc923315..8b8d2dfcf2df 100644
--- a/datafusion/expr/src/table_source.rs
+++ b/datafusion/expr/src/table_source.rs
@@ -22,7 +22,7 @@ use crate::{Expr, LogicalPlan};
 use arrow::datatypes::SchemaRef;
 use datafusion_common::{Constraints, Result};
 
-use std::any::Any;
+use std::{any::Any, borrow::Cow};
 
 /// Indicates how a filter expression is handled by
 /// [`TableProvider::scan`].
@@ -122,7 +122,7 @@ pub trait TableSource: Sync + Send {
     }
 
     /// Get the Logical plan of this table provider, if available.
-    fn get_logical_plan(&self) -> Option<&LogicalPlan> {
+    fn get_logical_plan(&self) -> Option<Cow<LogicalPlan>> {
         None
     }
 
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index cb278c767974..7b4b3bb95c46 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -337,6 +337,9 @@ where
 /// let expr = geometric_mean.call(vec![col("a")]);
 /// ```
 pub trait AggregateUDFImpl: Debug + Send + Sync {
+    // Note: When adding any methods (with default implementations), remember to add them also
+    // into the AliasedAggregateUDFImpl below!
+
     /// Returns this object as an [`Any`] trait object
     fn as_any(&self) -> &dyn Any;
 
@@ -635,6 +638,60 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl {
         &self.aliases
     }
 
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
+        self.inner.state_fields(args)
+    }
+
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        self.inner.groups_accumulator_supported(args)
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        self.inner.create_groups_accumulator(args)
+    }
+
+    fn create_sliding_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn Accumulator>> {
+        self.inner.accumulator(args)
+    }
+
+    fn with_beneficial_ordering(
+        self: Arc<Self>,
+        beneficial_ordering: bool,
+    ) -> Result<Option<Arc<dyn AggregateUDFImpl>>> {
+        Arc::clone(&self.inner)
+            .with_beneficial_ordering(beneficial_ordering)
+            .map(|udf| {
+                udf.map(|udf| {
+                    Arc::new(AliasedAggregateUDFImpl {
+                        inner: udf,
+                        aliases: self.aliases.clone(),
+                    }) as Arc<dyn AggregateUDFImpl>
+                })
+            })
+    }
+
+    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
+        self.inner.order_sensitivity()
+    }
+
+    fn simplify(&self) -> Option<AggregateFunctionSimplification> {
+        self.inner.simplify()
+    }
+
+    fn reverse_expr(&self) -> ReversedUDAF {
+        self.inner.reverse_expr()
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        self.inner.coerce_types(arg_types)
+    }
+
     fn equals(&self, other: &dyn AggregateUDFImpl) -> bool {
         if let Some(other) = other.as_any().downcast_ref::<AliasedAggregateUDFImpl>() {
             self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
@@ -649,6 +706,10 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl {
         self.aliases.hash(hasher);
         hasher.finish()
     }
+
+    fn is_descending(&self) -> Option<bool> {
+        self.inner.is_descending()
+    }
 }
 
 /// Implementation of [`AggregateUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index a4584038e48b..be3f811dbe51 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -346,6 +346,9 @@ where
 /// let expr = add_one.call(vec![col("a")]);
 /// ```
 pub trait ScalarUDFImpl: Debug + Send + Sync {
+    // Note: When adding any methods (with default implementations), remember to add them also
+    // into the AliasedScalarUDFImpl below!
+
     /// Returns this object as an [`Any`] trait object
     fn as_any(&self) -> &dyn Any;
 
@@ -632,6 +635,14 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.name()
     }
 
+    fn display_name(&self, args: &[Expr]) -> Result<String> {
+        self.inner.display_name(args)
+    }
+
+    fn schema_name(&self, args: &[Expr]) -> Result<String> {
+        self.inner.schema_name(args)
+    }
+
     fn signature(&self) -> &Signature {
         self.inner.signature()
     }
@@ -640,12 +651,57 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.return_type(arg_types)
     }
 
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn return_type_from_exprs(
+        &self,
+        args: &[Expr],
+        schema: &dyn ExprSchema,
+        arg_types: &[DataType],
+    ) -> Result<DataType> {
+        self.inner.return_type_from_exprs(args, schema, arg_types)
+    }
+
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
         self.inner.invoke(args)
     }
 
-    fn aliases(&self) -> &[String] {
-        &self.aliases
+    fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
+        self.inner.invoke_no_args(number_rows)
+    }
+
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        info: &dyn SimplifyInfo,
+    ) -> Result<ExprSimplifyResult> {
+        self.inner.simplify(args, info)
+    }
+
+    fn short_circuits(&self) -> bool {
+        self.inner.short_circuits()
+    }
+
+    fn evaluate_bounds(&self, input: &[&Interval]) -> Result<Interval> {
+        self.inner.evaluate_bounds(input)
+    }
+
+    fn propagate_constraints(
+        &self,
+        interval: &Interval,
+        inputs: &[&Interval],
+    ) -> Result<Option<Vec<Interval>>> {
+        self.inner.propagate_constraints(interval, inputs)
+    }
+
+    fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
+        self.inner.output_ordering(inputs)
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        self.inner.coerce_types(arg_types)
     }
 
     fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 88b3d613cb43..e5fdaaceb439 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -266,6 +266,9 @@ where
 ///     .unwrap();
 /// ```
 pub trait WindowUDFImpl: Debug + Send + Sync {
+    // Note: When adding any methods (with default implementations), remember to add them also
+    // into the AliasedWindowUDFImpl below!
+
     /// Returns this object as an [`Any`] trait object
     fn as_any(&self) -> &dyn Any;
 
@@ -428,6 +431,10 @@ impl WindowUDFImpl for AliasedWindowUDFImpl {
         &self.aliases
     }
 
+    fn simplify(&self) -> Option<WindowFunctionSimplification> {
+        self.inner.simplify()
+    }
+
     fn equals(&self, other: &dyn WindowUDFImpl) -> bool {
         if let Some(other) = other.as_any().downcast_ref::<AliasedWindowUDFImpl>() {
             self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
@@ -442,6 +449,18 @@ impl WindowUDFImpl for AliasedWindowUDFImpl {
         self.aliases.hash(hasher);
         hasher.finish()
     }
+
+    fn nullable(&self) -> bool {
+        self.inner.nullable()
+    }
+
+    fn sort_options(&self) -> Option<SortOptions> {
+        self.inner.sort_options()
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        self.inner.coerce_types(arg_types)
+    }
 }
 
 /// Implementation of [`WindowUDFImpl`] that wraps the function style pointers
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 5f5c468fa2f5..a01d5ef8973a 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -463,7 +463,7 @@ pub fn expand_qualified_wildcard(
 /// if bool is true SortExpr comes from `PARTITION BY` column, if false comes from `ORDER BY` column
 type WindowSortKey = Vec<(Expr, bool)>;
 
-/// Generate a sort key for a given window expr's partition_by and order_bu expr
+/// Generate a sort key for a given window expr's partition_by and order_by expr
 pub fn generate_sort_key(
     partition_by: &[Expr],
     order_by: &[Expr],
@@ -569,7 +569,7 @@ pub fn compare_sort_expr(
             }
             Ordering::Equal
         }
-        _ => Ordering::Equal,
+        _ => panic!("Sort expressions must be of type Sort"),
     }
 }
 
@@ -804,6 +804,15 @@ pub fn find_base_plan(input: &LogicalPlan) -> &LogicalPlan {
     match input {
         LogicalPlan::Window(window) => find_base_plan(&window.input),
         LogicalPlan::Aggregate(agg) => find_base_plan(&agg.input),
+        LogicalPlan::Filter(filter) => {
+            if filter.having {
+                // If a filter is used for a having clause, its input plan is an aggregation.
+                // We should expand the wildcard expression based on the aggregation's input plan.
+                find_base_plan(&filter.input)
+            } else {
+                input
+            }
+        }
         _ => input,
     }
 }
diff --git a/datafusion/functions-aggregate-common/src/aggregate.rs b/datafusion/functions-aggregate-common/src/aggregate.rs
index 698d1350cb61..c9cbaa8396fc 100644
--- a/datafusion/functions-aggregate-common/src/aggregate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate.rs
@@ -15,172 +15,5 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`AggregateExpr`] which defines the interface all aggregate expressions
-//! (built-in and custom) need to satisfy.
-
-use crate::order::AggregateOrderSensitivity;
-use arrow::datatypes::{DataType, Field};
-use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
-use datafusion_expr_common::accumulator::Accumulator;
-use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
-use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
-use std::fmt::Debug;
-use std::{any::Any, sync::Arc};
-
 pub mod count_distinct;
 pub mod groups_accumulator;
-
-/// An aggregate expression that:
-/// * knows its resulting field
-/// * knows how to create its accumulator
-/// * knows its accumulator's state's field
-/// * knows the expressions from whose its accumulator will receive values
-///
-/// Any implementation of this trait also needs to implement the
-/// `PartialEq<dyn Any>` to allows comparing equality between the
-/// trait objects.
-pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
-    /// Returns the aggregate expression as [`Any`] so that it can be
-    /// downcast to a specific implementation.
-    fn as_any(&self) -> &dyn Any;
-
-    /// the field of the final result of this aggregation.
-    fn field(&self) -> Result<Field>;
-
-    /// the accumulator used to accumulate values from the expressions.
-    /// the accumulator expects the same number of arguments as `expressions` and must
-    /// return states with the same description as `state_fields`
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>>;
-
-    /// the fields that encapsulate the Accumulator's state
-    /// the number of fields here equals the number of states that the accumulator contains
-    fn state_fields(&self) -> Result<Vec<Field>>;
-
-    /// expressions that are passed to the Accumulator.
-    /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many.
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>;
-
-    /// Order by requirements for the aggregate function
-    /// By default it is `None` (there is no requirement)
-    /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this
-    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
-        None
-    }
-
-    /// Indicates whether aggregator can produce the correct result with any
-    /// arbitrary input ordering. By default, we assume that aggregate expressions
-    /// are order insensitive.
-    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
-        AggregateOrderSensitivity::Insensitive
-    }
-
-    /// Sets the indicator whether ordering requirements of the aggregator is
-    /// satisfied by its input. If this is not the case, aggregators with order
-    /// sensitivity `AggregateOrderSensitivity::Beneficial` can still produce
-    /// the correct result with possibly more work internally.
-    ///
-    /// # Returns
-    ///
-    /// Returns `Ok(Some(updated_expr))` if the process completes successfully.
-    /// If the expression can benefit from existing input ordering, but does
-    /// not implement the method, returns an error. Order insensitive and hard
-    /// requirement aggregators return `Ok(None)`.
-    fn with_beneficial_ordering(
-        self: Arc<Self>,
-        _requirement_satisfied: bool,
-    ) -> Result<Option<Arc<dyn AggregateExpr>>> {
-        if self.order_bys().is_some() && self.order_sensitivity().is_beneficial() {
-            return exec_err!(
-                "Should implement with satisfied for aggregator :{:?}",
-                self.name()
-            );
-        }
-        Ok(None)
-    }
-
-    /// Human readable name such as `"MIN(c2)"`. The default
-    /// implementation returns placeholder text.
-    fn name(&self) -> &str {
-        "AggregateExpr: default name"
-    }
-
-    /// If the aggregate expression has a specialized
-    /// [`GroupsAccumulator`] implementation. If this returns true,
-    /// `[Self::create_groups_accumulator`] will be called.
-    fn groups_accumulator_supported(&self) -> bool {
-        false
-    }
-
-    /// Return a specialized [`GroupsAccumulator`] that manages state
-    /// for all groups.
-    ///
-    /// For maximum performance, a [`GroupsAccumulator`] should be
-    /// implemented in addition to [`Accumulator`].
-    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
-        not_impl_err!("GroupsAccumulator hasn't been implemented for {self:?} yet")
-    }
-
-    /// Construct an expression that calculates the aggregate in reverse.
-    /// Typically the "reverse" expression is itself (e.g. SUM, COUNT).
-    /// For aggregates that do not support calculation in reverse,
-    /// returns None (which is the default value).
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
-        None
-    }
-
-    /// Creates accumulator implementation that supports retract
-    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        not_impl_err!("Retractable Accumulator hasn't been implemented for {self:?} yet")
-    }
-
-    /// Returns all expressions used in the [`AggregateExpr`].
-    /// These expressions are  (1)function arguments, (2) order by expressions.
-    fn all_expressions(&self) -> AggregatePhysicalExpressions {
-        let args = self.expressions();
-        let order_bys = self.order_bys().unwrap_or(&[]);
-        let order_by_exprs = order_bys
-            .iter()
-            .map(|sort_expr| Arc::clone(&sort_expr.expr))
-            .collect::<Vec<_>>();
-        AggregatePhysicalExpressions {
-            args,
-            order_by_exprs,
-        }
-    }
-
-    /// Rewrites [`AggregateExpr`], with new expressions given. The argument should be consistent
-    /// with the return value of the [`AggregateExpr::all_expressions`] method.
-    /// Returns `Some(Arc<dyn AggregateExpr>)` if re-write is supported, otherwise returns `None`.
-    fn with_new_expressions(
-        &self,
-        _args: Vec<Arc<dyn PhysicalExpr>>,
-        _order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
-    ) -> Option<Arc<dyn AggregateExpr>> {
-        None
-    }
-
-    /// If this function is max, return (output_field, true)
-    /// if the function is min, return (output_field, false)
-    /// otherwise return None (the default)
-    ///
-    /// output_field is the name of the column produced by this aggregate
-    ///
-    /// Note: this is used to use special aggregate implementations in certain conditions
-    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
-        None
-    }
-
-    /// Returns default value of the function given the input is Null
-    /// Most of the aggregate function return Null if input is Null,
-    /// while `count` returns 0 if input is Null
-    fn default_value(&self, data_type: &DataType) -> Result<ScalarValue>;
-}
-
-/// Stores the physical expressions used inside the `AggregateExpr`.
-pub struct AggregatePhysicalExpressions {
-    /// Aggregate function arguments
-    pub args: Vec<Arc<dyn PhysicalExpr>>,
-    /// Order by expressions
-    pub order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
-}
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index 3984b02c5fbb..1c97d22ec79c 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -24,7 +24,7 @@ pub mod nulls;
 pub mod prim_op;
 
 use arrow::{
-    array::{ArrayRef, AsArray, BooleanArray, PrimitiveArray, UInt32Builder},
+    array::{ArrayRef, AsArray, BooleanArray, PrimitiveArray},
     compute,
     datatypes::UInt32Type,
 };
@@ -170,7 +170,7 @@ impl GroupsAccumulatorAdapter {
         let mut groups_with_rows = vec![];
 
         // batch_indices holds indices into values, each group is contiguous
-        let mut batch_indices = UInt32Builder::with_capacity(0);
+        let mut batch_indices = vec![];
 
         // offsets[i] is index into batch_indices where the rows for
         // group_index i starts
@@ -184,11 +184,11 @@ impl GroupsAccumulatorAdapter {
             }
 
             groups_with_rows.push(group_index);
-            batch_indices.append_slice(indices);
+            batch_indices.extend_from_slice(indices);
             offset_so_far += indices.len();
             offsets.push(offset_so_far);
         }
-        let batch_indices = batch_indices.finish();
+        let batch_indices = batch_indices.into();
 
         // reorder the values and opt_filter by batch_indices so that
         // all values for each group are contiguous, then invoke the
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
index 455fc5fec450..a0475fe8e446 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -91,36 +91,9 @@ impl NullState {
     /// * `opt_filter`: if present, only rows for which is Some(true) are included
     /// * `value_fn`: function invoked for  (group_index, value) where value is non null
     ///
-    /// # Example
+    /// See [`accumulate`], for more details on how value_fn is called
     ///
-    /// ```text
-    ///  ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐
-    ///  │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐
-    ///  │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │
-    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-    ///  │ │  2  │ │   │ │ 100 │ │   │ │  f  │ │
-    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-    ///  │ │  0  │ │   │ │ 200 │ │   │ │  t  │ │
-    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-    ///  │ │  1  │ │   │ │ 200 │ │   │ │NULL │ │
-    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-    ///  │ │  0  │ │   │ │ 300 │ │   │ │  t  │ │
-    ///  │ └─────┘ │   │ └─────┘ │     └─────┘
-    ///  └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘
-    ///
-    /// group_indices   values        opt_filter
-    /// ```
-    ///
-    /// In the example above, `value_fn` is invoked for each (group_index,
-    /// value) pair where `opt_filter[i]` is true and values is non null
-    ///
-    /// ```text
-    /// value_fn(2, 200)
-    /// value_fn(0, 200)
-    /// value_fn(0, 300)
-    /// ```
-    ///
-    /// It also sets
+    /// When value_fn is called it also sets
     ///
     /// 1. `self.seen_values[group_index]` to true for all rows that had a non null vale
     pub fn accumulate<T, F>(
@@ -134,105 +107,14 @@ impl NullState {
         T: ArrowPrimitiveType + Send,
         F: FnMut(usize, T::Native) + Send,
     {
-        let data: &[T::Native] = values.values();
-        assert_eq!(data.len(), group_indices.len());
-
         // ensure the seen_values is big enough (start everything at
         // "not seen" valid)
         let seen_values =
             initialize_builder(&mut self.seen_values, total_num_groups, false);
-
-        match (values.null_count() > 0, opt_filter) {
-            // no nulls, no filter,
-            (false, None) => {
-                let iter = group_indices.iter().zip(data.iter());
-                for (&group_index, &new_value) in iter {
-                    seen_values.set_bit(group_index, true);
-                    value_fn(group_index, new_value);
-                }
-            }
-            // nulls, no filter
-            (true, None) => {
-                let nulls = values.nulls().unwrap();
-                // This is based on (ahem, COPY/PASTE) arrow::compute::aggregate::sum
-                // iterate over in chunks of 64 bits for more efficient null checking
-                let group_indices_chunks = group_indices.chunks_exact(64);
-                let data_chunks = data.chunks_exact(64);
-                let bit_chunks = nulls.inner().bit_chunks();
-
-                let group_indices_remainder = group_indices_chunks.remainder();
-                let data_remainder = data_chunks.remainder();
-
-                group_indices_chunks
-                    .zip(data_chunks)
-                    .zip(bit_chunks.iter())
-                    .for_each(|((group_index_chunk, data_chunk), mask)| {
-                        // index_mask has value 1 << i in the loop
-                        let mut index_mask = 1;
-                        group_index_chunk.iter().zip(data_chunk.iter()).for_each(
-                            |(&group_index, &new_value)| {
-                                // valid bit was set, real value
-                                let is_valid = (mask & index_mask) != 0;
-                                if is_valid {
-                                    seen_values.set_bit(group_index, true);
-                                    value_fn(group_index, new_value);
-                                }
-                                index_mask <<= 1;
-                            },
-                        )
-                    });
-
-                // handle any remaining bits (after the initial 64)
-                let remainder_bits = bit_chunks.remainder_bits();
-                group_indices_remainder
-                    .iter()
-                    .zip(data_remainder.iter())
-                    .enumerate()
-                    .for_each(|(i, (&group_index, &new_value))| {
-                        let is_valid = remainder_bits & (1 << i) != 0;
-                        if is_valid {
-                            seen_values.set_bit(group_index, true);
-                            value_fn(group_index, new_value);
-                        }
-                    });
-            }
-            // no nulls, but a filter
-            (false, Some(filter)) => {
-                assert_eq!(filter.len(), group_indices.len());
-                // The performance with a filter could be improved by
-                // iterating over the filter in chunks, rather than a single
-                // iterator. TODO file a ticket
-                group_indices
-                    .iter()
-                    .zip(data.iter())
-                    .zip(filter.iter())
-                    .for_each(|((&group_index, &new_value), filter_value)| {
-                        if let Some(true) = filter_value {
-                            seen_values.set_bit(group_index, true);
-                            value_fn(group_index, new_value);
-                        }
-                    })
-            }
-            // both null values and filters
-            (true, Some(filter)) => {
-                assert_eq!(filter.len(), group_indices.len());
-                // The performance with a filter could be improved by
-                // iterating over the filter in chunks, rather than using
-                // iterators. TODO file a ticket
-                filter
-                    .iter()
-                    .zip(group_indices.iter())
-                    .zip(values.iter())
-                    .for_each(|((filter_value, &group_index), new_value)| {
-                        if let Some(true) = filter_value {
-                            if let Some(new_value) = new_value {
-                                seen_values.set_bit(group_index, true);
-                                value_fn(group_index, new_value)
-                            }
-                        }
-                    })
-            }
-        }
+        accumulate(group_indices, values, opt_filter, |group_index, value| {
+            seen_values.set_bit(group_index, true);
+            value_fn(group_index, value);
+        });
     }
 
     /// Invokes `value_fn(group_index, value)` for each non null, non
@@ -351,6 +233,144 @@ impl NullState {
     }
 }
 
+/// Invokes `value_fn(group_index, value)` for each non null, non
+/// filtered value of `value`,
+///
+/// # Arguments:
+///
+/// * `group_indices`:  To which groups do the rows in `values` belong, (aka group_index)
+/// * `values`: the input arguments to the accumulator
+/// * `opt_filter`: if present, only rows for which is Some(true) are included
+/// * `value_fn`: function invoked for  (group_index, value) where value is non null
+///
+/// # Example
+///
+/// ```text
+///  ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐
+///  │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐
+///  │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  2  │ │   │ │ 100 │ │   │ │  f  │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  0  │ │   │ │ 200 │ │   │ │  t  │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  1  │ │   │ │ 200 │ │   │ │NULL │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  0  │ │   │ │ 300 │ │   │ │  t  │ │
+///  │ └─────┘ │   │ └─────┘ │     └─────┘
+///  └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘
+///
+/// group_indices   values        opt_filter
+/// ```
+///
+/// In the example above, `value_fn` is invoked for each (group_index,
+/// value) pair where `opt_filter[i]` is true and values is non null
+///
+/// ```text
+/// value_fn(2, 200)
+/// value_fn(0, 200)
+/// value_fn(0, 300)
+/// ```
+pub fn accumulate<T, F>(
+    group_indices: &[usize],
+    values: &PrimitiveArray<T>,
+    opt_filter: Option<&BooleanArray>,
+    mut value_fn: F,
+) where
+    T: ArrowPrimitiveType + Send,
+    F: FnMut(usize, T::Native) + Send,
+{
+    let data: &[T::Native] = values.values();
+    assert_eq!(data.len(), group_indices.len());
+
+    match (values.null_count() > 0, opt_filter) {
+        // no nulls, no filter,
+        (false, None) => {
+            let iter = group_indices.iter().zip(data.iter());
+            for (&group_index, &new_value) in iter {
+                value_fn(group_index, new_value);
+            }
+        }
+        // nulls, no filter
+        (true, None) => {
+            let nulls = values.nulls().unwrap();
+            // This is based on (ahem, COPY/PASTE) arrow::compute::aggregate::sum
+            // iterate over in chunks of 64 bits for more efficient null checking
+            let group_indices_chunks = group_indices.chunks_exact(64);
+            let data_chunks = data.chunks_exact(64);
+            let bit_chunks = nulls.inner().bit_chunks();
+
+            let group_indices_remainder = group_indices_chunks.remainder();
+            let data_remainder = data_chunks.remainder();
+
+            group_indices_chunks
+                .zip(data_chunks)
+                .zip(bit_chunks.iter())
+                .for_each(|((group_index_chunk, data_chunk), mask)| {
+                    // index_mask has value 1 << i in the loop
+                    let mut index_mask = 1;
+                    group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                        |(&group_index, &new_value)| {
+                            // valid bit was set, real value
+                            let is_valid = (mask & index_mask) != 0;
+                            if is_valid {
+                                value_fn(group_index, new_value);
+                            }
+                            index_mask <<= 1;
+                        },
+                    )
+                });
+
+            // handle any remaining bits (after the initial 64)
+            let remainder_bits = bit_chunks.remainder_bits();
+            group_indices_remainder
+                .iter()
+                .zip(data_remainder.iter())
+                .enumerate()
+                .for_each(|(i, (&group_index, &new_value))| {
+                    let is_valid = remainder_bits & (1 << i) != 0;
+                    if is_valid {
+                        value_fn(group_index, new_value);
+                    }
+                });
+        }
+        // no nulls, but a filter
+        (false, Some(filter)) => {
+            assert_eq!(filter.len(), group_indices.len());
+            // The performance with a filter could be improved by
+            // iterating over the filter in chunks, rather than a single
+            // iterator. TODO file a ticket
+            group_indices
+                .iter()
+                .zip(data.iter())
+                .zip(filter.iter())
+                .for_each(|((&group_index, &new_value), filter_value)| {
+                    if let Some(true) = filter_value {
+                        value_fn(group_index, new_value);
+                    }
+                })
+        }
+        // both null values and filters
+        (true, Some(filter)) => {
+            assert_eq!(filter.len(), group_indices.len());
+            // The performance with a filter could be improved by
+            // iterating over the filter in chunks, rather than using
+            // iterators. TODO file a ticket
+            filter
+                .iter()
+                .zip(group_indices.iter())
+                .zip(values.iter())
+                .for_each(|((filter_value, &group_index), new_value)| {
+                    if let Some(true) = filter_value {
+                        if let Some(new_value) = new_value {
+                            value_fn(group_index, new_value)
+                        }
+                    }
+                })
+        }
+    }
+}
+
 /// This function is called to update the accumulator state per row
 /// when the value is not needed (e.g. COUNT)
 ///
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
index b5c6171af37c..8bbcf756c37c 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
@@ -167,7 +167,7 @@ where
 
                 // Rebuilding input values with a new nulls mask, which is equal to
                 // the union of original nulls and filter mask
-                let (dt, values_buf, original_nulls) = values.clone().into_parts();
+                let (dt, values_buf, original_nulls) = values.into_parts();
                 let nulls_buf =
                     NullBuffer::union(original_nulls.as_ref(), Some(&filter_nulls));
                 PrimitiveArray::<T>::new(values_buf, nulls_buf).with_data_type(dt)
diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
index 7b8ce0397af8..4fba772d8ddc 100644
--- a/datafusion/functions-aggregate-common/src/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, sync::Arc};
+use std::sync::Arc;
 
 use arrow::array::{ArrayRef, AsArray};
 use arrow::datatypes::ArrowNativeType;
@@ -32,25 +32,6 @@ use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr_common::accumulator::Accumulator;
 use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
-use crate::aggregate::AggregateExpr;
-
-/// Downcast a `Box<dyn AggregateExpr>` or `Arc<dyn AggregateExpr>`
-/// and return the inner trait object as [`Any`] so
-/// that it can be downcast to a specific implementation.
-///
-/// This method is used when implementing the `PartialEq<dyn Any>`
-/// for [`AggregateExpr`] aggregation expressions and allows comparing the equality
-/// between the trait objects.
-pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
-    if let Some(obj) = any.downcast_ref::<Arc<dyn AggregateExpr>>() {
-        obj.as_any()
-    } else if let Some(obj) = any.downcast_ref::<Box<dyn AggregateExpr>>() {
-        obj.as_any()
-    } else {
-        any
-    }
-}
-
 /// Convert scalar values from an accumulator into arrays.
 pub fn get_accum_scalar_values_as_arrays(
     accum: &mut dyn Accumulator,
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 89d827e86859..5578aebbf403 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -31,8 +31,8 @@ use arrow::{
 use arrow_schema::{Field, Schema};
 
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
-    ScalarValue,
+    downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
+    DataFusionError, Result, ScalarValue,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
@@ -126,9 +126,9 @@ impl ApproxPercentileCont {
             | DataType::Float32
             | DataType::Float64) => {
                 if let Some(max_size) = tdigest_max_size {
-                    ApproxPercentileAccumulator::new_with_max_size(percentile, t.clone(), max_size)
+                    ApproxPercentileAccumulator::new_with_max_size(percentile, t, max_size)
                 }else{
-                    ApproxPercentileAccumulator::new(percentile, t.clone())
+                    ApproxPercentileAccumulator::new(percentile, t)
 
                 }
             }
@@ -154,7 +154,8 @@ fn get_scalar_value(expr: &Arc<dyn PhysicalExpr>) -> Result<ScalarValue> {
 }
 
 fn validate_input_percentile_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<f64> {
-    let percentile = match get_scalar_value(expr)? {
+    let percentile = match get_scalar_value(expr)
+        .map_err(|_| not_impl_datafusion_err!("Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? {
         ScalarValue::Float32(Some(value)) => {
             value as f64
         }
@@ -179,7 +180,8 @@ fn validate_input_percentile_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<f64> {
 }
 
 fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
-    let max_size = match get_scalar_value(expr)? {
+    let max_size = match get_scalar_value(expr)
+        .map_err(|_| not_impl_datafusion_err!("Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? {
         ScalarValue::UInt8(Some(q)) => q as usize,
         ScalarValue::UInt16(Some(q)) => q as usize,
         ScalarValue::UInt32(Some(q)) => q as usize,
@@ -193,7 +195,7 @@ fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
                 "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).",
                 sv.data_type()
             )
-        }
+        },
     };
 
     Ok(max_size)
@@ -458,10 +460,6 @@ impl Accumulator for ApproxPercentileAccumulator {
             + self.return_type.size()
             - std::mem::size_of_val(&self.return_type)
     }
-
-    fn supports_retract_batch(&self) -> bool {
-        true
-    }
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index b641d388a7c5..15146fc4a2d8 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -501,11 +501,8 @@ impl OrderSensitiveArrayAggAccumulator {
             column_wise_ordering_values.push(array);
         }
 
-        let ordering_array = StructArray::try_new(
-            struct_field.clone(),
-            column_wise_ordering_values,
-            None,
-        )?;
+        let ordering_array =
+            StructArray::try_new(struct_field, column_wise_ordering_values, None)?;
         Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable(
             Arc::new(ordering_array),
         ))))
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index cb1ddd4738c4..7425bdfa18e7 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -374,11 +374,8 @@ impl NthValueAccumulator {
             column_wise_ordering_values.push(array);
         }
 
-        let ordering_array = StructArray::try_new(
-            struct_field.clone(),
-            column_wise_ordering_values,
-            None,
-        )?;
+        let ordering_array =
+            StructArray::try_new(struct_field, column_wise_ordering_values, None)?;
 
         Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable(
             Arc::new(ordering_array),
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index 180f4ad3cf37..3534fb5b4d26 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -19,17 +19,21 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
+use std::sync::Arc;
 
+use arrow::array::Float64Array;
 use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field};
 
 use datafusion_common::{internal_err, not_impl_err, Result};
 use datafusion_common::{plan_err, ScalarValue};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
+};
 use datafusion_functions_aggregate_common::stats::StatsType;
 
-use crate::variance::VarianceAccumulator;
+use crate::variance::{VarianceAccumulator, VarianceGroupsAccumulator};
 
 make_udaf_expr_and_func!(
     Stddev,
@@ -118,6 +122,17 @@ impl AggregateUDFImpl for Stddev {
     fn aliases(&self) -> &[String] {
         &self.alias
     }
+
+    fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool {
+        !acc_args.is_distinct
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        _args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(StddevGroupsAccumulator::new(StatsType::Sample)))
+    }
 }
 
 make_udaf_expr_and_func!(
@@ -201,6 +216,19 @@ impl AggregateUDFImpl for StddevPop {
 
         Ok(DataType::Float64)
     }
+
+    fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool {
+        !acc_args.is_distinct
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        _args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(StddevGroupsAccumulator::new(
+            StatsType::Population,
+        )))
+    }
 }
 
 /// An accumulator to compute the average
@@ -267,6 +295,57 @@ impl Accumulator for StddevAccumulator {
     }
 }
 
+#[derive(Debug)]
+pub struct StddevGroupsAccumulator {
+    variance: VarianceGroupsAccumulator,
+}
+
+impl StddevGroupsAccumulator {
+    pub fn new(s_type: StatsType) -> Self {
+        Self {
+            variance: VarianceGroupsAccumulator::new(s_type),
+        }
+    }
+}
+
+impl GroupsAccumulator for StddevGroupsAccumulator {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow::array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        self.variance
+            .update_batch(values, group_indices, opt_filter, total_num_groups)
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow::array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        self.variance
+            .merge_batch(values, group_indices, opt_filter, total_num_groups)
+    }
+
+    fn evaluate(&mut self, emit_to: datafusion_expr::EmitTo) -> Result<ArrayRef> {
+        let (mut variances, nulls) = self.variance.variance(emit_to);
+        variances.iter_mut().for_each(|v| *v = v.sqrt());
+        Ok(Arc::new(Float64Array::new(variances.into(), Some(nulls))))
+    }
+
+    fn state(&mut self, emit_to: datafusion_expr::EmitTo) -> Result<Vec<ArrayRef>> {
+        self.variance.state(emit_to)
+    }
+
+    fn size(&self) -> usize {
+        self.variance.size()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/functions-aggregate/src/variance.rs b/datafusion/functions-aggregate/src/variance.rs
index 4c78a42ea494..f5f2d06e3837 100644
--- a/datafusion/functions-aggregate/src/variance.rs
+++ b/datafusion/functions-aggregate/src/variance.rs
@@ -18,10 +18,11 @@
 //! [`VarianceSample`]: variance sample aggregations.
 //! [`VariancePopulation`]: variance population aggregations.
 
-use std::fmt::Debug;
+use std::{fmt::Debug, sync::Arc};
 
 use arrow::{
-    array::{ArrayRef, Float64Array, UInt64Array},
+    array::{Array, ArrayRef, BooleanArray, Float64Array, UInt64Array},
+    buffer::NullBuffer,
     compute::kernels::cast,
     datatypes::{DataType, Field},
 };
@@ -32,9 +33,11 @@ use datafusion_common::{
 use datafusion_expr::{
     function::{AccumulatorArgs, StateFieldsArgs},
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
+};
+use datafusion_functions_aggregate_common::{
+    aggregate::groups_accumulator::accumulate::accumulate, stats::StatsType,
 };
-use datafusion_functions_aggregate_common::stats::StatsType;
 
 make_udaf_expr_and_func!(
     VarianceSample,
@@ -122,6 +125,17 @@ impl AggregateUDFImpl for VarianceSample {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool {
+        !acc_args.is_distinct
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        _args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(VarianceGroupsAccumulator::new(StatsType::Sample)))
+    }
 }
 
 pub struct VariancePopulation {
@@ -196,6 +210,19 @@ impl AggregateUDFImpl for VariancePopulation {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool {
+        !acc_args.is_distinct
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        _args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        Ok(Box::new(VarianceGroupsAccumulator::new(
+            StatsType::Population,
+        )))
+    }
 }
 
 /// An accumulator to compute variance
@@ -239,6 +266,36 @@ impl VarianceAccumulator {
     }
 }
 
+#[inline]
+fn merge(
+    count: u64,
+    mean: f64,
+    m2: f64,
+    count2: u64,
+    mean2: f64,
+    m22: f64,
+) -> (u64, f64, f64) {
+    let new_count = count + count2;
+    let new_mean =
+        mean * count as f64 / new_count as f64 + mean2 * count2 as f64 / new_count as f64;
+    let delta = mean - mean2;
+    let new_m2 =
+        m2 + m22 + delta * delta * count as f64 * count2 as f64 / new_count as f64;
+
+    (new_count, new_mean, new_m2)
+}
+
+#[inline]
+fn update(count: u64, mean: f64, m2: f64, value: f64) -> (u64, f64, f64) {
+    let new_count = count + 1;
+    let delta1 = value - mean;
+    let new_mean = delta1 / new_count as f64 + mean;
+    let delta2 = value - new_mean;
+    let new_m2 = m2 + delta1 * delta2;
+
+    (new_count, new_mean, new_m2)
+}
+
 impl Accumulator for VarianceAccumulator {
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
         Ok(vec![
@@ -253,15 +310,8 @@ impl Accumulator for VarianceAccumulator {
         let arr = downcast_value!(values, Float64Array).iter().flatten();
 
         for value in arr {
-            let new_count = self.count + 1;
-            let delta1 = value - self.mean;
-            let new_mean = delta1 / new_count as f64 + self.mean;
-            let delta2 = value - new_mean;
-            let new_m2 = self.m2 + delta1 * delta2;
-
-            self.count += 1;
-            self.mean = new_mean;
-            self.m2 = new_m2;
+            (self.count, self.mean, self.m2) =
+                update(self.count, self.mean, self.m2, value)
         }
 
         Ok(())
@@ -296,17 +346,14 @@ impl Accumulator for VarianceAccumulator {
             if c == 0_u64 {
                 continue;
             }
-            let new_count = self.count + c;
-            let new_mean = self.mean * self.count as f64 / new_count as f64
-                + means.value(i) * c as f64 / new_count as f64;
-            let delta = self.mean - means.value(i);
-            let new_m2 = self.m2
-                + m2s.value(i)
-                + delta * delta * self.count as f64 * c as f64 / new_count as f64;
-
-            self.count = new_count;
-            self.mean = new_mean;
-            self.m2 = new_m2;
+            (self.count, self.mean, self.m2) = merge(
+                self.count,
+                self.mean,
+                self.m2,
+                c,
+                means.value(i),
+                m2s.value(i),
+            )
         }
         Ok(())
     }
@@ -344,3 +391,183 @@ impl Accumulator for VarianceAccumulator {
         true
     }
 }
+
+#[derive(Debug)]
+pub struct VarianceGroupsAccumulator {
+    m2s: Vec<f64>,
+    means: Vec<f64>,
+    counts: Vec<u64>,
+    stats_type: StatsType,
+}
+
+impl VarianceGroupsAccumulator {
+    pub fn new(s_type: StatsType) -> Self {
+        Self {
+            m2s: Vec::new(),
+            means: Vec::new(),
+            counts: Vec::new(),
+            stats_type: s_type,
+        }
+    }
+
+    fn resize(&mut self, total_num_groups: usize) {
+        self.m2s.resize(total_num_groups, 0.0);
+        self.means.resize(total_num_groups, 0.0);
+        self.counts.resize(total_num_groups, 0);
+    }
+
+    fn merge<F>(
+        group_indices: &[usize],
+        counts: &UInt64Array,
+        means: &Float64Array,
+        m2s: &Float64Array,
+        opt_filter: Option<&BooleanArray>,
+        mut value_fn: F,
+    ) where
+        F: FnMut(usize, u64, f64, f64) + Send,
+    {
+        assert_eq!(counts.null_count(), 0);
+        assert_eq!(means.null_count(), 0);
+        assert_eq!(m2s.null_count(), 0);
+
+        match opt_filter {
+            None => {
+                group_indices
+                    .iter()
+                    .zip(counts.values().iter())
+                    .zip(means.values().iter())
+                    .zip(m2s.values().iter())
+                    .for_each(|(((&group_index, &count), &mean), &m2)| {
+                        value_fn(group_index, count, mean, m2);
+                    });
+            }
+            Some(filter) => {
+                group_indices
+                    .iter()
+                    .zip(counts.values().iter())
+                    .zip(means.values().iter())
+                    .zip(m2s.values().iter())
+                    .zip(filter.iter())
+                    .for_each(
+                        |((((&group_index, &count), &mean), &m2), filter_value)| {
+                            if let Some(true) = filter_value {
+                                value_fn(group_index, count, mean, m2);
+                            }
+                        },
+                    );
+            }
+        }
+    }
+
+    pub fn variance(
+        &mut self,
+        emit_to: datafusion_expr::EmitTo,
+    ) -> (Vec<f64>, NullBuffer) {
+        let mut counts = emit_to.take_needed(&mut self.counts);
+        // means are only needed for updating m2s and are not needed for the final result.
+        // But we still need to take them to ensure the internal state is consistent.
+        let _ = emit_to.take_needed(&mut self.means);
+        let m2s = emit_to.take_needed(&mut self.m2s);
+
+        if let StatsType::Sample = self.stats_type {
+            counts.iter_mut().for_each(|count| {
+                *count -= 1;
+            });
+        }
+        let nulls = NullBuffer::from_iter(counts.iter().map(|&count| count != 0));
+        let variance = m2s
+            .iter()
+            .zip(counts)
+            .map(|(m2, count)| m2 / count as f64)
+            .collect();
+        (variance, nulls)
+    }
+}
+
+impl GroupsAccumulator for VarianceGroupsAccumulator {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow::array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = &cast(&values[0], &DataType::Float64)?;
+        let values = downcast_value!(values, Float64Array);
+
+        self.resize(total_num_groups);
+        accumulate(group_indices, values, opt_filter, |group_index, value| {
+            let (new_count, new_mean, new_m2) = update(
+                self.counts[group_index],
+                self.means[group_index],
+                self.m2s[group_index],
+                value,
+            );
+            self.counts[group_index] = new_count;
+            self.means[group_index] = new_mean;
+            self.m2s[group_index] = new_m2;
+        });
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow::array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 3, "two arguments to merge_batch");
+        // first batch is counts, second is partial means, third is partial m2s
+        let partial_counts = downcast_value!(values[0], UInt64Array);
+        let partial_means = downcast_value!(values[1], Float64Array);
+        let partial_m2s = downcast_value!(values[2], Float64Array);
+
+        self.resize(total_num_groups);
+        Self::merge(
+            group_indices,
+            partial_counts,
+            partial_means,
+            partial_m2s,
+            opt_filter,
+            |group_index, partial_count, partial_mean, partial_m2| {
+                let (new_count, new_mean, new_m2) = merge(
+                    self.counts[group_index],
+                    self.means[group_index],
+                    self.m2s[group_index],
+                    partial_count,
+                    partial_mean,
+                    partial_m2,
+                );
+                self.counts[group_index] = new_count;
+                self.means[group_index] = new_mean;
+                self.m2s[group_index] = new_m2;
+            },
+        );
+        Ok(())
+    }
+
+    fn evaluate(&mut self, emit_to: datafusion_expr::EmitTo) -> Result<ArrayRef> {
+        let (variances, nulls) = self.variance(emit_to);
+        Ok(Arc::new(Float64Array::new(variances.into(), Some(nulls))))
+    }
+
+    fn state(&mut self, emit_to: datafusion_expr::EmitTo) -> Result<Vec<ArrayRef>> {
+        let counts = emit_to.take_needed(&mut self.counts);
+        let means = emit_to.take_needed(&mut self.means);
+        let m2s = emit_to.take_needed(&mut self.m2s);
+
+        Ok(vec![
+            Arc::new(UInt64Array::new(counts.into(), None)),
+            Arc::new(Float64Array::new(means.into(), None)),
+            Arc::new(Float64Array::new(m2s.into(), None)),
+        ])
+    }
+
+    fn size(&self) -> usize {
+        self.m2s.capacity() * std::mem::size_of::<f64>()
+            + self.means.capacity() * std::mem::size_of::<f64>()
+            + self.counts.capacity() * std::mem::size_of::<u64>()
+    }
+}
diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml
index 6a1973ecfed1..bdfb07031b8c 100644
--- a/datafusion/functions-nested/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -50,7 +50,8 @@ datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions = { workspace = true }
 datafusion-functions-aggregate = { workspace = true }
-itertools = { version = "0.12", features = ["use_std"] }
+datafusion-physical-expr-common = { workspace = true }
+itertools = { workspace = true, features = ["use_std"] }
 log = { workspace = true }
 paste = "1.0.14"
 rand = "0.8.5"
diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index fe1df2579932..9b4357d0d14f 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -19,14 +19,17 @@
 
 use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait};
 use arrow::datatypes::DataType;
-use arrow::row::{RowConverter, SortField};
+use arrow::row::{RowConverter, Rows, SortField};
+use arrow_array::{Datum, GenericListArray, Scalar};
 use datafusion_common::cast::as_generic_list_array;
-use datafusion_common::{exec_err, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_common::utils::string_utils::string_array_to_vec;
+use datafusion_common::{exec_err, Result, ScalarValue};
+use datafusion_expr::{ColumnarValue, Operator, ScalarUDFImpl, Signature, Volatility};
 
+use datafusion_physical_expr_common::datum::compare_op_for_nested;
 use itertools::Itertools;
 
-use crate::utils::check_datatypes;
+use crate::utils::make_scalar_function;
 
 use std::any::Any;
 use std::sync::Arc;
@@ -93,28 +96,44 @@ impl ScalarUDFImpl for ArrayHas {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(args)?;
-
-        if args.len() != 2 {
-            return exec_err!("array_has needs two arguments");
+        // Always return null if the second argumet is null
+        // i.e. array_has(array, null) -> null
+        if let ColumnarValue::Scalar(s) = &args[1] {
+            if s.is_null() {
+                return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+            }
         }
 
-        let array_type = args[0].data_type();
-
-        match array_type {
-            DataType::List(_) => general_array_has_dispatch::<i32>(
-                &args[0],
-                &args[1],
-                ComparisonType::Single,
-            )
-            .map(ColumnarValue::Array),
-            DataType::LargeList(_) => general_array_has_dispatch::<i64>(
-                &args[0],
-                &args[1],
-                ComparisonType::Single,
-            )
-            .map(ColumnarValue::Array),
-            _ => exec_err!("array_has does not support type '{array_type:?}'."),
+        // first, identify if any of the arguments is an Array. If yes, store its `len`,
+        // as any scalar will need to be converted to an array of len `len`.
+        let len = args
+            .iter()
+            .fold(Option::<usize>::None, |acc, arg| match arg {
+                ColumnarValue::Scalar(_) => acc,
+                ColumnarValue::Array(a) => Some(a.len()),
+            });
+
+        let is_scalar = len.is_none();
+
+        let result = match args[1] {
+            ColumnarValue::Array(_) => {
+                let args = ColumnarValue::values_to_arrays(args)?;
+                array_has_inner_for_array(&args[0], &args[1])
+            }
+            ColumnarValue::Scalar(_) => {
+                let haystack = args[0].to_owned().into_array(1)?;
+                let needle = args[1].to_owned().into_array(1)?;
+                let needle = Scalar::new(needle);
+                array_has_inner_for_scalar(&haystack, &needle)
+            }
+        };
+
+        if is_scalar {
+            // If all inputs are scalar, keeps output as scalar
+            let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
+            result.map(ColumnarValue::Scalar)
+        } else {
+            result.map(ColumnarValue::Array)
         }
     }
 
@@ -123,6 +142,116 @@ impl ScalarUDFImpl for ArrayHas {
     }
 }
 
+fn array_has_inner_for_scalar(
+    haystack: &ArrayRef,
+    needle: &dyn Datum,
+) -> Result<ArrayRef> {
+    match haystack.data_type() {
+        DataType::List(_) => array_has_dispatch_for_scalar::<i32>(haystack, needle),
+        DataType::LargeList(_) => array_has_dispatch_for_scalar::<i64>(haystack, needle),
+        _ => exec_err!(
+            "array_has does not support type '{:?}'.",
+            haystack.data_type()
+        ),
+    }
+}
+
+fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result<ArrayRef> {
+    match haystack.data_type() {
+        DataType::List(_) => array_has_dispatch_for_array::<i32>(haystack, needle),
+        DataType::LargeList(_) => array_has_dispatch_for_array::<i64>(haystack, needle),
+        _ => exec_err!(
+            "array_has does not support type '{:?}'.",
+            haystack.data_type()
+        ),
+    }
+}
+
+fn array_has_dispatch_for_array<O: OffsetSizeTrait>(
+    haystack: &ArrayRef,
+    needle: &ArrayRef,
+) -> Result<ArrayRef> {
+    let haystack = as_generic_list_array::<O>(haystack)?;
+    let mut boolean_builder = BooleanArray::builder(haystack.len());
+
+    for (i, arr) in haystack.iter().enumerate() {
+        if arr.is_none() || needle.is_null(i) {
+            boolean_builder.append_null();
+            continue;
+        }
+        let arr = arr.unwrap();
+        let needle_row = Scalar::new(needle.slice(i, 1));
+        let eq_array = compare_op_for_nested(Operator::Eq, &arr, &needle_row)?;
+        let is_contained = eq_array.true_count() > 0;
+        boolean_builder.append_value(is_contained)
+    }
+
+    Ok(Arc::new(boolean_builder.finish()))
+}
+
+fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
+    haystack: &ArrayRef,
+    needle: &dyn Datum,
+) -> Result<ArrayRef> {
+    let haystack = as_generic_list_array::<O>(haystack)?;
+    let values = haystack.values();
+    let offsets = haystack.value_offsets();
+    // If first argument is empty list (second argument is non-null), return false
+    // i.e. array_has([], non-null element) -> false
+    if values.len() == 0 {
+        return Ok(Arc::new(BooleanArray::from(vec![Some(false)])));
+    }
+    let eq_array = compare_op_for_nested(Operator::Eq, values, needle)?;
+    let mut final_contained = vec![None; haystack.len()];
+    for (i, offset) in offsets.windows(2).enumerate() {
+        let start = offset[0].to_usize().unwrap();
+        let end = offset[1].to_usize().unwrap();
+        let length = end - start;
+        // For non-nested list, length is 0 for null
+        if length == 0 {
+            continue;
+        }
+        let sliced_array = eq_array.slice(start, length);
+        // For nested list, check number of nulls
+        if sliced_array.null_count() == length {
+            continue;
+        }
+        final_contained[i] = Some(sliced_array.true_count() > 0);
+    }
+
+    Ok(Arc::new(BooleanArray::from(final_contained)))
+}
+
+fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::List(_) => {
+            array_has_all_and_any_dispatch::<i32>(&args[0], &args[1], ComparisonType::All)
+        }
+        DataType::LargeList(_) => {
+            array_has_all_and_any_dispatch::<i64>(&args[0], &args[1], ComparisonType::All)
+        }
+        _ => exec_err!(
+            "array_has does not support type '{:?}'.",
+            args[0].data_type()
+        ),
+    }
+}
+
+fn array_has_any_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::List(_) => {
+            array_has_all_and_any_dispatch::<i32>(&args[0], &args[1], ComparisonType::Any)
+        }
+        DataType::LargeList(_) => {
+            array_has_all_and_any_dispatch::<i64>(&args[0], &args[1], ComparisonType::Any)
+        }
+        _ => exec_err!(
+            "array_has does not support type '{:?}'.",
+            args[0].data_type()
+        ),
+    }
+}
+
 #[derive(Debug)]
 pub struct ArrayHasAll {
     signature: Signature,
@@ -161,24 +290,7 @@ impl ScalarUDFImpl for ArrayHasAll {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(args)?;
-        if args.len() != 2 {
-            return exec_err!("array_has_all needs two arguments");
-        }
-
-        let array_type = args[0].data_type();
-
-        match array_type {
-            DataType::List(_) => {
-                general_array_has_dispatch::<i32>(&args[0], &args[1], ComparisonType::All)
-                    .map(ColumnarValue::Array)
-            }
-            DataType::LargeList(_) => {
-                general_array_has_dispatch::<i64>(&args[0], &args[1], ComparisonType::All)
-                    .map(ColumnarValue::Array)
-            }
-            _ => exec_err!("array_has_all does not support type '{array_type:?}'."),
-        }
+        make_scalar_function(array_has_all_inner)(args)
     }
 
     fn aliases(&self) -> &[String] {
@@ -224,25 +336,7 @@ impl ScalarUDFImpl for ArrayHasAny {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(args)?;
-
-        if args.len() != 2 {
-            return exec_err!("array_has_any needs two arguments");
-        }
-
-        let array_type = args[0].data_type();
-
-        match array_type {
-            DataType::List(_) => {
-                general_array_has_dispatch::<i32>(&args[0], &args[1], ComparisonType::Any)
-                    .map(ColumnarValue::Array)
-            }
-            DataType::LargeList(_) => {
-                general_array_has_dispatch::<i64>(&args[0], &args[1], ComparisonType::Any)
-                    .map(ColumnarValue::Array)
-            }
-            _ => exec_err!("array_has_any does not support type '{array_type:?}'."),
-        }
+        make_scalar_function(array_has_any_inner)(args)
     }
 
     fn aliases(&self) -> &[String] {
@@ -251,75 +345,114 @@ impl ScalarUDFImpl for ArrayHasAny {
 }
 
 /// Represents the type of comparison for array_has.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Clone, Copy)]
 enum ComparisonType {
     // array_has_all
     All,
     // array_has_any
     Any,
-    // array_has
-    Single,
 }
 
-fn general_array_has_dispatch<O: OffsetSizeTrait>(
+fn array_has_all_and_any_dispatch<O: OffsetSizeTrait>(
     haystack: &ArrayRef,
     needle: &ArrayRef,
     comparison_type: ComparisonType,
 ) -> Result<ArrayRef> {
-    let array = if comparison_type == ComparisonType::Single {
-        let arr = as_generic_list_array::<O>(haystack)?;
-        check_datatypes("array_has", &[arr.values(), needle])?;
-        arr
-    } else {
-        check_datatypes("array_has", &[haystack, needle])?;
-        as_generic_list_array::<O>(haystack)?
-    };
+    let haystack = as_generic_list_array::<O>(haystack)?;
+    let needle = as_generic_list_array::<O>(needle)?;
+    match needle.data_type() {
+        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+            array_has_all_and_any_string_internal::<O>(haystack, needle, comparison_type)
+        }
+        _ => general_array_has_for_all_and_any::<O>(haystack, needle, comparison_type),
+    }
+}
 
+// String comparison for array_has_all and array_has_any
+fn array_has_all_and_any_string_internal<O: OffsetSizeTrait>(
+    array: &GenericListArray<O>,
+    needle: &GenericListArray<O>,
+    comparison_type: ComparisonType,
+) -> Result<ArrayRef> {
     let mut boolean_builder = BooleanArray::builder(array.len());
-
-    let converter = RowConverter::new(vec![SortField::new(array.value_type())])?;
-
-    let element = Arc::clone(needle);
-    let sub_array = if comparison_type != ComparisonType::Single {
-        as_generic_list_array::<O>(needle)?
-    } else {
-        array
-    };
-    for (row_idx, (arr, sub_arr)) in array.iter().zip(sub_array.iter()).enumerate() {
+    for (arr, sub_arr) in array.iter().zip(needle.iter()) {
         match (arr, sub_arr) {
             (Some(arr), Some(sub_arr)) => {
-                let arr_values = converter.convert_columns(&[arr])?;
-                let sub_arr_values = if comparison_type != ComparisonType::Single {
-                    converter.convert_columns(&[sub_arr])?
-                } else {
-                    converter.convert_columns(&[Arc::clone(&element)])?
-                };
-
-                let mut res = match comparison_type {
-                    ComparisonType::All => sub_arr_values
-                        .iter()
-                        .dedup()
-                        .all(|elem| arr_values.iter().dedup().any(|x| x == elem)),
-                    ComparisonType::Any => sub_arr_values
-                        .iter()
-                        .dedup()
-                        .any(|elem| arr_values.iter().dedup().any(|x| x == elem)),
-                    ComparisonType::Single => arr_values
-                        .iter()
-                        .dedup()
-                        .any(|x| x == sub_arr_values.row(row_idx)),
-                };
-
-                if comparison_type == ComparisonType::Any {
-                    res |= res;
-                }
-                boolean_builder.append_value(res);
+                let haystack_array = string_array_to_vec(&arr);
+                let needle_array = string_array_to_vec(&sub_arr);
+                boolean_builder.append_value(array_has_string_kernel(
+                    haystack_array,
+                    needle_array,
+                    comparison_type,
+                ));
             }
-            // respect null input
             (_, _) => {
                 boolean_builder.append_null();
             }
         }
     }
+
     Ok(Arc::new(boolean_builder.finish()))
 }
+
+fn array_has_string_kernel(
+    haystack: Vec<Option<&str>>,
+    needle: Vec<Option<&str>>,
+    comparison_type: ComparisonType,
+) -> bool {
+    match comparison_type {
+        ComparisonType::All => needle
+            .iter()
+            .dedup()
+            .all(|x| haystack.iter().dedup().any(|y| y == x)),
+        ComparisonType::Any => needle
+            .iter()
+            .dedup()
+            .any(|x| haystack.iter().dedup().any(|y| y == x)),
+    }
+}
+
+// General row comparison for array_has_all and array_has_any
+fn general_array_has_for_all_and_any<O: OffsetSizeTrait>(
+    haystack: &GenericListArray<O>,
+    needle: &GenericListArray<O>,
+    comparison_type: ComparisonType,
+) -> Result<ArrayRef> {
+    let mut boolean_builder = BooleanArray::builder(haystack.len());
+    let converter = RowConverter::new(vec![SortField::new(haystack.value_type())])?;
+
+    for (arr, sub_arr) in haystack.iter().zip(needle.iter()) {
+        if let (Some(arr), Some(sub_arr)) = (arr, sub_arr) {
+            let arr_values = converter.convert_columns(&[arr])?;
+            let sub_arr_values = converter.convert_columns(&[sub_arr])?;
+            boolean_builder.append_value(general_array_has_all_and_any_kernel(
+                arr_values,
+                sub_arr_values,
+                comparison_type,
+            ));
+        } else {
+            boolean_builder.append_null();
+        }
+    }
+
+    Ok(Arc::new(boolean_builder.finish()))
+}
+
+fn general_array_has_all_and_any_kernel(
+    haystack_rows: Rows,
+    needle_rows: Rows,
+    comparison_type: ComparisonType,
+) -> bool {
+    match comparison_type {
+        ComparisonType::All => needle_rows.iter().all(|needle_row| {
+            haystack_rows
+                .iter()
+                .any(|haystack_row| haystack_row == needle_row)
+        }),
+        ComparisonType::Any => needle_rows.iter().any(|needle_row| {
+            haystack_rows
+                .iter()
+                .any(|haystack_row| haystack_row == needle_row)
+        }),
+    }
+}
diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs
index 2b383af3d456..b04c35667226 100644
--- a/datafusion/functions-nested/src/flatten.rs
+++ b/datafusion/functions-nested/src/flatten.rs
@@ -147,7 +147,7 @@ fn flatten_internal<O: OffsetSizeTrait>(
                 let list_arr = GenericListArray::<O>::new(field, offsets, values, None);
                 Ok(list_arr)
             } else {
-                Ok(list_arr.clone())
+                Ok(list_arr)
             }
         }
     }
diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs
index 688e1633e5cf..3d5b261618d5 100644
--- a/datafusion/functions-nested/src/utils.rs
+++ b/datafusion/functions-nested/src/utils.rs
@@ -296,8 +296,7 @@ mod tests {
         let array3d_1 = Arc::new(array_into_list_array_nullable(array2d_1)) as ArrayRef;
         let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned());
         let res =
-            align_array_dimensions::<i32>(vec![array1d_1, Arc::new(array3d_2.clone())])
-                .unwrap();
+            align_array_dimensions::<i32>(vec![array1d_1, Arc::new(array3d_2)]).unwrap();
 
         let expected = as_list_array(&array3d_1).unwrap();
         let expected_dim = datafusion_common::utils::list_ndims(array3d_1.data_type());
diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs
index aea3d4a59e02..43d2796ad7dc 100644
--- a/datafusion/functions-window/src/row_number.rs
+++ b/datafusion/functions-window/src/row_number.rs
@@ -52,13 +52,13 @@ pub fn row_number_udwf() -> std::sync::Arc<datafusion_expr::WindowUDF> {
 
 /// row_number expression
 #[derive(Debug)]
-struct RowNumber {
+pub struct RowNumber {
     signature: Signature,
 }
 
 impl RowNumber {
     /// Create a new `row_number` function
-    fn new() -> Self {
+    pub fn new() -> Self {
         Self {
             signature: Signature::any(0, Volatility::Immutable),
         }
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 9ef020b772f0..337379a74670 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -161,3 +161,8 @@ required-features = ["string_expressions"]
 harness = false
 name = "random"
 required-features = ["math_expressions"]
+
+[[bench]]
+harness = false
+name = "substr"
+required-features = ["unicode_expressions"]
diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs
index fa963f174e46..934c1c6bd189 100644
--- a/datafusion/functions/benches/lower.rs
+++ b/datafusion/functions/benches/lower.rs
@@ -17,8 +17,10 @@
 
 extern crate criterion;
 
-use arrow::array::{ArrayRef, StringArray};
-use arrow::util::bench_util::create_string_array_with_len;
+use arrow::array::{ArrayRef, StringArray, StringViewBuilder};
+use arrow::util::bench_util::{
+    create_string_array_with_len, create_string_view_array_with_len,
+};
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_expr::ColumnarValue;
 use datafusion_functions::string;
@@ -65,6 +67,58 @@ fn create_args3(size: usize) -> Vec<ColumnarValue> {
     vec![ColumnarValue::Array(array)]
 }
 
+/// Create an array of args containing StringViews, where all the values in the
+/// StringViews are ASCII.
+/// * `size` - the length of the StringViews, and
+/// * `str_len` - the length of the strings within the array.
+/// * `null_density` - the density of null values in the array.
+/// * `mixed` - whether the array is mixed between inlined and referenced strings.
+fn create_args4(
+    size: usize,
+    str_len: usize,
+    null_density: f32,
+    mixed: bool,
+) -> Vec<ColumnarValue> {
+    let array = Arc::new(create_string_view_array_with_len(
+        size,
+        null_density,
+        str_len,
+        mixed,
+    ));
+
+    vec![ColumnarValue::Array(array)]
+}
+
+/// Create an array of args containing a StringViewArray, where some of the values in the
+/// array are non-ASCII.
+/// * `size` - the length of the StringArray, and
+/// * `non_ascii_density` - the density of non-ASCII values in the array.
+/// * `null_density` - the density of null values in the array.
+fn create_args5(
+    size: usize,
+    non_ascii_density: f32,
+    null_density: f32,
+) -> Vec<ColumnarValue> {
+    let mut string_view_builder = StringViewBuilder::with_capacity(size);
+    for _ in 0..size {
+        // sample null_density to determine if the value should be null
+        if rand::random::<f32>() < null_density {
+            string_view_builder.append_null();
+            continue;
+        }
+
+        // sample non_ascii_density to determine if the value should be non-ASCII
+        if rand::random::<f32>() < non_ascii_density {
+            string_view_builder.append_value("农历新年农历新年农历新年农历新年农历新年");
+        } else {
+            string_view_builder.append_value("DATAFUSIONDATAFUSIONDATAFUSION");
+        }
+    }
+
+    let array = Arc::new(string_view_builder.finish()) as ArrayRef;
+    vec![ColumnarValue::Array(array)]
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     let lower = string::lower();
     for size in [1024, 4096, 8192] {
@@ -85,6 +139,40 @@ fn criterion_benchmark(c: &mut Criterion) {
             |b| b.iter(|| black_box(lower.invoke(&args))),
         );
     }
+
+    let sizes = [4096, 8192];
+    let str_lens = [10, 64, 128];
+    let mixes = [true, false];
+    let null_densities = [0.0f32, 0.1f32];
+
+    for null_density in &null_densities {
+        for &mixed in &mixes {
+            for &str_len in &str_lens {
+                for &size in &sizes {
+                    let args = create_args4(size, str_len, *null_density, mixed);
+                    c.bench_function(
+            &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
+                     size, str_len, null_density, mixed),
+            |b| b.iter(|| black_box(lower.invoke(&args))),
+        );
+
+                    let args = create_args4(size, str_len, *null_density, mixed);
+                    c.bench_function(
+            &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
+                     size, str_len, null_density, mixed),
+            |b| b.iter(|| black_box(lower.invoke(&args))),
+        );
+
+                    let args = create_args5(size, 0.1, *null_density);
+                    c.bench_function(
+            &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
+                     size, str_len, 0.1, null_density, mixed),
+            |b| b.iter(|| black_box(lower.invoke(&args))),
+        );
+                }
+            }
+        }
+    }
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs
index 5ff1e2fb860d..0c496bc63347 100644
--- a/datafusion/functions/benches/pad.rs
+++ b/datafusion/functions/benches/pad.rs
@@ -127,11 +127,12 @@ fn criterion_benchmark(c: &mut Criterion) {
         group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
             b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
         });
-        //
-        // let args = create_args::<i32>(size, 32, true);
-        // group.bench_function(BenchmarkId::new("stringview type", size), |b| {
-        //     b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
-        // });
+
+        // rpad for stringview type
+        let args = create_args::<i32>(size, 32, true);
+        group.bench_function(BenchmarkId::new("stringview type", size), |b| {
+            b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap()))
+        });
 
         group.finish();
     }
diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs
new file mode 100644
index 000000000000..14a3389da380
--- /dev/null
+++ b/datafusion/functions/benches/substr.rs
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait};
+use arrow::util::bench_util::{
+    create_string_array_with_len, create_string_view_array_with_len,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::unicode;
+use std::sync::Arc;
+
+fn create_args_without_count<O: OffsetSizeTrait>(
+    size: usize,
+    str_len: usize,
+    start_half_way: bool,
+    use_string_view: bool,
+) -> Vec<ColumnarValue> {
+    let start_array = Arc::new(Int64Array::from(
+        (0..size)
+            .map(|_| {
+                if start_half_way {
+                    (str_len / 2) as i64
+                } else {
+                    1i64
+                }
+            })
+            .collect::<Vec<_>>(),
+    ));
+
+    if use_string_view {
+        let string_array =
+            Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(start_array),
+        ]
+    } else {
+        let string_array =
+            Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(Arc::clone(&start_array) as ArrayRef),
+        ]
+    }
+}
+
+fn create_args_with_count<O: OffsetSizeTrait>(
+    size: usize,
+    str_len: usize,
+    count_max: usize,
+    use_string_view: bool,
+) -> Vec<ColumnarValue> {
+    let start_array =
+        Arc::new(Int64Array::from((0..size).map(|_| 1).collect::<Vec<_>>()));
+    let count = count_max.min(str_len) as i64;
+    let count_array = Arc::new(Int64Array::from(
+        (0..size).map(|_| count).collect::<Vec<_>>(),
+    ));
+
+    if use_string_view {
+        let string_array =
+            Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(start_array),
+            ColumnarValue::Array(count_array),
+        ]
+    } else {
+        let string_array =
+            Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+
+        vec![
+            ColumnarValue::Array(string_array),
+            ColumnarValue::Array(Arc::clone(&start_array) as ArrayRef),
+            ColumnarValue::Array(Arc::clone(&count_array) as ArrayRef),
+        ]
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let substr = unicode::substr();
+    for size in [1024, 4096] {
+        // string_len = 12, substring_len=6 (see `create_args_without_count`)
+        let len = 12;
+        let mut group = c.benchmark_group("SHORTER THAN 12");
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+
+        let args = create_args_without_count::<i32>(size, len, true, true);
+        group.bench_function(
+            &format!("substr_string_view [size={}, strlen={}]", size, len),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        let args = create_args_without_count::<i32>(size, len, false, false);
+        group.bench_function(
+            &format!("substr_string [size={}, strlen={}]", size, len),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        let args = create_args_without_count::<i64>(size, len, true, false);
+        group.bench_function(
+            &format!("substr_large_string [size={}, strlen={}]", size, len),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        group.finish();
+
+        // string_len = 128, start=1, count=64, substring_len=64
+        let len = 128;
+        let count = 64;
+        let mut group = c.benchmark_group("LONGER THAN 12");
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+
+        let args = create_args_with_count::<i32>(size, len, count, true);
+        group.bench_function(
+            &format!(
+                "substr_string_view [size={}, count={}, strlen={}]",
+                size, count, len,
+            ),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        let args = create_args_with_count::<i32>(size, len, count, false);
+        group.bench_function(
+            &format!(
+                "substr_string [size={}, count={}, strlen={}]",
+                size, count, len,
+            ),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        let args = create_args_with_count::<i64>(size, len, count, false);
+        group.bench_function(
+            &format!(
+                "substr_large_string [size={}, count={}, strlen={}]",
+                size, count, len,
+            ),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        group.finish();
+
+        // string_len = 128, start=1, count=6, substring_len=6
+        let len = 128;
+        let count = 6;
+        let mut group = c.benchmark_group("SRC_LEN > 12, SUB_LEN < 12");
+        group.sampling_mode(SamplingMode::Flat);
+        group.sample_size(10);
+
+        let args = create_args_with_count::<i32>(size, len, count, true);
+        group.bench_function(
+            &format!(
+                "substr_string_view [size={}, count={}, strlen={}]",
+                size, count, len,
+            ),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        let args = create_args_with_count::<i32>(size, len, count, false);
+        group.bench_function(
+            &format!(
+                "substr_string [size={}, count={}, strlen={}]",
+                size, count, len,
+            ),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        let args = create_args_with_count::<i64>(size, len, count, false);
+        group.bench_function(
+            &format!(
+                "substr_large_string [size={}, count={}, strlen={}]",
+                size, count, len,
+            ),
+            |b| b.iter(|| black_box(substr.invoke(&args))),
+        );
+
+        group.finish();
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs
index 54aebb039046..a5dc22b4d9e4 100644
--- a/datafusion/functions/src/string/common.rs
+++ b/datafusion/functions/src/string/common.rs
@@ -15,13 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Common utilities for implementing string functions
+
 use std::fmt::{Display, Formatter};
 use std::sync::Arc;
 
 use arrow::array::{
     new_null_array, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ArrayRef,
     GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray,
-    StringViewArray,
+    StringBuilder, StringViewArray,
 };
 use arrow::buffer::{Buffer, MutableBuffer, NullBuffer};
 use arrow::datatypes::DataType;
@@ -212,6 +214,23 @@ where
                 i64,
                 _,
             >(array, op)?)),
+            DataType::Utf8View => {
+                let string_array = as_string_view_array(array)?;
+                let mut string_builder = StringBuilder::with_capacity(
+                    string_array.len(),
+                    string_array.get_array_memory_size(),
+                );
+
+                for str in string_array.iter() {
+                    if let Some(str) = str {
+                        string_builder.append_value(op(str));
+                    } else {
+                        string_builder.append_null();
+                    }
+                }
+
+                Ok(ColumnarValue::Array(Arc::new(string_builder.finish())))
+            }
             other => exec_err!("Unsupported data type {other:?} for function {name}"),
         },
         ColumnarValue::Scalar(scalar) => match scalar {
@@ -223,6 +242,10 @@ where
                 let result = a.as_ref().map(|x| op(x));
                 Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
             }
+            ScalarValue::Utf8View(a) => {
+                let result = a.as_ref().map(|x| op(x));
+                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
+            }
             other => exec_err!("Unsupported data type {other:?} for function {name}"),
         },
     }
@@ -252,7 +275,69 @@ impl<'a> ColumnarValueRef<'a> {
     }
 }
 
+/// Abstracts iteration over different types of string arrays.
+///
+/// The [`StringArrayType`] trait helps write generic code for string functions that can work with
+/// different types of string arrays.
+///
+/// Currently three types are supported:
+/// - [`StringArray`]
+/// - [`LargeStringArray`]
+/// - [`StringViewArray`]
+///
+/// It is inspired / copied from [arrow-rs].
+///
+/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/bf0ea9129e617e4a3cf915a900b747cc5485315f/arrow-string/src/like.rs#L151-L157
+///
+/// # Examples
+/// Generic function that works for [`StringArray`], [`LargeStringArray`]
+/// and [`StringViewArray`]:
+/// ```
+/// # use arrow::array::{StringArray, LargeStringArray, StringViewArray};
+/// # use datafusion_functions::string::common::StringArrayType;
+///
+/// /// Combines string values for any StringArrayType type. It can be invoked on
+/// /// and combination of `StringArray`, `LargeStringArray` or `StringViewArray`
+/// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec<String>
+///   where S1: StringArrayType<'a>, S2: StringArrayType<'a>
+/// {
+///   // iterate over the elements of the 2 arrays in parallel
+///   array1
+///   .iter()
+///   .zip(array2.iter())
+///   .map(|(s1, s2)| {
+///      // if both values are non null, combine them
+///      if let (Some(s1), Some(s2)) = (s1, s2) {
+///        format!("{s1}{s2}")
+///      } else {
+///        "None".to_string()
+///     }
+///    })
+///   .collect()
+/// }
+///
+/// let string_array = StringArray::from(vec!["foo", "bar"]);
+/// let large_string_array = LargeStringArray::from(vec!["foo2", "bar2"]);
+/// let string_view_array = StringViewArray::from(vec!["foo3", "bar3"]);
+///
+/// // can invoke this function a string array and large string array
+/// assert_eq!(
+///   combine_values(&string_array, &large_string_array),
+///   vec![String::from("foofoo2"), String::from("barbar2")]
+/// );
+///
+/// // Can call the same function with string array and string view array
+/// assert_eq!(
+///   combine_values(&string_array, &string_view_array),
+///   vec![String::from("foofoo3"), String::from("barbar3")]
+/// );
+/// ```
+///
+/// [`LargeStringArray`]: arrow::array::LargeStringArray
 pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
+    /// Return an [`ArrayIter`]  over the values of the array.
+    ///
+    /// This iterator iterates returns `Option<&str>` for each item in the array.
     fn iter(&self) -> ArrayIter<Self>;
 }
 
diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs
index 29ca682c380b..ca324e69c0d2 100644
--- a/datafusion/functions/src/string/lower.rs
+++ b/datafusion/functions/src/string/lower.rs
@@ -43,7 +43,7 @@ impl LowerFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs
index 19721f0fad28..8d292315a35a 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -15,21 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
-use arrow::datatypes::DataType;
-
-use datafusion_common::cast::{
-    as_generic_string_array, as_int64_array, as_string_view_array,
+use arrow::array::{
+    ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringViewArray,
 };
-use datafusion_common::{exec_err, Result};
+use arrow::array::{AsArray, GenericStringBuilder};
+use arrow::datatypes::DataType;
+use datafusion_common::cast::as_int64_array;
+use datafusion_common::ScalarValue;
+use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ColumnarValue, Volatility};
 use datafusion_expr::{ScalarUDFImpl, Signature};
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::utils::utf8_to_str_type;
 
-use crate::utils::{make_scalar_function, utf8_to_str_type};
+use super::common::StringArrayType;
 
 #[derive(Debug)]
 pub struct SplitPartFunc {
@@ -82,36 +84,121 @@ impl ScalarUDFImpl for SplitPartFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match (args[0].data_type(), args[1].data_type()) {
-            (
-                DataType::Utf8 | DataType::Utf8View,
-                DataType::Utf8 | DataType::Utf8View,
-            ) => make_scalar_function(split_part::<i32, i32>, vec![])(args),
+        // First, determine if any of the arguments is an Array
+        let len = args.iter().find_map(|arg| match arg {
+            ColumnarValue::Array(a) => Some(a.len()),
+            _ => None,
+        });
+
+        let inferred_length = len.unwrap_or(1);
+        let is_scalar = len.is_none();
+
+        // Convert all ColumnarValues to ArrayRefs
+        let args = args
+            .iter()
+            .map(|arg| match arg {
+                ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(inferred_length),
+                ColumnarValue::Array(array) => Ok(Arc::clone(array)),
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        // Unpack the ArrayRefs from the arguments
+        let n_array = as_int64_array(&args[2])?;
+        let result = match (args[0].data_type(), args[1].data_type()) {
+            (DataType::Utf8View, DataType::Utf8View) => {
+                split_part_impl::<&StringViewArray, &StringViewArray, i32>(
+                    args[0].as_string_view(),
+                    args[1].as_string_view(),
+                    n_array,
+                )
+            }
+            (DataType::Utf8View, DataType::Utf8) => {
+                split_part_impl::<&StringViewArray, &GenericStringArray<i32>, i32>(
+                    args[0].as_string_view(),
+                    args[1].as_string::<i32>(),
+                    n_array,
+                )
+            }
+            (DataType::Utf8View, DataType::LargeUtf8) => {
+                split_part_impl::<&StringViewArray, &GenericStringArray<i64>, i32>(
+                    args[0].as_string_view(),
+                    args[1].as_string::<i64>(),
+                    n_array,
+                )
+            }
+            (DataType::Utf8, DataType::Utf8View) => {
+                split_part_impl::<&GenericStringArray<i32>, &StringViewArray, i32>(
+                    args[0].as_string::<i32>(),
+                    args[1].as_string_view(),
+                    n_array,
+                )
+            }
+            (DataType::LargeUtf8, DataType::Utf8View) => {
+                split_part_impl::<&GenericStringArray<i64>, &StringViewArray, i64>(
+                    args[0].as_string::<i64>(),
+                    args[1].as_string_view(),
+                    n_array,
+                )
+            }
+            (DataType::Utf8, DataType::Utf8) => {
+                split_part_impl::<&GenericStringArray<i32>, &GenericStringArray<i32>, i32>(
+                    args[0].as_string::<i32>(),
+                    args[1].as_string::<i32>(),
+                    n_array,
+                )
+            }
             (DataType::LargeUtf8, DataType::LargeUtf8) => {
-                make_scalar_function(split_part::<i64, i64>, vec![])(args)
+                split_part_impl::<&GenericStringArray<i64>, &GenericStringArray<i64>, i64>(
+                    args[0].as_string::<i64>(),
+                    args[1].as_string::<i64>(),
+                    n_array,
+                )
             }
-            (_, DataType::LargeUtf8) => {
-                make_scalar_function(split_part::<i32, i64>, vec![])(args)
+            (DataType::Utf8, DataType::LargeUtf8) => {
+                split_part_impl::<&GenericStringArray<i32>, &GenericStringArray<i64>, i32>(
+                    args[0].as_string::<i32>(),
+                    args[1].as_string::<i64>(),
+                    n_array,
+                )
             }
-            (DataType::LargeUtf8, _) => {
-                make_scalar_function(split_part::<i64, i32>, vec![])(args)
+            (DataType::LargeUtf8, DataType::Utf8) => {
+                split_part_impl::<&GenericStringArray<i64>, &GenericStringArray<i32>, i64>(
+                    args[0].as_string::<i64>(),
+                    args[1].as_string::<i32>(),
+                    n_array,
+                )
             }
-            (first_type, second_type) => exec_err!(
-                "unsupported first type {} and second type {} for split_part function",
-                first_type,
-                second_type
-            ),
+            _ => exec_err!("Unsupported combination of argument types for split_part"),
+        };
+        if is_scalar {
+            // If all inputs are scalar, keep the output as scalar
+            let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
+            result.map(ColumnarValue::Scalar)
+        } else {
+            result.map(ColumnarValue::Array)
         }
     }
 }
 
-macro_rules! process_split_part {
-    ($string_array: expr, $delimiter_array: expr, $n_array: expr) => {{
-        let result = $string_array
-            .iter()
-            .zip($delimiter_array.iter())
-            .zip($n_array.iter())
-            .map(|((string, delimiter), n)| match (string, delimiter, n) {
+/// impl
+pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>(
+    string_array: StringArrType,
+    delimiter_array: DelimiterArrType,
+    n_array: &Int64Array,
+) -> Result<ArrayRef>
+where
+    StringArrType: StringArrayType<'a>,
+    DelimiterArrType: StringArrayType<'a>,
+    StringArrayLen: OffsetSizeTrait,
+{
+    let mut builder: GenericStringBuilder<StringArrayLen> = GenericStringBuilder::new();
+
+    string_array
+        .iter()
+        .zip(delimiter_array.iter())
+        .zip(n_array.iter())
+        .try_for_each(|((string, delimiter), n)| -> Result<(), DataFusionError> {
+            match (string, delimiter, n) {
                 (Some(string), Some(delimiter), Some(n)) => {
                     let split_string: Vec<&str> = string.split(delimiter).collect();
                     let len = split_string.len();
@@ -125,58 +212,17 @@ macro_rules! process_split_part {
                     } as usize;
 
                     if index < len {
-                        Ok(Some(split_string[index]))
+                        builder.append_value(split_string[index]);
                     } else {
-                        Ok(Some(""))
+                        builder.append_value("");
                     }
                 }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<StringLen>>>()?;
-        Ok(Arc::new(result) as ArrayRef)
-    }};
-}
-
-/// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
-/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-fn split_part<StringLen: OffsetSizeTrait, DelimiterLen: OffsetSizeTrait>(
-    args: &[ArrayRef],
-) -> Result<ArrayRef> {
-    let n_array = as_int64_array(&args[2])?;
-    match (args[0].data_type(), args[1].data_type()) {
-        (DataType::Utf8View, _) => {
-            let string_array = as_string_view_array(&args[0])?;
-            match args[1].data_type() {
-                DataType::Utf8View => {
-                    let delimiter_array = as_string_view_array(&args[1])?;
-                    process_split_part!(string_array, delimiter_array, n_array)
-                }
-                _ => {
-                    let delimiter_array =
-                        as_generic_string_array::<DelimiterLen>(&args[1])?;
-                    process_split_part!(string_array, delimiter_array, n_array)
-                }
-            }
-        }
-        (_, DataType::Utf8View) => {
-            let delimiter_array = as_string_view_array(&args[1])?;
-            match args[0].data_type() {
-                DataType::Utf8View => {
-                    let string_array = as_string_view_array(&args[0])?;
-                    process_split_part!(string_array, delimiter_array, n_array)
-                }
-                _ => {
-                    let string_array = as_generic_string_array::<StringLen>(&args[0])?;
-                    process_split_part!(string_array, delimiter_array, n_array)
-                }
+                _ => builder.append_null(),
             }
-        }
-        (_, _) => {
-            let string_array = as_generic_string_array::<StringLen>(&args[0])?;
-            let delimiter_array = as_generic_string_array::<DelimiterLen>(&args[1])?;
-            process_split_part!(string_array, delimiter_array, n_array)
-        }
-    }
+            Ok(())
+        })?;
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs
index da31948fbcfa..593e33ab6bb4 100644
--- a/datafusion/functions/src/string/upper.rs
+++ b/datafusion/functions/src/string/upper.rs
@@ -40,7 +40,7 @@ impl UpperFunc {
         Self {
             signature: Signature::uniform(
                 1,
-                vec![Utf8, LargeUtf8],
+                vec![Utf8, LargeUtf8, Utf8View],
                 Volatility::Immutable,
             ),
         }
diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs
index 4bcf102c8793..c1d6f327928f 100644
--- a/datafusion/functions/src/unicode/rpad.rs
+++ b/datafusion/functions/src/unicode/rpad.rs
@@ -15,20 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
-use arrow::datatypes::DataType;
-use datafusion_common::cast::{
-    as_generic_string_array, as_int64_array, as_string_view_array,
-};
-use unicode_segmentation::UnicodeSegmentation;
-
+use crate::string::common::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_str_type};
+use arrow::array::{
+    ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array,
+    OffsetSizeTrait, StringViewArray,
+};
+use arrow::datatypes::DataType;
+use datafusion_common::cast::as_int64_array;
+use datafusion_common::DataFusionError;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::fmt::Write;
+use std::sync::Arc;
+use unicode_segmentation::UnicodeSegmentation;
+use DataType::{LargeUtf8, Utf8, Utf8View};
 
 #[derive(Debug)]
 pub struct RPadFunc {
@@ -84,170 +87,182 @@ impl ScalarUDFImpl for RPadFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match args.len() {
-            2 => match args[0].data_type() {
-                DataType::Utf8 | DataType::Utf8View => {
-                    make_scalar_function(rpad::<i32, i32>, vec![])(args)
-                }
-                DataType::LargeUtf8 => {
-                    make_scalar_function(rpad::<i64, i64>, vec![])(args)
-                }
-                other => exec_err!("Unsupported data type {other:?} for function rpad"),
-            },
-            3 => match (args[0].data_type(), args[2].data_type()) {
-                (
-                    DataType::Utf8 | DataType::Utf8View,
-                    DataType::Utf8 | DataType::Utf8View,
-                ) => make_scalar_function(rpad::<i32, i32>, vec![])(args),
-                (DataType::LargeUtf8, DataType::LargeUtf8) => {
-                    make_scalar_function(rpad::<i64, i64>, vec![])(args)
-                }
-                (DataType::LargeUtf8, DataType::Utf8View | DataType::Utf8) => {
-                    make_scalar_function(rpad::<i64, i32>, vec![])(args)
-                }
-                (DataType::Utf8View | DataType::Utf8, DataType::LargeUtf8) => {
-                    make_scalar_function(rpad::<i32, i64>, vec![])(args)
-                }
-                (first_type, last_type) => {
-                    exec_err!("unsupported arguments type for rpad, first argument type is {}, last argument type is {}", first_type, last_type)
-                }
-            },
-            number => {
-                exec_err!("unsupported arguments number {} for rpad", number)
+        match (
+            args.len(),
+            args[0].data_type(),
+            args.get(2).map(|arg| arg.data_type()),
+        ) {
+            (2, Utf8 | Utf8View, _) => {
+                make_scalar_function(rpad::<i32, i32>, vec![])(args)
+            }
+            (2, LargeUtf8, _) => make_scalar_function(rpad::<i64, i64>, vec![])(args),
+            (3, Utf8 | Utf8View, Some(Utf8 | Utf8View)) => {
+                make_scalar_function(rpad::<i32, i32>, vec![])(args)
+            }
+            (3, LargeUtf8, Some(LargeUtf8)) => {
+                make_scalar_function(rpad::<i64, i64>, vec![])(args)
+            }
+            (3, Utf8 | Utf8View, Some(LargeUtf8)) => {
+                make_scalar_function(rpad::<i32, i64>, vec![])(args)
+            }
+            (3, LargeUtf8, Some(Utf8 | Utf8View)) => {
+                make_scalar_function(rpad::<i64, i32>, vec![])(args)
+            }
+            (_, _, _) => {
+                exec_err!("Unsupported combination of data types for function rpad")
             }
         }
     }
 }
 
-macro_rules! process_rpad {
-    // For the two-argument case
-    ($string_array:expr, $length_array:expr) => {{
-        $string_array
-            .iter()
-            .zip($length_array.iter())
-            .map(|(string, length)| match (string, length) {
-                (Some(string), Some(length)) => {
-                    if length > i32::MAX as i64 {
-                        return exec_err!("rpad requested length {} too large", length);
-                    }
-
-                    let length = if length < 0 { 0 } else { length as usize };
-                    if length == 0 {
-                        Ok(Some("".to_string()))
-                    } else {
-                        let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                        if length < graphemes.len() {
-                            Ok(Some(graphemes[..length].concat()))
-                        } else {
-                            let mut s = string.to_string();
-                            s.push_str(" ".repeat(length - graphemes.len()).as_str());
-                            Ok(Some(s))
-                        }
-                    }
-                }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<StringArrayLen>>>()
-    }};
-
-    // For the three-argument case
-    ($string_array:expr, $length_array:expr, $fill_array:expr) => {{
-        $string_array
-            .iter()
-            .zip($length_array.iter())
-            .zip($fill_array.iter())
-            .map(|((string, length), fill)| match (string, length, fill) {
-                (Some(string), Some(length), Some(fill)) => {
-                    if length > i32::MAX as i64 {
-                        return exec_err!("rpad requested length {} too large", length);
-                    }
-
-                    let length = if length < 0 { 0 } else { length as usize };
-                    let graphemes = string.graphemes(true).collect::<Vec<&str>>();
-                    let fill_chars = fill.chars().collect::<Vec<char>>();
+pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
+    args: &[ArrayRef],
+) -> Result<ArrayRef> {
+    if args.len() < 2 || args.len() > 3 {
+        return exec_err!(
+            "rpad was called with {} arguments. It requires 2 or 3 arguments.",
+            args.len()
+        );
+    }
 
-                    if length < graphemes.len() {
-                        Ok(Some(graphemes[..length].concat()))
-                    } else if fill_chars.is_empty() {
-                        Ok(Some(string.to_string()))
-                    } else {
-                        let mut s = string.to_string();
-                        let char_vector: Vec<char> = (0..length - graphemes.len())
-                            .map(|l| fill_chars[l % fill_chars.len()])
-                            .collect();
-                        s.push_str(&char_vector.iter().collect::<String>());
-                        Ok(Some(s))
-                    }
-                }
-                _ => Ok(None),
-            })
-            .collect::<Result<GenericStringArray<StringArrayLen>>>()
-    }};
+    let length_array = as_int64_array(&args[1])?;
+    match (
+        args.len(),
+        args[0].data_type(),
+        args.get(2).map(|arg| arg.data_type()),
+    ) {
+        (2, Utf8View, _) => {
+            rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>(
+                args[0].as_string_view(),
+                length_array,
+                None,
+            )
+        }
+        (3, Utf8View, Some(Utf8View)) => {
+            rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>(
+                args[0].as_string_view(),
+                length_array,
+                Some(args[2].as_string_view()),
+            )
+        }
+        (3, Utf8View, Some(Utf8 | LargeUtf8)) => {
+            rpad_impl::<&StringViewArray, &GenericStringArray<FillArrayLen>, StringArrayLen>(
+                args[0].as_string_view(),
+                length_array,
+                Some(args[2].as_string::<FillArrayLen>()),
+            )
+        }
+        (3, Utf8 | LargeUtf8, Some(Utf8View)) => rpad_impl::<
+            &GenericStringArray<StringArrayLen>,
+            &StringViewArray,
+            StringArrayLen,
+        >(
+            args[0].as_string::<StringArrayLen>(),
+            length_array,
+            Some(args[2].as_string_view()),
+        ),
+        (_, _, _) => rpad_impl::<
+            &GenericStringArray<StringArrayLen>,
+            &GenericStringArray<FillArrayLen>,
+            StringArrayLen,
+        >(
+            args[0].as_string::<StringArrayLen>(),
+            length_array,
+            args.get(2).map(|arg| arg.as_string::<FillArrayLen>()),
+        ),
+    }
 }
 
 /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
 /// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<StringArrayLen: OffsetSizeTrait, FillArrayLen: OffsetSizeTrait>(
-    args: &[ArrayRef],
-) -> Result<ArrayRef> {
-    match (args.len(), args[0].data_type()) {
-        (2, DataType::Utf8View) => {
-            let string_array = as_string_view_array(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
+pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>(
+    string_array: StringArrType,
+    length_array: &Int64Array,
+    fill_array: Option<FillArrType>,
+) -> Result<ArrayRef>
+where
+    StringArrType: StringArrayType<'a>,
+    FillArrType: StringArrayType<'a>,
+    StringArrayLen: OffsetSizeTrait,
+{
+    let mut builder: GenericStringBuilder<StringArrayLen> = GenericStringBuilder::new();
 
-            let result = process_rpad!(string_array, length_array)?;
-            Ok(Arc::new(result) as ArrayRef)
+    match fill_array {
+        None => {
+            string_array.iter().zip(length_array.iter()).try_for_each(
+                |(string, length)| -> Result<(), DataFusionError> {
+                    match (string, length) {
+                        (Some(string), Some(length)) => {
+                            if length > i32::MAX as i64 {
+                                return exec_err!(
+                                    "rpad requested length {} too large",
+                                    length
+                                );
+                            }
+                            let length = if length < 0 { 0 } else { length as usize };
+                            if length == 0 {
+                                builder.append_value("");
+                            } else {
+                                let graphemes =
+                                    string.graphemes(true).collect::<Vec<&str>>();
+                                if length < graphemes.len() {
+                                    builder.append_value(graphemes[..length].concat());
+                                } else {
+                                    builder.write_str(string)?;
+                                    builder.write_str(
+                                        &" ".repeat(length - graphemes.len()),
+                                    )?;
+                                    builder.append_value("");
+                                }
+                            }
+                        }
+                        _ => builder.append_null(),
+                    }
+                    Ok(())
+                },
+            )?;
         }
-        (2, _) => {
-            let string_array = as_generic_string_array::<StringArrayLen>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
+        Some(fill_array) => {
+            string_array
+                .iter()
+                .zip(length_array.iter())
+                .zip(fill_array.iter())
+                .try_for_each(
+                    |((string, length), fill)| -> Result<(), DataFusionError> {
+                        match (string, length, fill) {
+                            (Some(string), Some(length), Some(fill)) => {
+                                if length > i32::MAX as i64 {
+                                    return exec_err!(
+                                        "rpad requested length {} too large",
+                                        length
+                                    );
+                                }
+                                let length = if length < 0 { 0 } else { length as usize };
+                                let graphemes =
+                                    string.graphemes(true).collect::<Vec<&str>>();
 
-            let result = process_rpad!(string_array, length_array)?;
-            Ok(Arc::new(result) as ArrayRef)
-        }
-        (3, DataType::Utf8View) => {
-            let string_array = as_string_view_array(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            match args[2].data_type() {
-                DataType::Utf8View => {
-                    let fill_array = as_string_view_array(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                DataType::Utf8 | DataType::LargeUtf8 => {
-                    let fill_array = as_generic_string_array::<FillArrayLen>(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                other_type => {
-                    exec_err!("unsupported type for rpad's third operator: {}", other_type)
-                }
-            }
-        }
-        (3, _) => {
-            let string_array = as_generic_string_array::<StringArrayLen>(&args[0])?;
-            let length_array = as_int64_array(&args[1])?;
-            match args[2].data_type() {
-                DataType::Utf8View => {
-                    let fill_array = as_string_view_array(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                DataType::Utf8 | DataType::LargeUtf8 => {
-                    let fill_array = as_generic_string_array::<FillArrayLen>(&args[2])?;
-                    let result = process_rpad!(string_array, length_array, fill_array)?;
-                    Ok(Arc::new(result) as ArrayRef)
-                }
-                other_type => {
-                    exec_err!("unsupported type for rpad's third operator: {}", other_type)
-                }
-            }
+                                if length < graphemes.len() {
+                                    builder.append_value(graphemes[..length].concat());
+                                } else if fill.is_empty() {
+                                    builder.append_value(string);
+                                } else {
+                                    builder.write_str(string)?;
+                                    fill.chars()
+                                        .cycle()
+                                        .take(length - graphemes.len())
+                                        .for_each(|ch| builder.write_char(ch).unwrap());
+                                    builder.append_value("");
+                                }
+                            }
+                            _ => builder.append_null(),
+                        }
+                        Ok(())
+                    },
+                )?;
         }
-        (other, other_type) => exec_err!(
-            "rpad requires 2 or 3 arguments with corresponding types, but got {}. number of arguments with {}",
-            other, other_type
-        ),
     }
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 #[cfg(test)]
diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs
index 702baf6e8fa7..cf10b18ae338 100644
--- a/datafusion/functions/src/unicode/strpos.rs
+++ b/datafusion/functions/src/unicode/strpos.rs
@@ -19,11 +19,10 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    ArrayRef, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+    ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 
-use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
@@ -52,6 +51,9 @@ impl StrposFunc {
                     Exact(vec![Utf8, LargeUtf8]),
                     Exact(vec![LargeUtf8, Utf8]),
                     Exact(vec![LargeUtf8, LargeUtf8]),
+                    Exact(vec![Utf8View, Utf8View]),
+                    Exact(vec![Utf8View, Utf8]),
+                    Exact(vec![Utf8View, LargeUtf8]),
                 ],
                 Volatility::Immutable,
             ),
@@ -78,21 +80,7 @@ impl ScalarUDFImpl for StrposFunc {
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        match (args[0].data_type(), args[1].data_type()) {
-            (DataType::Utf8, DataType::Utf8) => {
-                make_scalar_function(strpos::<Int32Type, Int32Type>, vec![])(args)
-            }
-            (DataType::Utf8, DataType::LargeUtf8) => {
-                make_scalar_function(strpos::<Int32Type, Int64Type>, vec![])(args)
-            }
-            (DataType::LargeUtf8, DataType::Utf8) => {
-                make_scalar_function(strpos::<Int64Type, Int32Type>, vec![])(args)
-            }
-            (DataType::LargeUtf8, DataType::LargeUtf8) => {
-                make_scalar_function(strpos::<Int64Type, Int64Type>, vec![])(args)
-            }
-            other => exec_err!("Unsupported data type {other:?} for function strpos"),
-        }
+        make_scalar_function(strpos, vec![])(args)
     }
 
     fn aliases(&self) -> &[String] {
@@ -100,30 +88,71 @@ impl ScalarUDFImpl for StrposFunc {
     }
 }
 
+fn strpos(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match (args[0].data_type(), args[1].data_type()) {
+        (DataType::Utf8, DataType::Utf8) => {
+            let string_array = args[0].as_string::<i32>();
+            let substring_array = args[1].as_string::<i32>();
+            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+        }
+        (DataType::Utf8, DataType::LargeUtf8) => {
+            let string_array = args[0].as_string::<i32>();
+            let substring_array = args[1].as_string::<i64>();
+            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+        }
+        (DataType::LargeUtf8, DataType::Utf8) => {
+            let string_array = args[0].as_string::<i64>();
+            let substring_array = args[1].as_string::<i32>();
+            calculate_strpos::<_, _, Int64Type>(string_array, substring_array)
+        }
+        (DataType::LargeUtf8, DataType::LargeUtf8) => {
+            let string_array = args[0].as_string::<i64>();
+            let substring_array = args[1].as_string::<i64>();
+            calculate_strpos::<_, _, Int64Type>(string_array, substring_array)
+        }
+        (DataType::Utf8View, DataType::Utf8View) => {
+            let string_array = args[0].as_string_view();
+            let substring_array = args[1].as_string_view();
+            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+        }
+        (DataType::Utf8View, DataType::Utf8) => {
+            let string_array = args[0].as_string_view();
+            let substring_array = args[1].as_string::<i32>();
+            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+        }
+        (DataType::Utf8View, DataType::LargeUtf8) => {
+            let string_array = args[0].as_string_view();
+            let substring_array = args[1].as_string::<i64>();
+            calculate_strpos::<_, _, Int32Type>(string_array, substring_array)
+        }
+
+        other => {
+            exec_err!("Unsupported data type combination {other:?} for function strpos")
+        }
+    }
+}
+
 /// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)
 /// strpos('high', 'ig') = 2
 /// The implementation uses UTF-8 code points as characters
-fn strpos<T0: ArrowPrimitiveType, T1: ArrowPrimitiveType>(
-    args: &[ArrayRef],
+fn calculate_strpos<'a, V1, V2, T: ArrowPrimitiveType>(
+    string_array: V1,
+    substring_array: V2,
 ) -> Result<ArrayRef>
 where
-    T0::Native: OffsetSizeTrait,
-    T1::Native: OffsetSizeTrait,
+    V1: ArrayAccessor<Item = &'a str>,
+    V2: ArrayAccessor<Item = &'a str>,
 {
-    let string_array: &GenericStringArray<T0::Native> =
-        as_generic_string_array::<T0::Native>(&args[0])?;
-
-    let substring_array: &GenericStringArray<T1::Native> =
-        as_generic_string_array::<T1::Native>(&args[1])?;
+    let string_iter = ArrayIter::new(string_array);
+    let substring_iter = ArrayIter::new(substring_array);
 
-    let result = string_array
-        .iter()
-        .zip(substring_array.iter())
+    let result = string_iter
+        .zip(substring_iter)
         .map(|(string, substring)| match (string, substring) {
             (Some(string), Some(substring)) => {
-                // the find method returns the byte index of the substring
-                // Next, we count the number of the chars until that byte
-                T0::Native::from_usize(
+                // The `find` method returns the byte index of the substring.
+                // We count the number of chars up to that byte index.
+                T::Native::from_usize(
                     string
                         .find(substring)
                         .map(|x| string[..x].chars().count() + 1)
@@ -132,20 +161,21 @@ where
             }
             _ => None,
         })
-        .collect::<PrimitiveArray<T0>>();
+        .collect::<PrimitiveArray<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 #[cfg(test)]
-mod test {
-    use super::*;
+mod tests {
+    use arrow::array::{Array, Int32Array, Int64Array};
+    use arrow::datatypes::DataType::{Int32, Int64};
+
+    use datafusion_common::{Result, ScalarValue};
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    use crate::unicode::strpos::StrposFunc;
     use crate::utils::test::test_function;
-    use arrow::{
-        array::{Array as _, Int32Array, Int64Array},
-        datatypes::DataType::{Int32, Int64},
-    };
-    use datafusion_common::ScalarValue;
 
     macro_rules! test_strpos {
         ($lhs:literal, $rhs:literal -> $result:literal; $t1:ident $t2:ident $t3:ident $t4:ident $t5:ident) => {
@@ -164,21 +194,54 @@ mod test {
     }
 
     #[test]
-    fn strpos() {
-        test_strpos!("foo", "bar" -> 0; Utf8 Utf8 i32 Int32 Int32Array);
-        test_strpos!("foobar", "foo" -> 1; Utf8 Utf8 i32 Int32 Int32Array);
-        test_strpos!("foobar", "bar" -> 4; Utf8 Utf8 i32 Int32 Int32Array);
-
-        test_strpos!("foo", "bar" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
-        test_strpos!("foobar", "foo" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
-        test_strpos!("foobar", "bar" -> 4; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
-
-        test_strpos!("foo", "bar" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array);
-        test_strpos!("foobar", "foo" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array);
-        test_strpos!("foobar", "bar" -> 4; Utf8 LargeUtf8 i32 Int32 Int32Array);
-
-        test_strpos!("foo", "bar" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array);
-        test_strpos!("foobar", "foo" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array);
-        test_strpos!("foobar", "bar" -> 4; LargeUtf8 Utf8 i64 Int64 Int64Array);
+    fn test_strpos_functions() {
+        // Utf8 and Utf8 combinations
+        test_strpos!("alphabet", "ph" -> 3; Utf8 Utf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "a" -> 1; Utf8 Utf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "z" -> 0; Utf8 Utf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "" -> 1; Utf8 Utf8 i32 Int32 Int32Array);
+        test_strpos!("", "a" -> 0; Utf8 Utf8 i32 Int32 Int32Array);
+
+        // LargeUtf8 and LargeUtf8 combinations
+        test_strpos!("alphabet", "ph" -> 3; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+        test_strpos!("alphabet", "a" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+        test_strpos!("alphabet", "z" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+        test_strpos!("alphabet", "" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+        test_strpos!("", "a" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array);
+
+        // Utf8 and LargeUtf8 combinations
+        test_strpos!("alphabet", "ph" -> 3; Utf8 LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "a" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "z" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("", "a" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array);
+
+        // LargeUtf8 and Utf8 combinations
+        test_strpos!("alphabet", "ph" -> 3; LargeUtf8 Utf8 i64 Int64 Int64Array);
+        test_strpos!("alphabet", "a" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array);
+        test_strpos!("alphabet", "z" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array);
+        test_strpos!("alphabet", "" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array);
+        test_strpos!("", "a" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array);
+
+        // Utf8View and Utf8View combinations
+        test_strpos!("alphabet", "ph" -> 3; Utf8View Utf8View i32 Int32 Int32Array);
+        test_strpos!("alphabet", "a" -> 1; Utf8View Utf8View i32 Int32 Int32Array);
+        test_strpos!("alphabet", "z" -> 0; Utf8View Utf8View i32 Int32 Int32Array);
+        test_strpos!("alphabet", "" -> 1; Utf8View Utf8View i32 Int32 Int32Array);
+        test_strpos!("", "a" -> 0; Utf8View Utf8View i32 Int32 Int32Array);
+
+        // Utf8View and Utf8 combinations
+        test_strpos!("alphabet", "ph" -> 3; Utf8View Utf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "a" -> 1; Utf8View Utf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "z" -> 0; Utf8View Utf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "" -> 1; Utf8View Utf8 i32 Int32 Int32Array);
+        test_strpos!("", "a" -> 0; Utf8View Utf8 i32 Int32 Int32Array);
+
+        // Utf8View and LargeUtf8 combinations
+        test_strpos!("alphabet", "ph" -> 3; Utf8View LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "a" -> 1; Utf8View LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "z" -> 0; Utf8View LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("alphabet", "" -> 1; Utf8View LargeUtf8 i32 Int32 Int32Array);
+        test_strpos!("", "a" -> 0; Utf8View LargeUtf8 i32 Int32 Int32Array);
     }
 }
diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs
index 9fd8c75eab23..db218f9127ae 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -25,7 +25,7 @@ use arrow::array::{
 use arrow::datatypes::DataType;
 
 use datafusion_common::cast::as_int64_array;
-use datafusion_common::{exec_err, Result};
+use datafusion_common::{exec_datafusion_err, exec_err, Result};
 use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
@@ -144,19 +144,23 @@ where
             let result = iter
                 .zip(start_array.iter())
                 .zip(count_array.iter())
-                .map(|((string, start), count)| match (string, start, count) {
-                    (Some(string), Some(start), Some(count)) => {
-                        if count < 0 {
-                            exec_err!(
+                .map(|((string, start), count)| {
+                    match (string, start, count) {
+                        (Some(string), Some(start), Some(count)) => {
+                            if count < 0 {
+                                exec_err!(
                                 "negative substring length not allowed: substr(<str>, {start}, {count})"
                             )
-                        } else {
-                            let skip = max(0, start - 1);
-                            let count = max(0, count + (if start < 1 {start - 1} else {0}));
-                            Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::<String>()))
+                            } else {
+                                let skip = max(0, start.checked_sub(1).ok_or_else(
+                                    || exec_datafusion_err!("negative overflow when calculating skip value")
+                                )?);
+                                let count = max(0, count + (if start < 1 { start - 1 } else { 0 }));
+                                Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::<String>()))
+                            }
                         }
+                        _ => Ok(None),
                     }
-                    _ => Ok(None),
                 })
                 .collect::<Result<GenericStringArray<T>>>()?;
 
@@ -482,6 +486,29 @@ mod tests {
             Utf8,
             StringArray
         );
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::from("abc")),
+                ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)),
+            ],
+            Ok(Some("abc")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(
+            SubstrFunc::new(),
+            &[
+                ColumnarValue::Scalar(ScalarValue::from("overflow")),
+                ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)),
+                ColumnarValue::Scalar(ScalarValue::from(1i64)),
+            ],
+            exec_err!("negative overflow when calculating skip value"),
+            &str,
+            Utf8,
+            StringArray
+        );
 
         Ok(())
     }
diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
index 53ba3042f522..dd422f7aab95 100644
--- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs
@@ -160,14 +160,13 @@ fn replace_columns(
 mod tests {
     use arrow::datatypes::{DataType, Field, Schema};
 
+    use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan};
+    use crate::Analyzer;
     use datafusion_common::{JoinType, TableReference};
     use datafusion_expr::{
         col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder,
     };
 
-    use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan};
-    use crate::Analyzer;
-
     use super::*;
 
     fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> {
diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
index b69b8410da49..d5b3648725b9 100644
--- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs
+++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs
@@ -24,7 +24,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{Column, Result};
 use datafusion_expr::expr::WildcardOptions;
-use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder, TableScan};
+use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder};
 
 /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`]
 /// (DataFrame / ViewTable)
@@ -56,24 +56,23 @@ fn analyze_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
         match plan {
             // Match only on scans without filter / projection / fetch
             // Views and DataFrames won't have those added
-            // during the early stage of planning
-            LogicalPlan::TableScan(TableScan {
-                table_name,
-                source,
-                projection,
-                filters,
-                ..
-            }) if filters.is_empty() && source.get_logical_plan().is_some() => {
-                let sub_plan = source.get_logical_plan().unwrap();
-                let projection_exprs = generate_projection_expr(&projection, sub_plan)?;
-                LogicalPlanBuilder::from(sub_plan.clone())
-                    .project(projection_exprs)?
-                    // Ensures that the reference to the inlined table remains the
-                    // same, meaning we don't have to change any of the parent nodes
-                    // that reference this table.
-                    .alias(table_name)?
-                    .build()
-                    .map(Transformed::yes)
+            // during the early stage of planning.
+            LogicalPlan::TableScan(table_scan) if table_scan.filters.is_empty() => {
+                if let Some(sub_plan) = table_scan.source.get_logical_plan() {
+                    let sub_plan = sub_plan.into_owned();
+                    let projection_exprs =
+                        generate_projection_expr(&table_scan.projection, &sub_plan)?;
+                    LogicalPlanBuilder::from(sub_plan)
+                        .project(projection_exprs)?
+                        // Ensures that the reference to the inlined table remains the
+                        // same, meaning we don't have to change any of the parent nodes
+                        // that reference this table.
+                        .alias(table_scan.table_name)?
+                        .build()
+                        .map(Transformed::yes)
+                } else {
+                    Ok(Transformed::no(LogicalPlan::TableScan(table_scan)))
+                }
             }
             _ => Ok(Transformed::no(plan)),
         }
@@ -104,7 +103,7 @@ fn generate_projection_expr(
 
 #[cfg(test)]
 mod tests {
-    use std::{sync::Arc, vec};
+    use std::{borrow::Cow, sync::Arc, vec};
 
     use crate::analyzer::inline_table_scan::InlineTableScan;
     use crate::test::assert_analyzed_plan_eq;
@@ -167,8 +166,8 @@ mod tests {
             Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, false)]))
         }
 
-        fn get_logical_plan(&self) -> Option<&LogicalPlan> {
-            Some(&self.plan)
+        fn get_logical_plan(&self) -> Option<Cow<LogicalPlan>> {
+            Some(Cow::Borrowed(&self.plan))
         }
     }
 
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 68ab2e13005f..a6b9bad6c5d9 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -37,7 +37,6 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
 use datafusion_expr::expr_schema::cast_subquery;
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::Subquery;
 use datafusion_expr::type_coercion::binary::{
     comparison_coercion, get_input_types, like_coercion,
@@ -56,6 +55,8 @@ use datafusion_expr::{
     Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
 };
 
+/// Performs type coercion by determining the schema
+/// and performing the expression rewrites.
 #[derive(Default)]
 pub struct TypeCoercion {}
 
@@ -128,16 +129,23 @@ fn analyze_internal(
     .map_data(|plan| plan.recompute_schema())
 }
 
-pub(crate) struct TypeCoercionRewriter<'a> {
+/// Rewrite expressions to apply type coercion.
+pub struct TypeCoercionRewriter<'a> {
     pub(crate) schema: &'a DFSchema,
 }
 
 impl<'a> TypeCoercionRewriter<'a> {
+    /// Create a new [`TypeCoercionRewriter`] with a provided schema
+    /// representing both the inputs and output of the [`LogicalPlan`] node.
     fn new(schema: &'a DFSchema) -> Self {
         Self { schema }
     }
 
-    fn coerce_plan(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
+    /// Coerce the [`LogicalPlan`].
+    ///
+    /// Refer to [`TypeCoercionRewriter::coerce_join`] and [`TypeCoercionRewriter::coerce_union`]
+    /// for type-coercion approach.
+    pub fn coerce_plan(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
         match plan {
             LogicalPlan::Join(join) => self.coerce_join(join),
             LogicalPlan::Union(union) => Self::coerce_union(union),
@@ -153,7 +161,7 @@ impl<'a> TypeCoercionRewriter<'a> {
     ///
     /// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored
     /// as a list of `(t1.a, t2.b), (t1.x, t2.y)`
-    fn coerce_join(&mut self, mut join: Join) -> Result<LogicalPlan> {
+    pub fn coerce_join(&mut self, mut join: Join) -> Result<LogicalPlan> {
         join.on = join
             .on
             .into_iter()
@@ -176,7 +184,7 @@ impl<'a> TypeCoercionRewriter<'a> {
 
     /// Coerce the union’s inputs to a common schema compatible with all inputs.
     /// This occurs after wildcard expansion and the coercion of the input expressions.
-    fn coerce_union(union_plan: Union) -> Result<LogicalPlan> {
+    pub fn coerce_union(union_plan: Union) -> Result<LogicalPlan> {
         let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?);
         let new_inputs = union_plan
             .inputs
@@ -241,15 +249,19 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
                 subquery,
                 outer_ref_columns,
             }) => {
-                let new_plan = analyze_internal(self.schema, unwrap_arc(subquery))?.data;
+                let new_plan =
+                    analyze_internal(self.schema, Arc::unwrap_or_clone(subquery))?.data;
                 Ok(Transformed::yes(Expr::ScalarSubquery(Subquery {
                     subquery: Arc::new(new_plan),
                     outer_ref_columns,
                 })))
             }
             Expr::Exists(Exists { subquery, negated }) => {
-                let new_plan =
-                    analyze_internal(self.schema, unwrap_arc(subquery.subquery))?.data;
+                let new_plan = analyze_internal(
+                    self.schema,
+                    Arc::unwrap_or_clone(subquery.subquery),
+                )?
+                .data;
                 Ok(Transformed::yes(Expr::Exists(Exists {
                     subquery: Subquery {
                         subquery: Arc::new(new_plan),
@@ -263,8 +275,11 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
                 subquery,
                 negated,
             }) => {
-                let new_plan =
-                    analyze_internal(self.schema, unwrap_arc(subquery.subquery))?.data;
+                let new_plan = analyze_internal(
+                    self.schema,
+                    Arc::unwrap_or_clone(subquery.subquery),
+                )?
+                .data;
                 let expr_type = expr.get_type(self.schema)?;
                 let subquery_type = new_plan.schema().field(0).data_type();
                 let common_type = comparison_coercion(&expr_type, subquery_type).ok_or(plan_datafusion_err!(
@@ -809,7 +824,10 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result<Case> {
 }
 
 /// Get a common schema that is compatible with all inputs of UNION.
-fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
+///
+/// This method presumes that the wildcard expansion is unneeded, or has already
+/// been applied.
+pub fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
     let base_schema = inputs[0].schema();
     let mut union_datatypes = base_schema
         .fields()
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index feccf5679efb..3a2b190359d4 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -33,7 +33,6 @@ use datafusion_common::tree_node::{
 };
 use datafusion_common::{qualified_name, Column, DFSchema, DFSchemaRef, Result};
 use datafusion_expr::expr::{Alias, ScalarFunction};
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{
     Aggregate, Filter, LogicalPlan, Projection, Sort, Window,
 };
@@ -314,7 +313,7 @@ impl CommonSubexprEliminate {
             schema,
             ..
         } = projection;
-        let input = unwrap_arc(input);
+        let input = Arc::unwrap_or_clone(input);
         self.try_unary_plan(expr, input, config)?
             .map_data(|(new_expr, new_input)| {
                 Projection::try_new_with_schema(new_expr, Arc::new(new_input), schema)
@@ -327,7 +326,7 @@ impl CommonSubexprEliminate {
         config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         let Sort { expr, input, fetch } = sort;
-        let input = unwrap_arc(input);
+        let input = Arc::unwrap_or_clone(input);
         let new_sort = self.try_unary_plan(expr, input, config)?.update_data(
             |(new_expr, new_input)| {
                 LogicalPlan::Sort(Sort {
@@ -348,7 +347,7 @@ impl CommonSubexprEliminate {
         let Filter {
             predicate, input, ..
         } = filter;
-        let input = unwrap_arc(input);
+        let input = Arc::unwrap_or_clone(input);
         let expr = vec![predicate];
         self.try_unary_plan(expr, input, config)?
             .map_data(|(mut new_expr, new_input)| {
@@ -458,7 +457,7 @@ impl CommonSubexprEliminate {
             schema,
             ..
         } = aggregate;
-        let input = unwrap_arc(input);
+        let input = Arc::unwrap_or_clone(input);
         // Extract common sub-expressions from the aggregate and grouping expressions.
         self.find_common_exprs(vec![group_expr, aggr_expr], config, ExprMask::Normal)?
             .map_data(|common| {
@@ -729,7 +728,7 @@ fn get_consecutive_window_exprs(
         window_expr_list.push(window_expr);
         window_schemas.push(schema);
 
-        plan = unwrap_arc(input);
+        plan = Arc::unwrap_or_clone(input);
     }
     (window_expr_list, window_schemas, plan)
 }
@@ -1432,7 +1431,7 @@ mod test {
     fn nested_aliases() -> Result<()> {
         let table_scan = test_table_scan()?;
 
-        let plan = LogicalPlanBuilder::from(table_scan.clone())
+        let plan = LogicalPlanBuilder::from(table_scan)
             .project(vec![
                 (col("a") + col("b") - col("c")).alias("alias1") * (col("a") + col("b")),
                 col("a") + col("b"),
@@ -1843,7 +1842,7 @@ mod test {
         let config = &OptimizerContext::new();
         let _common_expr_1 = config.alias_generator().next(CSE_PREFIX);
         let common_expr_2 = config.alias_generator().next(CSE_PREFIX);
-        let plan = LogicalPlanBuilder::from(table_scan.clone())
+        let plan = LogicalPlanBuilder::from(table_scan)
             .project(vec![
                 (col("a") + col("b")).alias(common_expr_2.clone()),
                 col("c"),
@@ -1887,7 +1886,7 @@ mod test {
         let extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0));
         let not_extracted_short_circuit_leg_2 = (col("a") - col("b")).eq(lit(0));
         let extracted_short_circuit_leg_3 = (col("a") * col("b")).eq(lit(0));
-        let plan = LogicalPlanBuilder::from(table_scan.clone())
+        let plan = LogicalPlanBuilder::from(table_scan)
             .project(vec![
                 extracted_short_circuit.clone().alias("c1"),
                 extracted_short_circuit.alias("c2"),
@@ -1900,7 +1899,7 @@ mod test {
                     .alias("c4"),
                 extracted_short_circuit_leg_3
                     .clone()
-                    .or(extracted_short_circuit_leg_3.clone())
+                    .or(extracted_short_circuit_leg_3)
                     .alias("c5"),
             ])?
             .build()?;
@@ -1921,7 +1920,7 @@ mod test {
         let extracted_child = col("a") + col("b");
         let rand = rand_func().call(vec![]);
         let not_extracted_volatile = extracted_child + rand;
-        let plan = LogicalPlanBuilder::from(table_scan.clone())
+        let plan = LogicalPlanBuilder::from(table_scan)
             .project(vec![
                 not_extracted_volatile.clone().alias("c1"),
                 not_extracted_volatile.alias("c2"),
@@ -1948,7 +1947,7 @@ mod test {
         let not_extracted_short_circuit_leg_2 = col("b").eq(lit(0));
         let not_extracted_volatile_short_circuit_2 =
             rand.eq(lit(0)).or(not_extracted_short_circuit_leg_2);
-        let plan = LogicalPlanBuilder::from(table_scan.clone())
+        let plan = LogicalPlanBuilder::from(table_scan)
             .project(vec![
                 not_extracted_volatile_short_circuit_1.clone().alias("c1"),
                 not_extracted_volatile_short_circuit_1.alias("c2"),
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index b6d49490d437..f1cae1099a4d 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -37,7 +37,6 @@ use datafusion_expr::{
     LogicalPlan, LogicalPlanBuilder, Operator,
 };
 
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use log::debug;
 
 /// Optimizer rule for rewriting predicate(IN/EXISTS) subquery to left semi/anti joins
@@ -55,8 +54,10 @@ impl DecorrelatePredicateSubquery {
         mut subquery: Subquery,
         config: &dyn OptimizerConfig,
     ) -> Result<Subquery> {
-        subquery.subquery =
-            Arc::new(self.rewrite(unwrap_arc(subquery.subquery), config)?.data);
+        subquery.subquery = Arc::new(
+            self.rewrite(Arc::unwrap_or_clone(subquery.subquery), config)?
+                .data,
+        );
         Ok(subquery)
     }
 
@@ -164,7 +165,7 @@ impl OptimizerRule for DecorrelatePredicateSubquery {
         }
 
         // iterate through all exists clauses in predicate, turning each into a join
-        let mut cur_input = unwrap_arc(input);
+        let mut cur_input = Arc::unwrap_or_clone(input);
         for subquery in subqueries {
             if let Some(plan) =
                 build_join(&subquery, &cur_input, config.alias_generator())?
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index fc4eaef80903..20e6641e4d62 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -24,7 +24,6 @@ use crate::join_key_set::JoinKeySet;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::expr::{BinaryExpr, Expr};
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{
     CrossJoin, Filter, Join, JoinConstraint, JoinType, LogicalPlan, Projection,
 };
@@ -114,7 +113,7 @@ impl OptimizerRule for EliminateCrossJoin {
                 input, predicate, ..
             } = filter;
             flatten_join_inputs(
-                unwrap_arc(input),
+                Arc::unwrap_or_clone(input),
                 &mut possible_join_keys,
                 &mut all_inputs,
             )?;
@@ -217,12 +216,28 @@ fn flatten_join_inputs(
                 );
             }
             possible_join_keys.insert_all_owned(join.on);
-            flatten_join_inputs(unwrap_arc(join.left), possible_join_keys, all_inputs)?;
-            flatten_join_inputs(unwrap_arc(join.right), possible_join_keys, all_inputs)?;
+            flatten_join_inputs(
+                Arc::unwrap_or_clone(join.left),
+                possible_join_keys,
+                all_inputs,
+            )?;
+            flatten_join_inputs(
+                Arc::unwrap_or_clone(join.right),
+                possible_join_keys,
+                all_inputs,
+            )?;
         }
         LogicalPlan::CrossJoin(join) => {
-            flatten_join_inputs(unwrap_arc(join.left), possible_join_keys, all_inputs)?;
-            flatten_join_inputs(unwrap_arc(join.right), possible_join_keys, all_inputs)?;
+            flatten_join_inputs(
+                Arc::unwrap_or_clone(join.left),
+                possible_join_keys,
+                all_inputs,
+            )?;
+            flatten_join_inputs(
+                Arc::unwrap_or_clone(join.right),
+                possible_join_keys,
+                all_inputs,
+            )?;
         }
         _ => {
             all_inputs.push(plan);
diff --git a/datafusion/optimizer/src/eliminate_filter.rs b/datafusion/optimizer/src/eliminate_filter.rs
index 84bb8e782142..bb2b4547e9c2 100644
--- a/datafusion/optimizer/src/eliminate_filter.rs
+++ b/datafusion/optimizer/src/eliminate_filter.rs
@@ -19,7 +19,6 @@
 
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::{EmptyRelation, Expr, Filter, LogicalPlan};
 use std::sync::Arc;
 
@@ -65,7 +64,7 @@ impl OptimizerRule for EliminateFilter {
                 input,
                 ..
             }) => match v {
-                Some(true) => Ok(Transformed::yes(unwrap_arc(input))),
+                Some(true) => Ok(Transformed::yes(Arc::unwrap_or_clone(input))),
                 Some(false) | None => Ok(Transformed::yes(LogicalPlan::EmptyRelation(
                     EmptyRelation {
                         produce_one_row: false,
diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs
index a42fe6a6f95b..e48f37a77cd3 100644
--- a/datafusion/optimizer/src/eliminate_limit.rs
+++ b/datafusion/optimizer/src/eliminate_limit.rs
@@ -20,7 +20,7 @@ use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
-use datafusion_expr::logical_plan::{tree_node::unwrap_arc, EmptyRelation, LogicalPlan};
+use datafusion_expr::logical_plan::{EmptyRelation, LogicalPlan};
 use std::sync::Arc;
 
 /// Optimizer rule to replace `LIMIT 0` or `LIMIT` whose ancestor LIMIT's skip is
@@ -74,7 +74,9 @@ impl OptimizerRule for EliminateLimit {
                     }
                 } else if limit.skip == 0 {
                     // input also can be Limit, so we should apply again.
-                    return Ok(self.rewrite(unwrap_arc(limit.input), _config).unwrap());
+                    return Ok(self
+                        .rewrite(Arc::unwrap_or_clone(limit.input), _config)
+                        .unwrap());
                 }
                 Ok(Transformed::no(LogicalPlan::Limit(limit)))
             }
diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs
index 5d7895bba4d8..965771326854 100644
--- a/datafusion/optimizer/src/eliminate_nested_union.rs
+++ b/datafusion/optimizer/src/eliminate_nested_union.rs
@@ -21,7 +21,6 @@ use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::Result;
 use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema;
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::{Distinct, LogicalPlan, Union};
 use itertools::Itertools;
 use std::sync::Arc;
@@ -69,7 +68,7 @@ impl OptimizerRule for EliminateNestedUnion {
                 })))
             }
             LogicalPlan::Distinct(Distinct::All(nested_plan)) => {
-                match unwrap_arc(nested_plan) {
+                match Arc::unwrap_or_clone(nested_plan) {
                     LogicalPlan::Union(Union { inputs, schema }) => {
                         let inputs = inputs
                             .into_iter()
@@ -96,16 +95,17 @@ impl OptimizerRule for EliminateNestedUnion {
 }
 
 fn extract_plans_from_union(plan: Arc<LogicalPlan>) -> Vec<LogicalPlan> {
-    match unwrap_arc(plan) {
-        LogicalPlan::Union(Union { inputs, .. }) => {
-            inputs.into_iter().map(unwrap_arc).collect::<Vec<_>>()
-        }
+    match Arc::unwrap_or_clone(plan) {
+        LogicalPlan::Union(Union { inputs, .. }) => inputs
+            .into_iter()
+            .map(Arc::unwrap_or_clone)
+            .collect::<Vec<_>>(),
         plan => vec![plan],
     }
 }
 
 fn extract_plan_from_distinct(plan: Arc<LogicalPlan>) -> Arc<LogicalPlan> {
-    match unwrap_arc(plan) {
+    match Arc::unwrap_or_clone(plan) {
         LogicalPlan::Distinct(Distinct::All(plan)) => plan,
         plan => Arc::new(plan),
     }
@@ -144,10 +144,7 @@ mod tests {
     fn eliminate_nothing() -> Result<()> {
         let plan_builder = table_scan(Some("table"), &schema(), None)?;
 
-        let plan = plan_builder
-            .clone()
-            .union(plan_builder.clone().build()?)?
-            .build()?;
+        let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?;
 
         let expected = "\
         Union\
@@ -162,7 +159,7 @@ mod tests {
 
         let plan = plan_builder
             .clone()
-            .union_distinct(plan_builder.clone().build()?)?
+            .union_distinct(plan_builder.build()?)?
             .build()?;
 
         let expected = "Distinct:\
@@ -180,7 +177,7 @@ mod tests {
             .clone()
             .union(plan_builder.clone().build()?)?
             .union(plan_builder.clone().build()?)?
-            .union(plan_builder.clone().build()?)?
+            .union(plan_builder.build()?)?
             .build()?;
 
         let expected = "\
@@ -200,7 +197,7 @@ mod tests {
             .clone()
             .union_distinct(plan_builder.clone().build()?)?
             .union(plan_builder.clone().build()?)?
-            .union(plan_builder.clone().build()?)?
+            .union(plan_builder.build()?)?
             .build()?;
 
         let expected = "Union\
@@ -222,7 +219,7 @@ mod tests {
             .union(plan_builder.clone().build()?)?
             .union_distinct(plan_builder.clone().build()?)?
             .union(plan_builder.clone().build()?)?
-            .union_distinct(plan_builder.clone().build()?)?
+            .union_distinct(plan_builder.build()?)?
             .build()?;
 
         let expected = "Distinct:\
@@ -243,7 +240,7 @@ mod tests {
             .clone()
             .union_distinct(plan_builder.clone().distinct()?.build()?)?
             .union(plan_builder.clone().distinct()?.build()?)?
-            .union_distinct(plan_builder.clone().build()?)?
+            .union_distinct(plan_builder.build()?)?
             .build()?;
 
         let expected = "Distinct:\
@@ -271,7 +268,6 @@ mod tests {
             )?
             .union(
                 plan_builder
-                    .clone()
                     .project(vec![col("id").alias("_id"), col("key"), col("value")])?
                     .build()?,
             )?
@@ -300,7 +296,6 @@ mod tests {
             )?
             .union_distinct(
                 plan_builder
-                    .clone()
                     .project(vec![col("id").alias("_id"), col("key"), col("value")])?
                     .build()?,
             )?
diff --git a/datafusion/optimizer/src/eliminate_one_union.rs b/datafusion/optimizer/src/eliminate_one_union.rs
index 43024107c4f8..7a1c4e118e05 100644
--- a/datafusion/optimizer/src/eliminate_one_union.rs
+++ b/datafusion/optimizer/src/eliminate_one_union.rs
@@ -16,9 +16,11 @@
 // under the License.
 
 //! [`EliminateOneUnion`]  eliminates single element `Union`
+
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::{tree_node::Transformed, Result};
-use datafusion_expr::logical_plan::{tree_node::unwrap_arc, LogicalPlan, Union};
+use datafusion_expr::logical_plan::{LogicalPlan, Union};
+use std::sync::Arc;
 
 use crate::optimizer::ApplyOrder;
 
@@ -48,9 +50,9 @@ impl OptimizerRule for EliminateOneUnion {
         _config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         match plan {
-            LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => {
-                Ok(Transformed::yes(unwrap_arc(inputs.pop().unwrap())))
-            }
+            LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok(
+                Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())),
+            ),
             _ => Ok(Transformed::no(plan)),
         }
     }
@@ -92,10 +94,7 @@ mod tests {
     fn eliminate_nothing() -> Result<()> {
         let plan_builder = table_scan(Some("table"), &schema(), None)?;
 
-        let plan = plan_builder
-            .clone()
-            .union(plan_builder.clone().build()?)?
-            .build()?;
+        let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?;
 
         let expected = "\
         Union\
diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs b/datafusion/optimizer/src/eliminate_outer_join.rs
index 12534e058152..e7c88df55122 100644
--- a/datafusion/optimizer/src/eliminate_outer_join.rs
+++ b/datafusion/optimizer/src/eliminate_outer_join.rs
@@ -18,7 +18,6 @@
 //! [`EliminateOuterJoin`] converts `LEFT/RIGHT/FULL` joins to `INNER` joins
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::{Column, DFSchema, Result};
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{Join, JoinType, LogicalPlan};
 use datafusion_expr::{Expr, Filter, Operator};
 
@@ -79,7 +78,7 @@ impl OptimizerRule for EliminateOuterJoin {
         _config: &dyn OptimizerConfig,
     ) -> Result<Transformed<LogicalPlan>> {
         match plan {
-            LogicalPlan::Filter(mut filter) => match unwrap_arc(filter.input) {
+            LogicalPlan::Filter(mut filter) => match Arc::unwrap_or_clone(filter.input) {
                 LogicalPlan::Join(join) => {
                     let mut non_nullable_cols: Vec<Column> = vec![];
 
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index ac4ed87a4a1a..35b0d07751ff 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -41,7 +41,6 @@ use crate::utils::NamePreserver;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion,
 };
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 
 /// Optimizer rule to prune unnecessary columns from intermediate schemas
 /// inside the [`LogicalPlan`]. This rule:
@@ -181,7 +180,7 @@ fn optimize_projections(
             let necessary_exprs = necessary_indices.get_required_exprs(schema);
 
             return optimize_projections(
-                unwrap_arc(aggregate.input),
+                Arc::unwrap_or_clone(aggregate.input),
                 config,
                 necessary_indices,
             )?
@@ -221,7 +220,7 @@ fn optimize_projections(
                 child_reqs.with_exprs(&input_schema, &new_window_expr)?;
 
             return optimize_projections(
-                unwrap_arc(window.input),
+                Arc::unwrap_or_clone(window.input),
                 config,
                 required_indices.clone(),
             )?
@@ -488,7 +487,7 @@ fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Project
         return Projection::try_new_with_schema(expr, input, schema).map(Transformed::no);
     }
 
-    let LogicalPlan::Projection(prev_projection) = unwrap_arc(input) else {
+    let LogicalPlan::Projection(prev_projection) = Arc::unwrap_or_clone(input) else {
         // We know it is a `LogicalPlan::Projection` from check above
         unreachable!();
     };
@@ -766,8 +765,8 @@ fn rewrite_projection_given_requirements(
 
     // rewrite the children projection, and if they are changed rewrite the
     // projection down
-    optimize_projections(unwrap_arc(input), config, required_indices)?.transform_data(
-        |input| {
+    optimize_projections(Arc::unwrap_or_clone(input), config, required_indices)?
+        .transform_data(|input| {
             if is_projection_unnecessary(&input, &exprs_used)? {
                 Ok(Transformed::yes(input))
             } else {
@@ -775,8 +774,7 @@ fn rewrite_projection_given_requirements(
                     .map(LogicalPlan::Projection)
                     .map(Transformed::yes)
             }
-        },
-    )
+        })
 }
 
 /// Projection is unnecessary, when
@@ -1338,8 +1336,8 @@ mod tests {
         let right_table = test_table_scan_with_name("r")?;
         let custom_plan = LogicalPlan::Extension(Extension {
             node: Arc::new(UserDefinedCrossJoin::new(
-                Arc::new(left_table.clone()),
-                Arc::new(right_table.clone()),
+                Arc::new(left_table),
+                Arc::new(right_table),
             )),
         });
         let plan = LogicalPlanBuilder::from(custom_plan)
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs
index 6bf878ab3df8..61d9934d897a 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -22,7 +22,6 @@ use std::sync::Arc;
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::JoinType;
 use datafusion_common::{plan_err, Result};
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::{EmptyRelation, Projection, Union};
 
@@ -184,7 +183,7 @@ impl OptimizerRule for PropagateEmptyRelation {
                 } else if new_inputs.len() == 1 {
                     let mut new_inputs = new_inputs;
                     let input_plan = new_inputs.pop().unwrap(); // length checked
-                    let child = unwrap_arc(input_plan);
+                    let child = Arc::unwrap_or_clone(input_plan);
                     if child.schema().eq(plan.schema()) {
                         Ok(Transformed::yes(child))
                     } else {
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index 8455919c35a8..82149a087e63 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -28,7 +28,6 @@ use datafusion_common::{
     JoinConstraint, Result,
 };
 use datafusion_expr::expr_rewriter::replace_col;
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{
     CrossJoin, Join, JoinType, LogicalPlan, TableScan, Union,
 };
@@ -658,7 +657,7 @@ impl OptimizerRule for PushDownFilter {
             return Ok(Transformed::no(plan));
         };
 
-        match unwrap_arc(filter.input) {
+        match Arc::unwrap_or_clone(filter.input) {
             LogicalPlan::Filter(child_filter) => {
                 let parents_predicates = split_conjunction_owned(filter.predicate);
 
@@ -820,7 +819,7 @@ impl OptimizerRule for PushDownFilter {
                     .map(|e| Ok(Column::from_qualified_name(e.schema_name().to_string())))
                     .collect::<Result<HashSet<_>>>()?;
 
-                let predicates = split_conjunction_owned(filter.predicate.clone());
+                let predicates = split_conjunction_owned(filter.predicate);
 
                 let mut keep_predicates = vec![];
                 let mut push_predicates = vec![];
@@ -1139,19 +1138,19 @@ fn convert_to_cross_join_if_beneficial(
     match plan {
         // Can be converted back to cross join
         LogicalPlan::Join(join) if join.on.is_empty() && join.filter.is_none() => {
-            LogicalPlanBuilder::from(unwrap_arc(join.left))
-                .cross_join(unwrap_arc(join.right))?
+            LogicalPlanBuilder::from(Arc::unwrap_or_clone(join.left))
+                .cross_join(Arc::unwrap_or_clone(join.right))?
                 .build()
                 .map(Transformed::yes)
         }
-        LogicalPlan::Filter(filter) => convert_to_cross_join_if_beneficial(unwrap_arc(
-            filter.input,
-        ))?
-        .transform_data(|child_plan| {
-            Filter::try_new(filter.predicate, Arc::new(child_plan))
-                .map(LogicalPlan::Filter)
-                .map(Transformed::yes)
-        }),
+        LogicalPlan::Filter(filter) => {
+            convert_to_cross_join_if_beneficial(Arc::unwrap_or_clone(filter.input))?
+                .transform_data(|child_plan| {
+                    Filter::try_new(filter.predicate, Arc::new(child_plan))
+                        .map(LogicalPlan::Filter)
+                        .map(Transformed::yes)
+                })
+        }
         plan => Ok(Transformed::no(plan)),
     }
 }
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index 290b893577b8..dff0b61c6b22 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -26,7 +26,6 @@ use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::utils::combine_limit;
 use datafusion_common::Result;
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan};
 
 /// Optimization rule that tries to push down `LIMIT`.
@@ -83,7 +82,7 @@ impl OptimizerRule for PushDownLimit {
             })));
         };
 
-        match unwrap_arc(input) {
+        match Arc::unwrap_or_clone(input) {
             LogicalPlan::TableScan(mut scan) => {
                 let rows_needed = if fetch != 0 { fetch + skip } else { 0 };
                 let new_fetch = scan
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index c45df74a564d..7129ceb0fea1 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -3407,32 +3407,32 @@ mod tests {
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
             in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
         );
-        assert_eq!(simplify(expr.clone()), lit(false));
+        assert_eq!(simplify(expr), lit(false));
 
         // 2. c1 IN (1,2,3,4) AND c1 IN (4,5,6,7) -> c1 = 4
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
             in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
         );
-        assert_eq!(simplify(expr.clone()), col("c1").eq(lit(4)));
+        assert_eq!(simplify(expr), col("c1").eq(lit(4)));
 
         // 3. c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) -> true
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
             in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
         );
-        assert_eq!(simplify(expr.clone()), lit(true));
+        assert_eq!(simplify(expr), lit(true));
 
         // 3.5 c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (4, 5, 6, 7) -> c1 != 4 (4 overlaps)
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
             in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
         );
-        assert_eq!(simplify(expr.clone()), col("c1").not_eq(lit(4)));
+        assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
 
         // 4. c1 NOT IN (1,2,3,4) AND c1 NOT IN (4,5,6,7) -> c1 NOT IN (1,2,3,4,5,6,7)
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
             in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
         );
         assert_eq!(
-            simplify(expr.clone()),
+            simplify(expr),
             in_list(
                 col("c1"),
                 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
@@ -3445,7 +3445,7 @@ mod tests {
             in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
         );
         assert_eq!(
-            simplify(expr.clone()),
+            simplify(expr),
             in_list(
                 col("c1"),
                 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
@@ -3459,7 +3459,7 @@ mod tests {
             vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
             true,
         ));
-        assert_eq!(simplify(expr.clone()), lit(false));
+        assert_eq!(simplify(expr), lit(false));
 
         // 7. c1 NOT IN (1,2,3,4) AND c1 IN (1,2,3,4,5) -> c1 = 5
         let expr =
@@ -3468,14 +3468,14 @@ mod tests {
                 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
                 false,
             ));
-        assert_eq!(simplify(expr.clone()), col("c1").eq(lit(5)));
+        assert_eq!(simplify(expr), col("c1").eq(lit(5)));
 
         // 8. c1 IN (1,2,3,4) AND c1 NOT IN (5,6,7,8) -> c1 IN (1,2,3,4)
         let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
             in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
         );
         assert_eq!(
-            simplify(expr.clone()),
+            simplify(expr),
             in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
         );
 
@@ -3493,7 +3493,7 @@ mod tests {
         ))
         .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
         assert_eq!(
-            simplify(expr.clone()),
+            simplify(expr),
             col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
         );
 
@@ -3507,7 +3507,7 @@ mod tests {
                 ))
                 .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
         );
-        assert_eq!(simplify(expr.clone()), col("c1").eq(lit(8)));
+        assert_eq!(simplify(expr), col("c1").eq(lit(8)));
 
         // Contains non-InList expression
         // c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) -> c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9)
@@ -3622,7 +3622,7 @@ mod tests {
         let expr_x = col("c3").gt(lit(3_i64));
         let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
         let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
-        let expr = expr_x.clone().and(expr_y.clone().or(expr_z));
+        let expr = expr_x.clone().and(expr_y.or(expr_z));
 
         // All guaranteed null
         let guarantees = vec![
@@ -3698,7 +3698,7 @@ mod tests {
             col("c4"),
             NullableInterval::from(ScalarValue::UInt32(Some(3))),
         )];
-        let output = simplify_with_guarantee(expr.clone(), guarantees);
+        let output = simplify_with_guarantee(expr, guarantees);
         assert_eq!(&output, &expr_x);
     }
 
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
index 09fdd7685a9c..afcbe528083b 100644
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
@@ -225,12 +225,12 @@ mod tests {
 
         // x IS NULL => guaranteed false
         let expr = col("x").is_null();
-        let output = expr.clone().rewrite(&mut rewriter).data().unwrap();
+        let output = expr.rewrite(&mut rewriter).data().unwrap();
         assert_eq!(output, lit(false));
 
         // x IS NOT NULL => guaranteed true
         let expr = col("x").is_not_null();
-        let output = expr.clone().rewrite(&mut rewriter).data().unwrap();
+        let output = expr.rewrite(&mut rewriter).data().unwrap();
         assert_eq!(output, lit(true));
     }
 
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index e0f50a470d43..b17d69437cbe 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -627,7 +627,7 @@ mod tests {
             Box::new(DataType::Int32),
             Box::new(ScalarValue::LargeUtf8(Some("value".to_owned()))),
         );
-        let expr_input = cast(col("largestr"), dict.data_type()).eq(lit(dict.clone()));
+        let expr_input = cast(col("largestr"), dict.data_type()).eq(lit(dict));
         let expected =
             col("largestr").eq(lit(ScalarValue::LargeUtf8(Some("value".to_owned()))));
         assert_eq!(optimize_test(expr_input, &schema), expected);
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 05b1744d90c5..45cef55bf272 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -28,6 +28,10 @@ use datafusion_expr::{logical_plan::LogicalPlan, Expr, Operator};
 
 use log::{debug, trace};
 
+/// Re-export of `NamesPreserver` for backwards compatibility,
+/// as it was initially placed here and then moved elsewhere.
+pub use datafusion_expr::expr_rewriter::NamePreserver;
+
 /// Convenience rule for writing optimizers: recursively invoke
 /// optimize on plan's children and then return a node of the same
 /// type. Useful for optimizer rules which want to leave the type
@@ -294,54 +298,3 @@ pub fn only_or_err<T>(slice: &[T]) -> Result<&T> {
 pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema {
     expr_utils::merge_schema(inputs)
 }
-
-/// Handles ensuring the name of rewritten expressions is not changed.
-///
-/// For example, if an expression `1 + 2` is rewritten to `3`, the name of the
-/// expression should be preserved: `3 as "1 + 2"`
-///
-/// See <https://github.com/apache/datafusion/issues/3555> for details
-pub struct NamePreserver {
-    use_alias: bool,
-}
-
-/// If the name of an expression is remembered, it will be preserved when
-/// rewriting the expression
-pub struct SavedName(Option<String>);
-
-impl NamePreserver {
-    /// Create a new NamePreserver for rewriting the `expr` that is part of the specified plan
-    pub fn new(plan: &LogicalPlan) -> Self {
-        Self {
-            use_alias: !matches!(plan, LogicalPlan::Filter(_) | LogicalPlan::Join(_)),
-        }
-    }
-
-    /// Create a new NamePreserver for rewriting the `expr`s in `Projection`
-    ///
-    /// This will use aliases
-    pub fn new_for_projection() -> Self {
-        Self { use_alias: true }
-    }
-
-    pub fn save(&self, expr: &Expr) -> Result<SavedName> {
-        let original_name = if self.use_alias {
-            Some(expr.name_for_alias()?)
-        } else {
-            None
-        };
-
-        Ok(SavedName(original_name))
-    }
-}
-
-impl SavedName {
-    /// Ensures the name of the rewritten expression is preserved
-    pub fn restore(self, expr: Expr) -> Result<Expr> {
-        let Self(original_name) = self;
-        match original_name {
-            Some(name) => expr.alias_if_changed(name),
-            None => Ok(expr),
-        }
-    }
-}
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs
index 9dc54d2eb2d0..745ec543c31a 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -221,7 +221,7 @@ impl PhysicalSortRequirement {
     /// [`ExecutionPlan::required_input_ordering`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#method.required_input_ordering
     pub fn from_sort_exprs<'a>(
         ordering: impl IntoIterator<Item = &'a PhysicalSortExpr>,
-    ) -> Vec<PhysicalSortRequirement> {
+    ) -> LexRequirement {
         ordering
             .into_iter()
             .cloned()
diff --git a/datafusion/physical-expr-functions-aggregate/Cargo.toml b/datafusion/physical-expr-functions-aggregate/Cargo.toml
deleted file mode 100644
index 6eed89614c53..000000000000
--- a/datafusion/physical-expr-functions-aggregate/Cargo.toml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion-physical-expr-functions-aggregate"
-description = "Logical plan and expression representation for DataFusion query engine"
-keywords = ["datafusion", "logical", "plan", "expressions"]
-readme = "README.md"
-version = { workspace = true }
-edition = { workspace = true }
-homepage = { workspace = true }
-repository = { workspace = true }
-license = { workspace = true }
-authors = { workspace = true }
-rust-version = { workspace = true }
-
-[lints]
-workspace = true
-
-[lib]
-name = "datafusion_physical_expr_functions_aggregate"
-path = "src/lib.rs"
-
-[features]
-
-[dependencies]
-ahash = { workspace = true }
-arrow = { workspace = true }
-datafusion-common = { workspace = true }
-datafusion-expr = { workspace = true }
-datafusion-expr-common = { workspace = true }
-datafusion-functions-aggregate-common = { workspace = true }
-datafusion-physical-expr-common = { workspace = true }
-rand = { workspace = true }
diff --git a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs
similarity index 69%
rename from datafusion/physical-expr-functions-aggregate/src/aggregate.rs
rename to datafusion/physical-expr/src/aggregate.rs
index aa1d1999a339..5c1216f2a386 100644
--- a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
+++ b/datafusion/physical-expr/src/aggregate.rs
@@ -15,30 +15,46 @@
 // specific language governing permissions and limitations
 // under the License.
 
+pub(crate) mod groups_accumulator {
+    #[allow(unused_imports)]
+    pub(crate) mod accumulate {
+        pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
+    }
+    pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::{
+        accumulate::NullState, GroupsAccumulatorAdapter,
+    };
+}
+pub(crate) mod stats {
+    pub use datafusion_functions_aggregate_common::stats::StatsType;
+}
+pub mod utils {
+    pub use datafusion_functions_aggregate_common::utils::{
+        adjust_output_array, get_accum_scalar_values_as_arrays, get_sort_options,
+        ordering_fields, DecimalAverager, Hashable,
+    };
+}
+
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::ScalarValue;
 use datafusion_common::{internal_err, not_impl_err, Result};
-use datafusion_expr::expr::create_function_physical_name;
 use datafusion_expr::AggregateUDF;
 use datafusion_expr::ReversedUDAF;
 use datafusion_expr_common::accumulator::Accumulator;
-use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
 use datafusion_expr_common::type_coercion::aggregates::check_arg_count;
 use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs;
 use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs;
-use datafusion_functions_aggregate_common::aggregate::AggregateExpr;
 use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity;
-use datafusion_functions_aggregate_common::utils::{self, down_cast_any_ref};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use datafusion_physical_expr_common::utils::reverse_order_bys;
 
+use datafusion_expr_common::groups_accumulator::GroupsAccumulator;
 use std::fmt::Debug;
-use std::{any::Any, sync::Arc};
+use std::sync::Arc;
 
-/// Builder for physical [`AggregateExpr`]
+/// Builder for physical [`AggregateFunctionExpr`]
 ///
-/// `AggregateExpr` contains the information necessary to call
+/// `AggregateFunctionExpr` contains the information necessary to call
 /// an aggregate expression.
 #[derive(Debug, Clone)]
 pub struct AggregateExprBuilder {
@@ -72,7 +88,7 @@ impl AggregateExprBuilder {
         }
     }
 
-    pub fn build(self) -> Result<Arc<dyn AggregateExpr>> {
+    pub fn build(self) -> Result<Arc<AggregateFunctionExpr>> {
         let Self {
             fun,
             args,
@@ -112,8 +128,7 @@ impl AggregateExprBuilder {
         let data_type = fun.return_type(&input_exprs_types)?;
         let is_nullable = fun.is_nullable();
         let name = match alias {
-            // TODO: Ideally, we should build the name from physical expressions
-            None => create_function_physical_name(fun.name(), is_distinct, &[], None)?,
+            None => return internal_err!("alias should be provided"),
             Some(alias) => alias,
         };
 
@@ -206,6 +221,17 @@ impl AggregateFunctionExpr {
         &self.fun
     }
 
+    /// expressions that are passed to the Accumulator.
+    /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many.
+    pub fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
+        self.args.clone()
+    }
+
+    /// Human readable name such as `"MIN(c2)"`.
+    pub fn name(&self) -> &str {
+        &self.name
+    }
+
     /// Return if the aggregation is distinct
     pub fn is_distinct(&self) -> bool {
         self.is_distinct
@@ -221,34 +247,13 @@ impl AggregateFunctionExpr {
         self.is_reversed
     }
 
+    /// Return if the aggregation is nullable
     pub fn is_nullable(&self) -> bool {
         self.is_nullable
     }
-}
 
-impl AggregateExpr for AggregateFunctionExpr {
-    /// Return a reference to Any that can be used for downcasting
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.args.clone()
-    }
-
-    fn state_fields(&self) -> Result<Vec<Field>> {
-        let args = StateFieldsArgs {
-            name: &self.name,
-            input_types: &self.input_types,
-            return_type: &self.data_type,
-            ordering_fields: &self.ordering_fields,
-            is_distinct: self.is_distinct,
-        };
-
-        self.fun.state_fields(args)
-    }
-
-    fn field(&self) -> Result<Field> {
+    /// the field of the final result of this aggregation.
+    pub fn field(&self) -> Result<Field> {
         Ok(Field::new(
             &self.name,
             self.data_type.clone(),
@@ -256,7 +261,10 @@ impl AggregateExpr for AggregateFunctionExpr {
         ))
     }
 
-    fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+    /// the accumulator used to accumulate values from the expressions.
+    /// the accumulator expects the same number of arguments as `expressions` and must
+    /// return states with the same description as `state_fields`
+    pub fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
         let acc_args = AccumulatorArgs {
             return_type: &self.data_type,
             schema: &self.schema,
@@ -271,7 +279,83 @@ impl AggregateExpr for AggregateFunctionExpr {
         self.fun.accumulator(acc_args)
     }
 
-    fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
+    /// the field of the final result of this aggregation.
+    pub fn state_fields(&self) -> Result<Vec<Field>> {
+        let args = StateFieldsArgs {
+            name: &self.name,
+            input_types: &self.input_types,
+            return_type: &self.data_type,
+            ordering_fields: &self.ordering_fields,
+            is_distinct: self.is_distinct,
+        };
+
+        self.fun.state_fields(args)
+    }
+
+    /// Order by requirements for the aggregate function
+    /// By default it is `None` (there is no requirement)
+    /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this
+    pub fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
+        if self.ordering_req.is_empty() {
+            return None;
+        }
+
+        if !self.order_sensitivity().is_insensitive() {
+            return Some(&self.ordering_req);
+        }
+
+        None
+    }
+
+    /// Indicates whether aggregator can produce the correct result with any
+    /// arbitrary input ordering. By default, we assume that aggregate expressions
+    /// are order insensitive.
+    pub fn order_sensitivity(&self) -> AggregateOrderSensitivity {
+        if !self.ordering_req.is_empty() {
+            // If there is requirement, use the sensitivity of the implementation
+            self.fun.order_sensitivity()
+        } else {
+            // If no requirement, aggregator is order insensitive
+            AggregateOrderSensitivity::Insensitive
+        }
+    }
+
+    /// Sets the indicator whether ordering requirements of the aggregator is
+    /// satisfied by its input. If this is not the case, aggregators with order
+    /// sensitivity `AggregateOrderSensitivity::Beneficial` can still produce
+    /// the correct result with possibly more work internally.
+    ///
+    /// # Returns
+    ///
+    /// Returns `Ok(Some(updated_expr))` if the process completes successfully.
+    /// If the expression can benefit from existing input ordering, but does
+    /// not implement the method, returns an error. Order insensitive and hard
+    /// requirement aggregators return `Ok(None)`.
+    pub fn with_beneficial_ordering(
+        self: Arc<Self>,
+        beneficial_ordering: bool,
+    ) -> Result<Option<Arc<AggregateFunctionExpr>>> {
+        let Some(updated_fn) = self
+            .fun
+            .clone()
+            .with_beneficial_ordering(beneficial_ordering)?
+        else {
+            return Ok(None);
+        };
+
+        AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec())
+            .order_by(self.ordering_req.to_vec())
+            .schema(Arc::new(self.schema.clone()))
+            .alias(self.name().to_string())
+            .with_ignore_nulls(self.ignore_nulls)
+            .with_distinct(self.is_distinct)
+            .with_reversed(self.is_reversed)
+            .build()
+            .map(Some)
+    }
+
+    /// Creates accumulator implementation that supports retract
+    pub fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
         let args = AccumulatorArgs {
             return_type: &self.data_type,
             schema: &self.schema,
@@ -337,11 +421,10 @@ impl AggregateExpr for AggregateFunctionExpr {
         Ok(accumulator)
     }
 
-    fn name(&self) -> &str {
-        &self.name
-    }
-
-    fn groups_accumulator_supported(&self) -> bool {
+    /// If the aggregate expression has a specialized
+    /// [`GroupsAccumulator`] implementation. If this returns true,
+    /// `[Self::create_groups_accumulator`] will be called.
+    pub fn groups_accumulator_supported(&self) -> bool {
         let args = AccumulatorArgs {
             return_type: &self.data_type,
             schema: &self.schema,
@@ -355,7 +438,12 @@ impl AggregateExpr for AggregateFunctionExpr {
         self.fun.groups_accumulator_supported(args)
     }
 
-    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+    /// Return a specialized [`GroupsAccumulator`] that manages state
+    /// for all groups.
+    ///
+    /// For maximum performance, a [`GroupsAccumulator`] should be
+    /// implemented in addition to [`Accumulator`].
+    pub fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         let args = AccumulatorArgs {
             return_type: &self.data_type,
             schema: &self.schema,
@@ -369,52 +457,11 @@ impl AggregateExpr for AggregateFunctionExpr {
         self.fun.create_groups_accumulator(args)
     }
 
-    fn order_bys(&self) -> Option<&[PhysicalSortExpr]> {
-        if self.ordering_req.is_empty() {
-            return None;
-        }
-
-        if !self.order_sensitivity().is_insensitive() {
-            return Some(&self.ordering_req);
-        }
-
-        None
-    }
-
-    fn order_sensitivity(&self) -> AggregateOrderSensitivity {
-        if !self.ordering_req.is_empty() {
-            // If there is requirement, use the sensitivity of the implementation
-            self.fun.order_sensitivity()
-        } else {
-            // If no requirement, aggregator is order insensitive
-            AggregateOrderSensitivity::Insensitive
-        }
-    }
-
-    fn with_beneficial_ordering(
-        self: Arc<Self>,
-        beneficial_ordering: bool,
-    ) -> Result<Option<Arc<dyn AggregateExpr>>> {
-        let Some(updated_fn) = self
-            .fun
-            .clone()
-            .with_beneficial_ordering(beneficial_ordering)?
-        else {
-            return Ok(None);
-        };
-
-        AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec())
-            .order_by(self.ordering_req.to_vec())
-            .schema(Arc::new(self.schema.clone()))
-            .alias(self.name().to_string())
-            .with_ignore_nulls(self.ignore_nulls)
-            .with_distinct(self.is_distinct)
-            .with_reversed(self.is_reversed)
-            .build()
-            .map(Some)
-    }
-
-    fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
+    /// Construct an expression that calculates the aggregate in reverse.
+    /// Typically the "reverse" expression is itself (e.g. SUM, COUNT).
+    /// For aggregates that do not support calculation in reverse,
+    /// returns None (which is the default value).
+    pub fn reverse_expr(&self) -> Option<Arc<AggregateFunctionExpr>> {
         match self.fun.reverse_udf() {
             ReversedUDAF::NotSupported => None,
             ReversedUDAF::Identical => Some(Arc::new(self.clone())),
@@ -442,33 +489,72 @@ impl AggregateExpr for AggregateFunctionExpr {
         }
     }
 
-    fn get_minmax_desc(&self) -> Option<(Field, bool)> {
+    /// Returns all expressions used in the [`AggregateFunctionExpr`].
+    /// These expressions are  (1)function arguments, (2) order by expressions.
+    pub fn all_expressions(&self) -> AggregatePhysicalExpressions {
+        let args = self.expressions();
+        let order_bys = self.order_bys().unwrap_or(&[]);
+        let order_by_exprs = order_bys
+            .iter()
+            .map(|sort_expr| Arc::clone(&sort_expr.expr))
+            .collect::<Vec<_>>();
+        AggregatePhysicalExpressions {
+            args,
+            order_by_exprs,
+        }
+    }
+
+    /// Rewrites [`AggregateFunctionExpr`], with new expressions given. The argument should be consistent
+    /// with the return value of the [`AggregateFunctionExpr::all_expressions`] method.
+    /// Returns `Some(Arc<dyn AggregateExpr>)` if re-write is supported, otherwise returns `None`.
+    pub fn with_new_expressions(
+        &self,
+        _args: Vec<Arc<dyn PhysicalExpr>>,
+        _order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Option<Arc<AggregateFunctionExpr>> {
+        None
+    }
+
+    /// If this function is max, return (output_field, true)
+    /// if the function is min, return (output_field, false)
+    /// otherwise return None (the default)
+    ///
+    /// output_field is the name of the column produced by this aggregate
+    ///
+    /// Note: this is used to use special aggregate implementations in certain conditions
+    pub fn get_minmax_desc(&self) -> Option<(Field, bool)> {
         self.fun
             .is_descending()
             .and_then(|flag| self.field().ok().map(|f| (f, flag)))
     }
 
-    fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
+    /// Returns default value of the function given the input is Null
+    /// Most of the aggregate function return Null if input is Null,
+    /// while `count` returns 0 if input is Null
+    pub fn default_value(&self, data_type: &DataType) -> Result<ScalarValue> {
         self.fun.default_value(data_type)
     }
 }
 
-impl PartialEq<dyn Any> for AggregateFunctionExpr {
-    fn eq(&self, other: &dyn Any) -> bool {
-        down_cast_any_ref(other)
-            .downcast_ref::<Self>()
-            .map(|x| {
-                self.name == x.name
-                    && self.data_type == x.data_type
-                    && self.fun == x.fun
-                    && self.args.len() == x.args.len()
-                    && self
-                        .args
-                        .iter()
-                        .zip(x.args.iter())
-                        .all(|(this_arg, other_arg)| this_arg.eq(other_arg))
-            })
-            .unwrap_or(false)
+/// Stores the physical expressions used inside the `AggregateExpr`.
+pub struct AggregatePhysicalExpressions {
+    /// Aggregate function arguments
+    pub args: Vec<Arc<dyn PhysicalExpr>>,
+    /// Order by expressions
+    pub order_by_exprs: Vec<Arc<dyn PhysicalExpr>>,
+}
+
+impl PartialEq for AggregateFunctionExpr {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name
+            && self.data_type == other.data_type
+            && self.fun == other.fun
+            && self.args.len() == other.args.len()
+            && self
+                .args
+                .iter()
+                .zip(other.args.iter())
+                .all(|(this_arg, other_arg)| this_arg.eq(other_arg))
     }
 }
 
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index b9228282b081..d862eda5018e 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -239,7 +239,7 @@ mod tests {
     // Convert each tuple to PhysicalSortRequirement
     pub fn convert_to_sort_reqs(
         in_data: &[(&Arc<dyn PhysicalExpr>, Option<SortOptions>)],
-    ) -> Vec<PhysicalSortRequirement> {
+    ) -> LexRequirement {
         in_data
             .iter()
             .map(|(expr, options)| {
diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs
index c4b8a5c46563..49a0de7252ab 100644
--- a/datafusion/physical-expr/src/equivalence/ordering.rs
+++ b/datafusion/physical-expr/src/equivalence/ordering.rs
@@ -272,7 +272,7 @@ mod tests {
         // Crude ordering doesn't satisfy finer ordering. should return false
         let mut eq_properties_crude =
             EquivalenceProperties::new(Arc::clone(&input_schema));
-        eq_properties_crude.oeq_class.push(crude.clone());
+        eq_properties_crude.oeq_class.push(crude);
         assert!(!eq_properties_crude.ordering_satisfy(&finer));
         Ok(())
     }
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 26885ae1350c..2680a7930ff1 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -41,6 +41,7 @@ use datafusion_expr::type_coercion::binary::get_result_type;
 use datafusion_expr::{ColumnarValue, Operator};
 use datafusion_physical_expr_common::datum::{apply, apply_cmp, apply_cmp_for_nested};
 
+use crate::expressions::binary::kernels::concat_elements_utf8view;
 use kernels::{
     bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar,
     bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn,
@@ -131,34 +132,6 @@ impl std::fmt::Display for BinaryExpr {
     }
 }
 
-/// Invoke a compute kernel on a pair of binary data arrays
-macro_rules! compute_utf8_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast left side array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast right side array");
-        Ok(Arc::new(paste::expr! {[<$OP _utf8>]}(&ll, &rr)?))
-    }};
-}
-
-macro_rules! binary_string_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            DataType::LargeUtf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, LargeStringArray),
-            other => internal_err!(
-                "Data type {:?} not supported for binary operation '{}' on string arrays",
-                other, stringify!($OP)
-            ),
-        }
-    }};
-}
-
 /// Invoke a boolean kernel on a pair of arrays
 macro_rules! boolean_op {
     ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
@@ -662,7 +635,7 @@ impl BinaryExpr {
             BitwiseXor => bitwise_xor_dyn(left, right),
             BitwiseShiftRight => bitwise_shift_right_dyn(left, right),
             BitwiseShiftLeft => bitwise_shift_left_dyn(left, right),
-            StringConcat => binary_string_array_op!(left, right, concat_elements),
+            StringConcat => concat_elements(left, right),
             AtArrow | ArrowAt => {
                 unreachable!("ArrowAt and AtArrow should be rewritten to function")
             }
@@ -670,6 +643,28 @@ impl BinaryExpr {
     }
 }
 
+fn concat_elements(left: Arc<dyn Array>, right: Arc<dyn Array>) -> Result<ArrayRef> {
+    Ok(match left.data_type() {
+        DataType::Utf8 => Arc::new(concat_elements_utf8(
+            left.as_string::<i32>(),
+            right.as_string::<i32>(),
+        )?),
+        DataType::LargeUtf8 => Arc::new(concat_elements_utf8(
+            left.as_string::<i64>(),
+            right.as_string::<i64>(),
+        )?),
+        DataType::Utf8View => Arc::new(concat_elements_utf8view(
+            left.as_string_view(),
+            right.as_string_view(),
+        )?),
+        other => {
+            return internal_err!(
+                "Data type {other:?} not supported for binary operation 'concat_elements' on string arrays"
+            );
+        }
+    })
+}
+
 /// Create a binary expression whose arguments are correctly coerced.
 /// This function errors if it is not possible to coerce the arguments
 /// to computational types supported by the operator.
@@ -2587,7 +2582,7 @@ mod tests {
             &a,
             &b,
             Operator::RegexIMatch,
-            regex_expected.clone(),
+            regex_expected,
         )?;
         apply_logic_op(
             &Arc::new(schema.clone()),
@@ -2601,7 +2596,7 @@ mod tests {
             &a,
             &b,
             Operator::RegexNotIMatch,
-            regex_not_expected.clone(),
+            regex_not_expected,
         )?;
 
         Ok(())
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index b0736e140fec..1f9cfed1a44f 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -27,6 +27,7 @@ use arrow::datatypes::DataType;
 use datafusion_common::internal_err;
 use datafusion_common::{Result, ScalarValue};
 
+use arrow_schema::ArrowError;
 use std::sync::Arc;
 
 /// Downcasts $LEFT and $RIGHT to $ARRAY_TYPE and then calls $KERNEL($LEFT, $RIGHT)
@@ -131,3 +132,35 @@ create_dyn_scalar_kernel!(bitwise_or_dyn_scalar, bitwise_or_scalar);
 create_dyn_scalar_kernel!(bitwise_xor_dyn_scalar, bitwise_xor_scalar);
 create_dyn_scalar_kernel!(bitwise_shift_right_dyn_scalar, bitwise_shift_right_scalar);
 create_dyn_scalar_kernel!(bitwise_shift_left_dyn_scalar, bitwise_shift_left_scalar);
+
+pub fn concat_elements_utf8view(
+    left: &StringViewArray,
+    right: &StringViewArray,
+) -> std::result::Result<StringViewArray, ArrowError> {
+    let capacity = left
+        .data_buffers()
+        .iter()
+        .zip(right.data_buffers().iter())
+        .map(|(b1, b2)| b1.len() + b2.len())
+        .sum();
+    let mut result = StringViewBuilder::with_capacity(capacity);
+
+    // Avoid reallocations by writing to a reused buffer (note we
+    // could be even more efficient r by creating the view directly
+    // here and avoid the buffer but that would be more complex)
+    let mut buffer = String::new();
+
+    for (left, right) in left.iter().zip(right.iter()) {
+        if let (Some(left), Some(right)) = (left, right) {
+            use std::fmt::Write;
+            buffer.clear();
+            write!(&mut buffer, "{left}{right}")
+                .expect("writing into string buffer failed");
+            result.append_value(&buffer);
+        } else {
+            // at least one of the values is null, so the output is also null
+            result.append_null()
+        }
+    }
+    Ok(result.finish())
+}
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index c6afb5c05985..712175c9afbe 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -380,7 +380,7 @@ impl CaseExpr {
 
         // keep `else_expr`'s data type and return type consistent
         let e = self.else_expr.as_ref().unwrap();
-        let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())
+        let expr = try_cast(Arc::clone(e), &batch.schema(), return_type)
             .unwrap_or_else(|_| Arc::clone(e));
         let else_ = Scalar::new(expr.evaluate(batch)?.into_array(1)?);
 
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index c4255172d680..7db7188b85d3 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -19,27 +19,7 @@
 #![deny(clippy::clone_on_ref_ptr)]
 
 // Backward compatibility
-pub mod aggregate {
-    pub(crate) mod groups_accumulator {
-        #[allow(unused_imports)]
-        pub(crate) mod accumulate {
-            pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
-        }
-        pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::{
-            accumulate::NullState, GroupsAccumulatorAdapter,
-        };
-    }
-    pub(crate) mod stats {
-        pub use datafusion_functions_aggregate_common::stats::StatsType;
-    }
-    pub mod utils {
-        pub use datafusion_functions_aggregate_common::utils::{
-            adjust_output_array, down_cast_any_ref, get_accum_scalar_values_as_arrays,
-            get_sort_options, ordering_fields, DecimalAverager, Hashable,
-        };
-    }
-    pub use datafusion_functions_aggregate_common::aggregate::AggregateExpr;
-}
+pub mod aggregate;
 pub mod analysis;
 pub mod binary_map {
     pub use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType};
@@ -67,9 +47,6 @@ pub mod execution_props {
 
 pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, NullState};
 pub use analysis::{analyze, AnalysisContext, ExprBoundaries};
-pub use datafusion_functions_aggregate_common::aggregate::{
-    AggregateExpr, AggregatePhysicalExpressions,
-};
 pub use equivalence::{calculate_union, ConstExpr, EquivalenceProperties};
 pub use partitioning::{Distribution, Partitioning};
 pub use physical_expr::{
diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs
index 6472dd47489c..45beeb7b81af 100644
--- a/datafusion/physical-expr/src/partitioning.rs
+++ b/datafusion/physical-expr/src/partitioning.rs
@@ -24,8 +24,8 @@ use crate::{physical_exprs_equal, EquivalenceProperties, PhysicalExpr};
 
 /// Output partitioning supported by [`ExecutionPlan`]s.
 ///
-/// When `executed`, `ExecutionPlan`s  produce one or more independent stream of
-/// data batches in parallel, referred to as partitions. The streams are Rust
+/// Calling [`ExecutionPlan::execute`] produce one or more independent streams of
+/// [`RecordBatch`]es in parallel, referred to as partitions. The streams are Rust
 /// `async` [`Stream`]s (a special kind of future). The number of output
 /// partitions varies based on the input and the operation performed.
 ///
@@ -102,6 +102,8 @@ use crate::{physical_exprs_equal, EquivalenceProperties, PhysicalExpr};
 /// Plans such as `FilterExec` produce the same number of output streams
 /// (partitions) as input streams (partitions).
 ///
+/// [`RecordBatch`]: arrow::record_batch::RecordBatch
+/// [`ExecutionPlan::execute`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#tymethod.execute
 /// [`ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
 /// [`Stream`]: https://docs.rs/futures/latest/futures/stream/trait.Stream.html
 #[derive(Debug, Clone)]
diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 52015f425217..5439e140502a 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -29,20 +29,19 @@ use datafusion_common::ScalarValue;
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::{Accumulator, WindowFrame};
 
+use crate::aggregate::AggregateFunctionExpr;
 use crate::window::window_expr::AggregateWindowExpr;
 use crate::window::{
     PartitionBatches, PartitionWindowAggStates, SlidingAggregateWindowExpr, WindowExpr,
 };
-use crate::{
-    expressions::PhysicalSortExpr, reverse_order_bys, AggregateExpr, PhysicalExpr,
-};
+use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr};
 
 /// A window expr that takes the form of an aggregate function.
 ///
 /// See comments on [`WindowExpr`] for more details.
 #[derive(Debug)]
 pub struct PlainAggregateWindowExpr {
-    aggregate: Arc<dyn AggregateExpr>,
+    aggregate: Arc<AggregateFunctionExpr>,
     partition_by: Vec<Arc<dyn PhysicalExpr>>,
     order_by: Vec<PhysicalSortExpr>,
     window_frame: Arc<WindowFrame>,
@@ -51,7 +50,7 @@ pub struct PlainAggregateWindowExpr {
 impl PlainAggregateWindowExpr {
     /// Create a new aggregate window function expression
     pub fn new(
-        aggregate: Arc<dyn AggregateExpr>,
+        aggregate: Arc<AggregateFunctionExpr>,
         partition_by: &[Arc<dyn PhysicalExpr>],
         order_by: &[PhysicalSortExpr],
         window_frame: Arc<WindowFrame>,
@@ -65,7 +64,7 @@ impl PlainAggregateWindowExpr {
     }
 
     /// Get aggregate expr of AggregateWindowExpr
-    pub fn get_aggregate_expr(&self) -> &Arc<dyn AggregateExpr> {
+    pub fn get_aggregate_expr(&self) -> &Arc<AggregateFunctionExpr> {
         &self.aggregate
     }
 }
diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs
index afa799e86953..ac3a4f4c09ec 100644
--- a/datafusion/physical-expr/src/window/sliding_aggregate.rs
+++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs
@@ -28,13 +28,12 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{Accumulator, WindowFrame};
 
+use crate::aggregate::AggregateFunctionExpr;
 use crate::window::window_expr::AggregateWindowExpr;
 use crate::window::{
     PartitionBatches, PartitionWindowAggStates, PlainAggregateWindowExpr, WindowExpr,
 };
-use crate::{
-    expressions::PhysicalSortExpr, reverse_order_bys, AggregateExpr, PhysicalExpr,
-};
+use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr};
 
 /// A window expr that takes the form of an aggregate function that
 /// can be incrementally computed over sliding windows.
@@ -42,7 +41,7 @@ use crate::{
 /// See comments on [`WindowExpr`] for more details.
 #[derive(Debug)]
 pub struct SlidingAggregateWindowExpr {
-    aggregate: Arc<dyn AggregateExpr>,
+    aggregate: Arc<AggregateFunctionExpr>,
     partition_by: Vec<Arc<dyn PhysicalExpr>>,
     order_by: Vec<PhysicalSortExpr>,
     window_frame: Arc<WindowFrame>,
@@ -51,7 +50,7 @@ pub struct SlidingAggregateWindowExpr {
 impl SlidingAggregateWindowExpr {
     /// Create a new (sliding) aggregate window function expression.
     pub fn new(
-        aggregate: Arc<dyn AggregateExpr>,
+        aggregate: Arc<AggregateFunctionExpr>,
         partition_by: &[Arc<dyn PhysicalExpr>],
         order_by: &[PhysicalSortExpr],
         window_frame: Arc<WindowFrame>,
@@ -64,8 +63,8 @@ impl SlidingAggregateWindowExpr {
         }
     }
 
-    /// Get the [AggregateExpr] of this object.
-    pub fn get_aggregate_expr(&self) -> &Arc<dyn AggregateExpr> {
+    /// Get the [AggregateFunctionExpr] of this object.
+    pub fn get_aggregate_expr(&self) -> &Arc<AggregateFunctionExpr> {
         &self.aggregate
     }
 }
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 66b250c5063b..2b8725b5bac7 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -23,7 +23,7 @@ use datafusion_common::scalar::ScalarValue;
 use datafusion_common::Result;
 use datafusion_physical_plan::aggregates::AggregateExec;
 use datafusion_physical_plan::projection::ProjectionExec;
-use datafusion_physical_plan::{expressions, AggregateExpr, ExecutionPlan, Statistics};
+use datafusion_physical_plan::{expressions, ExecutionPlan, Statistics};
 
 use crate::PhysicalOptimizerRule;
 use datafusion_common::stats::Precision;
@@ -58,12 +58,12 @@ impl PhysicalOptimizerRule for AggregateStatistics {
             let mut projections = vec![];
             for expr in partial_agg_exec.aggr_expr() {
                 if let Some((non_null_rows, name)) =
-                    take_optimizable_column_and_table_count(&**expr, &stats)
+                    take_optimizable_column_and_table_count(expr, &stats)
                 {
                     projections.push((expressions::lit(non_null_rows), name.to_owned()));
-                } else if let Some((min, name)) = take_optimizable_min(&**expr, &stats) {
+                } else if let Some((min, name)) = take_optimizable_min(expr, &stats) {
                     projections.push((expressions::lit(min), name.to_owned()));
-                } else if let Some((max, name)) = take_optimizable_max(&**expr, &stats) {
+                } else if let Some((max, name)) = take_optimizable_max(expr, &stats) {
                     projections.push((expressions::lit(max), name.to_owned()));
                 } else {
                     // TODO: we need all aggr_expr to be resolved (cf TODO fullres)
@@ -137,7 +137,7 @@ fn take_optimizable(node: &dyn ExecutionPlan) -> Option<Arc<dyn ExecutionPlan>>
 
 /// If this agg_expr is a count that can be exactly derived from the statistics, return it.
 fn take_optimizable_column_and_table_count(
-    agg_expr: &dyn AggregateExpr,
+    agg_expr: &AggregateFunctionExpr,
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
     let col_stats = &stats.column_statistics;
@@ -174,7 +174,7 @@ fn take_optimizable_column_and_table_count(
 
 /// If this agg_expr is a min that is exactly defined in the statistics, return it.
 fn take_optimizable_min(
-    agg_expr: &dyn AggregateExpr,
+    agg_expr: &AggregateFunctionExpr,
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
     if let Precision::Exact(num_rows) = &stats.num_rows {
@@ -220,7 +220,7 @@ fn take_optimizable_min(
 
 /// If this agg_expr is a max that is exactly defined in the statistics, return it.
 fn take_optimizable_max(
-    agg_expr: &dyn AggregateExpr,
+    agg_expr: &AggregateFunctionExpr,
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
     if let Precision::Exact(num_rows) = &stats.num_rows {
@@ -266,33 +266,27 @@ fn take_optimizable_max(
 
 // TODO: Move this check into AggregateUDFImpl
 // https://github.com/apache/datafusion/issues/11153
-fn is_non_distinct_count(agg_expr: &dyn AggregateExpr) -> bool {
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
-            return true;
-        }
+fn is_non_distinct_count(agg_expr: &AggregateFunctionExpr) -> bool {
+    if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
+        return true;
     }
     false
 }
 
 // TODO: Move this check into AggregateUDFImpl
 // https://github.com/apache/datafusion/issues/11153
-fn is_min(agg_expr: &dyn AggregateExpr) -> bool {
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name().to_lowercase() == "min" {
-            return true;
-        }
+fn is_min(agg_expr: &AggregateFunctionExpr) -> bool {
+    if agg_expr.fun().name().to_lowercase() == "min" {
+        return true;
     }
     false
 }
 
 // TODO: Move this check into AggregateUDFImpl
 // https://github.com/apache/datafusion/issues/11153
-fn is_max(agg_expr: &dyn AggregateExpr) -> bool {
-    if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if agg_expr.fun().name().to_lowercase() == "max" {
-            return true;
-        }
+fn is_max(agg_expr: &AggregateFunctionExpr) -> bool {
+    if agg_expr.fun().name().to_lowercase() == "max" {
+        return true;
     }
     false
 }
diff --git a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
index e18e530072db..8653ad19da77 100644
--- a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
+++ b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs
@@ -33,8 +33,8 @@ use itertools::Itertools;
 
 /// An optimizer rule that passes a `limit` hint into grouped aggregations which don't require all
 /// rows in the group to be processed for correctness. Example queries fitting this description are:
-/// `SELECT distinct l_orderkey FROM lineitem LIMIT 10;`
-/// `SELECT l_orderkey FROM lineitem GROUP BY l_orderkey LIMIT 10;`
+/// - `SELECT distinct l_orderkey FROM lineitem LIMIT 10;`
+/// - `SELECT l_orderkey FROM lineitem GROUP BY l_orderkey LIMIT 10;`
 pub struct LimitedDistinctAggregation {}
 
 impl LimitedDistinctAggregation {
diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs
index fdfdd349e36e..4f6f91a2348f 100644
--- a/datafusion/physical-optimizer/src/output_requirements.rs
+++ b/datafusion/physical-optimizer/src/output_requirements.rs
@@ -165,7 +165,7 @@ impl ExecutionPlan for OutputRequirementExec {
         vec![&self.input]
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         vec![self.order_requirement.clone()]
     }
 
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 78da4dc9c53f..24387c5f15ee 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -55,7 +55,6 @@ datafusion-functions-aggregate = { workspace = true }
 datafusion-functions-aggregate-common = { workspace = true }
 datafusion-physical-expr = { workspace = true, default-features = true }
 datafusion-physical-expr-common = { workspace = true }
-datafusion-physical-expr-functions-aggregate = { workspace = true }
 futures = { workspace = true }
 half = { workspace = true }
 hashbrown = { workspace = true }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 89d4c452cca6..0f33a9d7b992 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -42,10 +42,11 @@ use datafusion_expr::Accumulator;
 use datafusion_physical_expr::{
     equivalence::{collapse_lex_req, ProjectionMapping},
     expressions::{Column, UnKnownColumn},
-    physical_exprs_contains, AggregateExpr, EquivalenceProperties, LexOrdering,
-    LexRequirement, PhysicalExpr, PhysicalSortRequirement,
+    physical_exprs_contains, EquivalenceProperties, LexOrdering, LexRequirement,
+    PhysicalExpr, PhysicalSortRequirement,
 };
 
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use itertools::Itertools;
 
 pub mod group_values;
@@ -110,9 +111,9 @@ impl AggregateMode {
 /// Represents `GROUP BY` clause in the plan (including the more general GROUPING SET)
 /// In the case of a simple `GROUP BY a, b` clause, this will contain the expression [a, b]
 /// and a single group [false, false].
-/// In the case of `GROUP BY GROUPING SET/CUBE/ROLLUP` the planner will expand the expression
+/// In the case of `GROUP BY GROUPING SETS/CUBE/ROLLUP` the planner will expand the expression
 /// into multiple groups, using null expressions to align each group.
-/// For example, with a group by clause `GROUP BY GROUPING SET ((a,b),(a),(b))` the planner should
+/// For example, with a group by clause `GROUP BY GROUPING SETS ((a,b),(a),(b))` the planner should
 /// create a `PhysicalGroupBy` like
 /// ```text
 /// PhysicalGroupBy {
@@ -133,7 +134,7 @@ pub struct PhysicalGroupBy {
     null_expr: Vec<(Arc<dyn PhysicalExpr>, String)>,
     /// Null mask for each group in this grouping set. Each group is
     /// composed of either one of the group expressions in expr or a null
-    /// expression in null_expr. If `groups[i][j]` is true, then the the
+    /// expression in null_expr. If `groups[i][j]` is true, then the
     /// j-th expression in the i-th group is NULL, otherwise it is `expr[j]`.
     groups: Vec<Vec<bool>>,
 }
@@ -253,7 +254,7 @@ pub struct AggregateExec {
     /// Group by expressions
     group_by: PhysicalGroupBy,
     /// Aggregate expressions
-    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+    aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
     /// FILTER (WHERE clause) expression for each aggregate expression
     filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
     /// Set if the output of this aggregation is truncated by a upstream sort/limit clause
@@ -280,7 +281,10 @@ impl AggregateExec {
     /// Function used in `ConvertFirstLast` optimizer rule,
     /// where we need parts of the new value, others cloned from the old one
     /// Rewrites aggregate exec with new aggregate expressions.
-    pub fn with_new_aggr_exprs(&self, aggr_expr: Vec<Arc<dyn AggregateExpr>>) -> Self {
+    pub fn with_new_aggr_exprs(
+        &self,
+        aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
+    ) -> Self {
         Self {
             aggr_expr,
             // clone the rest of the fields
@@ -306,7 +310,7 @@ impl AggregateExec {
     pub fn try_new(
         mode: AggregateMode,
         group_by: PhysicalGroupBy,
-        aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+        aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
         filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
         input: Arc<dyn ExecutionPlan>,
         input_schema: SchemaRef,
@@ -343,7 +347,7 @@ impl AggregateExec {
     fn try_new_with_schema(
         mode: AggregateMode,
         group_by: PhysicalGroupBy,
-        mut aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+        mut aggr_expr: Vec<Arc<AggregateFunctionExpr>>,
         filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
         input: Arc<dyn ExecutionPlan>,
         input_schema: SchemaRef,
@@ -451,7 +455,7 @@ impl AggregateExec {
     }
 
     /// Aggregate expressions
-    pub fn aggr_expr(&self) -> &[Arc<dyn AggregateExpr>] {
+    pub fn aggr_expr(&self) -> &[Arc<AggregateFunctionExpr>] {
         &self.aggr_expr
     }
 
@@ -788,7 +792,7 @@ impl ExecutionPlan for AggregateExec {
 fn create_schema(
     input_schema: &Schema,
     group_expr: &[(Arc<dyn PhysicalExpr>, String)],
-    aggr_expr: &[Arc<dyn AggregateExpr>],
+    aggr_expr: &[Arc<AggregateFunctionExpr>],
     contains_null_expr: bool,
     mode: AggregateMode,
 ) -> Result<Schema> {
@@ -834,7 +838,7 @@ fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef {
 ///
 /// # Parameters
 ///
-/// - `aggr_expr`: A reference to an `Arc<dyn AggregateExpr>` representing the
+/// - `aggr_expr`: A reference to an `Arc<AggregateFunctionExpr>` representing the
 ///   aggregate expression.
 /// - `group_by`: A reference to a `PhysicalGroupBy` instance representing the
 ///   physical GROUP BY expression.
@@ -846,7 +850,7 @@ fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef {
 /// A `LexOrdering` instance indicating the lexical ordering requirement for
 /// the aggregate expression.
 fn get_aggregate_expr_req(
-    aggr_expr: &Arc<dyn AggregateExpr>,
+    aggr_expr: &Arc<AggregateFunctionExpr>,
     group_by: &PhysicalGroupBy,
     agg_mode: &AggregateMode,
 ) -> LexOrdering {
@@ -894,7 +898,7 @@ fn get_aggregate_expr_req(
 /// the aggregator requirement is incompatible.
 fn finer_ordering(
     existing_req: &LexOrdering,
-    aggr_expr: &Arc<dyn AggregateExpr>,
+    aggr_expr: &Arc<AggregateFunctionExpr>,
     group_by: &PhysicalGroupBy,
     eq_properties: &EquivalenceProperties,
     agg_mode: &AggregateMode,
@@ -912,7 +916,7 @@ pub fn concat_slices<T: Clone>(lhs: &[T], rhs: &[T]) -> Vec<T> {
 ///
 /// # Parameters
 ///
-/// - `aggr_exprs`: A slice of `Arc<dyn AggregateExpr>` containing all the
+/// - `aggr_exprs`: A slice of `Arc<AggregateFunctionExpr>` containing all the
 ///   aggregate expressions.
 /// - `group_by`: A reference to a `PhysicalGroupBy` instance representing the
 ///   physical GROUP BY expression.
@@ -926,7 +930,7 @@ pub fn concat_slices<T: Clone>(lhs: &[T], rhs: &[T]) -> Vec<T> {
 /// A `LexRequirement` instance, which is the requirement that satisfies all the
 /// aggregate requirements. Returns an error in case of conflicting requirements.
 pub fn get_finer_aggregate_exprs_requirement(
-    aggr_exprs: &mut [Arc<dyn AggregateExpr>],
+    aggr_exprs: &mut [Arc<AggregateFunctionExpr>],
     group_by: &PhysicalGroupBy,
     eq_properties: &EquivalenceProperties,
     agg_mode: &AggregateMode,
@@ -996,10 +1000,10 @@ pub fn get_finer_aggregate_exprs_requirement(
 
 /// returns physical expressions for arguments to evaluate against a batch
 /// The expressions are different depending on `mode`:
-/// * Partial: AggregateExpr::expressions
-/// * Final: columns of `AggregateExpr::state_fields()`
+/// * Partial: AggregateFunctionExpr::expressions
+/// * Final: columns of `AggregateFunctionExpr::state_fields()`
 pub fn aggregate_expressions(
-    aggr_expr: &[Arc<dyn AggregateExpr>],
+    aggr_expr: &[Arc<AggregateFunctionExpr>],
     mode: &AggregateMode,
     col_idx_base: usize,
 ) -> Result<Vec<Vec<Arc<dyn PhysicalExpr>>>> {
@@ -1035,12 +1039,12 @@ pub fn aggregate_expressions(
 }
 
 /// uses `state_fields` to build a vec of physical column expressions required to merge the
-/// AggregateExpr' accumulator's state.
+/// AggregateFunctionExpr' accumulator's state.
 ///
 /// `index_base` is the starting physical column index for the next expanded state field.
 fn merge_expressions(
     index_base: usize,
-    expr: &Arc<dyn AggregateExpr>,
+    expr: &Arc<AggregateFunctionExpr>,
 ) -> Result<Vec<Arc<dyn PhysicalExpr>>> {
     expr.state_fields().map(|fields| {
         fields
@@ -1054,7 +1058,7 @@ fn merge_expressions(
 pub type AccumulatorItem = Box<dyn Accumulator>;
 
 pub fn create_accumulators(
-    aggr_expr: &[Arc<dyn AggregateExpr>],
+    aggr_expr: &[Arc<AggregateFunctionExpr>],
 ) -> Result<Vec<AccumulatorItem>> {
     aggr_expr
         .iter()
@@ -1208,7 +1212,7 @@ mod tests {
     };
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::memory_pool::FairSpillPool;
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_functions_aggregate::array_agg::array_agg_udaf;
     use datafusion_functions_aggregate::average::avg_udaf;
     use datafusion_functions_aggregate::count::count_udaf;
@@ -1218,8 +1222,8 @@ mod tests {
     use datafusion_physical_expr::PhysicalSortExpr;
 
     use crate::common::collect;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
     use datafusion_physical_expr::expressions::Literal;
-    use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
     use futures::{FutureExt, Stream};
 
     // Generate a schema which consists of 5 columns (a, b, c, d, e)
@@ -1320,11 +1324,10 @@ mod tests {
     fn new_spill_ctx(batch_size: usize, max_memory: usize) -> Arc<TaskContext> {
         let session_config = SessionConfig::new().with_batch_size(batch_size);
         let runtime = Arc::new(
-            RuntimeEnv::new(
-                RuntimeConfig::default()
-                    .with_memory_pool(Arc::new(FairSpillPool::new(max_memory))),
-            )
-            .unwrap(),
+            RuntimeEnvBuilder::default()
+                .with_memory_pool(Arc::new(FairSpillPool::new(max_memory)))
+                .build()
+                .unwrap(),
         );
         let task_ctx = TaskContext::default()
             .with_session_config(session_config)
@@ -1496,13 +1499,12 @@ mod tests {
             groups: vec![vec![false]],
         };
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> =
-            vec![
-                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
-                    .schema(Arc::clone(&input_schema))
-                    .alias("AVG(b)")
-                    .build()?,
-            ];
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
+            AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
+                .schema(Arc::clone(&input_schema))
+                .alias("AVG(b)")
+                .build()?,
+        ];
 
         let task_ctx = if spill {
             // set to an appropriate value to trigger spill
@@ -1793,7 +1795,7 @@ mod tests {
     }
 
     // Median(a)
-    fn test_median_agg_expr(schema: SchemaRef) -> Result<Arc<dyn AggregateExpr>> {
+    fn test_median_agg_expr(schema: SchemaRef) -> Result<Arc<AggregateFunctionExpr>> {
         AggregateExprBuilder::new(median_udaf(), vec![col("a", &schema)?])
             .schema(schema)
             .alias("MEDIAN(a)")
@@ -1806,7 +1808,9 @@ mod tests {
         let input_schema = input.schema();
 
         let runtime = Arc::new(
-            RuntimeEnv::new(RuntimeConfig::default().with_memory_limit(1, 1.0)).unwrap(),
+            RuntimeEnvBuilder::default()
+                .with_memory_limit(1, 1.0)
+                .build()?,
         );
         let task_ctx = TaskContext::default().with_runtime(runtime);
         let task_ctx = Arc::new(task_ctx);
@@ -1819,17 +1823,16 @@ mod tests {
         };
 
         // something that allocates within the aggregator
-        let aggregates_v0: Vec<Arc<dyn AggregateExpr>> =
+        let aggregates_v0: Vec<Arc<AggregateFunctionExpr>> =
             vec![test_median_agg_expr(Arc::clone(&input_schema))?];
 
         // use fast-path in `row_hash.rs`.
-        let aggregates_v2: Vec<Arc<dyn AggregateExpr>> =
-            vec![
-                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
-                    .schema(Arc::clone(&input_schema))
-                    .alias("AVG(b)")
-                    .build()?,
-            ];
+        let aggregates_v2: Vec<Arc<AggregateFunctionExpr>> = vec![
+            AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?])
+                .schema(Arc::clone(&input_schema))
+                .alias("AVG(b)")
+                .build()?,
+        ];
 
         for (version, groups, aggregates) in [
             (0, groups_none, aggregates_v0),
@@ -1883,13 +1886,12 @@ mod tests {
 
         let groups = PhysicalGroupBy::default();
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> =
-            vec![
-                AggregateExprBuilder::new(avg_udaf(), vec![col("a", &schema)?])
-                    .schema(Arc::clone(&schema))
-                    .alias("AVG(a)")
-                    .build()?,
-            ];
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
+            AggregateExprBuilder::new(avg_udaf(), vec![col("a", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("AVG(a)")
+                .build()?,
+        ];
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
@@ -1923,13 +1925,12 @@ mod tests {
         let groups =
             PhysicalGroupBy::new_single(vec![(col("a", &schema)?, "a".to_string())]);
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> =
-            vec![
-                AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
-                    .schema(Arc::clone(&schema))
-                    .alias("AVG(b)")
-                    .build()?,
-            ];
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
+            AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("AVG(b)")
+                .build()?,
+        ];
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
@@ -1974,7 +1975,7 @@ mod tests {
     fn test_first_value_agg_expr(
         schema: &Schema,
         sort_options: SortOptions,
-    ) -> Result<Arc<dyn AggregateExpr>> {
+    ) -> Result<Arc<AggregateFunctionExpr>> {
         let ordering_req = [PhysicalSortExpr {
             expr: col("b", schema)?,
             options: sort_options,
@@ -1992,7 +1993,7 @@ mod tests {
     fn test_last_value_agg_expr(
         schema: &Schema,
         sort_options: SortOptions,
-    ) -> Result<Arc<dyn AggregateExpr>> {
+    ) -> Result<Arc<AggregateFunctionExpr>> {
         let ordering_req = [PhysicalSortExpr {
             expr: col("b", schema)?,
             options: sort_options,
@@ -2047,7 +2048,7 @@ mod tests {
             descending: false,
             nulls_first: false,
         };
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = if is_first_acc {
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = if is_first_acc {
             vec![test_first_value_agg_expr(&schema, sort_options)?]
         } else {
             vec![test_last_value_agg_expr(&schema, sort_options)?]
@@ -2179,6 +2180,7 @@ mod tests {
             .map(|order_by_expr| {
                 let ordering_req = order_by_expr.unwrap_or_default();
                 AggregateExprBuilder::new(array_agg_udaf(), vec![Arc::clone(col_a)])
+                    .alias("a")
                     .order_by(ordering_req.to_vec())
                     .schema(Arc::clone(&test_schema))
                     .build()
@@ -2211,7 +2213,7 @@ mod tests {
         };
         let groups = PhysicalGroupBy::new_single(vec![(col_a, "a".to_string())]);
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![
             test_first_value_agg_expr(&schema, option_desc)?,
             test_last_value_agg_expr(&schema, option_desc)?,
         ];
@@ -2219,7 +2221,7 @@ mod tests {
         let aggregate_exec = Arc::new(AggregateExec::try_new(
             AggregateMode::Partial,
             groups,
-            aggregates.clone(),
+            aggregates,
             vec![None, None],
             Arc::clone(&blocking_exec) as Arc<dyn ExecutionPlan>,
             schema,
@@ -2269,7 +2271,7 @@ mod tests {
             ],
         );
 
-        let aggregates: Vec<Arc<dyn AggregateExpr>> =
+        let aggregates: Vec<Arc<AggregateFunctionExpr>> =
             vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)])
                 .schema(Arc::clone(&schema))
                 .alias("1")
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index b3221752d034..d022bb007d9b 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -47,10 +47,9 @@ use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_physical_expr::expressions::Column;
-use datafusion_physical_expr::{
-    AggregateExpr, GroupsAccumulatorAdapter, PhysicalSortExpr,
-};
+use datafusion_physical_expr::{GroupsAccumulatorAdapter, PhysicalSortExpr};
 
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 use log::debug;
@@ -76,35 +75,45 @@ use super::AggregateExec;
 
 /// This encapsulates the spilling state
 struct SpillState {
-    /// If data has previously been spilled, the locations of the
-    /// spill files (in Arrow IPC format)
-    spills: Vec<RefCountedTempFile>,
-
+    // ========================================================================
+    // PROPERTIES:
+    // These fields are initialized at the start and remain constant throughout
+    // the execution.
+    // ========================================================================
     /// Sorting expression for spilling batches
     spill_expr: Vec<PhysicalSortExpr>,
 
     /// Schema for spilling batches
     spill_schema: SchemaRef,
 
-    /// true when streaming merge is in progress
-    is_stream_merging: bool,
-
     /// aggregate_arguments for merging spilled data
     merging_aggregate_arguments: Vec<Vec<Arc<dyn PhysicalExpr>>>,
 
     /// GROUP BY expressions for merging spilled data
     merging_group_by: PhysicalGroupBy,
+
+    // ========================================================================
+    // STATES:
+    // Fields changes during execution. Can be buffer, or state flags that
+    // influence the exeuction in parent `GroupedHashAggregateStream`
+    // ========================================================================
+    /// If data has previously been spilled, the locations of the
+    /// spill files (in Arrow IPC format)
+    spills: Vec<RefCountedTempFile>,
+
+    /// true when streaming merge is in progress
+    is_stream_merging: bool,
 }
 
 /// Tracks if the aggregate should skip partial aggregations
 ///
 /// See "partial aggregation" discussion on [`GroupedHashAggregateStream`]
 struct SkipAggregationProbe {
-    /// Number of processed input rows (updated during probing)
-    input_rows: usize,
-    /// Number of total group values for `input_rows` (updated during probing)
-    num_groups: usize,
-
+    // ========================================================================
+    // PROPERTIES:
+    // These fields are initialized at the start and remain constant throughout
+    // the execution.
+    // ========================================================================
     /// Aggregation ratio check performed when the number of input rows exceeds
     /// this threshold (from `SessionConfig`)
     probe_rows_threshold: usize,
@@ -113,6 +122,16 @@ struct SkipAggregationProbe {
     /// is skipped and input rows are directly converted to output
     probe_ratio_threshold: f64,
 
+    // ========================================================================
+    // STATES:
+    // Fields changes during execution. Can be buffer, or state flags that
+    // influence the exeuction in parent `GroupedHashAggregateStream`
+    // ========================================================================
+    /// Number of processed input rows (updated during probing)
+    input_rows: usize,
+    /// Number of total group values for `input_rows` (updated during probing)
+    num_groups: usize,
+
     /// Flag indicating further data aggregation may be skipped (decision made
     /// when probing complete)
     should_skip: bool,
@@ -316,17 +335,15 @@ impl SkipAggregationProbe {
 /// └─────────────────┘    └─────────────────┘
 /// ```
 pub(crate) struct GroupedHashAggregateStream {
+    // ========================================================================
+    // PROPERTIES:
+    // These fields are initialized at the start and remain constant throughout
+    // the execution.
+    // ========================================================================
     schema: SchemaRef,
     input: SendableRecordBatchStream,
     mode: AggregateMode,
 
-    /// Accumulators, one for each `AggregateExpr` in the query
-    ///
-    /// For example, if the query has aggregates, `SUM(x)`,
-    /// `COUNT(y)`, there will be two accumulators, each one
-    /// specialized for that particular aggregate and its input types
-    accumulators: Vec<Box<dyn GroupsAccumulator>>,
-
     /// Arguments to pass to each accumulator.
     ///
     /// The arguments in `accumulator[i]` is passed `aggregate_arguments[i]`
@@ -347,9 +364,30 @@ pub(crate) struct GroupedHashAggregateStream {
     /// GROUP BY expressions
     group_by: PhysicalGroupBy,
 
-    /// The memory reservation for this grouping
-    reservation: MemoryReservation,
+    /// max rows in output RecordBatches
+    batch_size: usize,
+
+    /// Optional soft limit on the number of `group_values` in a batch
+    /// If the number of `group_values` in a single batch exceeds this value,
+    /// the `GroupedHashAggregateStream` operation immediately switches to
+    /// output mode and emits all groups.
+    group_values_soft_limit: Option<usize>,
 
+    // ========================================================================
+    // STATE FLAGS:
+    // These fields will be updated during the execution. And control the flow of
+    // the execution.
+    // ========================================================================
+    /// Tracks if this stream is generating input or output
+    exec_state: ExecutionState,
+
+    /// Have we seen the end of the input
+    input_done: bool,
+
+    // ========================================================================
+    // STATE BUFFERS:
+    // These fields will accumulate intermediate results during the execution.
+    // ========================================================================
     /// An interning store of group keys
     group_values: Box<dyn GroupValues>,
 
@@ -357,38 +395,41 @@ pub(crate) struct GroupedHashAggregateStream {
     /// processed. Reused across batches here to avoid reallocations
     current_group_indices: Vec<usize>,
 
-    /// Tracks if this stream is generating input or output
-    exec_state: ExecutionState,
-
-    /// Execution metrics
-    baseline_metrics: BaselineMetrics,
-
-    /// max rows in output RecordBatches
-    batch_size: usize,
+    /// Accumulators, one for each `AggregateFunctionExpr` in the query
+    ///
+    /// For example, if the query has aggregates, `SUM(x)`,
+    /// `COUNT(y)`, there will be two accumulators, each one
+    /// specialized for that particular aggregate and its input types
+    accumulators: Vec<Box<dyn GroupsAccumulator>>,
 
+    // ========================================================================
+    // TASK-SPECIFIC STATES:
+    // Inner states groups together properties, states for a specific task.
+    // ========================================================================
     /// Optional ordering information, that might allow groups to be
     /// emitted from the hash table prior to seeing the end of the
     /// input
     group_ordering: GroupOrdering,
 
-    /// Have we seen the end of the input
-    input_done: bool,
-
-    /// The [`RuntimeEnv`] associated with the [`TaskContext`] argument
-    runtime: Arc<RuntimeEnv>,
-
     /// The spill state object
     spill_state: SpillState,
 
-    /// Optional soft limit on the number of `group_values` in a batch
-    /// If the number of `group_values` in a single batch exceeds this value,
-    /// the `GroupedHashAggregateStream` operation immediately switches to
-    /// output mode and emits all groups.
-    group_values_soft_limit: Option<usize>,
-
     /// Optional probe for skipping data aggregation, if supported by
     /// current stream.
     skip_aggregation_probe: Option<SkipAggregationProbe>,
+
+    // ========================================================================
+    // EXECUTION RESOURCES:
+    // Fields related to managing execution resources and monitoring performance.
+    // ========================================================================
+    /// The memory reservation for this grouping
+    reservation: MemoryReservation,
+
+    /// Execution metrics
+    baseline_metrics: BaselineMetrics,
+
+    /// The [`RuntimeEnv`] associated with the [`TaskContext`] argument
+    runtime: Arc<RuntimeEnv>,
 }
 
 impl GroupedHashAggregateStream {
@@ -537,7 +578,7 @@ impl GroupedHashAggregateStream {
 /// that is supported by the aggregate, or a
 /// [`GroupsAccumulatorAdapter`] if not.
 pub(crate) fn create_group_accumulator(
-    agg_expr: &Arc<dyn AggregateExpr>,
+    agg_expr: &Arc<AggregateFunctionExpr>,
 ) -> Result<Box<dyn GroupsAccumulator>> {
     if agg_expr.groups_accumulator_supported() {
         agg_expr.create_groups_accumulator()
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
new file mode 100644
index 000000000000..5befa5ecda99
--- /dev/null
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -0,0 +1,588 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::compute::concat_batches;
+use arrow_array::builder::StringViewBuilder;
+use arrow_array::cast::AsArray;
+use arrow_array::{Array, ArrayRef, RecordBatch};
+use arrow_schema::SchemaRef;
+use std::sync::Arc;
+
+/// Concatenate multiple [`RecordBatch`]es
+///
+/// `BatchCoalescer` concatenates multiple small [`RecordBatch`]es, produced by
+/// operations such as `FilterExec` and `RepartitionExec`, into larger ones for
+/// more efficient processing by subsequent operations.
+///
+/// # Background
+///
+/// Generally speaking, larger [`RecordBatch`]es are more efficient to process
+/// than smaller record batches (until the CPU cache is exceeded) because there
+/// is fixed processing overhead per batch. DataFusion tries to operate on
+/// batches of `target_batch_size` rows to amortize this overhead
+///
+/// ```text
+/// ┌────────────────────┐
+/// │    RecordBatch     │
+/// │   num_rows = 23    │
+/// └────────────────────┘                 ┌────────────────────┐
+///                                        │                    │
+/// ┌────────────────────┐     Coalesce    │                    │
+/// │                    │      Batches    │                    │
+/// │    RecordBatch     │                 │                    │
+/// │   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
+/// │                    │                 │    RecordBatch     │
+/// │                    │                 │   num_rows = 106   │
+/// └────────────────────┘                 │                    │
+///                                        │                    │
+/// ┌────────────────────┐                 │                    │
+/// │                    │                 │                    │
+/// │    RecordBatch     │                 │                    │
+/// │   num_rows = 33    │                 └────────────────────┘
+/// │                    │
+/// └────────────────────┘
+/// ```
+///
+/// # Notes:
+///
+/// 1. Output rows are produced in the same order as the input rows
+///
+/// 2. The output is a sequence of batches, with all but the last being at least
+///    `target_batch_size` rows.
+///
+/// 3. Eventually this may also be able to handle other optimizations such as a
+///    combined filter/coalesce operation.
+///
+#[derive(Debug)]
+pub struct BatchCoalescer {
+    /// The input schema
+    schema: SchemaRef,
+    /// Minimum number of rows for coalesces batches
+    target_batch_size: usize,
+    /// Total number of rows returned so far
+    total_rows: usize,
+    /// Buffered batches
+    buffer: Vec<RecordBatch>,
+    /// Buffered row count
+    buffered_rows: usize,
+    /// Limit: maximum number of rows to fetch, `None` means fetch all rows
+    fetch: Option<usize>,
+}
+
+impl BatchCoalescer {
+    /// Create a new `BatchCoalescer`
+    ///
+    /// # Arguments
+    /// - `schema` - the schema of the output batches
+    /// - `target_batch_size` - the minimum number of rows for each
+    ///    output batch (until limit reached)
+    /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows
+    pub fn new(
+        schema: SchemaRef,
+        target_batch_size: usize,
+        fetch: Option<usize>,
+    ) -> Self {
+        Self {
+            schema,
+            target_batch_size,
+            total_rows: 0,
+            buffer: vec![],
+            buffered_rows: 0,
+            fetch,
+        }
+    }
+
+    /// Return the schema of the output batches
+    pub fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    /// Push next batch, and returns [`CoalescerState`] indicating the current
+    /// state of the buffer.
+    pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
+        let batch = gc_string_view_batch(&batch);
+        if self.limit_reached(&batch) {
+            CoalescerState::LimitReached
+        } else if self.target_reached(batch) {
+            CoalescerState::TargetReached
+        } else {
+            CoalescerState::Continue
+        }
+    }
+
+    /// Return true if the there is no data buffered
+    pub fn is_empty(&self) -> bool {
+        self.buffer.is_empty()
+    }
+
+    /// Checks if the buffer will reach the specified limit after getting
+    /// `batch`.
+    ///
+    /// If fetch would be exceeded, slices the received batch, updates the
+    /// buffer with it, and returns `true`.
+    ///
+    /// Otherwise: does nothing and returns `false`.
+    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
+        match self.fetch {
+            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
+                // Limit is reached
+                let remaining_rows = fetch - self.total_rows;
+                debug_assert!(remaining_rows > 0);
+
+                let batch = batch.slice(0, remaining_rows);
+                self.buffered_rows += batch.num_rows();
+                self.total_rows = fetch;
+                self.buffer.push(batch);
+                true
+            }
+            _ => false,
+        }
+    }
+
+    /// Updates the buffer with the given batch.
+    ///
+    /// If the target batch size is reached, returns `true`. Otherwise, returns
+    /// `false`.
+    fn target_reached(&mut self, batch: RecordBatch) -> bool {
+        if batch.num_rows() == 0 {
+            false
+        } else {
+            self.total_rows += batch.num_rows();
+            self.buffered_rows += batch.num_rows();
+            self.buffer.push(batch);
+            self.buffered_rows >= self.target_batch_size
+        }
+    }
+
+    /// Concatenates and returns all buffered batches, and clears the buffer.
+    pub fn finish_batch(&mut self) -> datafusion_common::Result<RecordBatch> {
+        let batch = concat_batches(&self.schema, &self.buffer)?;
+        self.buffer.clear();
+        self.buffered_rows = 0;
+        Ok(batch)
+    }
+}
+
+/// Indicates the state of the [`BatchCoalescer`] buffer after the
+/// [`BatchCoalescer::push_batch()`] operation.
+///
+/// The caller should take diferent actions, depending on the variant returned.
+pub enum CoalescerState {
+    /// Neither the limit nor the target batch size is reached.
+    ///
+    /// Action: continue pushing batches.
+    Continue,
+    /// The limit has been reached.
+    ///
+    /// Action: call [`BatchCoalescer::finish_batch()`] to get the final
+    /// buffered results as a batch and finish the query.
+    LimitReached,
+    /// The specified minimum number of rows a batch should have is reached.
+    ///
+    /// Action: call [`BatchCoalescer::finish_batch()`] to get the current
+    /// buffered results as a batch and then continue pushing batches.
+    TargetReached,
+}
+
+/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
+///
+/// Decides when to consolidate the StringView into a new buffer to reduce
+/// memory usage and improve string locality for better performance.
+///
+/// This differs from `StringViewArray::gc` because:
+/// 1. It may not compact the array depending on a heuristic.
+/// 2. It uses a precise block size to reduce the number of buffers to track.
+///
+/// # Heuristic
+///
+/// If the average size of each view is larger than 32 bytes, we compact the array.
+///
+/// `StringViewArray` include pointers to buffer that hold the underlying data.
+/// One of the great benefits of `StringViewArray` is that many operations
+/// (e.g., `filter`) can be done without copying the underlying data.
+///
+/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the
+/// `StringViewArray` may only refer to a small portion of the buffer,
+/// significantly increasing memory usage.
+fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
+    let new_columns: Vec<ArrayRef> = batch
+        .columns()
+        .iter()
+        .map(|c| {
+            // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long.
+            let Some(s) = c.as_string_view_opt() else {
+                return Arc::clone(c);
+            };
+            let ideal_buffer_size: usize = s
+                .views()
+                .iter()
+                .map(|v| {
+                    let len = (*v as u32) as usize;
+                    if len > 12 {
+                        len
+                    } else {
+                        0
+                    }
+                })
+                .sum();
+            let actual_buffer_size = s.get_buffer_memory_size();
+
+            // Re-creating the array copies data and can be time consuming.
+            // We only do it if the array is sparse
+            if actual_buffer_size > (ideal_buffer_size * 2) {
+                // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
+                // See https://github.com/apache/arrow-rs/issues/6094 for more details.
+                let mut builder = StringViewBuilder::with_capacity(s.len());
+                if ideal_buffer_size > 0 {
+                    builder = builder.with_block_size(ideal_buffer_size as u32);
+                }
+
+                for v in s.iter() {
+                    builder.append_option(v);
+                }
+
+                let gc_string = builder.finish();
+
+                debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0
+
+                Arc::new(gc_string)
+            } else {
+                Arc::clone(c)
+            }
+        })
+        .collect();
+    RecordBatch::try_new(batch.schema(), new_columns)
+        .expect("Failed to re-create the gc'ed record batch")
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use super::*;
+
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_array::builder::ArrayBuilder;
+    use arrow_array::{StringViewArray, UInt32Array};
+
+    #[test]
+    fn test_coalesce() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // expected output is batches of at least 20 rows (except for the final batch)
+            .with_target_batch_size(21)
+            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .run()
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_larger_than_input_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
+            // expected to behave the same as `test_concat_batches`
+            .with_target_batch_size(21)
+            .with_fetch(Some(100))
+            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_less_than_input_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
+            .with_target_batch_size(21)
+            .with_fetch(Some(50))
+            .with_expected_output_sizes(vec![24, 24, 2])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
+            .with_target_batch_size(21)
+            .with_fetch(Some(48))
+            .with_expected_output_sizes(vec![24, 24])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_with_fetch_less_target_batch_size() {
+        let batch = uint32_batch(0..8);
+        Test::new()
+            .with_batches(std::iter::repeat(batch).take(10))
+            // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
+            .with_target_batch_size(21)
+            .with_fetch(Some(10))
+            .with_expected_output_sizes(vec![10])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_single_large_batch_over_fetch() {
+        let large_batch = uint32_batch(0..100);
+        Test::new()
+            .with_batch(large_batch)
+            .with_target_batch_size(20)
+            .with_fetch(Some(7))
+            .with_expected_output_sizes(vec![7])
+            .run()
+    }
+
+    /// Test for [`BatchCoalescer`]
+    ///
+    /// Pushes the input batches to the coalescer and verifies that the resulting
+    /// batches have the expected number of rows and contents.
+    #[derive(Debug, Clone, Default)]
+    struct Test {
+        /// Batches to feed to the coalescer. Tests must have at least one
+        /// schema
+        input_batches: Vec<RecordBatch>,
+        /// Expected output sizes of the resulting batches
+        expected_output_sizes: Vec<usize>,
+        /// target batch size
+        target_batch_size: usize,
+        /// Fetch (limit)
+        fetch: Option<usize>,
+    }
+
+    impl Test {
+        fn new() -> Self {
+            Self::default()
+        }
+
+        /// Set the target batch size
+        fn with_target_batch_size(mut self, target_batch_size: usize) -> Self {
+            self.target_batch_size = target_batch_size;
+            self
+        }
+
+        /// Set the fetch (limit)
+        fn with_fetch(mut self, fetch: Option<usize>) -> Self {
+            self.fetch = fetch;
+            self
+        }
+
+        /// Extend the input batches with `batch`
+        fn with_batch(mut self, batch: RecordBatch) -> Self {
+            self.input_batches.push(batch);
+            self
+        }
+
+        /// Extends the input batches with `batches`
+        fn with_batches(
+            mut self,
+            batches: impl IntoIterator<Item = RecordBatch>,
+        ) -> Self {
+            self.input_batches.extend(batches);
+            self
+        }
+
+        /// Extends `sizes` to expected output sizes
+        fn with_expected_output_sizes(
+            mut self,
+            sizes: impl IntoIterator<Item = usize>,
+        ) -> Self {
+            self.expected_output_sizes.extend(sizes);
+            self
+        }
+
+        /// Runs the test -- see documentation on [`Test`] for details
+        fn run(self) {
+            let Self {
+                input_batches,
+                target_batch_size,
+                fetch,
+                expected_output_sizes,
+            } = self;
+
+            let schema = input_batches[0].schema();
+
+            // create a single large input batch for output comparison
+            let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
+
+            let mut coalescer =
+                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
+
+            let mut output_batches = vec![];
+            for batch in input_batches {
+                match coalescer.push_batch(batch) {
+                    CoalescerState::Continue => {}
+                    CoalescerState::LimitReached => {
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                        break;
+                    }
+                    CoalescerState::TargetReached => {
+                        coalescer.buffered_rows = 0;
+                        output_batches.push(coalescer.finish_batch().unwrap());
+                    }
+                }
+            }
+            if coalescer.buffered_rows != 0 {
+                output_batches.extend(coalescer.buffer);
+            }
+
+            // make sure we got the expected number of output batches and content
+            let mut starting_idx = 0;
+            assert_eq!(expected_output_sizes.len(), output_batches.len());
+            for (i, (expected_size, batch)) in
+                expected_output_sizes.iter().zip(output_batches).enumerate()
+            {
+                assert_eq!(
+                    *expected_size,
+                    batch.num_rows(),
+                    "Unexpected number of rows in Batch {i}"
+                );
+
+                // compare the contents of the batch (using `==` compares the
+                // underlying memory layout too)
+                let expected_batch =
+                    single_input_batch.slice(starting_idx, *expected_size);
+                let batch_strings = batch_to_pretty_strings(&batch);
+                let expected_batch_strings = batch_to_pretty_strings(&expected_batch);
+                let batch_strings = batch_strings.lines().collect::<Vec<_>>();
+                let expected_batch_strings =
+                    expected_batch_strings.lines().collect::<Vec<_>>();
+                assert_eq!(
+                    expected_batch_strings, batch_strings,
+                    "Unexpected content in Batch {i}:\
+                    \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}"
+                );
+                starting_idx += *expected_size;
+            }
+        }
+    }
+
+    /// Return a batch of  UInt32 with the specified range
+    fn uint32_batch(range: Range<u32>) -> RecordBatch {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]));
+
+        RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(UInt32Array::from_iter_values(range))],
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_small_no_compact() {
+        // view with only short strings (no buffers) --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("a"), Some("b"), Some("c")],
+        }
+        .build();
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 0);
+        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_large_no_compact() {
+        // view with large strings (has buffers) but full --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("This string is longer than 12 bytes")],
+        }
+        .build();
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 5);
+        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
+    }
+
+    #[test]
+    fn test_gc_string_view_batch_large_slice_compact() {
+        // view with large strings (has buffers) and only partially used  --> no need to compact
+        let array = StringViewTest {
+            rows: 1000,
+            strings: vec![Some("this string is longer than 12 bytes")],
+        }
+        .build();
+
+        // slice only 11 rows, so most of the buffer is not used
+        let array = array.slice(11, 22);
+
+        let gc_array = do_gc(array.clone());
+        compare_string_array_values(&array, &gc_array);
+        assert_eq!(array.data_buffers().len(), 5);
+        assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer
+    }
+
+    /// Compares the values of two string view arrays
+    fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) {
+        assert_eq!(arr1.len(), arr2.len());
+        for (s1, s2) in arr1.iter().zip(arr2.iter()) {
+            assert_eq!(s1, s2);
+        }
+    }
+
+    /// runs garbage collection on string view array
+    /// and ensures the number of rows are the same
+    fn do_gc(array: StringViewArray) -> StringViewArray {
+        let batch =
+            RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap();
+        let gc_batch = gc_string_view_batch(&batch);
+        assert_eq!(batch.num_rows(), gc_batch.num_rows());
+        assert_eq!(batch.schema(), gc_batch.schema());
+        gc_batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<StringViewArray>()
+            .unwrap()
+            .clone()
+    }
+
+    /// Describes parameters for creating a `StringViewArray`
+    struct StringViewTest {
+        /// The number of rows in the array
+        rows: usize,
+        /// The strings to use in the array (repeated over and over
+        strings: Vec<Option<&'static str>>,
+    }
+
+    impl StringViewTest {
+        /// Create a `StringViewArray` with the parameters specified in this struct
+        fn build(self) -> StringViewArray {
+            let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
+            loop {
+                for &v in self.strings.iter() {
+                    builder.append_option(v);
+                    if builder.len() >= self.rows {
+                        return builder.finish();
+                    }
+                }
+            }
+        }
+    }
+    fn batch_to_pretty_strings(batch: &RecordBatch) -> String {
+        arrow::util::pretty::pretty_format_batches(&[batch.clone()])
+            .unwrap()
+            .to_string()
+    }
+}
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 5589027694fe..7caf5b8ab65a 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -28,19 +28,17 @@ use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
 
-use arrow::array::{AsArray, StringViewBuilder};
-use arrow::compute::concat_batches;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use arrow_array::{Array, ArrayRef};
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 
+use crate::coalesce::{BatchCoalescer, CoalescerState};
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 
 /// `CoalesceBatchesExec` combines small batches into larger batches for more
-/// efficient use of vectorized processing by later operators.
+/// efficient vectorized processing by later operators.
 ///
 /// The operator buffers batches until it collects `target_batch_size` rows and
 /// then emits a single concatenated batch. When only a limited number of rows
@@ -48,35 +46,7 @@ use futures::stream::{Stream, StreamExt};
 /// buffering and returns the final batch once the number of collected rows
 /// reaches the `fetch` value.
 ///
-/// # Background
-///
-/// Generally speaking, larger RecordBatches are more efficient to process than
-/// smaller record batches (until the CPU cache is exceeded) because there is
-/// fixed processing overhead per batch. This code concatenates multiple small
-/// record batches into larger ones to amortize this overhead.
-///
-/// ```text
-/// ┌────────────────────┐
-/// │    RecordBatch     │
-/// │   num_rows = 23    │
-/// └────────────────────┘                 ┌────────────────────┐
-///                                        │                    │
-/// ┌────────────────────┐     Coalesce    │                    │
-/// │                    │      Batches    │                    │
-/// │    RecordBatch     │                 │                    │
-/// │   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
-/// │                    │                 │    RecordBatch     │
-/// │                    │                 │   num_rows = 106   │
-/// └────────────────────┘                 │                    │
-///                                        │                    │
-/// ┌────────────────────┐                 │                    │
-/// │                    │                 │                    │
-/// │    RecordBatch     │                 │                    │
-/// │   num_rows = 33    │                 └────────────────────┘
-/// │                    │
-/// └────────────────────┘
-/// ```
-
+/// See [`BatchCoalescer`] for more information
 #[derive(Debug)]
 pub struct CoalesceBatchesExec {
     /// The input plan
@@ -346,7 +316,7 @@ impl CoalesceBatchesStream {
                 }
                 CoalesceBatchesStreamState::Exhausted => {
                     // Handle the end of the input stream.
-                    return if self.coalescer.buffer.is_empty() {
+                    return if self.coalescer.is_empty() {
                         // If buffer is empty, return None indicating the stream is fully consumed.
                         Poll::Ready(None)
                     } else {
@@ -365,511 +335,3 @@ impl RecordBatchStream for CoalesceBatchesStream {
         self.coalescer.schema()
     }
 }
-
-/// Concatenate multiple record batches into larger batches
-///
-/// See [`CoalesceBatchesExec`] for more details.
-///
-/// Notes:
-///
-/// 1. The output rows is the same order as the input rows
-///
-/// 2. The output is a sequence of batches, with all but the last being at least
-///    `target_batch_size` rows.
-///
-/// 3. Eventually this may also be able to handle other optimizations such as a
-///    combined filter/coalesce operation.
-#[derive(Debug)]
-struct BatchCoalescer {
-    /// The input schema
-    schema: SchemaRef,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
-    /// Total number of rows returned so far
-    total_rows: usize,
-    /// Buffered batches
-    buffer: Vec<RecordBatch>,
-    /// Buffered row count
-    buffered_rows: usize,
-    /// Maximum number of rows to fetch, `None` means fetching all rows
-    fetch: Option<usize>,
-}
-
-impl BatchCoalescer {
-    /// Create a new `BatchCoalescer`
-    ///
-    /// # Arguments
-    /// - `schema` - the schema of the output batches
-    /// - `target_batch_size` - the minimum number of rows for each
-    ///    output batch (until limit reached)
-    /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows
-    fn new(schema: SchemaRef, target_batch_size: usize, fetch: Option<usize>) -> Self {
-        Self {
-            schema,
-            target_batch_size,
-            total_rows: 0,
-            buffer: vec![],
-            buffered_rows: 0,
-            fetch,
-        }
-    }
-
-    /// Return the schema of the output batches
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
-    }
-
-    /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns
-    /// a variant of [`CoalescerState`] indicating the final state of the buffer.
-    fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
-        let batch = gc_string_view_batch(&batch);
-        if self.limit_reached(&batch) {
-            CoalescerState::LimitReached
-        } else if self.target_reached(batch) {
-            CoalescerState::TargetReached
-        } else {
-            CoalescerState::Continue
-        }
-    }
-
-    /// The function checks if the buffer can reach the specified limit after getting `batch`.
-    /// If it does, it slices the received batch as needed, updates the buffer with it, and
-    /// finally returns `true`. Otherwise; the function does nothing and returns `false`.
-    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
-        match self.fetch {
-            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
-                // Limit is reached
-                let remaining_rows = fetch - self.total_rows;
-                debug_assert!(remaining_rows > 0);
-
-                let batch = batch.slice(0, remaining_rows);
-                self.buffered_rows += batch.num_rows();
-                self.total_rows = fetch;
-                self.buffer.push(batch);
-                true
-            }
-            _ => false,
-        }
-    }
-
-    /// Updates the buffer with the given batch. If the target batch size is reached,
-    /// the function returns `true`. Otherwise, it returns `false`.
-    fn target_reached(&mut self, batch: RecordBatch) -> bool {
-        if batch.num_rows() == 0 {
-            false
-        } else {
-            self.total_rows += batch.num_rows();
-            self.buffered_rows += batch.num_rows();
-            self.buffer.push(batch);
-            self.buffered_rows >= self.target_batch_size
-        }
-    }
-
-    /// Concatenates and returns all buffered batches, and clears the buffer.
-    fn finish_batch(&mut self) -> Result<RecordBatch> {
-        let batch = concat_batches(&self.schema, &self.buffer)?;
-        self.buffer.clear();
-        self.buffered_rows = 0;
-        Ok(batch)
-    }
-}
-
-/// This enumeration acts as a status indicator for the [`BatchCoalescer`] after a
-/// [`BatchCoalescer::push_batch()`] operation.
-enum CoalescerState {
-    /// Neither the limit nor the target batch size is reached.
-    Continue,
-    /// The sufficient row count to produce a complete query result is reached.
-    LimitReached,
-    /// The specified minimum number of rows a batch should have is reached.
-    TargetReached,
-}
-
-/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
-///
-/// This function decides when to consolidate the StringView into a new buffer
-/// to reduce memory usage and improve string locality for better performance.
-///
-/// This differs from `StringViewArray::gc` because:
-/// 1. It may not compact the array depending on a heuristic.
-/// 2. It uses a precise block size to reduce the number of buffers to track.
-///
-/// # Heuristic
-///
-/// If the average size of each view is larger than 32 bytes, we compact the array.
-///
-/// `StringViewArray` include pointers to buffer that hold the underlying data.
-/// One of the great benefits of `StringViewArray` is that many operations
-/// (e.g., `filter`) can be done without copying the underlying data.
-///
-/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the
-/// `StringViewArray` may only refer to a small portion of the buffer,
-/// significantly increasing memory usage.
-fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
-    let new_columns: Vec<ArrayRef> = batch
-        .columns()
-        .iter()
-        .map(|c| {
-            // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long.
-            let Some(s) = c.as_string_view_opt() else {
-                return Arc::clone(c);
-            };
-            let ideal_buffer_size: usize = s
-                .views()
-                .iter()
-                .map(|v| {
-                    let len = (*v as u32) as usize;
-                    if len > 12 {
-                        len
-                    } else {
-                        0
-                    }
-                })
-                .sum();
-            let actual_buffer_size = s.get_buffer_memory_size();
-
-            // Re-creating the array copies data and can be time consuming.
-            // We only do it if the array is sparse
-            if actual_buffer_size > (ideal_buffer_size * 2) {
-                // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
-                // See https://github.com/apache/arrow-rs/issues/6094 for more details.
-                let mut builder = StringViewBuilder::with_capacity(s.len());
-                if ideal_buffer_size > 0 {
-                    builder = builder.with_block_size(ideal_buffer_size as u32);
-                }
-
-                for v in s.iter() {
-                    builder.append_option(v);
-                }
-
-                let gc_string = builder.finish();
-
-                debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0
-
-                Arc::new(gc_string)
-            } else {
-                Arc::clone(c)
-            }
-        })
-        .collect();
-    RecordBatch::try_new(batch.schema(), new_columns)
-        .expect("Failed to re-create the gc'ed record batch")
-}
-
-#[cfg(test)]
-mod tests {
-    use std::ops::Range;
-
-    use super::*;
-
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow_array::builder::ArrayBuilder;
-    use arrow_array::{StringViewArray, UInt32Array};
-
-    #[test]
-    fn test_coalesce() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // expected output is batches of at least 20 rows (except for the final batch)
-            .with_target_batch_size(21)
-            .with_expected_output_sizes(vec![24, 24, 24, 8])
-            .run()
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_larger_than_input_size() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
-            // expected to behave the same as `test_concat_batches`
-            .with_target_batch_size(21)
-            .with_fetch(Some(100))
-            .with_expected_output_sizes(vec![24, 24, 24, 8])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_less_than_input_size() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
-            .with_target_batch_size(21)
-            .with_fetch(Some(50))
-            .with_expected_output_sizes(vec![24, 24, 2])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
-            .with_target_batch_size(21)
-            .with_fetch(Some(48))
-            .with_expected_output_sizes(vec![24, 24])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_with_fetch_less_target_batch_size() {
-        let batch = uint32_batch(0..8);
-        Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
-            // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
-            .with_target_batch_size(21)
-            .with_fetch(Some(10))
-            .with_expected_output_sizes(vec![10])
-            .run();
-    }
-
-    #[test]
-    fn test_coalesce_single_large_batch_over_fetch() {
-        let large_batch = uint32_batch(0..100);
-        Test::new()
-            .with_batch(large_batch)
-            .with_target_batch_size(20)
-            .with_fetch(Some(7))
-            .with_expected_output_sizes(vec![7])
-            .run()
-    }
-
-    /// Test for [`BatchCoalescer`]
-    ///
-    /// Pushes the input batches to the coalescer and verifies that the resulting
-    /// batches have the expected number of rows and contents.
-    #[derive(Debug, Clone, Default)]
-    struct Test {
-        /// Batches to feed to the coalescer. Tests must have at least one
-        /// schema
-        input_batches: Vec<RecordBatch>,
-        /// Expected output sizes of the resulting batches
-        expected_output_sizes: Vec<usize>,
-        /// target batch size
-        target_batch_size: usize,
-        /// Fetch (limit)
-        fetch: Option<usize>,
-    }
-
-    impl Test {
-        fn new() -> Self {
-            Self::default()
-        }
-
-        /// Set the target batch size
-        fn with_target_batch_size(mut self, target_batch_size: usize) -> Self {
-            self.target_batch_size = target_batch_size;
-            self
-        }
-
-        /// Set the fetch (limit)
-        fn with_fetch(mut self, fetch: Option<usize>) -> Self {
-            self.fetch = fetch;
-            self
-        }
-
-        /// Extend the input batches with `batch`
-        fn with_batch(mut self, batch: RecordBatch) -> Self {
-            self.input_batches.push(batch);
-            self
-        }
-
-        /// Extends the input batches with `batches`
-        fn with_batches(
-            mut self,
-            batches: impl IntoIterator<Item = RecordBatch>,
-        ) -> Self {
-            self.input_batches.extend(batches);
-            self
-        }
-
-        /// Extends `sizes` to expected output sizes
-        fn with_expected_output_sizes(
-            mut self,
-            sizes: impl IntoIterator<Item = usize>,
-        ) -> Self {
-            self.expected_output_sizes.extend(sizes);
-            self
-        }
-
-        /// Runs the test -- see documentation on [`Test`] for details
-        fn run(self) {
-            let Self {
-                input_batches,
-                target_batch_size,
-                fetch,
-                expected_output_sizes,
-            } = self;
-
-            let schema = input_batches[0].schema();
-
-            // create a single large input batch for output comparison
-            let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
-
-            let mut coalescer =
-                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
-
-            let mut output_batches = vec![];
-            for batch in input_batches {
-                match coalescer.push_batch(batch) {
-                    CoalescerState::Continue => {}
-                    CoalescerState::LimitReached => {
-                        output_batches.push(coalescer.finish_batch().unwrap());
-                        break;
-                    }
-                    CoalescerState::TargetReached => {
-                        coalescer.buffered_rows = 0;
-                        output_batches.push(coalescer.finish_batch().unwrap());
-                    }
-                }
-            }
-            if coalescer.buffered_rows != 0 {
-                output_batches.extend(coalescer.buffer);
-            }
-
-            // make sure we got the expected number of output batches and content
-            let mut starting_idx = 0;
-            assert_eq!(expected_output_sizes.len(), output_batches.len());
-            for (i, (expected_size, batch)) in
-                expected_output_sizes.iter().zip(output_batches).enumerate()
-            {
-                assert_eq!(
-                    *expected_size,
-                    batch.num_rows(),
-                    "Unexpected number of rows in Batch {i}"
-                );
-
-                // compare the contents of the batch (using `==` compares the
-                // underlying memory layout too)
-                let expected_batch =
-                    single_input_batch.slice(starting_idx, *expected_size);
-                let batch_strings = batch_to_pretty_strings(&batch);
-                let expected_batch_strings = batch_to_pretty_strings(&expected_batch);
-                let batch_strings = batch_strings.lines().collect::<Vec<_>>();
-                let expected_batch_strings =
-                    expected_batch_strings.lines().collect::<Vec<_>>();
-                assert_eq!(
-                    expected_batch_strings, batch_strings,
-                    "Unexpected content in Batch {i}:\
-                    \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}"
-                );
-                starting_idx += *expected_size;
-            }
-        }
-    }
-
-    /// Return a batch of  UInt32 with the specified range
-    fn uint32_batch(range: Range<u32>) -> RecordBatch {
-        let schema =
-            Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]));
-
-        RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![Arc::new(UInt32Array::from_iter_values(range))],
-        )
-        .unwrap()
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_small_no_compact() {
-        // view with only short strings (no buffers) --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("a"), Some("b"), Some("c")],
-        }
-        .build();
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 0);
-        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_large_no_compact() {
-        // view with large strings (has buffers) but full --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("This string is longer than 12 bytes")],
-        }
-        .build();
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 5);
-        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_large_slice_compact() {
-        // view with large strings (has buffers) and only partially used  --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("this string is longer than 12 bytes")],
-        }
-        .build();
-
-        // slice only 11 rows, so most of the buffer is not used
-        let array = array.slice(11, 22);
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 5);
-        assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer
-    }
-
-    /// Compares the values of two string view arrays
-    fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) {
-        assert_eq!(arr1.len(), arr2.len());
-        for (s1, s2) in arr1.iter().zip(arr2.iter()) {
-            assert_eq!(s1, s2);
-        }
-    }
-
-    /// runs garbage collection on string view array
-    /// and ensures the number of rows are the same
-    fn do_gc(array: StringViewArray) -> StringViewArray {
-        let batch =
-            RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap();
-        let gc_batch = gc_string_view_batch(&batch);
-        assert_eq!(batch.num_rows(), gc_batch.num_rows());
-        assert_eq!(batch.schema(), gc_batch.schema());
-        gc_batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringViewArray>()
-            .unwrap()
-            .clone()
-    }
-
-    /// Describes parameters for creating a `StringViewArray`
-    struct StringViewTest {
-        /// The number of rows in the array
-        rows: usize,
-        /// The strings to use in the array (repeated over and over
-        strings: Vec<Option<&'static str>>,
-    }
-
-    impl StringViewTest {
-        /// Create a `StringViewArray` with the parameters specified in this struct
-        fn build(self) -> StringViewArray {
-            let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192);
-            loop {
-                for &v in self.strings.iter() {
-                    builder.append_option(v);
-                    if builder.len() >= self.rows {
-                        return builder.finish();
-                    }
-                }
-            }
-        }
-    }
-    fn batch_to_pretty_strings(batch: &RecordBatch) -> String {
-        arrow::util::pretty::pretty_format_batches(&[batch.clone()])
-            .unwrap()
-            .to_string()
-    }
-}
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index a6a15e46860c..c1c66f6d3923 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -34,11 +34,10 @@ pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 pub use datafusion_expr::{Accumulator, ColumnarValue};
 pub use datafusion_physical_expr::window::WindowExpr;
 pub use datafusion_physical_expr::{
-    expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr,
-};
-use datafusion_physical_expr::{
-    EquivalenceProperties, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement,
+    expressions, functions, udf, Distribution, Partitioning, PhysicalExpr,
 };
+use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr};
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::display::DisplayableExecutionPlan;
@@ -125,7 +124,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     /// NOTE that checking `!is_empty()` does **not** check for a
     /// required input ordering. Instead, the correct check is that at
     /// least one entry must be `Some`
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         vec![None; self.children().len()]
     }
 
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index 5cd864125e29..5dc27bc239d2 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -35,11 +35,10 @@ use arrow_array::{ArrayRef, UInt64Array};
 use arrow_schema::{DataType, Field, Schema};
 use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{
-    Distribution, EquivalenceProperties, PhysicalSortRequirement,
-};
+use datafusion_physical_expr::{Distribution, EquivalenceProperties};
 
 use async_trait::async_trait;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::StreamExt;
 
 /// `DataSink` implements writing streams of [`RecordBatch`]es to
@@ -90,7 +89,7 @@ pub struct DataSinkExec {
     /// Schema describing the structure of the output data.
     count_schema: SchemaRef,
     /// Optional required sort order for output data.
-    sort_order: Option<Vec<PhysicalSortRequirement>>,
+    sort_order: Option<LexRequirement>,
     cache: PlanProperties,
 }
 
@@ -106,7 +105,7 @@ impl DataSinkExec {
         input: Arc<dyn ExecutionPlan>,
         sink: Arc<dyn DataSink>,
         sink_schema: SchemaRef,
-        sort_order: Option<Vec<PhysicalSortRequirement>>,
+        sort_order: Option<LexRequirement>,
     ) -> Self {
         let count_schema = make_count_schema();
         let cache = Self::create_schema(&input, count_schema);
@@ -131,7 +130,7 @@ impl DataSinkExec {
     }
 
     /// Optional sort order for output data
-    pub fn sort_order(&self) -> &Option<Vec<PhysicalSortRequirement>> {
+    pub fn sort_order(&self) -> &Option<LexRequirement> {
         &self.sort_order
     }
 
@@ -189,7 +188,7 @@ impl ExecutionPlan for DataSinkExec {
         vec![Distribution::SinglePartition; self.children().len()]
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         // The required input ordering is set externally (e.g. by a `ListingTable`).
         // Otherwise, there is no specific requirement (i.e. `sort_expr` is `None`).
         vec![self.sort_order.as_ref().cloned()]
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 0868ee721665..b99d4f17c42a 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -488,7 +488,7 @@ mod tests {
     use crate::test::build_table_scan_i32;
 
     use datafusion_common::{assert_batches_sorted_eq, assert_contains};
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 
     async fn join_collect(
         left: Arc<dyn ExecutionPlan>,
@@ -673,8 +673,11 @@ mod tests {
 
     #[tokio::test]
     async fn test_overallocation() -> Result<()> {
-        let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0);
-        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let runtime = Arc::new(
+            RuntimeEnvBuilder::new()
+                .with_memory_limit(100, 1.0)
+                .build()?,
+        );
         let task_ctx = TaskContext::default().with_runtime(runtime);
         let task_ctx = Arc::new(task_ctx);
 
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index e40a07cf6220..f20d00e1a298 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -49,8 +49,7 @@ use crate::{
 };
 
 use arrow::array::{
-    Array, ArrayRef, BooleanArray, BooleanBufferBuilder, PrimitiveArray, UInt32Array,
-    UInt64Array,
+    Array, ArrayRef, BooleanArray, BooleanBufferBuilder, UInt32Array, UInt64Array,
 };
 use arrow::compute::kernels::cmp::{eq, not_distinct};
 use arrow::compute::{and, concat_batches, take, FilterBuilder};
@@ -1204,13 +1203,11 @@ fn lookup_join_hashmap(
         })
         .collect::<Result<Vec<_>>>()?;
 
-    let (mut probe_builder, mut build_builder, next_offset) = build_hashmap
+    let (probe_indices, build_indices, next_offset) = build_hashmap
         .get_matched_indices_with_limit_offset(hashes_buffer, None, limit, offset);
 
-    let build_indices: UInt64Array =
-        PrimitiveArray::new(build_builder.finish().into(), None);
-    let probe_indices: UInt32Array =
-        PrimitiveArray::new(probe_builder.finish().into(), None);
+    let build_indices: UInt64Array = build_indices.into();
+    let probe_indices: UInt32Array = probe_indices.into();
 
     let (build_indices, probe_indices) = equal_rows_arr(
         &build_indices,
@@ -1566,7 +1563,7 @@ mod tests {
         test::build_table_i32, test::exec::MockExec,
     };
 
-    use arrow::array::{Date32Array, Int32Array, UInt32Builder, UInt64Builder};
+    use arrow::array::{Date32Array, Int32Array};
     use arrow::datatypes::{DataType, Field};
     use arrow_array::StructArray;
     use arrow_buffer::NullBuffer;
@@ -1575,7 +1572,7 @@ mod tests {
         ScalarValue,
     };
     use datafusion_execution::config::SessionConfig;
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
 
@@ -3169,17 +3166,13 @@ mod tests {
             (0, None),
         )?;
 
-        let mut left_ids = UInt64Builder::with_capacity(0);
-        left_ids.append_value(0);
-        left_ids.append_value(1);
+        let left_ids: UInt64Array = vec![0, 1].into();
 
-        let mut right_ids = UInt32Builder::with_capacity(0);
-        right_ids.append_value(0);
-        right_ids.append_value(1);
+        let right_ids: UInt32Array = vec![0, 1].into();
 
-        assert_eq!(left_ids.finish(), l);
+        assert_eq!(left_ids, l);
 
-        assert_eq!(right_ids.finish(), r);
+        assert_eq!(right_ids, r);
 
         Ok(())
     }
@@ -3805,8 +3798,11 @@ mod tests {
         ];
 
         for join_type in join_types {
-            let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0);
-            let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+            let runtime = Arc::new(
+                RuntimeEnvBuilder::new()
+                    .with_memory_limit(100, 1.0)
+                    .build()?,
+            );
             let task_ctx = TaskContext::default().with_runtime(runtime);
             let task_ctx = Arc::new(task_ctx);
 
@@ -3878,8 +3874,11 @@ mod tests {
         ];
 
         for join_type in join_types {
-            let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0);
-            let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+            let runtime = Arc::new(
+                RuntimeEnvBuilder::new()
+                    .with_memory_limit(100, 1.0)
+                    .build()?,
+            );
             let session_config = SessionConfig::default().with_batch_size(50);
             let task_ctx = TaskContext::default()
                 .with_session_config(session_config)
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 04a025c93288..3cd373544157 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -40,13 +40,12 @@ use crate::{
     RecordBatchStream, SendableRecordBatchStream,
 };
 
-use arrow::array::{
-    BooleanBufferBuilder, UInt32Array, UInt32Builder, UInt64Array, UInt64Builder,
-};
+use arrow::array::{BooleanBufferBuilder, UInt32Array, UInt64Array};
 use arrow::compute::concat_batches;
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow::datatypes::{Schema, SchemaRef, UInt64Type};
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_util;
+use arrow_array::PrimitiveArray;
 use datafusion_common::{exec_datafusion_err, JoinSide, Result, Statistics};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
@@ -573,23 +572,21 @@ fn join_left_and_right_batch(
             )
         })?;
 
-    let mut left_indices_builder = UInt64Builder::new();
-    let mut right_indices_builder = UInt32Builder::new();
+    let mut left_indices_builder: Vec<u64> = vec![];
+    let mut right_indices_builder: Vec<u32> = vec![];
     for (left_side, right_side) in indices {
-        left_indices_builder
-            .append_values(left_side.values(), &vec![true; left_side.len()]);
-        right_indices_builder
-            .append_values(right_side.values(), &vec![true; right_side.len()]);
+        left_indices_builder.extend(left_side.values());
+        right_indices_builder.extend(right_side.values());
     }
 
-    let left_side = left_indices_builder.finish();
-    let right_side = right_indices_builder.finish();
+    let left_side: PrimitiveArray<UInt64Type> = left_indices_builder.into();
+    let right_side = right_indices_builder.into();
     // set the left bitmap
     // and only full join need the left bitmap
     if need_produce_result_in_final(join_type) {
         let mut bitmap = visited_left_side.lock();
-        left_side.iter().flatten().for_each(|x| {
-            bitmap.set_bit(x as usize, true);
+        left_side.values().iter().for_each(|x| {
+            bitmap.set_bit(*x as usize, true);
         });
     }
     // adjust the two side indices base on the join type
@@ -647,7 +644,7 @@ mod tests {
 
     use arrow::datatypes::{DataType, Field};
     use datafusion_common::{assert_batches_sorted_eq, assert_contains, ScalarValue};
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
     use datafusion_physical_expr::{Partitioning, PhysicalExpr};
@@ -1022,8 +1019,11 @@ mod tests {
         ];
 
         for join_type in join_types {
-            let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0);
-            let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+            let runtime = Arc::new(
+                RuntimeEnvBuilder::new()
+                    .with_memory_limit(100, 1.0)
+                    .build()?,
+            );
             let task_ctx = TaskContext::default().with_runtime(runtime);
             let task_ctx = Arc::new(task_ctx);
 
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 96d5ba728a30..09fe5d9ebc54 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -22,7 +22,7 @@
 
 use std::any::Any;
 use std::cmp::Ordering;
-use std::collections::VecDeque;
+use std::collections::{HashMap, VecDeque};
 use std::fmt::Formatter;
 use std::fs::File;
 use std::io::BufReader;
@@ -51,6 +51,7 @@ use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 
 use crate::expressions::PhysicalSortExpr;
 use crate::joins::utils::{
@@ -288,7 +289,7 @@ impl ExecutionPlan for SortMergeJoinExec {
         ]
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         vec![
             Some(PhysicalSortRequirement::from_sort_exprs(
                 &self.left_sort_exprs,
@@ -595,8 +596,10 @@ struct BufferedBatch {
     /// Size estimation used for reserving / releasing memory
     pub size_estimation: usize,
     /// The indices of buffered batch that failed the join filter.
+    /// This is a map between buffered row index and a boolean value indicating whether all joined row
+    /// of the buffered row failed the join filter.
     /// When dequeuing the buffered batch, we need to produce null joined rows for these indices.
-    pub join_filter_failed_idxs: HashSet<u64>,
+    pub join_filter_failed_map: HashMap<u64, bool>,
     /// Current buffered batch number of rows. Equal to batch.num_rows()
     /// but if batch is spilled to disk this property is preferable
     /// and less expensive
@@ -637,7 +640,7 @@ impl BufferedBatch {
             join_arrays,
             null_joined: vec![],
             size_estimation,
-            join_filter_failed_idxs: HashSet::new(),
+            join_filter_failed_map: HashMap::new(),
             num_rows,
             spill_file: None,
         }
@@ -1229,11 +1232,19 @@ impl SMJStream {
             }
             buffered_batch.null_joined.clear();
 
-            // For buffered rows which are joined with streamed side but doesn't satisfy the join filter
+            // For buffered row which is joined with streamed side rows but all joined rows
+            // don't satisfy the join filter
             if output_not_matched_filter {
+                let not_matched_buffered_indices = buffered_batch
+                    .join_filter_failed_map
+                    .iter()
+                    .filter_map(|(idx, failed)| if *failed { Some(*idx) } else { None })
+                    .collect::<Vec<_>>();
+
                 let buffered_indices = UInt64Array::from_iter_values(
-                    buffered_batch.join_filter_failed_idxs.iter().copied(),
+                    not_matched_buffered_indices.iter().copied(),
                 );
+
                 if let Some(record_batch) = produce_buffered_null_batch(
                     &self.schema,
                     &self.streamed_schema,
@@ -1242,7 +1253,7 @@ impl SMJStream {
                 )? {
                     self.output_record_batches.push(record_batch);
                 }
-                buffered_batch.join_filter_failed_idxs.clear();
+                buffered_batch.join_filter_failed_map.clear();
             }
         }
         Ok(())
@@ -1459,24 +1470,26 @@ impl SMJStream {
                         // If it is joined with streamed side, but doesn't match the join filter,
                         // we need to output it with nulls as streamed side.
                         if matches!(self.join_type, JoinType::Full) {
+                            let buffered_batch = &mut self.buffered_data.batches
+                                [chunk.buffered_batch_idx.unwrap()];
+
                             for i in 0..pre_mask.len() {
-                                let buffered_batch = &mut self.buffered_data.batches
-                                    [chunk.buffered_batch_idx.unwrap()];
+                                // If the buffered row is not joined with streamed side,
+                                // skip it.
+                                if buffered_indices.is_null(i) {
+                                    continue;
+                                }
+
                                 let buffered_index = buffered_indices.value(i);
 
-                                if !pre_mask.value(i) {
-                                    // For a buffered row that is joined with streamed side but doesn't satisfy the join filter,
-                                    buffered_batch
-                                        .join_filter_failed_idxs
-                                        .insert(buffered_index);
-                                } else if buffered_batch
-                                    .join_filter_failed_idxs
-                                    .contains(&buffered_index)
-                                {
-                                    buffered_batch
-                                        .join_filter_failed_idxs
-                                        .remove(&buffered_index);
-                                }
+                                buffered_batch.join_filter_failed_map.insert(
+                                    buffered_index,
+                                    *buffered_batch
+                                        .join_filter_failed_map
+                                        .get(&buffered_index)
+                                        .unwrap_or(&true)
+                                        && !pre_mask.value(i),
+                                );
                             }
                         }
                     }
@@ -1965,7 +1978,7 @@ mod tests {
     };
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::disk_manager::DiskManagerConfig;
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_execution::TaskContext;
 
     use crate::expressions::Column;
@@ -2887,10 +2900,12 @@ mod tests {
         ];
 
         // Disable DiskManager to prevent spilling
-        let runtime_config = RuntimeConfig::new()
-            .with_memory_limit(100, 1.0)
-            .with_disk_manager(DiskManagerConfig::Disabled);
-        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let runtime = Arc::new(
+            RuntimeEnvBuilder::new()
+                .with_memory_limit(100, 1.0)
+                .with_disk_manager(DiskManagerConfig::Disabled)
+                .build()?,
+        );
         let session_config = SessionConfig::default().with_batch_size(50);
 
         for join_type in join_types {
@@ -2972,10 +2987,12 @@ mod tests {
         ];
 
         // Disable DiskManager to prevent spilling
-        let runtime_config = RuntimeConfig::new()
-            .with_memory_limit(100, 1.0)
-            .with_disk_manager(DiskManagerConfig::Disabled);
-        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let runtime = Arc::new(
+            RuntimeEnvBuilder::new()
+                .with_memory_limit(100, 1.0)
+                .with_disk_manager(DiskManagerConfig::Disabled)
+                .build()?,
+        );
         let session_config = SessionConfig::default().with_batch_size(50);
 
         for join_type in join_types {
@@ -3035,10 +3052,12 @@ mod tests {
         ];
 
         // Enable DiskManager to allow spilling
-        let runtime_config = RuntimeConfig::new()
-            .with_memory_limit(100, 1.0)
-            .with_disk_manager(DiskManagerConfig::NewOs);
-        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let runtime = Arc::new(
+            RuntimeEnvBuilder::new()
+                .with_memory_limit(100, 1.0)
+                .with_disk_manager(DiskManagerConfig::NewOs)
+                .build()?,
+        );
 
         for batch_size in [1, 50] {
             let session_config = SessionConfig::default().with_batch_size(batch_size);
@@ -3143,10 +3162,13 @@ mod tests {
         ];
 
         // Enable DiskManager to allow spilling
-        let runtime_config = RuntimeConfig::new()
-            .with_memory_limit(500, 1.0)
-            .with_disk_manager(DiskManagerConfig::NewOs);
-        let runtime = Arc::new(RuntimeEnv::new(runtime_config)?);
+        let runtime = Arc::new(
+            RuntimeEnvBuilder::new()
+                .with_memory_limit(500, 1.0)
+                .with_disk_manager(DiskManagerConfig::NewOs)
+                .build()?,
+        );
+
         for batch_size in [1, 50] {
             let session_config = SessionConfig::default().with_batch_size(batch_size);
 
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 1bf2ef2fd5f7..ac718a95e9f4 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -72,6 +72,7 @@ use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph;
 use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
 
 use ahash::RandomState;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::{ready, Stream, StreamExt};
 use hashbrown::HashSet;
 use parking_lot::Mutex;
@@ -410,7 +411,7 @@ impl ExecutionPlan for SymmetricHashJoinExec {
         }
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         vec![
             self.left_sort_exprs
                 .as_ref()
@@ -929,13 +930,11 @@ fn lookup_join_hashmap(
     let (mut matched_probe, mut matched_build) = build_hashmap
         .get_matched_indices(hash_values.iter().enumerate().rev(), deleted_offset);
 
-    matched_probe.as_slice_mut().reverse();
-    matched_build.as_slice_mut().reverse();
+    matched_probe.reverse();
+    matched_build.reverse();
 
-    let build_indices: UInt64Array =
-        PrimitiveArray::new(matched_build.finish().into(), None);
-    let probe_indices: UInt32Array =
-        PrimitiveArray::new(matched_probe.finish().into(), None);
+    let build_indices: UInt64Array = matched_build.into();
+    let probe_indices: UInt32Array = matched_probe.into();
 
     let (build_indices, probe_indices) = equal_rows_arr(
         &build_indices,
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 80d8815bdebc..89f3feaf07be 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -31,7 +31,7 @@ use crate::{
 
 use arrow::array::{
     downcast_array, new_null_array, Array, BooleanBufferBuilder, UInt32Array,
-    UInt32BufferBuilder, UInt32Builder, UInt64Array, UInt64BufferBuilder,
+    UInt32Builder, UInt64Array,
 };
 use arrow::compute;
 use arrow::datatypes::{Field, Schema, SchemaBuilder, UInt32Type, UInt64Type};
@@ -163,8 +163,8 @@ macro_rules! chain_traverse {
             } else {
                 i
             };
-            $match_indices.append(match_row_idx);
-            $input_indices.append($input_idx as u32);
+            $match_indices.push(match_row_idx);
+            $input_indices.push($input_idx as u32);
             $remaining_output -= 1;
             // Follow the chain to get the next index value
             let next = $next_chain[match_row_idx as usize];
@@ -238,9 +238,9 @@ pub trait JoinHashMapType {
         &self,
         iter: impl Iterator<Item = (usize, &'a u64)>,
         deleted_offset: Option<usize>,
-    ) -> (UInt32BufferBuilder, UInt64BufferBuilder) {
-        let mut input_indices = UInt32BufferBuilder::new(0);
-        let mut match_indices = UInt64BufferBuilder::new(0);
+    ) -> (Vec<u32>, Vec<u64>) {
+        let mut input_indices = vec![];
+        let mut match_indices = vec![];
 
         let hash_map = self.get_map();
         let next_chain = self.get_list();
@@ -261,8 +261,8 @@ pub trait JoinHashMapType {
                     } else {
                         i
                     };
-                    match_indices.append(match_row_idx);
-                    input_indices.append(row_idx as u32);
+                    match_indices.push(match_row_idx);
+                    input_indices.push(row_idx as u32);
                     // Follow the chain to get the next index value
                     let next = next_chain[match_row_idx as usize];
                     if next == 0 {
@@ -289,13 +289,9 @@ pub trait JoinHashMapType {
         deleted_offset: Option<usize>,
         limit: usize,
         offset: JoinHashMapOffset,
-    ) -> (
-        UInt32BufferBuilder,
-        UInt64BufferBuilder,
-        Option<JoinHashMapOffset>,
-    ) {
-        let mut input_indices = UInt32BufferBuilder::new(0);
-        let mut match_indices = UInt64BufferBuilder::new(0);
+    ) -> (Vec<u32>, Vec<u64>, Option<JoinHashMapOffset>) {
+        let mut input_indices = vec![];
+        let mut match_indices = vec![];
 
         let mut remaining_output = limit;
 
@@ -2447,7 +2443,7 @@ mod tests {
             Statistics {
                 num_rows: Absent,
                 total_byte_size: Absent,
-                column_statistics: dummy_column_stats.clone(),
+                column_statistics: dummy_column_stats,
             },
             &join_on,
         );
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 59c5da6b6fb2..026798c5798b 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -31,7 +31,7 @@ pub use datafusion_expr::{Accumulator, ColumnarValue};
 pub use datafusion_physical_expr::window::WindowExpr;
 use datafusion_physical_expr::PhysicalSortExpr;
 pub use datafusion_physical_expr::{
-    expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr,
+    expressions, functions, udf, Distribution, Partitioning, PhysicalExpr,
 };
 
 pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
@@ -82,8 +82,9 @@ pub mod windows;
 pub mod work_table;
 
 pub mod udaf {
-    pub use datafusion_physical_expr_functions_aggregate::aggregate::AggregateFunctionExpr;
+    pub use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 }
 
+pub mod coalesce;
 #[cfg(test)]
 pub mod test;
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index bd9303f97db0..e9ea9d4f5032 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -394,7 +394,7 @@ impl Stream for RecursiveQueryStream {
                     self.recursive_stream = None;
                     self.poll_next_iteration(cx)
                 }
-                Some(Ok(batch)) => self.push_batch(batch.clone()),
+                Some(Ok(batch)) => self.push_batch(batch),
                 _ => Poll::Ready(batch_result),
             }
         } else {
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 656d82215bbe..650006a9d02d 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -38,9 +38,10 @@ use crate::sorts::streaming_merge;
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics};
 
-use arrow::array::{ArrayRef, UInt64Builder};
-use arrow::datatypes::SchemaRef;
+use arrow::array::ArrayRef;
+use arrow::datatypes::{SchemaRef, UInt64Type};
 use arrow::record_batch::RecordBatch;
+use arrow_array::PrimitiveArray;
 use datafusion_common::utils::transpose;
 use datafusion_common::{arrow_datafusion_err, not_impl_err, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
@@ -275,12 +276,11 @@ impl BatchPartitioner {
                     create_hashes(&arrays, random_state, hash_buffer)?;
 
                     let mut indices: Vec<_> = (0..*partitions)
-                        .map(|_| UInt64Builder::with_capacity(batch.num_rows()))
+                        .map(|_| Vec::with_capacity(batch.num_rows()))
                         .collect();
 
                     for (index, hash) in hash_buffer.iter().enumerate() {
-                        indices[(*hash % *partitions as u64) as usize]
-                            .append_value(index as u64);
+                        indices[(*hash % *partitions as u64) as usize].push(index as u64);
                     }
 
                     // Finished building index-arrays for output partitions
@@ -291,8 +291,8 @@ impl BatchPartitioner {
                     let it = indices
                         .into_iter()
                         .enumerate()
-                        .filter_map(|(partition, mut indices)| {
-                            let indices = indices.finish();
+                        .filter_map(|(partition, indices)| {
+                            let indices: PrimitiveArray<UInt64Type> = indices.into();
                             (!indices.is_empty()).then_some((partition, indices))
                         })
                         .map(move |(partition, indices)| {
@@ -1025,7 +1025,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::cast::as_string_array;
     use datafusion_common::{assert_batches_sorted_eq, exec_err};
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 
     use tokio::task::JoinSet;
 
@@ -1507,7 +1507,9 @@ mod tests {
 
         // setup up context
         let runtime = Arc::new(
-            RuntimeEnv::new(RuntimeConfig::default().with_memory_limit(1, 1.0)).unwrap(),
+            RuntimeEnvBuilder::default()
+                .with_memory_limit(1, 1.0)
+                .build()?,
         );
 
         let task_ctx = TaskContext::default().with_runtime(runtime);
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index e7e1c5481f80..e92a57493141 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -499,6 +499,12 @@ impl ExternalSorter {
         metrics: BaselineMetrics,
     ) -> Result<SendableRecordBatchStream> {
         assert_ne!(self.in_mem_batches.len(), 0);
+
+        // The elapsed compute timer is updated when the value is dropped.
+        // There is no need for an explicit call to drop.
+        let elapsed_compute = metrics.elapsed_compute().clone();
+        let _timer = elapsed_compute.timer();
+
         if self.in_mem_batches.len() == 1 {
             let batch = self.in_mem_batches.remove(0);
             let reservation = self.reservation.take();
@@ -552,7 +558,9 @@ impl ExternalSorter {
         let fetch = self.fetch;
         let expressions = Arc::clone(&self.expr);
         let stream = futures::stream::once(futures::future::lazy(move |_| {
+            let timer = metrics.elapsed_compute().timer();
             let sorted = sort_batch(&batch, &expressions, fetch)?;
+            timer.done();
             metrics.record_output(sorted.num_rows());
             drop(batch);
             drop(reservation);
@@ -958,7 +966,7 @@ mod tests {
     use arrow::datatypes::*;
     use datafusion_common::cast::as_primitive_array;
     use datafusion_execution::config::SessionConfig;
-    use datafusion_execution::runtime_env::RuntimeConfig;
+    use datafusion_execution::runtime_env::RuntimeEnvBuilder;
 
     use datafusion_common::ScalarValue;
     use datafusion_physical_expr::expressions::Literal;
@@ -1001,9 +1009,11 @@ mod tests {
             .options()
             .execution
             .sort_spill_reservation_bytes;
-        let rt_config = RuntimeConfig::new()
-            .with_memory_limit(sort_spill_reservation_bytes + 12288, 1.0);
-        let runtime = Arc::new(RuntimeEnv::new(rt_config)?);
+        let runtime = Arc::new(
+            RuntimeEnvBuilder::new()
+                .with_memory_limit(sort_spill_reservation_bytes + 12288, 1.0)
+                .build()?,
+        );
         let task_ctx = Arc::new(
             TaskContext::default()
                 .with_session_config(session_config)
@@ -1077,11 +1087,14 @@ mod tests {
                 .execution
                 .sort_spill_reservation_bytes;
 
-            let rt_config = RuntimeConfig::new().with_memory_limit(
-                sort_spill_reservation_bytes + avg_batch_size * (partitions - 1),
-                1.0,
+            let runtime = Arc::new(
+                RuntimeEnvBuilder::new()
+                    .with_memory_limit(
+                        sort_spill_reservation_bytes + avg_batch_size * (partitions - 1),
+                        1.0,
+                    )
+                    .build()?,
             );
-            let runtime = Arc::new(RuntimeEnv::new(rt_config)?);
             let task_ctx = Arc::new(
                 TaskContext::default()
                     .with_runtime(runtime)
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 7ba1d77aea4e..131fa71217cc 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -22,6 +22,7 @@ use std::sync::Arc;
 
 use crate::common::spawn_buffered;
 use crate::expressions::PhysicalSortExpr;
+use crate::limit::LimitStream;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::streaming_merge;
 use crate::{
@@ -34,6 +35,7 @@ use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::PhysicalSortRequirement;
 
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use log::{debug, trace};
 
 /// Sort preserving merge execution plan
@@ -186,7 +188,7 @@ impl ExecutionPlan for SortPreservingMergeExec {
         vec![false]
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))]
     }
 
@@ -238,12 +240,23 @@ impl ExecutionPlan for SortPreservingMergeExec {
             0 => internal_err!(
                 "SortPreservingMergeExec requires at least one input partition"
             ),
-            1 => {
-                // bypass if there is only one partition to merge (no metrics in this case either)
-                let result = self.input.execute(0, context);
-                debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input");
-                result
-            }
+            1 => match self.fetch {
+                Some(fetch) => {
+                    let stream = self.input.execute(0, context)?;
+                    debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input with {fetch}");
+                    Ok(Box::pin(LimitStream::new(
+                        stream,
+                        0,
+                        Some(fetch),
+                        BaselineMetrics::new(&self.metrics, partition),
+                    )))
+                }
+                None => {
+                    let stream = self.input.execute(0, context);
+                    debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input without fetch");
+                    stream
+                }
+            },
             _ => {
                 let receivers = (0..input_partitions)
                     .map(|partition| {
@@ -817,6 +830,79 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn test_sort_merge_single_partition_with_fetch() {
+        let task_ctx = Arc::new(TaskContext::default());
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 7, 9, 3]));
+        let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
+        let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap();
+        let schema = batch.schema();
+
+        let sort = vec![PhysicalSortExpr {
+            expr: col("b", &schema).unwrap(),
+            options: SortOptions {
+                descending: false,
+                nulls_first: true,
+            },
+        }];
+        let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap();
+        let merge = Arc::new(
+            SortPreservingMergeExec::new(sort, Arc::new(exec)).with_fetch(Some(2)),
+        );
+
+        let collected = collect(merge, task_ctx).await.unwrap();
+        assert_eq!(collected.len(), 1);
+
+        assert_batches_eq!(
+            &[
+                "+---+---+",
+                "| a | b |",
+                "+---+---+",
+                "| 1 | a |",
+                "| 2 | b |",
+                "+---+---+",
+            ],
+            collected.as_slice()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_sort_merge_single_partition_without_fetch() {
+        let task_ctx = Arc::new(TaskContext::default());
+        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 7, 9, 3]));
+        let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
+        let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap();
+        let schema = batch.schema();
+
+        let sort = vec![PhysicalSortExpr {
+            expr: col("b", &schema).unwrap(),
+            options: SortOptions {
+                descending: false,
+                nulls_first: true,
+            },
+        }];
+        let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap();
+        let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec)));
+
+        let collected = collect(merge, task_ctx).await.unwrap();
+        assert_eq!(collected.len(), 1);
+
+        assert_batches_eq!(
+            &[
+                "+---+---+",
+                "| a | b |",
+                "+---+---+",
+                "| 1 | a |",
+                "| 2 | b |",
+                "| 7 | c |",
+                "| 9 | d |",
+                "| 3 | e |",
+                "+---+---+",
+            ],
+            collected.as_slice()
+        );
+    }
+
     #[tokio::test]
     async fn test_async() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index efb5dea1ec6e..c1bcd83a6fd2 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -59,7 +59,8 @@ use datafusion_expr::ColumnarValue;
 use datafusion_physical_expr::window::{
     PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState,
 };
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::stream::Stream;
 use futures::{ready, StreamExt};
 use hashbrown::raw::RawTable;
@@ -253,7 +254,7 @@ impl ExecutionPlan for BoundedWindowAggExec {
         vec![&self.input]
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         let partition_bys = self.window_expr()[0].partition_by();
         let order_keys = self.window_expr()[0].order_by();
         if self.input_order_mode != InputOrderMode::Sorted
@@ -1311,7 +1312,7 @@ mod tests {
                 &args,
                 &partitionby_exprs,
                 &orderby_exprs,
-                Arc::new(window_frame.clone()),
+                Arc::new(window_frame),
                 &input.schema(),
                 false,
             )?],
@@ -1484,7 +1485,7 @@ mod tests {
         let partitions = vec![
             Arc::new(TestStreamPartition {
                 schema: Arc::clone(&schema),
-                batches: batches.clone(),
+                batches,
                 idx: 0,
                 state: PolingState::BatchReturn,
                 sleep_duration: per_batch_wait_duration,
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index f938f4410a99..56823e6dec2d 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -30,19 +30,20 @@ use crate::{
 
 use arrow::datatypes::Schema;
 use arrow_schema::{DataType, Field, SchemaRef};
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{
+    exec_datafusion_err, exec_err, DataFusionError, Result, ScalarValue,
+};
 use datafusion_expr::{
     BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition,
     WindowUDF,
 };
+use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::equivalence::collapse_lex_req;
 use datafusion_physical_expr::{
     reverse_order_bys,
     window::{BuiltInWindowFunctionExpr, SlidingAggregateWindowExpr},
-    AggregateExpr, ConstExpr, EquivalenceProperties, LexOrdering,
-    PhysicalSortRequirement,
+    ConstExpr, EquivalenceProperties, LexOrdering, PhysicalSortRequirement,
 };
-use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
 use itertools::Itertools;
 
 mod bounded_window_agg_exec;
@@ -54,6 +55,7 @@ use datafusion_physical_expr::expressions::Column;
 pub use datafusion_physical_expr::window::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr,
 };
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 pub use window_agg_exec::WindowAggExec;
 
 /// Build field from window function and add it into schema
@@ -139,7 +141,7 @@ fn window_expr_from_aggregate_expr(
     partition_by: &[Arc<dyn PhysicalExpr>],
     order_by: &[PhysicalSortExpr],
     window_frame: Arc<WindowFrame>,
-    aggregate: Arc<dyn AggregateExpr>,
+    aggregate: Arc<AggregateFunctionExpr>,
 ) -> Arc<dyn WindowExpr> {
     // Is there a potentially unlimited sized window frame?
     let unbounded_window = window_frame.start_bound.is_unbounded();
@@ -284,7 +286,9 @@ fn create_built_in_window_expr(
                 args[1]
                     .as_any()
                     .downcast_ref::<Literal>()
-                    .unwrap()
+                    .ok_or_else(|| {
+                        exec_datafusion_err!("Expected a signed integer literal for the second argument of nth_value, got {}", args[1])
+                    })?
                     .value()
                     .clone(),
             )?;
@@ -397,7 +401,7 @@ pub(crate) fn calc_requirements<
 >(
     partition_by_exprs: impl IntoIterator<Item = T>,
     orderby_sort_exprs: impl IntoIterator<Item = S>,
-) -> Option<Vec<PhysicalSortRequirement>> {
+) -> Option<LexRequirement> {
     let mut sort_reqs = partition_by_exprs
         .into_iter()
         .map(|partition_by| {
@@ -567,7 +571,7 @@ pub fn get_window_mode(
     input: &Arc<dyn ExecutionPlan>,
 ) -> Option<(bool, InputOrderMode)> {
     let input_eqs = input.equivalence_properties().clone();
-    let mut partition_by_reqs: Vec<PhysicalSortRequirement> = vec![];
+    let mut partition_by_reqs: LexRequirement = vec![];
     let (_, indices) = input_eqs.find_longest_permutation(partitionby_exprs);
     partition_by_reqs.extend(indices.iter().map(|&idx| PhysicalSortRequirement {
         expr: Arc::clone(&partitionby_exprs[idx]),
@@ -724,7 +728,7 @@ mod tests {
                 orderbys.push(PhysicalSortExpr { expr, options });
             }
 
-            let mut expected: Option<Vec<PhysicalSortRequirement>> = None;
+            let mut expected: Option<LexRequirement> = None;
             for (col_name, reqs) in expected_params {
                 let options = reqs.map(|(descending, nulls_first)| SortOptions {
                     descending,
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index d2f7090fca17..afe9700ed08c 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -43,7 +43,7 @@ use datafusion_common::stats::Precision;
 use datafusion_common::utils::{evaluate_partition_ranges, transpose};
 use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::PhysicalSortRequirement;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use futures::{ready, Stream, StreamExt};
 
 /// Window execution plan
@@ -191,7 +191,7 @@ impl ExecutionPlan for WindowAggExec {
         vec![true]
     }
 
-    fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
+    fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
         let partition_bys = self.window_expr()[0].partition_by();
         let order_keys = self.window_expr()[0].order_by();
         if self.ordered_partition_by_indices.len() < partition_bys.len() {
diff --git a/datafusion/proto/gen/src/main.rs b/datafusion/proto/gen/src/main.rs
index d3b3c92f6065..be61ff58fa8d 100644
--- a/datafusion/proto/gen/src/main.rs
+++ b/datafusion/proto/gen/src/main.rs
@@ -55,7 +55,7 @@ fn main() -> Result<(), String> {
     let common_path = proto_dir.join("src/datafusion_common.rs");
     println!(
         "Copying {} to {}",
-        prost.clone().display(),
+        prost.display(),
         proto_dir.join("src/generated/prost.rs").display()
     );
     std::fs::copy(prost, proto_dir.join("src/generated/prost.rs")).unwrap();
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index acf540d44465..826992e132ba 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -1140,6 +1140,7 @@ message NestedLoopJoinExecNode {
 message CoalesceBatchesExecNode {
   PhysicalPlanNode input = 1;
   uint32 target_batch_size = 2;
+  optional uint32 fetch = 3;
 }
 
 message CoalescePartitionsExecNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 489b6c67534f..b4d63798f080 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -2000,6 +2000,9 @@ impl serde::Serialize for CoalesceBatchesExecNode {
         if self.target_batch_size != 0 {
             len += 1;
         }
+        if self.fetch.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.CoalesceBatchesExecNode", len)?;
         if let Some(v) = self.input.as_ref() {
             struct_ser.serialize_field("input", v)?;
@@ -2007,6 +2010,9 @@ impl serde::Serialize for CoalesceBatchesExecNode {
         if self.target_batch_size != 0 {
             struct_ser.serialize_field("targetBatchSize", &self.target_batch_size)?;
         }
+        if let Some(v) = self.fetch.as_ref() {
+            struct_ser.serialize_field("fetch", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -2020,12 +2026,14 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode {
             "input",
             "target_batch_size",
             "targetBatchSize",
+            "fetch",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Input,
             TargetBatchSize,
+            Fetch,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -2049,6 +2057,7 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode {
                         match value {
                             "input" => Ok(GeneratedField::Input),
                             "targetBatchSize" | "target_batch_size" => Ok(GeneratedField::TargetBatchSize),
+                            "fetch" => Ok(GeneratedField::Fetch),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -2070,6 +2079,7 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode {
             {
                 let mut input__ = None;
                 let mut target_batch_size__ = None;
+                let mut fetch__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Input => {
@@ -2086,11 +2096,20 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode {
                                 Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0)
                             ;
                         }
+                        GeneratedField::Fetch => {
+                            if fetch__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("fetch"));
+                            }
+                            fetch__ = 
+                                map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0)
+                            ;
+                        }
                     }
                 }
                 Ok(CoalesceBatchesExecNode {
                     input: input__,
                     target_batch_size: target_batch_size__.unwrap_or_default(),
+                    fetch: fetch__,
                 })
             }
         }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index c98c950d35f9..875d2af75dd7 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1813,6 +1813,8 @@ pub struct CoalesceBatchesExecNode {
     pub input: ::core::option::Option<::prost::alloc::boxed::Box<PhysicalPlanNode>>,
     #[prost(uint32, tag = "2")]
     pub target_batch_size: u32,
+    #[prost(uint32, optional, tag = "3")]
+    pub fetch: ::core::option::Option<u32>,
 }
 #[allow(clippy::derive_partial_eq_without_eq)]
 #[derive(Clone, PartialEq, ::prost::Message)]
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index b74237b5281b..acda1298dd80 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -586,7 +586,10 @@ pub fn parse_expr(
                 parse_exprs(&pb.args, registry, codec)?,
                 pb.distinct,
                 parse_optional_expr(pb.filter.as_deref(), registry, codec)?.map(Box::new),
-                parse_vec_expr(&pb.order_by, registry, codec)?,
+                match pb.order_by.len() {
+                    0 => None,
+                    _ => Some(parse_exprs(&pb.order_by, registry, codec)?),
+                },
                 None,
             )))
         }
@@ -676,16 +679,6 @@ pub fn from_proto_binary_op(op: &str) -> Result<Operator, Error> {
     }
 }
 
-fn parse_vec_expr(
-    p: &[protobuf::LogicalExprNode],
-    registry: &dyn FunctionRegistry,
-    codec: &dyn LogicalExtensionCodec,
-) -> Result<Option<Vec<Expr>>, Error> {
-    let res = parse_exprs(p, registry, codec)?;
-    // Convert empty vector to None.
-    Ok((!res.is_empty()).then_some(res))
-}
-
 fn parse_optional_expr(
     p: Option<&protobuf::LogicalExprNode>,
     registry: &dyn FunctionRegistry,
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index bc019725f36c..67977b1795a6 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -29,7 +29,7 @@ use crate::{
     },
 };
 
-use crate::protobuf::{proto_error, FromProtoError, ToProtoError};
+use crate::protobuf::{proto_error, ToProtoError};
 use arrow::datatypes::{DataType, Schema, SchemaRef};
 #[cfg(feature = "parquet")]
 use datafusion::datasource::file_format::parquet::ParquetFormat;
@@ -66,11 +66,10 @@ use datafusion_expr::{
 };
 use datafusion_expr::{AggregateUDF, Unnest};
 
+use self::to_proto::{serialize_expr, serialize_exprs};
 use prost::bytes::BufMut;
 use prost::Message;
 
-use self::to_proto::serialize_expr;
-
 pub mod file_formats;
 pub mod from_proto;
 pub mod to_proto;
@@ -273,13 +272,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                     values
                         .values_list
                         .chunks_exact(n_cols)
-                        .map(|r| {
-                            r.iter()
-                                .map(|expr| {
-                                    from_proto::parse_expr(expr, ctx, extension_codec)
-                                })
-                                .collect::<Result<Vec<_>, FromProtoError>>()
-                        })
+                        .map(|r| from_proto::parse_exprs(r, ctx, extension_codec))
                         .collect::<Result<Vec<_>, _>>()
                         .map_err(|e| e.into())
                 }?;
@@ -288,11 +281,8 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlanType::Projection(projection) => {
                 let input: LogicalPlan =
                     into_logical_plan!(projection.input, ctx, extension_codec)?;
-                let expr: Vec<Expr> = projection
-                    .expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let expr: Vec<Expr> =
+                    from_proto::parse_exprs(&projection.expr, ctx, extension_codec)?;
 
                 let new_proj = project(input, expr)?;
                 match projection.optional_alias.as_ref() {
@@ -324,26 +314,17 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlanType::Window(window) => {
                 let input: LogicalPlan =
                     into_logical_plan!(window.input, ctx, extension_codec)?;
-                let window_expr = window
-                    .window_expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<Expr>, _>>()?;
+                let window_expr =
+                    from_proto::parse_exprs(&window.window_expr, ctx, extension_codec)?;
                 LogicalPlanBuilder::from(input).window(window_expr)?.build()
             }
             LogicalPlanType::Aggregate(aggregate) => {
                 let input: LogicalPlan =
                     into_logical_plan!(aggregate.input, ctx, extension_codec)?;
-                let group_expr = aggregate
-                    .group_expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<Expr>, _>>()?;
-                let aggr_expr = aggregate
-                    .aggr_expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<Expr>, _>>()?;
+                let group_expr =
+                    from_proto::parse_exprs(&aggregate.group_expr, ctx, extension_codec)?;
+                let aggr_expr =
+                    from_proto::parse_exprs(&aggregate.aggr_expr, ctx, extension_codec)?;
                 LogicalPlanBuilder::from(input)
                     .aggregate(group_expr, aggr_expr)?
                     .build()
@@ -361,20 +342,16 @@ impl AsLogicalPlan for LogicalPlanNode {
                     projection = Some(column_indices);
                 }
 
-                let filters = scan
-                    .filters
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let filters =
+                    from_proto::parse_exprs(&scan.filters, ctx, extension_codec)?;
 
                 let mut all_sort_orders = vec![];
                 for order in &scan.file_sort_order {
-                    let file_sort_order = order
-                        .logical_expr_nodes
-                        .iter()
-                        .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                        .collect::<Result<Vec<_>, _>>()?;
-                    all_sort_orders.push(file_sort_order)
+                    all_sort_orders.push(from_proto::parse_exprs(
+                        &order.logical_expr_nodes,
+                        ctx,
+                        extension_codec,
+                    )?)
                 }
 
                 let file_format: Arc<dyn FileFormat> =
@@ -475,11 +452,8 @@ impl AsLogicalPlan for LogicalPlanNode {
                     projection = Some(column_indices);
                 }
 
-                let filters = scan
-                    .filters
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let filters =
+                    from_proto::parse_exprs(&scan.filters, ctx, extension_codec)?;
 
                 let table_name =
                     from_table_reference(scan.table_name.as_ref(), "CustomScan")?;
@@ -502,11 +476,8 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlanType::Sort(sort) => {
                 let input: LogicalPlan =
                     into_logical_plan!(sort.input, ctx, extension_codec)?;
-                let sort_expr: Vec<Expr> = sort
-                    .expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<Expr>, _>>()?;
+                let sort_expr: Vec<Expr> =
+                    from_proto::parse_exprs(&sort.expr, ctx, extension_codec)?;
                 LogicalPlanBuilder::from(input).sort(sort_expr)?.build()
             }
             LogicalPlanType::Repartition(repartition) => {
@@ -525,12 +496,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         hash_expr: pb_hash_expr,
                         partition_count,
                     }) => Partitioning::Hash(
-                        pb_hash_expr
-                            .iter()
-                            .map(|expr| {
-                                from_proto::parse_expr(expr, ctx, extension_codec)
-                            })
-                            .collect::<Result<Vec<_>, _>>()?,
+                        from_proto::parse_exprs(pb_hash_expr, ctx, extension_codec)?,
                         *partition_count as usize,
                     ),
                     PartitionMethod::RoundRobin(partition_count) => {
@@ -570,12 +536,11 @@ impl AsLogicalPlan for LogicalPlanNode {
 
                 let mut order_exprs = vec![];
                 for expr in &create_extern_table.order_exprs {
-                    let order_expr = expr
-                        .logical_expr_nodes
-                        .iter()
-                        .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                        .collect::<Result<Vec<Expr>, _>>()?;
-                    order_exprs.push(order_expr)
+                    order_exprs.push(from_proto::parse_exprs(
+                        &expr.logical_expr_nodes,
+                        ctx,
+                        extension_codec,
+                    )?);
                 }
 
                 let mut column_defaults =
@@ -693,16 +658,10 @@ impl AsLogicalPlan for LogicalPlanNode {
                 LogicalPlanBuilder::from(input).limit(skip, fetch)?.build()
             }
             LogicalPlanType::Join(join) => {
-                let left_keys: Vec<Expr> = join
-                    .left_join_key
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
-                let right_keys: Vec<Expr> = join
-                    .right_join_key
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let left_keys: Vec<Expr> =
+                    from_proto::parse_exprs(&join.left_join_key, ctx, extension_codec)?;
+                let right_keys: Vec<Expr> =
+                    from_proto::parse_exprs(&join.right_join_key, ctx, extension_codec)?;
                 let join_type =
                     protobuf::JoinType::try_from(join.join_type).map_err(|_| {
                         proto_error(format!(
@@ -804,27 +763,20 @@ impl AsLogicalPlan for LogicalPlanNode {
             LogicalPlanType::DistinctOn(distinct_on) => {
                 let input: LogicalPlan =
                     into_logical_plan!(distinct_on.input, ctx, extension_codec)?;
-                let on_expr = distinct_on
-                    .on_expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<Expr>, _>>()?;
-                let select_expr = distinct_on
-                    .select_expr
-                    .iter()
-                    .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
-                    .collect::<Result<Vec<Expr>, _>>()?;
+                let on_expr =
+                    from_proto::parse_exprs(&distinct_on.on_expr, ctx, extension_codec)?;
+                let select_expr = from_proto::parse_exprs(
+                    &distinct_on.select_expr,
+                    ctx,
+                    extension_codec,
+                )?;
                 let sort_expr = match distinct_on.sort_expr.len() {
                     0 => None,
-                    _ => Some(
-                        distinct_on
-                            .sort_expr
-                            .iter()
-                            .map(|expr| {
-                                from_proto::parse_expr(expr, ctx, extension_codec)
-                            })
-                            .collect::<Result<Vec<Expr>, _>>()?,
-                    ),
+                    _ => Some(from_proto::parse_exprs(
+                        &distinct_on.sort_expr,
+                        ctx,
+                        extension_codec,
+                    )?),
                 };
                 LogicalPlanBuilder::from(input)
                     .distinct_on(on_expr, select_expr, sort_expr)?
@@ -943,11 +895,8 @@ impl AsLogicalPlan for LogicalPlanNode {
                 } else {
                     values[0].len()
                 } as u64;
-                let values_list = values
-                    .iter()
-                    .flatten()
-                    .map(|v| serialize_expr(v, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let values_list =
+                    serialize_exprs(values.iter().flatten(), extension_codec)?;
                 Ok(protobuf::LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::Values(
                         protobuf::ValuesNode {
@@ -982,10 +931,8 @@ impl AsLogicalPlan for LogicalPlanNode {
                 };
                 let schema: protobuf::Schema = schema.as_ref().try_into()?;
 
-                let filters: Vec<protobuf::LogicalExprNode> = filters
-                    .iter()
-                    .map(|filter| serialize_expr(filter, extension_codec))
-                    .collect::<Result<Vec<_>, _>>()?;
+                let filters: Vec<protobuf::LogicalExprNode> =
+                    serialize_exprs(filters, extension_codec)?;
 
                 if let Some(listing_table) = source.downcast_ref::<ListingTable>() {
                     let any = listing_table.options().format.as_any();
@@ -1037,10 +984,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                     let mut exprs_vec: Vec<LogicalExprNodeCollection> = vec![];
                     for order in &options.file_sort_order {
                         let expr_vec = LogicalExprNodeCollection {
-                            logical_expr_nodes: order
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, ToProtoError>>()?,
+                            logical_expr_nodes: serialize_exprs(order, extension_codec)?,
                         };
                         exprs_vec.push(expr_vec);
                     }
@@ -1118,10 +1062,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                                     extension_codec,
                                 )?,
                             )),
-                            expr: expr
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, ToProtoError>>()?,
+                            expr: serialize_exprs(expr, extension_codec)?,
                             optional_alias: None,
                         },
                     ))),
@@ -1173,22 +1114,13 @@ impl AsLogicalPlan for LogicalPlanNode {
                     )?;
                 let sort_expr = match sort_expr {
                     None => vec![],
-                    Some(sort_expr) => sort_expr
-                        .iter()
-                        .map(|expr| serialize_expr(expr, extension_codec))
-                        .collect::<Result<Vec<_>, _>>()?,
+                    Some(sort_expr) => serialize_exprs(sort_expr, extension_codec)?,
                 };
                 Ok(protobuf::LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::DistinctOn(Box::new(
                         protobuf::DistinctOnNode {
-                            on_expr: on_expr
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, _>>()?,
-                            select_expr: select_expr
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, _>>()?,
+                            on_expr: serialize_exprs(on_expr, extension_codec)?,
+                            select_expr: serialize_exprs(select_expr, extension_codec)?,
                             sort_expr,
                             input: Some(Box::new(input)),
                         },
@@ -1207,10 +1139,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::Window(Box::new(
                         protobuf::WindowNode {
                             input: Some(Box::new(input)),
-                            window_expr: window_expr
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, _>>()?,
+                            window_expr: serialize_exprs(window_expr, extension_codec)?,
                         },
                     ))),
                 })
@@ -1230,14 +1159,8 @@ impl AsLogicalPlan for LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::Aggregate(Box::new(
                         protobuf::AggregateNode {
                             input: Some(Box::new(input)),
-                            group_expr: group_expr
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, _>>()?,
-                            aggr_expr: aggr_expr
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, _>>()?,
+                            group_expr: serialize_exprs(group_expr, extension_codec)?,
+                            aggr_expr: serialize_exprs(aggr_expr, extension_codec)?,
                         },
                     ))),
                 })
@@ -1335,10 +1258,8 @@ impl AsLogicalPlan for LogicalPlanNode {
                         input.as_ref(),
                         extension_codec,
                     )?;
-                let selection_expr: Vec<protobuf::LogicalExprNode> = expr
-                    .iter()
-                    .map(|expr| serialize_expr(expr, extension_codec))
-                    .collect::<Result<Vec<_>, ToProtoError>>()?;
+                let selection_expr: Vec<protobuf::LogicalExprNode> =
+                    serialize_exprs(expr, extension_codec)?;
                 Ok(protobuf::LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::Sort(Box::new(
                         protobuf::SortNode {
@@ -1367,10 +1288,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 let pb_partition_method = match partitioning_scheme {
                     Partitioning::Hash(exprs, partition_count) => {
                         PartitionMethod::Hash(protobuf::HashRepartition {
-                            hash_expr: exprs
-                                .iter()
-                                .map(|expr| serialize_expr(expr, extension_codec))
-                                .collect::<Result<Vec<_>, ToProtoError>>()?,
+                            hash_expr: serialize_exprs(exprs, extension_codec)?,
                             partition_count: *partition_count as u64,
                         })
                     }
@@ -1419,10 +1337,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 let mut converted_order_exprs: Vec<LogicalExprNodeCollection> = vec![];
                 for order in order_exprs {
                     let temp = LogicalExprNodeCollection {
-                        logical_expr_nodes: order
-                            .iter()
-                            .map(|expr| serialize_expr(expr, extension_codec))
-                            .collect::<Result<Vec<_>, ToProtoError>>()?,
+                        logical_expr_nodes: serialize_exprs(order, extension_codec)?,
                     };
                     converted_order_exprs.push(temp);
                 }
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 0f6722dd375b..78f370c714cc 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -18,7 +18,7 @@
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
+use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use prost::bytes::BufMut;
 use prost::Message;
 
@@ -34,6 +34,7 @@ use datafusion::datasource::physical_plan::ParquetExec;
 use datafusion::datasource::physical_plan::{AvroExec, CsvExec};
 use datafusion::execution::runtime_env::RuntimeEnv;
 use datafusion::execution::FunctionRegistry;
+use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
 use datafusion::physical_expr::{PhysicalExprRef, PhysicalSortRequirement};
 use datafusion::physical_plan::aggregates::AggregateMode;
 use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
@@ -59,7 +60,7 @@ use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMerge
 use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
 use datafusion::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use datafusion::physical_plan::{
-    AggregateExpr, ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
+    ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
 };
 use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
 use datafusion_expr::{AggregateUDF, ScalarUDF};
@@ -259,10 +260,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                     runtime,
                     extension_codec,
                 )?;
-                Ok(Arc::new(CoalesceBatchesExec::new(
-                    input,
-                    coalesce_batches.target_batch_size as usize,
-                )))
+                Ok(Arc::new(
+                    CoalesceBatchesExec::new(
+                        input,
+                        coalesce_batches.target_batch_size as usize,
+                    )
+                    .with_fetch(coalesce_batches.fetch.map(|f| f as usize)),
+                ))
             }
             PhysicalPlanType::Merge(merge) => {
                 let input: Arc<dyn ExecutionPlan> =
@@ -464,7 +468,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                     })
                     .collect::<Result<Vec<_>, _>>()?;
 
-                let physical_aggr_expr: Vec<Arc<dyn AggregateExpr>> = hash_agg
+                let physical_aggr_expr: Vec<Arc<AggregateFunctionExpr>> = hash_agg
                     .aggr_expr
                     .iter()
                     .zip(hash_agg.aggr_expr_name.iter())
@@ -1536,6 +1540,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                     protobuf::CoalesceBatchesExecNode {
                         input: Some(Box::new(input)),
                         target_batch_size: coalesce_batches.target_batch_size() as u32,
+                        fetch: coalesce_batches.fetch().map(|n| n as u32),
                     },
                 ))),
             });
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 7949a457f40f..555ad22a9bc1 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -29,7 +29,7 @@ use datafusion::physical_plan::expressions::{
 };
 use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr};
-use datafusion::physical_plan::{AggregateExpr, Partitioning, PhysicalExpr, WindowExpr};
+use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 use datafusion::{
     datasource::{
         file_format::{csv::CsvSink, json::JsonSink},
@@ -49,58 +49,50 @@ use crate::protobuf::{
 use super::PhysicalExtensionCodec;
 
 pub fn serialize_physical_aggr_expr(
-    aggr_expr: Arc<dyn AggregateExpr>,
+    aggr_expr: Arc<AggregateFunctionExpr>,
     codec: &dyn PhysicalExtensionCodec,
 ) -> Result<protobuf::PhysicalExprNode> {
     let expressions = serialize_physical_exprs(aggr_expr.expressions(), codec)?;
     let ordering_req = aggr_expr.order_bys().unwrap_or(&[]).to_vec();
     let ordering_req = serialize_physical_sort_exprs(ordering_req, codec)?;
 
-    if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        let name = a.fun().name().to_string();
-        let mut buf = Vec::new();
-        codec.try_encode_udaf(a.fun(), &mut buf)?;
-        Ok(protobuf::PhysicalExprNode {
-            expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr(
-                protobuf::PhysicalAggregateExprNode {
-                    aggregate_function: Some(physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(name)),
-                    expr: expressions,
-                    ordering_req,
-                    distinct: a.is_distinct(),
-                    ignore_nulls: a.ignore_nulls(),
-                    fun_definition: (!buf.is_empty()).then_some(buf)
-                },
-            )),
-        })
-    } else {
-        unreachable!("No other types exists besides AggergationFunctionExpr");
-    }
+    let name = aggr_expr.fun().name().to_string();
+    let mut buf = Vec::new();
+    codec.try_encode_udaf(aggr_expr.fun(), &mut buf)?;
+    Ok(protobuf::PhysicalExprNode {
+        expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr(
+            protobuf::PhysicalAggregateExprNode {
+                aggregate_function: Some(physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(name)),
+                expr: expressions,
+                ordering_req,
+                distinct: aggr_expr.is_distinct(),
+                ignore_nulls: aggr_expr.ignore_nulls(),
+                fun_definition: (!buf.is_empty()).then_some(buf)
+            },
+        )),
+    })
 }
 
 fn serialize_physical_window_aggr_expr(
-    aggr_expr: &dyn AggregateExpr,
+    aggr_expr: &AggregateFunctionExpr,
     _window_frame: &WindowFrame,
     codec: &dyn PhysicalExtensionCodec,
 ) -> Result<(physical_window_expr_node::WindowFunction, Option<Vec<u8>>)> {
-    if let Some(a) = aggr_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
-        if a.is_distinct() || a.ignore_nulls() {
-            // TODO
-            return not_impl_err!(
-                "Distinct aggregate functions not supported in window expressions"
-            );
-        }
-
-        let mut buf = Vec::new();
-        codec.try_encode_udaf(a.fun(), &mut buf)?;
-        Ok((
-            physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(
-                a.fun().name().to_string(),
-            ),
-            (!buf.is_empty()).then_some(buf),
-        ))
-    } else {
-        unreachable!("No other types exists besides AggergationFunctionExpr");
+    if aggr_expr.is_distinct() || aggr_expr.ignore_nulls() {
+        // TODO
+        return not_impl_err!(
+            "Distinct aggregate functions not supported in window expressions"
+        );
     }
+
+    let mut buf = Vec::new();
+    codec.try_encode_udaf(aggr_expr.fun(), &mut buf)?;
+    Ok((
+        physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(
+            aggr_expr.fun().name().to_string(),
+        ),
+        (!buf.is_empty()).then_some(buf),
+    ))
 }
 
 pub fn serialize_physical_window_expr(
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 09c5f0f8bd3d..94ac913e1968 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -104,10 +104,8 @@ fn roundtrip_json_test(_proto: &protobuf::LogicalExprNode) {}
 fn roundtrip_expr_test(initial_struct: Expr, ctx: SessionContext) {
     let extension_codec = DefaultLogicalExtensionCodec {};
     let proto: protobuf::LogicalExprNode =
-        match serialize_expr(&initial_struct, &extension_codec) {
-            Ok(p) => p,
-            Err(e) => panic!("Error serializing expression: {:?}", e),
-        };
+        serialize_expr(&initial_struct, &extension_codec)
+            .unwrap_or_else(|e| panic!("Error serializing expression: {:?}", e));
     let round_trip: Expr =
         from_proto::parse_expr(&proto, &ctx, &extension_codec).unwrap();
 
@@ -2436,7 +2434,7 @@ fn roundtrip_window() {
         WindowFunctionDefinition::AggregateUDF(avg_udaf()),
         vec![col("col1")],
     ))
-    .window_frame(row_number_frame.clone())
+    .window_frame(row_number_frame)
     .build()
     .unwrap();
 
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 6766468ef443..3e49dc24fd5a 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -24,7 +24,8 @@ use std::vec;
 
 use arrow::array::RecordBatch;
 use arrow::csv::WriterBuilder;
-use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder;
+use datafusion::physical_expr::aggregate::AggregateExprBuilder;
+use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
 use datafusion_functions_aggregate::array_agg::array_agg_udaf;
 use datafusion_functions_aggregate::min_max::max_udaf;
@@ -46,7 +47,6 @@ use datafusion::datasource::physical_plan::{
 use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::sum::sum_udaf;
 use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility};
-use datafusion::physical_expr::aggregate::utils::down_cast_any_ref;
 use datafusion::physical_expr::expressions::Literal;
 use datafusion::physical_expr::window::SlidingAggregateWindowExpr;
 use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr};
@@ -69,13 +69,12 @@ use datafusion::physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion::physical_plan::projection::ProjectionExec;
 use datafusion::physical_plan::repartition::RepartitionExec;
 use datafusion::physical_plan::sorts::sort::SortExec;
+use datafusion::physical_plan::udaf::AggregateFunctionExpr;
 use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
 use datafusion::physical_plan::windows::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowAggExec,
 };
-use datafusion::physical_plan::{
-    AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, Statistics,
-};
+use datafusion::physical_plan::{ExecutionPlan, Partitioning, PhysicalExpr, Statistics};
 use datafusion::prelude::SessionContext;
 use datafusion::scalar::ScalarValue;
 use datafusion_common::config::TableParquetOptions;
@@ -361,7 +360,7 @@ fn rountrip_aggregate() -> Result<()> {
             .alias("NTH_VALUE(b, 1)")
             .build()?;
 
-    let test_cases: Vec<Vec<Arc<dyn AggregateExpr>>> = vec![
+    let test_cases: Vec<Vec<Arc<AggregateFunctionExpr>>> = vec![
         // AVG
         vec![avg_expr],
         // NTH_VALUE
@@ -394,7 +393,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+    let aggregates: Vec<Arc<AggregateFunctionExpr>> =
         vec![
             AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?])
                 .schema(Arc::clone(&schema))
@@ -405,7 +404,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> {
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
         PhysicalGroupBy::new_single(groups.clone()),
-        aggregates.clone(),
+        aggregates,
         vec![None],
         Arc::new(EmptyExec::new(schema.clone())),
         schema,
@@ -423,7 +422,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> = vec![AggregateExprBuilder::new(
+    let aggregates: Vec<Arc<AggregateFunctionExpr>> = vec![AggregateExprBuilder::new(
         approx_percentile_cont_udaf(),
         vec![col("b", &schema)?, lit(0.5)],
     )
@@ -434,7 +433,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
         PhysicalGroupBy::new_single(groups.clone()),
-        aggregates.clone(),
+        aggregates,
         vec![None],
         Arc::new(EmptyExec::new(schema.clone())),
         schema,
@@ -458,7 +457,7 @@ fn rountrip_aggregate_with_sort() -> Result<()> {
         },
     }];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+    let aggregates: Vec<Arc<AggregateFunctionExpr>> =
         vec![
             AggregateExprBuilder::new(array_agg_udaf(), vec![col("b", &schema)?])
                 .schema(Arc::clone(&schema))
@@ -470,7 +469,7 @@ fn rountrip_aggregate_with_sort() -> Result<()> {
     let agg = AggregateExec::try_new(
         AggregateMode::Final,
         PhysicalGroupBy::new_single(groups.clone()),
-        aggregates.clone(),
+        aggregates,
         vec![None],
         Arc::new(EmptyExec::new(schema.clone())),
         schema,
@@ -525,7 +524,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
     let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
         vec![(col("a", &schema)?, "unused".to_string())];
 
-    let aggregates: Vec<Arc<dyn AggregateExpr>> =
+    let aggregates: Vec<Arc<AggregateFunctionExpr>> =
         vec![
             AggregateExprBuilder::new(Arc::new(udaf), vec![col("b", &schema)?])
                 .schema(Arc::clone(&schema))
@@ -537,7 +536,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> {
         Arc::new(AggregateExec::try_new(
             AggregateMode::Final,
             PhysicalGroupBy::new_single(groups.clone()),
-            aggregates.clone(),
+            aggregates,
             vec![None],
             Arc::new(EmptyExec::new(schema.clone())),
             schema,
@@ -629,6 +628,23 @@ fn roundtrip_sort_preserve_partitioning() -> Result<()> {
     ))
 }
 
+#[test]
+fn roundtrip_coalesce_with_fetch() -> Result<()> {
+    let field_a = Field::new("a", DataType::Boolean, false);
+    let field_b = Field::new("b", DataType::Int64, false);
+    let schema = Arc::new(Schema::new(vec![field_a, field_b]));
+
+    roundtrip_test(Arc::new(CoalesceBatchesExec::new(
+        Arc::new(EmptyExec::new(schema.clone())),
+        8096,
+    )))?;
+
+    roundtrip_test(Arc::new(
+        CoalesceBatchesExec::new(Arc::new(EmptyExec::new(schema)), 8096)
+            .with_fetch(Some(10)),
+    ))
+}
+
 #[test]
 fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> {
     let scan_config = FileScanConfig {
@@ -730,7 +746,7 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> {
     }
     impl PartialEq<dyn Any> for CustomPredicateExpr {
         fn eq(&self, other: &dyn Any) -> bool {
-            down_cast_any_ref(other)
+            other
                 .downcast_ref::<Self>()
                 .map(|x| self.inner.eq(&x.inner))
                 .unwrap_or(false)
@@ -975,18 +991,16 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
         )),
         input,
     )?);
-    let aggr_expr = AggregateExprBuilder::new(
-        max_udaf(),
-        vec![udf_expr.clone() as Arc<dyn PhysicalExpr>],
-    )
-    .schema(schema.clone())
-    .alias("max")
-    .build()?;
+    let aggr_expr =
+        AggregateExprBuilder::new(max_udaf(), vec![udf_expr as Arc<dyn PhysicalExpr>])
+            .schema(schema.clone())
+            .alias("max")
+            .build()?;
 
     let window = Arc::new(WindowAggExec::try_new(
         vec![Arc::new(PlainAggregateWindowExpr::new(
             aggr_expr.clone(),
-            &[col("author", &schema.clone())?],
+            &[col("author", &schema)?],
             &[],
             Arc::new(WindowFrame::new(None)),
         ))],
@@ -997,10 +1011,10 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> {
     let aggregate = Arc::new(AggregateExec::try_new(
         AggregateMode::Final,
         PhysicalGroupBy::new(vec![], vec![], vec![]),
-        vec![aggr_expr.clone()],
+        vec![aggr_expr],
         vec![None],
         window,
-        schema.clone(),
+        schema,
     )?);
 
     let ctx = SessionContext::new();
@@ -1038,7 +1052,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         Arc::new(BinaryExpr::new(
             col("published", &schema)?,
             Operator::And,
-            Arc::new(BinaryExpr::new(udf_expr.clone(), Operator::Gt, lit(0))),
+            Arc::new(BinaryExpr::new(udf_expr, Operator::Gt, lit(0))),
         )),
         input,
     )?);
@@ -1067,7 +1081,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> {
         vec![aggr_expr],
         vec![None],
         window,
-        schema.clone(),
+        schema,
     )?);
 
     let ctx = SessionContext::new();
@@ -1142,7 +1156,7 @@ fn roundtrip_json_sink() -> Result<()> {
     roundtrip_test(Arc::new(DataSinkExec::new(
         input,
         data_sink,
-        schema.clone(),
+        schema,
         Some(sort_order),
     )))
 }
@@ -1181,7 +1195,7 @@ fn roundtrip_csv_sink() -> Result<()> {
         Arc::new(DataSinkExec::new(
             input,
             data_sink,
-            schema.clone(),
+            schema,
             Some(sort_order),
         )),
         &ctx,
@@ -1237,7 +1251,7 @@ fn roundtrip_parquet_sink() -> Result<()> {
     roundtrip_test(Arc::new(DataSinkExec::new(
         input,
         data_sink,
-        schema.clone(),
+        schema,
         Some(sort_order),
     )))
 }
@@ -1326,7 +1340,7 @@ fn roundtrip_interleave() -> Result<()> {
     )?;
     let right = RepartitionExec::try_new(
         Arc::new(EmptyExec::new(Arc::new(schema_right))),
-        partition.clone(),
+        partition,
     )?;
     let inputs: Vec<Arc<dyn ExecutionPlan>> = vec![Arc::new(left), Arc::new(right)];
     let interleave = InterleaveExec::try_new(inputs)?;
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index b95414a8cafd..71e40c20b80a 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -245,8 +245,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         // Build Unnest expression
         if name.eq("unnest") {
-            let mut exprs =
-                self.function_args_to_expr(args.clone(), schema, planner_context)?;
+            let mut exprs = self.function_args_to_expr(args, schema, planner_context)?;
             if exprs.len() != 1 {
                 return plan_err!("unnest() requires exactly one argument");
             }
@@ -295,8 +294,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             Some(false)
                         };
                     }
+                    Some(false)
+                } else {
+                    panic!("order_by expression must be of type Sort");
                 }
-                Some(false)
             });
 
             let window_frame = window
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index 86e49780724b..f8ebb04f3810 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -214,7 +214,7 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>>
     // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements.
     // 3. LIMIT => Handled by a `Sort`, so we need to search for it.
     let mut has_limit = false;
-    let new_plan = plan.clone().transform_down(|c| {
+    let new_plan = plan.transform_down(|c| {
         if let LogicalPlan::Limit(_) = c {
             has_limit = true;
             return Ok(Transformed::no(c));
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 4e0ce33f1334..384893bfa94c 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -33,7 +33,6 @@ use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions};
 use datafusion_expr::expr_rewriter::{
     normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols,
 };
-use datafusion_expr::logical_plan::tree_node::unwrap_arc;
 use datafusion_expr::utils::{
     expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs,
 };
@@ -215,7 +214,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr {
             LogicalPlanBuilder::from(plan)
-                .filter(having_expr_post_aggr)?
+                .having(having_expr_post_aggr)?
                 .build()?
         } else {
             plan
@@ -361,9 +360,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     .build()
             }
             LogicalPlan::Filter(mut filter) => {
-                filter.input = Arc::new(
-                    self.try_process_aggregate_unnest(unwrap_arc(filter.input))?,
-                );
+                filter.input =
+                    Arc::new(self.try_process_aggregate_unnest(Arc::unwrap_or_clone(
+                        filter.input,
+                    ))?);
                 Ok(LogicalPlan::Filter(filter))
             }
             _ => Ok(input),
@@ -401,7 +401,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         //     Projection: tab.array_col AS unnest(tab.array_col)
         //       TableScan: tab
         // ```
-        let mut intermediate_plan = unwrap_arc(input);
+        let mut intermediate_plan = Arc::unwrap_or_clone(input);
         let mut intermediate_select_exprs = group_expr;
 
         loop {
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 9ce627aecc76..0dbcba162bc0 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1864,7 +1864,7 @@ mod tests {
                 r#"EXISTS (SELECT * FROM t WHERE (t.a = 1))"#,
             ),
             (
-                not_exists(Arc::new(dummy_logical_plan.clone())),
+                not_exists(Arc::new(dummy_logical_plan)),
                 r#"NOT EXISTS (SELECT * FROM t WHERE (t.a = 1))"#,
             ),
             (
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index 8b5a5b0942b8..106705c322fc 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -528,24 +528,10 @@ impl Unparser<'_> {
     fn sort_to_sql(&self, sort_exprs: Vec<Expr>) -> Result<Vec<ast::OrderByExpr>> {
         sort_exprs
             .iter()
-            .map(|expr: &Expr| match expr {
-                Expr::Sort(sort_expr) => {
-                    let col = self.expr_to_sql(&sort_expr.expr)?;
-
-                    let nulls_first = if self.dialect.supports_nulls_first_in_sort() {
-                        Some(sort_expr.nulls_first)
-                    } else {
-                        None
-                    };
-
-                    Ok(ast::OrderByExpr {
-                        asc: Some(sort_expr.asc),
-                        expr: col,
-                        nulls_first,
-                        with_fill: None,
-                    })
-                }
-                _ => plan_err!("Expecting Sort expr"),
+            .map(|expr: &Expr| {
+                self.expr_to_unparsed(expr)?
+                    .into_order_by_expr()
+                    .or(plan_err!("Expecting Sort expr"))
             })
             .collect::<Result<Vec<_>>>()
     }
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index f6725485f920..9e1adcf4df31 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -59,10 +59,7 @@ pub(super) fn normalize_union_schema(plan: &LogicalPlan) -> Result<LogicalPlan>
 
     let transformed_plan = plan.transform_up(|plan| match plan {
         LogicalPlan::Union(mut union) => {
-            let schema = match Arc::try_unwrap(union.schema) {
-                Ok(inner) => inner,
-                Err(schema) => (*schema).clone(),
-            };
+            let schema = Arc::unwrap_or_clone(union.schema);
             let schema = schema.strip_qualifiers();
 
             union.schema = Arc::new(schema);
@@ -164,6 +161,8 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
     for expr in &sort.expr {
         if let Expr::Sort(s) = expr {
             collects.push(s.expr.as_ref().clone());
+        } else {
+            panic!("sort expression must be of type Sort");
         }
     }
 
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index af161bba45c1..c32acecaae5f 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -17,8 +17,6 @@
 
 //! SQL Utility Functions
 
-use std::collections::HashMap;
-
 use arrow_schema::{
     DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE,
 };
@@ -33,6 +31,7 @@ use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction};
 use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs};
 use datafusion_expr::{expr_vec_fmt, Expr, ExprSchemable, LogicalPlan};
 use sqlparser::ast::{Ident, Value};
+use std::collections::HashMap;
 
 /// Make a best-effort attempt at resolving all columns in the expression tree
 pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index ed23fada0cfb..cdc7bef06afd 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -203,7 +203,7 @@ fn roundtrip_crossjoin() -> Result<()> {
     println!("plan {}", plan.display_indent());
 
     let plan_roundtrip = sql_to_rel
-        .sql_statement_to_plan(roundtrip_statement.clone())
+        .sql_statement_to_plan(roundtrip_statement)
         .unwrap();
 
     let expected = "Projection: j1.j1_id, j2.j2_string\
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 5685e09c9c9f..5a203703e967 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3813,7 +3813,7 @@ fn test_prepare_statement_to_plan_params_as_constants() {
     ///////////////////
     // replace params with values
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    let expected_plan = "Projection: Int32(10)\n  EmptyRelation";
+    let expected_plan = "Projection: Int32(10) AS $1\n  EmptyRelation";
 
     prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
 
@@ -3829,7 +3829,8 @@ fn test_prepare_statement_to_plan_params_as_constants() {
     ///////////////////
     // replace params with values
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    let expected_plan = "Projection: Int64(1) + Int32(10)\n  EmptyRelation";
+    let expected_plan =
+        "Projection: Int64(1) + Int32(10) AS Int64(1) + $1\n  EmptyRelation";
 
     prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
 
@@ -3848,7 +3849,9 @@ fn test_prepare_statement_to_plan_params_as_constants() {
         ScalarValue::Int32(Some(10)),
         ScalarValue::Float64(Some(10.0)),
     ];
-    let expected_plan = "Projection: Int64(1) + Int32(10) + Float64(10)\n  EmptyRelation";
+    let expected_plan =
+        "Projection: Int64(1) + Int32(10) + Float64(10) AS Int64(1) + $1 + $2\
+        \n  EmptyRelation";
 
     prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
 }
@@ -4063,7 +4066,7 @@ fn test_prepare_statement_insert_infer() {
                         \n  Projection: column1 AS id, column2 AS first_name, column3 AS last_name, \
                                     CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, \
                                     CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀\
-                        \n    Values: (UInt32(1), Utf8(\"Alan\"), Utf8(\"Turing\"))";
+                        \n    Values: (UInt32(1) AS $1, Utf8(\"Alan\") AS $2, Utf8(\"Turing\") AS $3)";
     let plan = plan.replace_params_with_values(&param_values).unwrap();
 
     prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
@@ -4144,7 +4147,7 @@ fn test_prepare_statement_to_plan_multi_params() {
         ScalarValue::from("xyz"),
     ];
     let expected_plan =
-            "Projection: person.id, person.age, Utf8(\"xyz\")\
+            "Projection: person.id, person.age, Utf8(\"xyz\") AS $6\
         \n  Filter: person.age IN ([Int32(10), Int32(20)]) AND person.salary > Float64(100) AND person.salary < Float64(200) OR person.first_name < Utf8(\"abc\")\
         \n    TableScan: person";
 
@@ -4213,7 +4216,7 @@ fn test_prepare_statement_to_plan_value_list() {
     let expected_plan = "Projection: *\
         \n  SubqueryAlias: t\
         \n    Projection: column1 AS num, column2 AS letter\
-        \n      Values: (Int64(1), Utf8(\"a\")), (Int64(2), Utf8(\"b\"))";
+        \n      Values: (Int64(1), Utf8(\"a\") AS $1), (Int64(2), Utf8(\"b\") AS $2)";
 
     prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
 }
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index b8b93b28aff6..45cb4d4615d7 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -98,6 +98,17 @@ SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100
 statement error DataFusion error: Error during planning: Error during planning: Coercion from \[Float64, Float64, Float64\] to the signature OneOf(.*) failed(.|\n)*
 SELECT approx_percentile_cont(c12, 0.95, 111.1) FROM aggregate_test_100
 
+statement error DataFusion error: This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
+SELECT approx_percentile_cont(c12, c12) FROM aggregate_test_100
+
+statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal
+SELECT approx_percentile_cont(c12, 0.95, c5) FROM aggregate_test_100
+
+# Not supported over sliding windows
+query error This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented
+SELECT approx_percentile_cont(c3, 0.5) OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) 
+FROM aggregate_test_100
+
 # array agg can use order by
 query ?
 SELECT array_agg(c13 ORDER BY c13)
@@ -500,6 +511,85 @@ select stddev(sq.column1) from (values (1.1), (2.0), (3.0)) as sq
 ----
 0.950438495292
 
+# csv_query_stddev_7
+query IR
+SELECT c2, stddev_samp(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2
+----
+1 0.303641032262
+2 0.284581967411
+3 0.296002660506
+4 0.284324609109
+5 0.331034486752
+
+# csv_query_stddev_8
+query IR
+SELECT c2, stddev_pop(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2
+----
+1 0.296659845456
+2 0.278038978602
+3 0.288107833475
+4 0.278074953424
+5 0.318992813225
+
+# csv_query_stddev_9
+query IR
+SELECT c2, var_pop(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2
+----
+1 0.088007063906
+2 0.077305673622
+3 0.083006123709
+4 0.077325679722
+5 0.101756414889
+
+# csv_query_stddev_10
+query IR
+SELECT c2, var_samp(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2
+----
+1 0.092197876473
+2 0.080986896176
+3 0.087617575027
+4 0.080840483345
+5 0.109583831419
+
+# csv_query_stddev_11
+query IR
+SELECT c2, var_samp(c12) FROM aggregate_test_100 WHERE c12 > 0.90 GROUP BY c2 ORDER BY c2
+----
+1 0.000889240174
+2 0.000785878272
+3 NULL
+4 NULL
+5 0.000269544643
+
+# Use PostgresSQL dialect
+statement ok
+set datafusion.sql_parser.dialect = 'Postgres';
+
+# csv_query_stddev_12
+query IR
+SELECT c2, var_samp(c12) FILTER (WHERE c12 > 0.90) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2
+----
+1 0.000889240174
+2 0.000785878272
+3 NULL
+4 NULL
+5 0.000269544643
+
+# Restore the default dialect
+statement ok
+set datafusion.sql_parser.dialect = 'Generic';
+
+# csv_query_stddev_13
+query IR
+SELECT c2, var_samp(CASE WHEN c12 > 0.90 THEN c12 ELSE null END) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2
+----
+1 0.000889240174
+2 0.000785878272
+3 NULL
+4 NULL
+5 0.000269544643
+
+
 # csv_query_approx_median_1
 query I
 SELECT approx_median(c2) FROM aggregate_test_100
@@ -5387,6 +5477,18 @@ physical_plan
 statement ok
 DROP TABLE empty;
 
+# verify count aggregate function should not be nullable
+statement ok
+create table empty;
+
+query I
+select distinct count() from empty;
+----
+0
+
+statement ok
+DROP TABLE empty;
+
 statement ok
 CREATE TABLE t(col0 INTEGER) as VALUES(2);
 
@@ -5643,6 +5745,97 @@ select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(N
 ----
 0 NULL NULL NULL NULL NULL NULL NULL
 
+statement ok
+create table having_test(v1 int, v2 int)
+
+statement ok
+create table join_table(v1 int, v2 int)
+
+statement ok
+insert into having_test values (1, 2), (2, 3), (3, 4)
+
+statement ok
+insert into join_table values (1, 2), (2, 3), (3, 4)
+
+
+query II
+select * from having_test group by v1, v2 having max(v1) = 3
+----
+3 4
+
+query TT
+EXPLAIN select * from having_test group by v1, v2 having max(v1) = 3
+----
+logical_plan
+01)Projection: having_test.v1, having_test.v2
+02)--Filter: max(having_test.v1) = Int32(3)
+03)----Aggregate: groupBy=[[having_test.v1, having_test.v2]], aggr=[[max(having_test.v1)]]
+04)------TableScan: having_test projection=[v1, v2]
+physical_plan
+01)ProjectionExec: expr=[v1@0 as v1, v2@1 as v2]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----FilterExec: max(having_test.v1)@2 = 3
+04)------AggregateExec: mode=FinalPartitioned, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([v1@0, v2@1], 4), input_partitions=4
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------AggregateExec: mode=Partial, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+
+
+query error
+select * from having_test having max(v1) = 3
+
+query I
+select max(v1) from having_test having max(v1) = 3
+----
+3
+
+query I
+select max(v1), * exclude (v1, v2) from having_test having max(v1) = 3
+----
+3
+
+# because v1, v2 is not in the group by clause, the sql is invalid
+query III
+select max(v1), * replace ('v1' as v3) from having_test group by v1, v2 having max(v1) = 3
+----
+3 3 4
+
+query III
+select max(v1), t.* from having_test t group by v1, v2 having max(v1) = 3
+----
+3 3 4
+
+# j.* should also be included in the group-by clause
+query error
+select max(t.v1), j.* from having_test t join join_table j on t.v1 = j.v1 group by t.v1, t.v2 having max(t.v1) = 3
+
+query III
+select max(t.v1), j.* from having_test t join join_table j on t.v1 = j.v1 group by j.v1, j.v2 having max(t.v1) = 3
+----
+3 3 4
+
+# If the select items only contain scalar expressions, the having clause is valid.
+query P
+select now() from having_test having max(v1) = 4
+----
+
+# If the select items only contain scalar expressions, the having clause is valid.
+query I
+select 0 from having_test having max(v1) = 4
+----
+
+# v2 should also be included in group-by clause
+query error
+select * from having_test group by v1 having max(v1) = 3
+
+statement ok
+drop table having_test
+
+statement ok
+drop table join_table
+
 # test min/max Float16 without group expression
 query RRTT
 WITH data AS (
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 249241a51aea..c80fd7e92417 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -4972,11 +4972,19 @@ NULL 1 1
 
 ## array_has/array_has_all/array_has_any
 
-query BB
+# If lhs is empty, return false
+query B
+select array_has([], 1);
+----
+false
+
+# If rhs is Null, we returns Null
+query BBB
 select array_has([], null),
-       array_has([1, 2, 3], null);
+       array_has([1, 2, 3], null),
+       array_has([null, 1], null);
 ----
-false false
+NULL NULL NULL
 
 #TODO: array_has_all and array_has_any cannot handle NULL
 #query BBBB
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index b7a0a74913b0..270e4beccc52 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -376,11 +376,10 @@ SELECT MAP {'a':1, 'b':2, 'c':3 }['a'] FROM t;
 1
 1
 
-# TODO(https://github.com/sqlparser-rs/sqlparser-rs/pull/1361): support parsing an empty map. Enable this after upgrading sqlparser-rs.
-# query ?
-# SELECT MAP {};
-# ----
-# {}
+query ?
+SELECT MAP {};
+----
+{}
 
 # values contain null
 query ?
diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt
index ea3088e69674..2c28a5feadba 100644
--- a/datafusion/sqllogictest/test_files/sort_merge_join.slt
+++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt
@@ -134,7 +134,6 @@ Alice 100 NULL NULL
 Alice 50 Alice 2
 Bob 1 NULL NULL
 NULL NULL Alice 1
-NULL NULL Alice 2
 
 query TITI rowsort
 SELECT * FROM t1 FULL JOIN t2 ON t1.a = t2.a AND t1.b > t2.b + 50
@@ -624,6 +623,32 @@ NULL NULL 7 9
 NULL NULL 8 10
 NULL NULL 9 11
 
+query IIII
+select * from (
+with t as (
+    select id_a id_a_1, id_a % 5 id_a_2 from (select unnest(make_array(5, 6, 7, 8, 9, 0, 1, 2, 3, 4)) id_a)
+), t1 as (
+    select id_b % 10 id_b_1, id_b + 2 id_b_2 from (select unnest(make_array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) id_b)
+)
+select * from t full join t1 on t.id_a_2 = t1.id_b_1 and t.id_a_1 > t1.id_b_2
+) order by 1, 2, 3, 4
+----
+0 0 NULL NULL
+1 1 NULL NULL
+2 2 NULL NULL
+3 3 NULL NULL
+4 4 NULL NULL
+5 0 0 2
+6 1 1 3
+7 2 2 4
+8 3 3 5
+9 4 4 6
+NULL NULL 5 7
+NULL NULL 6 8
+NULL NULL 7 9
+NULL NULL 8 10
+NULL NULL 9 11
+
 # return sql params back to default values
 statement ok
 set datafusion.optimizer.prefer_hash_join = true;
diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt
index 0b441bcbeb8f..83c75b8df38c 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -460,8 +460,6 @@ Xiangpeng
 Raphael
 NULL
 
-
-
 ### Initcap
 
 query TT
@@ -501,7 +499,7 @@ SELECT
   INITCAP(column1_large_utf8_lower) as c3
 FROM test_lowercase;
 ----
-Andrew Andrew Andrew    
+Andrew Andrew Andrew
 Xiangpeng Xiangpeng Xiangpeng
 Raphael Raphael Raphael
 NULL NULL NULL
@@ -828,16 +826,42 @@ logical_plan
 02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for LOWER
-## TODO https://github.com/apache/datafusion/issues/11855
 query TT
 EXPLAIN SELECT
   LOWER(column1_utf8view) as c1
 FROM test;
 ----
 logical_plan
-01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
+01)Projection: lower(test.column1_utf8view) AS c1
 02)--TableScan: test projection=[column1_utf8view]
 
+query T
+SELECT LOWER(column1_utf8view) as c1
+FROM test;
+----
+andrew
+xiangpeng
+raphael
+NULL
+
+## Ensure no casts for UPPER
+query TT
+EXPLAIN SELECT
+  UPPER(column1_utf8view) as c1
+FROM test;
+----
+logical_plan
+01)Projection: upper(test.column1_utf8view) AS c1
+02)--TableScan: test projection=[column1_utf8view]
+
+query T
+SELECT UPPER(column1_utf8view) as c1
+FROM test;
+----
+ANDREW
+XIANGPENG
+RAPHAEL
+NULL
 
 ## Ensure no casts for LPAD
 query TT
@@ -1066,9 +1090,8 @@ EXPLAIN SELECT
 FROM test;
 ----
 logical_plan
-01)Projection: strpos(__common_expr_1, Utf8("f")) AS c, strpos(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
-02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
-03)----TableScan: test projection=[column1_utf8view, column2_utf8view]
+01)Projection: strpos(test.column1_utf8view, Utf8("f")) AS c, strpos(test.column1_utf8view, test.column2_utf8view) AS c2
+02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
 
 ## Ensure no casts for SUBSTR
 ## TODO file ticket
@@ -1145,6 +1168,63 @@ FROM test;
 0
 NULL
 
+# || mixed types
+# expect all results to be the same for each row as they all have the same values
+query TTTTTTTT
+SELECT
+  column1_utf8view || column2_utf8view,
+  column1_utf8 || column2_utf8view,
+  column1_large_utf8 || column2_utf8view,
+  column1_dict || column2_utf8view,
+  -- reverse argument order
+  column2_utf8view || column1_utf8view,
+  column2_utf8view || column1_utf8,
+  column2_utf8view || column1_large_utf8,
+  column2_utf8view || column1_dict
+FROM test;
+----
+AndrewX AndrewX AndrewX AndrewX XAndrew XAndrew XAndrew XAndrew
+XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng
+RaphaelR RaphaelR RaphaelR RaphaelR RRaphael RRaphael RRaphael RRaphael
+NULL NULL NULL NULL NULL NULL NULL NULL
+
+# || constants
+# expect all results to be the same for each row as they all have the same values
+query TTTTTTTT
+SELECT
+  column1_utf8view || 'foo',
+  column1_utf8 || 'foo',
+  column1_large_utf8 || 'foo',
+  column1_dict || 'foo',
+  -- reverse argument order
+  'foo' || column1_utf8view,
+  'foo' || column1_utf8,
+  'foo' || column1_large_utf8,
+  'foo' || column1_dict
+FROM test;
+----
+Andrewfoo Andrewfoo Andrewfoo Andrewfoo fooAndrew fooAndrew fooAndrew fooAndrew
+Xiangpengfoo Xiangpengfoo Xiangpengfoo Xiangpengfoo fooXiangpeng fooXiangpeng fooXiangpeng fooXiangpeng
+Raphaelfoo Raphaelfoo Raphaelfoo Raphaelfoo fooRaphael fooRaphael fooRaphael fooRaphael
+NULL NULL NULL NULL NULL NULL NULL NULL
+
+# || same type (column1 has null, so also tests NULL || NULL)
+# expect all results to be the same for each row as they all have the same values
+query TTT
+SELECT
+  column1_utf8view || column1_utf8view,
+  column1_utf8 || column1_utf8,
+  column1_large_utf8 || column1_large_utf8
+  -- Dictionary/Dictionary coercion doesn't work
+  -- https://github.com/apache/datafusion/issues/12101
+  --column1_dict || column1_dict
+FROM test;
+----
+AndrewAndrew AndrewAndrew AndrewAndrew
+XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng
+RaphaelRaphael RaphaelRaphael RaphaelRaphael
+NULL NULL NULL
+
 statement ok
 drop table test;
 
@@ -1168,18 +1248,25 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
 statement ok
 drop table dates;
 
+### Tests for `||` with Utf8View specifically
+
 statement ok
 create table temp as values
 ('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
 ('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View'));
 
+query TTT
+select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from temp;
+----
+Utf8 Utf8View Utf8View
+Utf8 Utf8View Utf8View
+
 query T
 select column2||' is fast' from temp;
 ----
 rust is fast
 datafusion is fast
 
-
 query T
 select column2 || ' is ' || column3 from temp;
 ----
@@ -1190,15 +1277,15 @@ query TT
 explain select column2 || 'is' || column3 from temp;
 ----
 logical_plan
-01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8)
+01)Projection: temp.column2 || Utf8View("is") || temp.column3 AS temp.column2 || Utf8("is") || temp.column3
 02)--TableScan: temp projection=[column2, column3]
 
-
+# should not cast the column2 to utf8
 query TT
 explain select column2||' is fast' from temp;
 ----
 logical_plan
-01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
+01)Projection: temp.column2 || Utf8View(" is fast") AS temp.column2 || Utf8(" is fast")
 02)--TableScan: temp projection=[column2]
 
 
@@ -1212,7 +1299,7 @@ query TT
 explain select column2||column3 from temp;
 ----
 logical_plan
-01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
+01)Projection: temp.column2 || temp.column3
 02)--TableScan: temp projection=[column2, column3]
 
 query T
diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt
index e420c0cc7155..0f9399cede2e 100644
--- a/datafusion/sqllogictest/test_files/type_coercion.slt
+++ b/datafusion/sqllogictest/test_files/type_coercion.slt
@@ -50,6 +50,35 @@ select interval '1 month' - '2023-05-01'::date;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
 
+# dictionary(int32, utf8) -> utf8
+query T
+select arrow_cast('foo', 'Dictionary(Int32, Utf8)') || arrow_cast('bar', 'Dictionary(Int32, Utf8)');
+----
+foobar
+
+# dictionary(int32, largeUtf8) -> largeUtf8
+query T
+select arrow_cast('foo', 'Dictionary(Int32, LargeUtf8)') || arrow_cast('bar', 'Dictionary(Int32, LargeUtf8)');
+----
+foobar
+
+####################################
+## Concat column dictionary test  ##
+####################################
+statement ok
+create table t as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)'), arrow_cast('bar', 'Dictionary(Int32, Utf8)'));
+
+query T
+select column1 || column2 from t;
+----
+foobar
+
+statement ok
+DROP TABLE t
+
+#######################################
+## Concat column dictionary test end ##
+#######################################
 
 ####################################
 ## Test type coercion with UNIONs ##
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 78055f8c1c11..5bf5cf83284f 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -4861,3 +4861,16 @@ select a, row_number(a) over (order by b) as rn from t;
 
 statement ok
 drop table t;
+
+statement ok
+DROP TABLE t1;
+
+# https://github.com/apache/datafusion/issues/12073
+statement ok
+CREATE TABLE t1(v1 BIGINT);
+
+query error DataFusion error: Execution error: Expected a signed integer literal for the second argument of nth_value, got v1@0
+SELECT NTH_VALUE('+Inf'::Double, v1) OVER (PARTITION BY v1) FROM t1;
+
+statement ok
+DROP TABLE t1;
\ No newline at end of file
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 9e7ef9632ad3..ff02ef8c7ef6 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -38,9 +38,9 @@ chrono = { workspace = true }
 datafusion = { workspace = true, default-features = true }
 itertools = { workspace = true }
 object_store = { workspace = true }
-pbjson-types = "0.6"
-prost = "0.12"
-substrait = { version = "0.36.0", features = ["serde"] }
+pbjson-types = "0.7"
+prost = "0.13"
+substrait = { version = "0.41", features = ["serde"] }
 url = { workspace = true }
 
 [dev-dependencies]
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index f2756bb06d1e..b1b510f1792d 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -42,14 +42,14 @@ use crate::variation_const::{
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
     INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
-    TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
-    TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
     UNSIGNED_INTEGER_TYPE_VARIATION_REF,
 };
 #[allow(deprecated)]
 use crate::variation_const::{
     INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
-    INTERVAL_YEAR_MONTH_TYPE_REF,
+    INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
+    TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
+    TIMESTAMP_SECOND_TYPE_VARIATION_REF,
 };
 use datafusion::arrow::array::{new_empty_array, AsArray};
 use datafusion::common::scalar::ScalarStructBuilder;
@@ -69,6 +69,7 @@ use datafusion::{
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use substrait::proto::exchange_rel::ExchangeKind;
+use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
 use substrait::proto::expression::literal::user_defined::Val;
 use substrait::proto::expression::literal::{
     IntervalDayToSecond, IntervalYearToMonth, UserDefined,
@@ -95,6 +96,13 @@ use substrait::proto::{
 };
 use substrait::proto::{FunctionArgument, SortField};
 
+// Substrait PrecisionTimestampTz indicates that the timestamp is relative to UTC, which
+// is the same as the expectation for any non-empty timezone in DF, so any non-empty timezone
+// results in correct points on the timeline, and we pick UTC as a reasonable default.
+// However, DF uses the timezone also for some arithmetic and display purposes (see e.g.
+// https://github.com/apache/arrow-rs/blob/ee5694078c86c8201549654246900a4232d531a9/arrow-cast/src/cast/mod.rs#L1749).
+const DEFAULT_TIMEZONE: &str = "UTC";
+
 pub fn name_to_op(name: &str) -> Option<Operator> {
     match name {
         "equal" => Some(Operator::Eq),
@@ -877,8 +885,8 @@ fn from_substrait_jointype(join_type: i32) -> Result<JoinType> {
             join_rel::JoinType::Left => Ok(JoinType::Left),
             join_rel::JoinType::Right => Ok(JoinType::Right),
             join_rel::JoinType::Outer => Ok(JoinType::Full),
-            join_rel::JoinType::Anti => Ok(JoinType::LeftAnti),
-            join_rel::JoinType::Semi => Ok(JoinType::LeftSemi),
+            join_rel::JoinType::LeftAnti => Ok(JoinType::LeftAnti),
+            join_rel::JoinType::LeftSemi => Ok(JoinType::LeftSemi),
             _ => plan_err!("unsupported join type {substrait_join_type:?}"),
         }
     } else {
@@ -1369,23 +1377,51 @@ fn from_substrait_type(
             },
             r#type::Kind::Fp32(_) => Ok(DataType::Float32),
             r#type::Kind::Fp64(_) => Ok(DataType::Float64),
-            r#type::Kind::Timestamp(ts) => match ts.type_variation_reference {
-                TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
-                    Ok(DataType::Timestamp(TimeUnit::Second, None))
-                }
-                TIMESTAMP_MILLI_TYPE_VARIATION_REF => {
-                    Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
-                }
-                TIMESTAMP_MICRO_TYPE_VARIATION_REF => {
-                    Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
-                }
-                TIMESTAMP_NANO_TYPE_VARIATION_REF => {
-                    Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
+            r#type::Kind::Timestamp(ts) => {
+                // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead
+                #[allow(deprecated)]
+                match ts.type_variation_reference {
+                    TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
+                        Ok(DataType::Timestamp(TimeUnit::Second, None))
+                    }
+                    TIMESTAMP_MILLI_TYPE_VARIATION_REF => {
+                        Ok(DataType::Timestamp(TimeUnit::Millisecond, None))
+                    }
+                    TIMESTAMP_MICRO_TYPE_VARIATION_REF => {
+                        Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
+                    }
+                    TIMESTAMP_NANO_TYPE_VARIATION_REF => {
+                        Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
+                    }
+                    v => not_impl_err!(
+                        "Unsupported Substrait type variation {v} of type {s_kind:?}"
+                    ),
                 }
-                v => not_impl_err!(
-                    "Unsupported Substrait type variation {v} of type {s_kind:?}"
-                ),
-            },
+            }
+            r#type::Kind::PrecisionTimestamp(pts) => {
+                let unit = match pts.precision {
+                    0 => Ok(TimeUnit::Second),
+                    3 => Ok(TimeUnit::Millisecond),
+                    6 => Ok(TimeUnit::Microsecond),
+                    9 => Ok(TimeUnit::Nanosecond),
+                    p => not_impl_err!(
+                        "Unsupported Substrait precision {p} for PrecisionTimestamp"
+                    ),
+                }?;
+                Ok(DataType::Timestamp(unit, None))
+            }
+            r#type::Kind::PrecisionTimestampTz(pts) => {
+                let unit = match pts.precision {
+                    0 => Ok(TimeUnit::Second),
+                    3 => Ok(TimeUnit::Millisecond),
+                    6 => Ok(TimeUnit::Microsecond),
+                    9 => Ok(TimeUnit::Nanosecond),
+                    p => not_impl_err!(
+                        "Unsupported Substrait precision {p} for PrecisionTimestampTz"
+                    ),
+                }?;
+                Ok(DataType::Timestamp(unit, Some(DEFAULT_TIMEZONE.into())))
+            }
             r#type::Kind::Date(date) => match date.type_variation_reference {
                 DATE_32_TYPE_VARIATION_REF => Ok(DataType::Date32),
                 DATE_64_TYPE_VARIATION_REF => Ok(DataType::Date64),
@@ -1465,22 +1501,10 @@ fn from_substrait_type(
                     "Unsupported Substrait type variation {v} of type {s_kind:?}"
                 ),
             },
-            r#type::Kind::IntervalYear(i) => match i.type_variation_reference {
-                DEFAULT_TYPE_VARIATION_REF => {
-                    Ok(DataType::Interval(IntervalUnit::YearMonth))
-                }
-                v => not_impl_err!(
-                    "Unsupported Substrait type variation {v} of type {s_kind:?}"
-                ),
-            },
-            r#type::Kind::IntervalDay(i) => match i.type_variation_reference {
-                DEFAULT_TYPE_VARIATION_REF => {
-                    Ok(DataType::Interval(IntervalUnit::DayTime))
-                }
-                v => not_impl_err!(
-                    "Unsupported Substrait type variation {v} of type {s_kind:?}"
-                ),
-            },
+            r#type::Kind::IntervalYear(_) => {
+                Ok(DataType::Interval(IntervalUnit::YearMonth))
+            }
+            r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)),
             r#type::Kind::UserDefined(u) => {
                 if let Some(name) = extensions.types.get(&u.type_reference) {
                     match name.as_ref() {
@@ -1676,21 +1700,59 @@ fn from_substrait_literal(
         },
         Some(LiteralType::Fp32(f)) => ScalarValue::Float32(Some(*f)),
         Some(LiteralType::Fp64(f)) => ScalarValue::Float64(Some(*f)),
-        Some(LiteralType::Timestamp(t)) => match lit.type_variation_reference {
-            TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
-                ScalarValue::TimestampSecond(Some(*t), None)
-            }
-            TIMESTAMP_MILLI_TYPE_VARIATION_REF => {
-                ScalarValue::TimestampMillisecond(Some(*t), None)
-            }
-            TIMESTAMP_MICRO_TYPE_VARIATION_REF => {
-                ScalarValue::TimestampMicrosecond(Some(*t), None)
+        Some(LiteralType::Timestamp(t)) => {
+            // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead
+            #[allow(deprecated)]
+            match lit.type_variation_reference {
+                TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
+                    ScalarValue::TimestampSecond(Some(*t), None)
+                }
+                TIMESTAMP_MILLI_TYPE_VARIATION_REF => {
+                    ScalarValue::TimestampMillisecond(Some(*t), None)
+                }
+                TIMESTAMP_MICRO_TYPE_VARIATION_REF => {
+                    ScalarValue::TimestampMicrosecond(Some(*t), None)
+                }
+                TIMESTAMP_NANO_TYPE_VARIATION_REF => {
+                    ScalarValue::TimestampNanosecond(Some(*t), None)
+                }
+                others => {
+                    return substrait_err!("Unknown type variation reference {others}");
+                }
             }
-            TIMESTAMP_NANO_TYPE_VARIATION_REF => {
-                ScalarValue::TimestampNanosecond(Some(*t), None)
+        }
+        Some(LiteralType::PrecisionTimestamp(pt)) => match pt.precision {
+            0 => ScalarValue::TimestampSecond(Some(pt.value), None),
+            3 => ScalarValue::TimestampMillisecond(Some(pt.value), None),
+            6 => ScalarValue::TimestampMicrosecond(Some(pt.value), None),
+            9 => ScalarValue::TimestampNanosecond(Some(pt.value), None),
+            p => {
+                return not_impl_err!(
+                    "Unsupported Substrait precision {p} for PrecisionTimestamp"
+                );
             }
-            others => {
-                return substrait_err!("Unknown type variation reference {others}");
+        },
+        Some(LiteralType::PrecisionTimestampTz(pt)) => match pt.precision {
+            0 => ScalarValue::TimestampSecond(
+                Some(pt.value),
+                Some(DEFAULT_TIMEZONE.into()),
+            ),
+            3 => ScalarValue::TimestampMillisecond(
+                Some(pt.value),
+                Some(DEFAULT_TIMEZONE.into()),
+            ),
+            6 => ScalarValue::TimestampMicrosecond(
+                Some(pt.value),
+                Some(DEFAULT_TIMEZONE.into()),
+            ),
+            9 => ScalarValue::TimestampNanosecond(
+                Some(pt.value),
+                Some(DEFAULT_TIMEZONE.into()),
+            ),
+            p => {
+                return not_impl_err!(
+                    "Unsupported Substrait precision {p} for PrecisionTimestamp"
+                );
             }
         },
         Some(LiteralType::Date(d)) => ScalarValue::Date32(Some(*d)),
@@ -1881,10 +1943,24 @@ fn from_substrait_literal(
         Some(LiteralType::IntervalDayToSecond(IntervalDayToSecond {
             days,
             seconds,
-            microseconds,
+            subseconds,
+            precision_mode,
         })) => {
-            // DF only supports millisecond precision, so we lose the micros here
-            ScalarValue::new_interval_dt(*days, (seconds * 1000) + (microseconds / 1000))
+            // DF only supports millisecond precision, so for any more granular type we lose precision
+            let milliseconds = match precision_mode {
+                Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
+                Some(PrecisionMode::Precision(0)) => *subseconds as i32 * 1000,
+                Some(PrecisionMode::Precision(3)) => *subseconds as i32,
+                Some(PrecisionMode::Precision(6)) => (subseconds / 1000) as i32,
+                Some(PrecisionMode::Precision(9)) => (subseconds / 1000 / 1000) as i32,
+                _ => {
+                    return not_impl_err!(
+                        "Unsupported Substrait interval day to second precision mode"
+                    )
+                }
+            };
+
+            ScalarValue::new_interval_dt(*days, (seconds * 1000) + milliseconds)
         }
         Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
             ScalarValue::new_interval_ym(*years, *months)
@@ -2026,21 +2102,55 @@ fn from_substrait_null(
             },
             r#type::Kind::Fp32(_) => Ok(ScalarValue::Float32(None)),
             r#type::Kind::Fp64(_) => Ok(ScalarValue::Float64(None)),
-            r#type::Kind::Timestamp(ts) => match ts.type_variation_reference {
-                TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
-                    Ok(ScalarValue::TimestampSecond(None, None))
-                }
-                TIMESTAMP_MILLI_TYPE_VARIATION_REF => {
-                    Ok(ScalarValue::TimestampMillisecond(None, None))
-                }
-                TIMESTAMP_MICRO_TYPE_VARIATION_REF => {
-                    Ok(ScalarValue::TimestampMicrosecond(None, None))
-                }
-                TIMESTAMP_NANO_TYPE_VARIATION_REF => {
-                    Ok(ScalarValue::TimestampNanosecond(None, None))
+            r#type::Kind::Timestamp(ts) => {
+                // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead
+                #[allow(deprecated)]
+                match ts.type_variation_reference {
+                    TIMESTAMP_SECOND_TYPE_VARIATION_REF => {
+                        Ok(ScalarValue::TimestampSecond(None, None))
+                    }
+                    TIMESTAMP_MILLI_TYPE_VARIATION_REF => {
+                        Ok(ScalarValue::TimestampMillisecond(None, None))
+                    }
+                    TIMESTAMP_MICRO_TYPE_VARIATION_REF => {
+                        Ok(ScalarValue::TimestampMicrosecond(None, None))
+                    }
+                    TIMESTAMP_NANO_TYPE_VARIATION_REF => {
+                        Ok(ScalarValue::TimestampNanosecond(None, None))
+                    }
+                    v => not_impl_err!(
+                        "Unsupported Substrait type variation {v} of type {kind:?}"
+                    ),
                 }
-                v => not_impl_err!(
-                    "Unsupported Substrait type variation {v} of type {kind:?}"
+            }
+            r#type::Kind::PrecisionTimestamp(pts) => match pts.precision {
+                0 => Ok(ScalarValue::TimestampSecond(None, None)),
+                3 => Ok(ScalarValue::TimestampMillisecond(None, None)),
+                6 => Ok(ScalarValue::TimestampMicrosecond(None, None)),
+                9 => Ok(ScalarValue::TimestampNanosecond(None, None)),
+                p => not_impl_err!(
+                    "Unsupported Substrait precision {p} for PrecisionTimestamp"
+                ),
+            },
+            r#type::Kind::PrecisionTimestampTz(pts) => match pts.precision {
+                0 => Ok(ScalarValue::TimestampSecond(
+                    None,
+                    Some(DEFAULT_TIMEZONE.into()),
+                )),
+                3 => Ok(ScalarValue::TimestampMillisecond(
+                    None,
+                    Some(DEFAULT_TIMEZONE.into()),
+                )),
+                6 => Ok(ScalarValue::TimestampMicrosecond(
+                    None,
+                    Some(DEFAULT_TIMEZONE.into()),
+                )),
+                9 => Ok(ScalarValue::TimestampNanosecond(
+                    None,
+                    Some(DEFAULT_TIMEZONE.into()),
+                )),
+                p => not_impl_err!(
+                    "Unsupported Substrait precision {p} for PrecisionTimestamp"
                 ),
             },
             r#type::Kind::Date(date) => match date.type_variation_reference {
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index ee04749f5e6b..72b6760be29c 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -38,8 +38,6 @@ use crate::variation_const::{
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
     INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
-    TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
-    TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
     UNSIGNED_INTEGER_TYPE_VARIATION_REF,
 };
 use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
@@ -55,10 +53,11 @@ use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Opera
 use datafusion::prelude::Expr;
 use pbjson_types::Any as ProtoAny;
 use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
+use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
 use substrait::proto::expression::literal::map::KeyValue;
 use substrait::proto::expression::literal::{
-    user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map, Struct,
-    UserDefined,
+    user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map,
+    PrecisionTimestamp, Struct, UserDefined,
 };
 use substrait::proto::expression::subquery::InPredicate;
 use substrait::proto::expression::window_function::BoundsType;
@@ -658,8 +657,8 @@ fn to_substrait_jointype(join_type: JoinType) -> join_rel::JoinType {
         JoinType::Left => join_rel::JoinType::Left,
         JoinType::Right => join_rel::JoinType::Right,
         JoinType::Full => join_rel::JoinType::Outer,
-        JoinType::LeftAnti => join_rel::JoinType::Anti,
-        JoinType::LeftSemi => join_rel::JoinType::Semi,
+        JoinType::LeftAnti => join_rel::JoinType::LeftAnti,
+        JoinType::LeftSemi => join_rel::JoinType::LeftSemi,
         JoinType::RightAnti | JoinType::RightSemi => unimplemented!(),
     }
 }
@@ -1376,20 +1375,31 @@ fn to_substrait_type(
                 nullability,
             })),
         }),
-        // Timezone is ignored.
-        DataType::Timestamp(unit, _) => {
-            let type_variation_reference = match unit {
-                TimeUnit::Second => TIMESTAMP_SECOND_TYPE_VARIATION_REF,
-                TimeUnit::Millisecond => TIMESTAMP_MILLI_TYPE_VARIATION_REF,
-                TimeUnit::Microsecond => TIMESTAMP_MICRO_TYPE_VARIATION_REF,
-                TimeUnit::Nanosecond => TIMESTAMP_NANO_TYPE_VARIATION_REF,
+        DataType::Timestamp(unit, tz) => {
+            let precision = match unit {
+                TimeUnit::Second => 0,
+                TimeUnit::Millisecond => 3,
+                TimeUnit::Microsecond => 6,
+                TimeUnit::Nanosecond => 9,
             };
-            Ok(substrait::proto::Type {
-                kind: Some(r#type::Kind::Timestamp(r#type::Timestamp {
-                    type_variation_reference,
+            let kind = match tz {
+                None => r#type::Kind::PrecisionTimestamp(r#type::PrecisionTimestamp {
+                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                     nullability,
-                })),
-            })
+                    precision,
+                }),
+                Some(_) => {
+                    // If timezone is present, no matter what the actual tz value is, it indicates the
+                    // value of the timestamp is tied to UTC epoch. That's all that Substrait cares about.
+                    // As the timezone is lost, this conversion may be lossy for downstream use of the value.
+                    r#type::Kind::PrecisionTimestampTz(r#type::PrecisionTimestampTz {
+                        type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                        nullability,
+                        precision,
+                    })
+                }
+            };
+            Ok(substrait::proto::Type { kind: Some(kind) })
         }
         DataType::Date32 => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Date(r#type::Date {
@@ -1415,6 +1425,7 @@ fn to_substrait_type(
                     kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay {
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                         nullability,
+                        precision: Some(3), // DayTime precision is always milliseconds
                     })),
                 }),
                 IntervalUnit::MonthDayNano => {
@@ -1798,21 +1809,64 @@ fn to_substrait_literal(
         ScalarValue::Float64(Some(f)) => {
             (LiteralType::Fp64(*f), DEFAULT_TYPE_VARIATION_REF)
         }
-        ScalarValue::TimestampSecond(Some(t), _) => (
-            LiteralType::Timestamp(*t),
-            TIMESTAMP_SECOND_TYPE_VARIATION_REF,
+        ScalarValue::TimestampSecond(Some(t), None) => (
+            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
+                precision: 0,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
         ),
-        ScalarValue::TimestampMillisecond(Some(t), _) => (
-            LiteralType::Timestamp(*t),
-            TIMESTAMP_MILLI_TYPE_VARIATION_REF,
+        ScalarValue::TimestampMillisecond(Some(t), None) => (
+            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
+                precision: 3,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
         ),
-        ScalarValue::TimestampMicrosecond(Some(t), _) => (
-            LiteralType::Timestamp(*t),
-            TIMESTAMP_MICRO_TYPE_VARIATION_REF,
+        ScalarValue::TimestampMicrosecond(Some(t), None) => (
+            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
+                precision: 6,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
         ),
-        ScalarValue::TimestampNanosecond(Some(t), _) => (
-            LiteralType::Timestamp(*t),
-            TIMESTAMP_NANO_TYPE_VARIATION_REF,
+        ScalarValue::TimestampNanosecond(Some(t), None) => (
+            LiteralType::PrecisionTimestamp(PrecisionTimestamp {
+                precision: 9,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
+        // If timezone is present, no matter what the actual tz value is, it indicates the
+        // value of the timestamp is tied to UTC epoch. That's all that Substrait cares about.
+        // As the timezone is lost, this conversion may be lossy for downstream use of the value.
+        ScalarValue::TimestampSecond(Some(t), Some(_)) => (
+            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
+                precision: 0,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
+        ScalarValue::TimestampMillisecond(Some(t), Some(_)) => (
+            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
+                precision: 3,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
+        ScalarValue::TimestampMicrosecond(Some(t), Some(_)) => (
+            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
+                precision: 6,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
+        ScalarValue::TimestampNanosecond(Some(t), Some(_)) => (
+            LiteralType::PrecisionTimestampTz(PrecisionTimestamp {
+                precision: 9,
+                value: *t,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
         ),
         ScalarValue::Date32(Some(d)) => {
             (LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF)
@@ -1847,7 +1901,8 @@ fn to_substrait_literal(
             LiteralType::IntervalDayToSecond(IntervalDayToSecond {
                 days: i.days,
                 seconds: i.milliseconds / 1000,
-                microseconds: (i.milliseconds % 1000) * 1000,
+                subseconds: (i.milliseconds % 1000) as i64,
+                precision_mode: Some(PrecisionMode::Precision(3)), // 3 for milliseconds
             }),
             DEFAULT_TYPE_VARIATION_REF,
         ),
@@ -2142,6 +2197,18 @@ mod test {
         round_trip_literal(ScalarValue::UInt64(Some(u64::MIN)))?;
         round_trip_literal(ScalarValue::UInt64(Some(u64::MAX)))?;
 
+        for (ts, tz) in [
+            (Some(12345), None),
+            (None, None),
+            (Some(12345), Some("UTC".into())),
+            (None, Some("UTC".into())),
+        ] {
+            round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
+            round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
+            round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
+            round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
+        }
+
         round_trip_literal(ScalarValue::List(ScalarValue::new_list_nullable(
             &[ScalarValue::Float32(Some(1.0))],
             &DataType::Float32,
@@ -2271,10 +2338,14 @@ mod test {
         round_trip_type(DataType::UInt64)?;
         round_trip_type(DataType::Float32)?;
         round_trip_type(DataType::Float64)?;
-        round_trip_type(DataType::Timestamp(TimeUnit::Second, None))?;
-        round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, None))?;
-        round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, None))?;
-        round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, None))?;
+
+        for tz in [None, Some("UTC".into())] {
+            round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
+            round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
+            round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
+            round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
+        }
+
         round_trip_type(DataType::Date32)?;
         round_trip_type(DataType::Date64)?;
         round_trip_type(DataType::Binary)?;
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index c94ad2d669fd..1525da764509 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -38,10 +38,16 @@
 /// The "system-preferred" variation (i.e., no variation).
 pub const DEFAULT_TYPE_VARIATION_REF: u32 = 0;
 pub const UNSIGNED_INTEGER_TYPE_VARIATION_REF: u32 = 1;
+
+#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")]
 pub const TIMESTAMP_SECOND_TYPE_VARIATION_REF: u32 = 0;
+#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")]
 pub const TIMESTAMP_MILLI_TYPE_VARIATION_REF: u32 = 1;
+#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")]
 pub const TIMESTAMP_MICRO_TYPE_VARIATION_REF: u32 = 2;
+#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")]
 pub const TIMESTAMP_NANO_TYPE_VARIATION_REF: u32 = 3;
+
 pub const DATE_32_TYPE_VARIATION_REF: u32 = 0;
 pub const DATE_64_TYPE_VARIATION_REF: u32 = 1;
 pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0;
diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs
index a74cce72ac64..50325d262d1d 100644
--- a/datafusion/wasmtest/src/lib.rs
+++ b/datafusion/wasmtest/src/lib.rs
@@ -78,9 +78,8 @@ mod test {
     use super::*;
     use datafusion::execution::context::SessionContext;
     use datafusion_execution::{
-        config::SessionConfig,
-        disk_manager::DiskManagerConfig,
-        runtime_env::{RuntimeConfig, RuntimeEnv},
+        config::SessionConfig, disk_manager::DiskManagerConfig,
+        runtime_env::RuntimeEnvBuilder,
     };
     use datafusion_physical_plan::collect;
     use datafusion_sql::parser::DFParser;
@@ -100,10 +99,10 @@ mod test {
 
         // Execute SQL (using datafusion)
         let rt = Arc::new(
-            RuntimeEnv::new(
-                RuntimeConfig::new().with_disk_manager(DiskManagerConfig::Disabled),
-            )
-            .unwrap(),
+            RuntimeEnvBuilder::new()
+                .with_disk_manager(DiskManagerConfig::Disabled)
+                .build()
+                .unwrap(),
         );
         let session_config = SessionConfig::new().with_target_partitions(1);
         let session_context =
diff --git a/docs/Cargo.toml b/docs/Cargo.toml
deleted file mode 100644
index 14398c841579..000000000000
--- a/docs/Cargo.toml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "datafusion-docs-tests"
-description = "DataFusion Documentation Tests"
-publish = false
-version = { workspace = true }
-edition = { workspace = true }
-readme = { workspace = true }
-homepage = { workspace = true }
-repository = { workspace = true }
-license = { workspace = true }
-authors = { workspace = true }
-rust-version = { workspace = true }
-
-[lints]
-workspace = true
-
-[dependencies]
-datafusion = { workspace = true }
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9c8c886d2502..bb5ea430a321 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -89,6 +89,7 @@ To get started, see
    user-guide/expressions
    user-guide/sql/index
    user-guide/configs
+   user-guide/explain-usage
    user-guide/faq
 
 .. _toc.library-user-guide:
diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md
index fe922d8eaeb1..556deb02e980 100644
--- a/docs/source/library-user-guide/building-logical-plans.md
+++ b/docs/source/library-user-guide/building-logical-plans.md
@@ -31,44 +31,52 @@ explained in more detail in the [Query Planning and Execution Overview] section
 DataFusion's [LogicalPlan] is an enum containing variants representing all the supported operators, and also
 contains an `Extension` variant that allows projects building on DataFusion to add custom logical operators.
 
-It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as follows, but is is
+It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as shown, but it is
 much easier to use the [LogicalPlanBuilder], which is described in the next section.
 
 Here is an example of building a logical plan directly:
 
-<!-- source for this example is in datafusion_docs::library_logical_plan::plan_1 -->
-
 ```rust
-// create a logical table source
-let schema = Schema::new(vec![
-    Field::new("id", DataType::Int32, true),
-    Field::new("name", DataType::Utf8, true),
-]);
-let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-// create a TableScan plan
-let projection = None; // optional projection
-let filters = vec![]; // optional filters to push down
-let fetch = None; // optional LIMIT
-let table_scan = LogicalPlan::TableScan(TableScan::try_new(
-    "person",
-    Arc::new(table_source),
-    projection,
-    filters,
-    fetch,
-)?);
-
-// create a Filter plan that evaluates `id > 500` that wraps the TableScan
-let filter_expr = col("id").gt(lit(500));
-let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?);
-
-// print the plan
-println!("{}", plan.display_indent_schema());
+use datafusion::common::DataFusionError;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::logical_expr::{Filter, LogicalPlan, TableScan, LogicalTableSource};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+fn main() -> Result<(), DataFusionError> {
+    // create a logical table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+    // create a TableScan plan
+    let projection = None; // optional projection
+    let filters = vec![]; // optional filters to push down
+    let fetch = None; // optional LIMIT
+    let table_scan = LogicalPlan::TableScan(TableScan::try_new(
+        "person",
+        Arc::new(table_source),
+        projection,
+        filters,
+        fetch,
+        )?
+    );
+
+    // create a Filter plan that evaluates `id > 500` that wraps the TableScan
+    let filter_expr = col("id").gt(lit(500));
+    let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan)) ? );
+
+    // print the plan
+    println!("{}", plan.display_indent_schema());
+    Ok(())
+}
 ```
 
 This example produces the following plan:
 
-```
+```text
 Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
   TableScan: person [id:Int32;N, name:Utf8;N]
 ```
@@ -78,7 +86,7 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
 DataFusion logical plans can be created using the [LogicalPlanBuilder] struct. There is also a [DataFrame] API which is
 a higher-level API that delegates to [LogicalPlanBuilder].
 
-The following associated functions can be used to create a new builder:
+There are several functions that can can be used to create a new builder, such as
 
 - `empty` - create an empty plan with no fields
 - `values` - create a plan from a set of literal values
@@ -102,41 +110,107 @@ The following example demonstrates building the same simple query plan as the pr
 <!-- source for this example is in datafusion_docs::library_logical_plan::plan_builder_1 -->
 
 ```rust
-// create a logical table source
-let schema = Schema::new(vec![
-    Field::new("id", DataType::Int32, true),
-    Field::new("name", DataType::Utf8, true),
-]);
-let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-// optional projection
-let projection = None;
-
-// create a LogicalPlanBuilder for a table scan
-let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?;
-
-// perform a filter operation and build the plan
-let plan = builder
-    .filter(col("id").gt(lit(500)))? // WHERE id > 500
-    .build()?;
-
-// print the plan
-println!("{}", plan.display_indent_schema());
+use datafusion::common::DataFusionError;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::logical_expr::{LogicalPlanBuilder, LogicalTableSource};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+fn main() -> Result<(), DataFusionError> {
+    // create a logical table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
+
+    // optional projection
+    let projection = None;
+
+    // create a LogicalPlanBuilder for a table scan
+    let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?;
+
+    // perform a filter operation and build the plan
+    let plan = builder
+        .filter(col("id").gt(lit(500)))? // WHERE id > 500
+        .build()?;
+
+    // print the plan
+    println!("{}", plan.display_indent_schema());
+    Ok(())
+}
 ```
 
 This example produces the following plan:
 
-```
+```text
 Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N]
   TableScan: person [id:Int32;N, name:Utf8;N]
 ```
 
+## Translating Logical Plan to Physical Plan
+
+Logical plans can not be directly executed. They must be "compiled" into an
+[`ExecutionPlan`], which is often referred to as a "physical plan".
+
+Compared to `LogicalPlan`s `ExecutionPlans` have many more details such as
+specific algorithms and detailed optimizations compared to. Given a
+`LogicalPlan` the easiest way to create an `ExecutionPlan` is using
+[`SessionState::create_physical_plan`] as shown below
+
+```rust
+use datafusion::datasource::{provider_as_source, MemTable};
+use datafusion::common::DataFusionError;
+use datafusion::physical_plan::display::DisplayableExecutionPlan;
+use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::logical_expr::{LogicalPlanBuilder, LogicalTableSource};
+use datafusion::prelude::*;
+use std::sync::Arc;
+
+// Creating physical plans may access remote catalogs and data sources
+// thus it must be run with an async runtime.
+#[tokio::main]
+async fn main() -> Result<(), DataFusionError> {
+
+    // create a default table source
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("name", DataType::Utf8, true),
+    ]);
+    // To create an ExecutionPlan we must provide an actual
+    // TableProvider. For this example, we don't provide any data
+    // but in production code, this would have `RecordBatch`es with
+    // in memory data
+    let table_provider = Arc::new(MemTable::try_new(Arc::new(schema), vec![])?);
+    // Use the provider_as_source function to convert the TableProvider to a table source
+    let table_source = provider_as_source(table_provider);
+
+    // create a LogicalPlanBuilder for a table scan without projection or filters
+    let logical_plan = LogicalPlanBuilder::scan("person", table_source, None)?.build()?;
+
+    // Now create the physical plan by calling `create_physical_plan`
+    let ctx = SessionContext::new();
+    let physical_plan = ctx.state().create_physical_plan(&logical_plan).await?;
+
+    // print the plan
+    println!("{}", DisplayableExecutionPlan::new(physical_plan.as_ref()).indent(true));
+    Ok(())
+}
+```
+
+This example produces the following physical plan:
+
+```text
+MemoryExec: partitions=0, partition_sizes=[]
+```
+
 ## Table Sources
 
-The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also
-suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner. However, if you
-want to use a [TableSource] that can be executed in DataFusion then you will need to use [DefaultTableSource], which is a
-wrapper for a [TableProvider].
+The previous examples use a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also
+suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner.
+
+However, it is more common to use a [TableProvider]. To get a [TableSource] from a
+[TableProvider], use [provider_as_source] or [DefaultTableSource].
 
 [query planning and execution overview]: https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview
 [architecture guide]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture
@@ -145,5 +219,8 @@ wrapper for a [TableProvider].
 [dataframe]: using-the-dataframe-api.md
 [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html
 [defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html
+[provider_as_source]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/fn.provider_as_source.html
 [tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html
 [tablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
+[`sessionstate::create_physical_plan`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html#method.create_physical_plan
diff --git a/docs/source/user-guide/explain-usage.md b/docs/source/user-guide/explain-usage.md
new file mode 100644
index 000000000000..a65fad92d104
--- /dev/null
+++ b/docs/source/user-guide/explain-usage.md
@@ -0,0 +1,365 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Reading Explain Plans
+
+## Introduction
+
+This section describes of how to read a DataFusion query plan. While fully
+comprehending all details of these plans requires significant expertise in the
+DataFusion engine, this guide will help you get started with the basics.
+
+Datafusion executes queries using a `query plan`. To see the plan without
+running the query, add the keyword `EXPLAIN` to your SQL query or call the
+[DataFrame::explain] method
+
+[dataframe::explain]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.explain
+
+## Example: Select and filter
+
+In this section, we run example queries against the `hits.parquet` file. See
+[below](#data-in-this-example)) for information on how to get this file.
+
+Let's see how DataFusion runs a query that selects the top 5 watch lists for the
+site `http://domcheloveplanet.ru/`:
+
+```sql
+EXPLAIN SELECT "WatchID" AS wid, "hits.parquet"."ClientIP" AS ip
+FROM 'hits.parquet'
+WHERE starts_with("URL", 'http://domcheloveplanet.ru/')
+ORDER BY wid ASC, ip DESC
+LIMIT 5;
+```
+
+The output will look like
+
+```
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                |
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: wid ASC NULLS LAST, ip DESC NULLS FIRST, fetch=5                                                                                                                              |
+|               |   Projection: hits.parquet.WatchID AS wid, hits.parquet.ClientIP AS ip                                                                                                              |
+|               |     Filter: starts_with(hits.parquet.URL, Utf8("http://domcheloveplanet.ru/"))                                                                                                      |
+|               |       TableScan: hits.parquet projection=[WatchID, ClientIP, URL], partial_filters=[starts_with(hits.parquet.URL, Utf8("http://domcheloveplanet.ru/"))]                             |
+| physical_plan | SortPreservingMergeExec: [wid@0 ASC NULLS LAST,ip@1 DESC], fetch=5                                                                                                                  |
+|               |   SortExec: TopK(fetch=5), expr=[wid@0 ASC NULLS LAST,ip@1 DESC], preserve_partitioning=[true]                                                                                      |
+|               |     ProjectionExec: expr=[WatchID@0 as wid, ClientIP@1 as ip]                                                                                                                       |
+|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                   |
+|               |         FilterExec: starts_with(URL@2, http://domcheloveplanet.ru/)                                                                                                                 |
+|               |           ParquetExec: file_groups={16 groups: [[hits.parquet:0..923748528], ...]}, projection=[WatchID, ClientIP, URL], predicate=starts_with(URL@13, http://domcheloveplanet.ru/) |
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+2 row(s) fetched.
+Elapsed 0.060 seconds.
+```
+
+There are two sections: logical plan and physical plan
+
+- **Logical Plan:** is a plan generated for a specific SQL query, DataFrame, or other language without the  
+  knowledge of the underlying data organization.
+- **Physical Plan:** is a plan generated from a logical plan along with consideration of the hardware
+  configuration (e.g number of CPUs) and the underlying data organization (e.g number of files).
+  This physical plan is specific to your hardware configuration and your data. If you load the same
+  data to different hardware with different configurations, the same query may generate different query plans.
+
+Understanding a query plan can help to you understand its performance. For example, when the plan shows your query reads
+many files, it signals you to either add more filter in the query to read less data or to modify your file
+design to make fewer but larger files. This document focuses on how to read a query plan. How to make a
+query run faster depends on the reason it is slow and beyond the scope of this document.
+
+## Query plans are trees
+
+A query plan is an upside down tree, and we always read from bottom up. The
+physical plan in Figure 1 in tree format will look like
+
+```
+                         ▲
+                         │
+                         │
+┌─────────────────────────────────────────────────┐
+│             SortPreservingMergeExec             │
+│        [wid@0 ASC NULLS LAST,ip@1 DESC]         │
+│                     fetch=5                     │
+└─────────────────────────────────────────────────┘
+                         ▲
+                         │
+┌─────────────────────────────────────────────────┐
+│             SortExec TopK(fetch=5),             │
+│     expr=[wid@0 ASC NULLS LAST,ip@1 DESC],      │
+│           preserve_partitioning=[true]          │
+└─────────────────────────────────────────────────┘
+                         ▲
+                         │
+┌─────────────────────────────────────────────────┐
+│                 ProjectionExec                  │
+│    expr=[WatchID@0 as wid, ClientIP@1 as ip]    │
+└─────────────────────────────────────────────────┘
+                         ▲
+                         │
+┌─────────────────────────────────────────────────┐
+│               CoalesceBatchesExec               │
+└─────────────────────────────────────────────────┘
+                         ▲
+                         │
+┌─────────────────────────────────────────────────┐
+│                   FilterExec                    │
+│ starts_with(URL@2, http://domcheloveplanet.ru/) │
+└─────────────────────────────────────────────────┘
+                         ▲
+                         │
+┌────────────────────────────────────────────────┐
+│                  ParquetExec                   │
+│          hits.parquet (filter = ...)           │
+└────────────────────────────────────────────────┘
+```
+
+Each node in the tree/plan ends with `Exec` and is sometimes also called an `operator` or `ExecutionPlan` where data is
+processed, transformed and sent up.
+
+1. First, data in parquet the `hits.parquet` file us read in parallel using 16 cores in 16 "partitions" (more on this later) from `ParquetExec`, which applies a first pass at filtering during the scan.
+2. Next, the output is filtered using `FilterExec` to ensure only rows where `starts_with(URL, 'http://domcheloveplanet.ru/')` evaluates to true are passed on
+3. The `CoalesceBatchesExec` then ensures that the data is grouped into larger batches for processing
+4. The `ProjectionExec` then projects the data to rename the `WatchID` and `ClientIP` columns to `wid` and `ip` respectively.
+5. The `SortExec` then sorts the data by `wid ASC, ip DESC`. The `Topk(fetch=5)` indicates that a special implementation is used that only tracks and emits the top 5 values in each partition.
+6. Finally the `SortPreservingMergeExec` merges the sorted data from all partitions and returns the top 5 rows overall.
+
+## Understanding large query plans
+
+A large query plan may look intimidating, but you can quickly understand what it does by following these steps
+
+1. As always, read from bottom up, one operator at a time.
+2. Understand the job of this operator by reading
+   the [Physical Plan documentation](https://docs.rs/datafusion/latest/datafusion/physical_plan/index.html).
+3. Understand the input data of the operator and how large/small it may be.
+4. Understand how much data that operator produces and what it would look like.
+
+If you can answer those questions, you will be able to estimate how much work
+that plan has to do and thus how long it will take. However, the `EXPLAIN` just
+shows you the plan without executing it.
+
+If you want to know more about how much work each operator in query plan does,
+you can use the `EXPLAIN ANALYZE` to get the explain with runtime added (see
+next section)
+
+## More Debugging Information: `EXPLAIN VERBOSE`
+
+If the plan has to read too many files, not all of them will be shown in the
+`EXPLAIN`. To see them, use `EXPLAIN VEBOSE`. Like `EXPLAIN`, `EXPLAIN VERBOSE`
+does not run the query. Instead it shows the full explain plan, with information
+that is omitted from the default explain, as well as all intermediate physical
+plans DataFusion generates before returning. This mode can be very helpful for
+debugging to see why and when DataFusion added and removed operators from a plan.
+
+## Execution Counters: `EXPLAIN ANALYZE`
+
+During execution, DataFusion operators collect detailed metrics. You can access
+them programmatically via [`ExecutionPlan::metrics`] as well as with the
+`EXPLAIN ANALYZE` command. For example here is the same query query as
+above but with `EXPLAIN ANALYZE` (note the output is edited for clarity)
+
+[`executionplan::metrics`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#method.metrics
+
+```
+> EXPLAIN ANALYZE SELECT "WatchID" AS wid, "hits.parquet"."ClientIP" AS ip
+FROM 'hits.parquet'
+WHERE starts_with("URL", 'http://domcheloveplanet.ru/')
+ORDER BY wid ASC, ip DESC
+LIMIT 5;
++-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type         | plan                                                                                                                                                                                                                                                                                                                                                           |
++-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Plan with Metrics | SortPreservingMergeExec: [wid@0 ASC NULLS LAST,ip@1 DESC], fetch=5, metrics=[output_rows=5, elapsed_compute=2.375µs]                                                                                                                                                                                                                                           |
+|                   |   SortExec: TopK(fetch=5), expr=[wid@0 ASC NULLS LAST,ip@1 DESC], preserve_partitioning=[true], metrics=[output_rows=75, elapsed_compute=7.243038ms, row_replacements=482]                                                                                                                                                                                     |
+|                   |     ProjectionExec: expr=[WatchID@0 as wid, ClientIP@1 as ip], metrics=[output_rows=811821, elapsed_compute=66.25µs]                                                                                                                                                                                                                                           |
+|                   |         FilterExec: starts_with(URL@2, http://domcheloveplanet.ru/), metrics=[output_rows=811821, elapsed_compute=1.36923816s]                                                                                                                                                                                                                                 |
+|                   |           ParquetExec: file_groups={16 groups: [[hits.parquet:0..923748528], ...]}, projection=[WatchID, ClientIP, URL], predicate=starts_with(URL@13, http://domcheloveplanet.ru/), metrics=[output_rows=99997497, elapsed_compute=16ns, ... bytes_scanned=3703192723, ...  time_elapsed_opening=308.203002ms, time_elapsed_scanning_total=8.350342183s, ...] |
++-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+1 row(s) fetched.
+Elapsed 0.720 seconds.
+```
+
+In this case, DataFusion actually ran the query, but discarded any results, and
+instead returned an annotated plan with a new field, `metrics=[...]`
+
+Most operators have the common metrics `output_rows` and `elapsed_compute` and
+some have operator specific metrics such as `ParquetExec` which has
+`bytes_scanned=3703192723`. Note that times and counters are reported across all
+cores, so if you have 16 cores, the time reported is the sum of the time taken
+by all 16 cores.
+
+Again, reading from bottom up:
+
+- `ParquetExec`
+  - `output_rows=99997497`: A total 99.9M rows were produced
+  - `bytes_scanned=3703192723`: Of the 14GB file, 3.7GB were actually read (due to projection pushdown)
+  - `time_elapsed_opening=308.203002ms`: It took 300ms to open the file and prepare to read it
+  - `time_elapsed_scanning_total=8.350342183s`: It took 8.3 seconds of CPU time (across 16 cores) to actually decode the parquet data
+- `FilterExec`
+  - `output_rows=811821`: Of the 99.9M rows at its input, only 811K rows passed the filter and were produced at the output
+  - `elapsed_compute=1.36923816s`: In total, 1.36s of CPU time (across 16 cores) was spend evaluating the filter
+- `CoalesceBatchesExec`
+  - `output_rows=811821`, `elapsed_compute=12.873379ms`: Produced 811K rows in 13ms
+- `ProjectionExec`
+  - `output_rows=811821, elapsed_compute=66.25µs`: Produced 811K rows in 66µs (microseconds). This projection is almost instantaneous as it does not manipulate any data
+- `SortExec`
+  - `output_rows=75`: Produced 75 rows in total. Each of 16 cores could produce up to 5 rows, but in this case not all cores did.
+  - `elapsed_compute=7.243038ms`: 7ms was used to determine the top 5 rows
+  - `row_replacements=482`: Internally, the TopK operator updated its top list 482 times
+- `SortPreservingMergeExec`
+  - `output_rows=5`, `elapsed_compute=2.375µs`: Produced the final 5 rows in 2.375µs (microseconds)
+
+## Partitions and Execution
+
+DataFusion determines the optimal number of cores to use as part of query
+planning. Roughly speaking, each "partition" in the plan is run independently using
+a separate core. Data crosses between cores only within certain operators such as
+`RepartitionExec`, `CoalescePartitions` and `SortPreservingMergeExec`
+
+You can read more about this in the [Partitoning Docs].
+
+[partitoning docs]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html
+
+## Example of an Aggregate Query
+
+Let us delve into an example query that aggregates data from the `hits.parquet`
+file. For example, this query from ClickBench finds the top 10 users by their
+number of hits:
+
+```sql
+SELECT "UserID", COUNT(*)
+FROM 'hits.parquet'
+GROUP BY "UserID"
+ORDER BY COUNT(*) DESC
+LIMIT 10;
+```
+
+We can again see the query plan by using `EXPLAIN`:
+
+```
+> EXPLAIN SELECT "UserID", COUNT(*) FROM 'hits.parquet' GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10;
++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Limit: skip=0, fetch=10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+|               |   Sort: count(*) DESC NULLS FIRST, fetch=10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |     Aggregate: groupBy=[[hits.parquet.UserID]], aggr=[[count(Int64(1)) AS count(*)]]                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |       TableScan: hits.parquet projection=[UserID]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| physical_plan | GlobalLimitExec: skip=0, fetch=10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |   SortPreservingMergeExec: [count(*)@1 DESC], fetch=10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |     SortExec: TopK(fetch=10), expr=[count(*)@1 DESC], preserve_partitioning=[true]                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |       AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID], aggr=[count(*)]                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |           RepartitionExec: partitioning=Hash([UserID@0], 10), input_partitions=10                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |             AggregateExec: mode=Partial, gby=[UserID@0 as UserID], aggr=[count(*)]                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |               ParquetExec: file_groups={10 groups: [[hits.parquet:0..1477997645], [hits.parquet:1477997645..2955995290], [hits.parquet:2955995290..4433992935], [hits.parquet:4433992935..5911990580], [hits.parquet:5911990580..7389988225], ...]}, projection=[UserID] |
+|               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+```
+
+For this query, let's again read the plan from the bottom to the top:
+
+**Logical plan operators**
+
+- `TableScan`
+  - `hits.parquet`: Scans data from the file `hits.parquet`.
+  - `projection=[UserID]`: Reads only the `UserID` column
+- `Aggregate`
+  - `groupBy=[[hits.parquet.UserID]]`: Groups by `UserID` column.
+  - `aggr=[[count(Int64(1)) AS count(*)]]`: Applies the `COUNT` aggregate on each distinct group.
+- `Sort`
+  - `count(*) DESC NULLS FIRST`: Sorts the data in descending count order.
+  - `fetch=10`: Returns only the first 10 rows.
+- `Limit`
+  - `skip=0`: Does not skip any data for the results.
+  - `fetch=10`: Limits the results to 10 values.
+
+**Physical plan operators**
+
+- `ParquetExec`
+  - `file_groups={10 groups: [...]}`: Reads 10 groups in parallel from `hits.parquet`file. (The example above was run on a machine with 10 cores.)
+  - `projection=[UserID]`: Pushes down projection of the `UserID` column. The parquet format is columnar and the DataFusion reader only decodes the columns required.
+- `AggregateExec`
+  - `mode=Partial` Runs a [partial aggregation] in parallel across each of the 10 partitions from the `ParquetExec` immediately after reading.
+  - `gby=[UserID@0 as UserID]`: Represents `GROUP BY` in the [physical plan] and groups together the same values of `UserID`.
+  - `aggr=[count(*)]`: Applies the `COUNT` aggregate on all rows for each group.
+- `RepartitionExec`
+  - `partitioning=Hash([UserID@0], 10)`: Divides the input into into 10 (new) output partitions based on the value of `hash(UserID)`. You can read more about this in the [partitioning] documentation.
+  - `input_partitions=10`: Number of input partitions.
+- `CoalesceBatchesExec`
+  - `target_batch_size=8192`: Combines smaller batches in to larger batches. In this case approximately 8192 rows in each batch.
+- `AggregateExec`
+  - `mode=FinalPartitioned`: Performs the final aggregation on each group. See the [documentation on multi phase grouping] for more information.
+  - `gby=[UserID@0 as UserID]`: Groups by `UserID`.
+  - `aggr=[count(*)]`: Applies the `COUNT` aggregate on all rows for each group.
+- `SortExec`
+  - `TopK(fetch=10)`: Use a special "TopK" sort that keeps only the largest 10 values in memory at a time. You can read more about this in the [TopK] documentation.
+  - `expr=[count(*)@1 DESC]`: Sorts all rows in descending order. Note this represents the `ORDER BY` in the physical plan.
+  - `preserve_partitioning=[true]`: The sort is done in parallel on each partition. In this case the top 10 values are found for each of the 10 partitions, in parallel.
+- `SortPreservingMergeExec`
+  - `[count(*)@1 DESC]`: This operator merges the 10 distinct streams into a single stream using this expression.
+  - `fetch=10`: Returns only the first 10 rows
+- `GlobalLimitExec`
+  - `skip=0`: Does not skip any rows
+  - `fetch=10`: Returns only the first 10 rows, denoted by `LIMIT 10` in the query.
+
+[partial aggregation]: https://docs.rs/datafusion/latest/datafusion/physical_plan/aggregates/enum.AggregateMode.html#variant.Partial
+[physical plan]: https://docs.rs/datafusion/latest/datafusion/physical_plan/aggregates/struct.PhysicalGroupBy.html
+[partitioning]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html
+[topk]: https://docs.rs/datafusion/latest/datafusion/physical_plan/struct.TopK.html
+[documentation on multi phase grouping]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state
+
+### Data in this Example
+
+The examples in this section use data from [ClickBench], a benchmark for data
+analytics. The examples are in terms of the 14GB [`hits.parquet`] file and can be
+downloaded from the website or using the following commands:
+
+```shell
+cd benchmarks
+./bench.sh data clickbench_1
+***************************
+DataFusion Benchmark Runner and Data Generator
+COMMAND: data
+BENCHMARK: clickbench_1
+DATA_DIR: /Users/andrewlamb/Software/datafusion2/benchmarks/data
+CARGO_COMMAND: cargo run --release
+PREFER_HASH_JOIN: true
+***************************
+Checking hits.parquet...... found 14779976446 bytes ... Done
+```
+
+Then you can run `datafusion-cli` to get plans:
+
+```shell
+cd datafusion/benchmarks/data
+datafusion-cli
+
+DataFusion CLI v41.0.0
+> select count(*) from 'hits.parquet';
++----------+
+| count(*) |
++----------+
+| 99997497 |
++----------+
+1 row(s) fetched.
+Elapsed 0.062 seconds.
+>
+```
+
+[clickbench]: https://benchmark.clickhouse.com/
+[`hits.parquet`]: https://datasets.clickhouse.com/hits_compatible/hits.parquet
diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md
index 22f73e3d76d7..45bb3a57aa7c 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -21,6 +21,8 @@
 
 The `EXPLAIN` command shows the logical and physical execution plan for the specified SQL statement.
 
+See the [Reading Explain Plans](../explain-usage.md) page for more information on how to interpret these plans.
+
 <pre>
 EXPLAIN [ANALYZE] [VERBOSE] statement
 </pre>
diff --git a/docs/src/lib.rs b/docs/src/lib.rs
deleted file mode 100644
index f73132468ec9..000000000000
--- a/docs/src/lib.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[cfg(test)]
-mod library_logical_plan;
diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs
deleted file mode 100644
index 355003941570..000000000000
--- a/docs/src/library_logical_plan.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion::error::Result;
-use datafusion::logical_expr::builder::LogicalTableSource;
-use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder, TableScan};
-use datafusion::prelude::*;
-use std::sync::Arc;
-
-#[test]
-fn plan_1() -> Result<()> {
-    // create a logical table source
-    let schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, true),
-        Field::new("name", DataType::Utf8, true),
-    ]);
-    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-    // create a TableScan plan
-    let projection = None; // optional projection
-    let filters = vec![]; // optional filters to push down
-    let fetch = None; // optional LIMIT
-    let table_scan = LogicalPlan::TableScan(TableScan::try_new(
-        "person",
-        Arc::new(table_source),
-        projection,
-        filters,
-        fetch,
-    )?);
-
-    // create a Filter plan that evaluates `id > 500` and wraps the TableScan
-    let filter_expr = col("id").gt(lit(500));
-    let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?);
-
-    // print the plan
-    println!("{}", plan.display_indent_schema());
-
-    Ok(())
-}
-
-#[test]
-fn plan_builder_1() -> Result<()> {
-    // create a logical table source
-    let schema = Schema::new(vec![
-        Field::new("id", DataType::Int32, true),
-        Field::new("name", DataType::Utf8, true),
-    ]);
-    let table_source = LogicalTableSource::new(SchemaRef::new(schema));
-
-    // optional projection
-    let projection = None;
-
-    // create a LogicalPlanBuilder for a table scan
-    let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?;
-
-    // perform a filter that evaluates `id > 500`, and build the plan
-    let plan = builder.filter(col("id").gt(lit(500)))?.build()?;
-
-    // print the plan
-    println!("{}", plan.display_indent_schema());
-
-    Ok(())
-}