diff --git a/.github/workflows/pr_comment.yml b/.github/workflows/pr_comment.yml deleted file mode 100644 index 8b6df1c75687..000000000000 --- a/.github/workflows/pr_comment.yml +++ /dev/null @@ -1,53 +0,0 @@ -# Downloads any `message` artifacts created by other jobs -# and posts them as comments to the PR -name: PR Comment - -on: - workflow_run: - workflows: ["Benchmarks"] - types: - - completed - -jobs: - comment: - name: PR Comment - runs-on: ubuntu-latest - if: github.event.workflow_run.conclusion == 'success' - steps: - - name: Dump GitHub context - env: - GITHUB_CONTEXT: ${{ toJSON(github) }} - run: echo "$GITHUB_CONTEXT" - - - name: Download comment message - uses: actions/download-artifact@v4 - with: - name: message - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Download pr number - uses: actions/download-artifact@v4 - with: - name: pr - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Print message and pr number - run: | - cat pr - echo "PR_NUMBER=$(cat pr)" >> "$GITHUB_ENV" - cat message.md - - - name: Post the comment - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const content = fs.readFileSync('message.md', 'utf8'); - github.rest.issues.createComment({ - issue_number: process.env.PR_NUMBER, - owner: context.repo.owner, - repo: context.repo.repo, - body: content, - }) diff --git a/Cargo.toml b/Cargo.toml index ae344a46a1bd..50d234c39576 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ # under the License. [workspace] +# datafusion-cli is excluded because of its Cargo.lock. See datafusion-cli/README.md. exclude = ["datafusion-cli", "dev/depcheck"] members = [ "datafusion/common", @@ -33,7 +34,6 @@ members = [ "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-expr-common", - "datafusion/physical-expr-functions-aggregate", "datafusion/physical-optimizer", "datafusion/physical-plan", "datafusion/proto", @@ -45,7 +45,6 @@ members = [ "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", - "docs", "test-utils", "benchmarks", ] @@ -107,7 +106,6 @@ datafusion-functions-window = { path = "datafusion/functions-window", version = datafusion-optimizer = { path = "datafusion/optimizer", version = "41.0.0", default-features = false } datafusion-physical-expr = { path = "datafusion/physical-expr", version = "41.0.0", default-features = false } datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "41.0.0", default-features = false } -datafusion-physical-expr-functions-aggregate = { path = "datafusion/physical-expr-functions-aggregate", version = "41.0.0" } datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "41.0.0" } datafusion-physical-plan = { path = "datafusion/physical-plan", version = "41.0.0" } datafusion-proto = { path = "datafusion/proto", version = "41.0.0" } @@ -121,7 +119,7 @@ futures = "0.3" half = { version = "2.2.1", default-features = false } hashbrown = { version = "0.14.5", features = ["raw"] } indexmap = "2.0.0" -itertools = "0.12" +itertools = "0.13" log = "^0.4" num_cpus = "1.13.0" object_store = { version = "0.10.2", default-features = false } diff --git a/benchmarks/queries/clickbench/README.md b/benchmarks/queries/clickbench/README.md index 560b54181d5f..c6bd8fe9d6f5 100644 --- a/benchmarks/queries/clickbench/README.md +++ b/benchmarks/queries/clickbench/README.md @@ -58,6 +58,21 @@ LIMIT 10; ``` +### Q3: What is the income distribution for users in specific regions + +**Question**: "What regions and social networks have the highest variance of parameter price + +**Important Query Properties**: STDDEV and VAR aggregation functions, GROUP BY multiple small ints + +```sql +SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") +FROM 'hits.parquet' +GROUP BY "SocialSourceNetworkID", "RegionID" +HAVING s IS NOT NULL +ORDER BY s DESC +LIMIT 10; +``` + ## Data Notes Here are some interesting statistics about the data used in the queries diff --git a/benchmarks/queries/clickbench/extended.sql b/benchmarks/queries/clickbench/extended.sql index 0a2999fceb49..2f814ad8450a 100644 --- a/benchmarks/queries/clickbench/extended.sql +++ b/benchmarks/queries/clickbench/extended.sql @@ -1,3 +1,4 @@ SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits; SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage") FROM hits; -SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; \ No newline at end of file +SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; +SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") FROM hits GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s DESC LIMIT 10; diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a164b74c55a5..1e89bb3af87e 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -17,6 +17,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "adler32" version = "1.2.0" @@ -167,9 +173,9 @@ checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" @@ -430,7 +436,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -765,7 +771,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.7.4", "object", "rustc-demangle", ] @@ -815,9 +821,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" dependencies = [ "arrayref", "arrayvec", @@ -999,7 +1005,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -1155,7 +1161,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -1206,7 +1212,6 @@ dependencies = [ "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", - "datafusion-physical-expr-functions-aggregate", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-sql", @@ -1216,7 +1221,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools", "log", "num-traits", "num_cpus", @@ -1295,12 +1300,14 @@ dependencies = [ "parquet", "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" version = "41.0.0" dependencies = [ + "log", "tokio", ] @@ -1367,7 +1374,7 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", - "itertools 0.12.1", + "itertools", "log", "md-5", "rand", @@ -1422,7 +1429,8 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools", "log", "paste", "rand", @@ -1450,7 +1458,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools", "log", "paste", "regex-syntax", @@ -1479,7 +1487,7 @@ dependencies = [ "hashbrown", "hex", "indexmap", - "itertools 0.12.1", + "itertools", "log", "paste", "petgraph", @@ -1498,20 +1506,6 @@ dependencies = [ "rand", ] -[[package]] -name = "datafusion-physical-expr-functions-aggregate" -version = "41.0.0" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "rand", -] - [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" @@ -1520,7 +1514,7 @@ dependencies = [ "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", - "itertools 0.12.1", + "itertools", ] [[package]] @@ -1543,12 +1537,11 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", - "datafusion-physical-expr-functions-aggregate", "futures", "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools", "log", "once_cell", "parking_lot", @@ -1740,12 +1733,12 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "9c0596c1eac1f9e04ed902702e9878208b336edc9d6fddc8a48387349bab3666" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.8.0", ] [[package]] @@ -1828,7 +1821,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -1921,9 +1914,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -2108,7 +2101,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.5", + "h2 0.4.6", "http 1.1.0", "http-body 1.0.1", "httparse", @@ -2145,7 +2138,7 @@ dependencies = [ "hyper 1.4.1", "hyper-util", "rustls 0.23.12", - "rustls-native-certs 0.7.1", + "rustls-native-certs 0.7.2", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -2245,15 +2238,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -2359,9 +2343,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.156" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libflate" @@ -2495,6 +2479,15 @@ dependencies = [ "adler", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "1.0.2" @@ -2645,7 +2638,7 @@ dependencies = [ "futures", "humantime", "hyper 1.4.1", - "itertools 0.13.0", + "itertools", "md-5", "parking_lot", "percent-encoding", @@ -2835,7 +2828,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3034,9 +3027,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", "libredox", @@ -3080,15 +3073,15 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" dependencies = [ "base64 0.22.1", "bytes", "futures-core", "futures-util", - "h2 0.4.5", + "h2 0.4.6", "http 1.1.0", "http-body 1.0.1", "http-body-util", @@ -3104,7 +3097,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.12", - "rustls-native-certs 0.7.1", + "rustls-native-certs 0.7.2", "rustls-pemfile 2.1.3", "rustls-pki-types", "serde", @@ -3120,7 +3113,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -3259,9 +3252,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba" +checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa" dependencies = [ "openssl-probe", "rustls-pemfile 2.1.3", @@ -3427,7 +3420,7 @@ checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3569,7 +3562,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3615,7 +3608,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3628,7 +3621,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3650,9 +3643,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.74" +version = "2.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" +checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" dependencies = [ "proc-macro2", "quote", @@ -3664,6 +3657,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "tempfile" @@ -3710,7 +3706,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3780,9 +3776,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.2" +version = "1.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" +checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" dependencies = [ "backtrace", "bytes", @@ -3804,7 +3800,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3901,7 +3897,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3946,7 +3942,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -4101,7 +4097,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", "wasm-bindgen-shared", ] @@ -4135,7 +4131,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4219,6 +4215,36 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -4367,16 +4393,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "xmlparser" version = "0.13.6" @@ -4410,7 +4426,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs index 903defafe3ab..f6860bb5b87a 100644 --- a/datafusion-examples/examples/advanced_parquet_index.rs +++ b/datafusion-examples/examples/advanced_parquet_index.rs @@ -300,7 +300,7 @@ impl IndexTableProvider { // analyze the predicate. In a real system, using // `PruningPredicate::prune` would likely be easier to do. let pruning_predicate = - PruningPredicate::try_new(Arc::clone(predicate), self.schema().clone())?; + PruningPredicate::try_new(Arc::clone(predicate), self.schema())?; // The PruningPredicate's guarantees must all be satisfied in order for // the predicate to possibly evaluate to true. diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_file_format.rs index 8612a1cc4430..1d9b587f15b9 100644 --- a/datafusion-examples/examples/custom_file_format.rs +++ b/datafusion-examples/examples/custom_file_format.rs @@ -23,6 +23,7 @@ use arrow::{ }; use arrow_schema::{DataType, Field, Schema, SchemaRef}; use datafusion::execution::session_state::SessionStateBuilder; +use datafusion::physical_expr::LexRequirement; use datafusion::{ datasource::{ file_format::{ @@ -38,7 +39,7 @@ use datafusion::{ prelude::SessionContext, }; use datafusion_common::{GetExt, Statistics}; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; use object_store::{ObjectMeta, ObjectStore}; use tempfile::tempdir; @@ -123,7 +124,7 @@ impl FileFormat for TSVFileFormat { input: Arc, state: &SessionState, conf: FileSinkConfig, - order_requirements: Option>, + order_requirements: Option, ) -> Result> { self.csv_file_format .create_writer_physical_plan(input, state, conf, order_requirements) diff --git a/datafusion-examples/examples/file_stream_provider.rs b/datafusion-examples/examples/file_stream_provider.rs index b8549bd6b6e6..4db7e0200f53 100644 --- a/datafusion-examples/examples/file_stream_provider.rs +++ b/datafusion-examples/examples/file_stream_provider.rs @@ -100,7 +100,7 @@ mod non_windows { ) { // Timeout for a long period of BrokenPipe error let broken_pipe_timeout = Duration::from_secs(10); - let sa = file_path.clone(); + let sa = file_path; // Spawn a new thread to write to the FIFO file #[allow(clippy::disallowed_methods)] // spawn allowed only in tests tasks.spawn_blocking(move || { diff --git a/datafusion/catalog/src/table.rs b/datafusion/catalog/src/table.rs index 792315642a00..69fa81faf8e2 100644 --- a/datafusion/catalog/src/table.rs +++ b/datafusion/catalog/src/table.rs @@ -16,6 +16,7 @@ // under the License. use std::any::Any; +use std::borrow::Cow; use std::sync::Arc; use crate::session::Session; @@ -56,8 +57,8 @@ pub trait TableProvider: Sync + Send { None } - /// Get the [`LogicalPlan`] of this table, if available - fn get_logical_plan(&self) -> Option<&LogicalPlan> { + /// Get the [`LogicalPlan`] of this table, if available. + fn get_logical_plan(&self) -> Option> { None } diff --git a/datafusion/common-runtime/Cargo.toml b/datafusion/common-runtime/Cargo.toml index c10436087675..a21c72cd9f83 100644 --- a/datafusion/common-runtime/Cargo.toml +++ b/datafusion/common-runtime/Cargo.toml @@ -36,4 +36,8 @@ name = "datafusion_common_runtime" path = "src/lib.rs" [dependencies] +log = { workspace = true } tokio = { workspace = true } + +[dev-dependencies] +tokio = { version = "1.36", features = ["rt", "rt-multi-thread", "time"] } diff --git a/datafusion/common-runtime/src/common.rs b/datafusion/common-runtime/src/common.rs index 2f7ddb972f42..30f7526bc0b2 100644 --- a/datafusion/common-runtime/src/common.rs +++ b/datafusion/common-runtime/src/common.rs @@ -60,8 +60,8 @@ impl SpawnedTask { } /// Joins the task and unwinds the panic if it happens. - pub async fn join_unwind(self) -> R { - self.join().await.unwrap_or_else(|e| { + pub async fn join_unwind(self) -> Result { + self.join().await.map_err(|e| { // `JoinError` can be caused either by panic or cancellation. We have to handle panics: if e.is_panic() { std::panic::resume_unwind(e.into_panic()); @@ -69,9 +69,53 @@ impl SpawnedTask { // Cancellation may be caused by two reasons: // 1. Abort is called, but since we consumed `self`, it's not our case (`JoinHandle` not accessible outside). // 2. The runtime is shutting down. - // So we consider this branch as unreachable. - unreachable!("SpawnedTask was cancelled unexpectedly"); + log::warn!("SpawnedTask was polled during shutdown"); + e } }) } } + +#[cfg(test)] +mod tests { + use super::*; + + use std::future::{pending, Pending}; + + use tokio::runtime::Runtime; + + #[tokio::test] + async fn runtime_shutdown() { + let rt = Runtime::new().unwrap(); + let task = rt + .spawn(async { + SpawnedTask::spawn(async { + let fut: Pending<()> = pending(); + fut.await; + unreachable!("should never return"); + }) + }) + .await + .unwrap(); + + // caller shutdown their DF runtime (e.g. timeout, error in caller, etc) + rt.shutdown_background(); + + // race condition + // poll occurs during shutdown (buffered stream poll calls, etc) + assert!(matches!( + task.join_unwind().await, + Err(e) if e.is_cancelled() + )); + } + + #[tokio::test] + #[should_panic(expected = "foo")] + async fn panic_resume() { + // this should panic w/o an `unwrap` + SpawnedTask::spawn(async { panic!("foo") }) + .join_unwind() + .await + .ok(); + } +} diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 8435d0632576..79e20ba1215c 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -63,6 +63,7 @@ parquet = { workspace = true, optional = true, default-features = true } paste = "1.0.15" pyo3 = { version = "0.21.0", optional = true } sqlparser = { workspace = true } +tokio = { workspace = true } [target.'cfg(target_family = "wasm")'.dependencies] instant = { version = "0.1", features = ["wasm-bindgen"] } diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index f0eecd2ffeb1..095f4c510194 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1242,10 +1242,9 @@ mod tests { #[test] fn into() { // Demonstrate how to convert back and forth between Schema, SchemaRef, DFSchema, and DFSchemaRef - let metadata = test_metadata(); let arrow_schema = Schema::new_with_metadata( vec![Field::new("c0", DataType::Int64, true)], - metadata.clone(), + test_metadata(), ); let arrow_schema_ref = Arc::new(arrow_schema.clone()); diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 27a25d0c9dd5..05988d6c6da4 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -34,6 +34,7 @@ use arrow::error::ArrowError; #[cfg(feature = "parquet")] use parquet::errors::ParquetError; use sqlparser::parser::ParserError; +use tokio::task::JoinError; /// Result type for operations that could result in an [DataFusionError] pub type Result = result::Result; @@ -112,6 +113,10 @@ pub enum DataFusionError { /// SQL method, opened a CSV file that is broken, or tried to divide an /// integer by zero. Execution(String), + /// [`JoinError`] during execution of the query. + /// + /// This error can unoccur for unjoined tasks, such as execution shutdown. + ExecutionJoin(JoinError), /// Error when resources (such as memory of scratch disk space) are exhausted. /// /// This error is thrown when a consumer cannot acquire additional memory @@ -306,6 +311,7 @@ impl Error for DataFusionError { DataFusionError::Plan(_) => None, DataFusionError::SchemaError(e, _) => Some(e), DataFusionError::Execution(_) => None, + DataFusionError::ExecutionJoin(e) => Some(e), DataFusionError::ResourcesExhausted(_) => None, DataFusionError::External(e) => Some(e.as_ref()), DataFusionError::Context(_, e) => Some(e.as_ref()), @@ -418,6 +424,7 @@ impl DataFusionError { DataFusionError::Configuration(_) => "Invalid or Unsupported Configuration: ", DataFusionError::SchemaError(_, _) => "Schema error: ", DataFusionError::Execution(_) => "Execution error: ", + DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ", DataFusionError::ResourcesExhausted(_) => "Resources exhausted: ", DataFusionError::External(_) => "External error: ", DataFusionError::Context(_, _) => "", @@ -453,6 +460,7 @@ impl DataFusionError { Cow::Owned(format!("{desc}{backtrace}")) } DataFusionError::Execution(ref desc) => Cow::Owned(desc.to_string()), + DataFusionError::ExecutionJoin(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::ResourcesExhausted(ref desc) => Cow::Owned(desc.to_string()), DataFusionError::External(ref desc) => Cow::Owned(desc.to_string()), #[cfg(feature = "object_store")] diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index 4a229fe01b54..e42fb96ed6a5 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -565,7 +565,7 @@ mod tests { column_options_with_non_defaults(&parquet_options), )] .into(), - key_value_metadata: [(key.clone(), value.clone())].into(), + key_value_metadata: [(key, value)].into(), }; let writer_props = WriterPropertiesBuilder::try_from(&table_parquet_opts) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 677685b2c65b..5acc2b6f188e 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -4905,7 +4905,7 @@ mod tests { let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); - assert_eq!(non_null_list_scalar.data_type(), data_type.clone()); + assert_eq!(non_null_list_scalar.data_type(), data_type); assert_eq!(null_list_scalar.data_type(), data_type); } @@ -5582,13 +5582,13 @@ mod tests { // Define list-of-structs scalars - let nl0_array = ScalarValue::iter_to_array(vec![s0.clone(), s1.clone()]).unwrap(); + let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap(); let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array))); - let nl1_array = ScalarValue::iter_to_array(vec![s2.clone()]).unwrap(); + let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap(); let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array))); - let nl2_array = ScalarValue::iter_to_array(vec![s1.clone()]).unwrap(); + let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap(); let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array))); // iter_to_array for list-of-struct diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index 50ae4e3ca71f..d8e62b3045f9 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -557,6 +557,7 @@ mod tests { let precision: Precision = Precision::Exact(ScalarValue::Int64(Some(42))); // Clippy would complain about this if it were Copy + #[allow(clippy::redundant_clone)] let p2 = precision.clone(); assert_eq!(precision, p2); } diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index d7059e882e55..839f890bf077 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -20,6 +20,7 @@ pub mod expr; pub mod memory; pub mod proxy; +pub mod string_utils; use crate::error::{_internal_datafusion_err, _internal_err}; use crate::{arrow_datafusion_err, DataFusionError, Result, ScalarValue}; diff --git a/datafusion/physical-expr-functions-aggregate/src/lib.rs b/datafusion/common/src/utils/string_utils.rs similarity index 59% rename from datafusion/physical-expr-functions-aggregate/src/lib.rs rename to datafusion/common/src/utils/string_utils.rs index 2ff7ff5777ec..a2231e6786a7 100644 --- a/datafusion/physical-expr-functions-aggregate/src/lib.rs +++ b/datafusion/common/src/utils/string_utils.rs @@ -15,6 +15,17 @@ // specific language governing permissions and limitations // under the License. -//! Technically, all aggregate functions that depend on `expr` crate should be included here. +//! Utilities for working with strings -pub mod aggregate; +use arrow::{array::AsArray, datatypes::DataType}; +use arrow_array::Array; + +/// Convenient function to convert an Arrow string array to a vector of strings +pub fn string_array_to_vec(array: &dyn Array) -> Vec> { + match array.data_type() { + DataType::Utf8 => array.as_string::().iter().collect(), + DataType::LargeUtf8 => array.as_string::().iter().collect(), + DataType::Utf8View => array.as_string_view().iter().collect(), + _ => unreachable!(), + } +} diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index adbba3eb31d6..de228e058096 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -110,7 +110,6 @@ datafusion-functions-window = { workspace = true } datafusion-optimizer = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } -datafusion-physical-expr-functions-aggregate = { workspace = true } datafusion-physical-optimizer = { workspace = true } datafusion-physical-plan = { workspace = true } datafusion-sql = { workspace = true } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 42203e5fe84e..c516c7985d54 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -21,6 +21,7 @@ mod parquet; use std::any::Any; +use std::borrow::Cow; use std::collections::HashMap; use std::sync::Arc; @@ -1648,8 +1649,8 @@ impl TableProvider for DataFrameTableProvider { self } - fn get_logical_plan(&self) -> Option<&LogicalPlan> { - Some(&self.plan) + fn get_logical_plan(&self) -> Option> { + Some(Cow::Borrowed(&self.plan)) } fn supports_filters_pushdown( @@ -1707,7 +1708,7 @@ mod tests { use crate::test_util::{register_aggregate_csv, test_table, test_table_with_name}; use arrow::array::{self, Int32Array}; - use datafusion_common::{Constraint, Constraints, ScalarValue}; + use datafusion_common::{assert_batches_eq, Constraint, Constraints, ScalarValue}; use datafusion_common_runtime::SpawnedTask; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{ @@ -3699,4 +3700,32 @@ mod tests { assert!(result.is_err()); Ok(()) } + + // Test issue: https://github.com/apache/datafusion/issues/12065 + #[tokio::test] + async fn filtered_aggr_with_param_values() -> Result<()> { + let cfg = SessionConfig::new() + .set("datafusion.sql_parser.dialect", "PostgreSQL".into()); + let ctx = SessionContext::new_with_config(cfg); + register_aggregate_csv(&ctx, "table1").await?; + + let df = ctx + .sql("select count (c2) filter (where c3 > $1) from table1") + .await? + .with_param_values(ParamValues::List(vec![ScalarValue::from(10u64)])); + + let df_results = df?.collect().await?; + assert_batches_eq!( + &[ + "+------------------------------------------------+", + "| count(table1.c2) FILTER (WHERE table1.c3 > $1) |", + "+------------------------------------------------+", + "| 54 |", + "+------------------------------------------------+", + ], + &df_results + ); + + Ok(()) + } } diff --git a/datafusion/core/src/datasource/cte_worktable.rs b/datafusion/core/src/datasource/cte_worktable.rs index d7d224828dda..d2da15c64f52 100644 --- a/datafusion/core/src/datasource/cte_worktable.rs +++ b/datafusion/core/src/datasource/cte_worktable.rs @@ -17,8 +17,8 @@ //! CteWorkTable implementation used for recursive queries -use std::any::Any; use std::sync::Arc; +use std::{any::Any, borrow::Cow}; use arrow::datatypes::SchemaRef; use async_trait::async_trait; @@ -63,7 +63,7 @@ impl TableProvider for CteWorkTable { self } - fn get_logical_plan(&self) -> Option<&LogicalPlan> { + fn get_logical_plan(&self) -> Option> { None } diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs index 977e681d6641..b4a5a76fc9ff 100644 --- a/datafusion/core/src/datasource/default_table_source.rs +++ b/datafusion/core/src/datasource/default_table_source.rs @@ -17,8 +17,8 @@ //! Default TableSource implementation used in DataFusion physical plans -use std::any::Any; use std::sync::Arc; +use std::{any::Any, borrow::Cow}; use crate::datasource::TableProvider; @@ -70,7 +70,7 @@ impl TableSource for DefaultTableSource { self.table_provider.supports_filters_pushdown(filter) } - fn get_logical_plan(&self) -> Option<&datafusion_expr::LogicalPlan> { + fn get_logical_plan(&self) -> Option> { self.table_provider.get_logical_plan() } diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index 8b6a8800119d..6ee4280956e8 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -47,12 +47,13 @@ use datafusion_common::{ not_impl_err, DataFusionError, GetExt, Statistics, DEFAULT_ARROW_EXTENSION, }; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_plan::insert::{DataSink, DataSinkExec}; use datafusion_physical_plan::metrics::MetricsSet; use async_trait::async_trait; use bytes::Bytes; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::stream::BoxStream; use futures::StreamExt; use object_store::{GetResultPayload, ObjectMeta, ObjectStore}; @@ -178,7 +179,7 @@ impl FileFormat for ArrowFormat { input: Arc, _state: &SessionState, conf: FileSinkConfig, - order_requirements: Option>, + order_requirements: Option, ) -> Result> { if conf.overwrite { return not_impl_err!("Overwrites are not implemented yet for Arrow format"); @@ -341,7 +342,10 @@ impl DataSink for ArrowFileSink { } } - demux_task.join_unwind().await?; + demux_task + .join_unwind() + .await + .map_err(DataFusionError::ExecutionJoin)??; Ok(row_count as u64) } } diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index 24d55ea54068..e43f6ab29abc 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -46,11 +46,12 @@ use datafusion_common::{ exec_err, not_impl_err, DataFusionError, GetExt, DEFAULT_CSV_EXTENSION, }; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_plan::metrics::MetricsSet; use async_trait::async_trait; use bytes::{Buf, Bytes}; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::stream::BoxStream; use futures::{pin_mut, Stream, StreamExt, TryStreamExt}; use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore}; @@ -371,7 +372,7 @@ impl FileFormat for CsvFormat { input: Arc, state: &SessionState, conf: FileSinkConfig, - order_requirements: Option>, + order_requirements: Option, ) -> Result> { if conf.overwrite { return not_impl_err!("Overwrites are not implemented yet for CSV"); @@ -679,7 +680,7 @@ mod tests { use datafusion_common::cast::as_string_array; use datafusion_common::internal_err; use datafusion_common::stats::Precision; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::{col, lit}; use crate::execution::session_state::SessionStateBuilder; @@ -862,7 +863,7 @@ mod tests { async fn query_compress_data( file_compression_type: FileCompressionType, ) -> Result<()> { - let runtime = Arc::new(RuntimeEnv::new(RuntimeConfig::new()).unwrap()); + let runtime = Arc::new(RuntimeEnvBuilder::new().build()?); let mut cfg = SessionConfig::new(); cfg.options_mut().catalog.has_header = true; let session_state = SessionStateBuilder::new() diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs index 7c579e890c8c..4471d7d6cb31 100644 --- a/datafusion/core/src/datasource/file_format/json.rs +++ b/datafusion/core/src/datasource/file_format/json.rs @@ -46,12 +46,13 @@ use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions}; use datafusion_common::file_options::json_writer::JsonWriterOptions; use datafusion_common::{not_impl_err, GetExt, DEFAULT_JSON_EXTENSION}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_plan::metrics::MetricsSet; use datafusion_physical_plan::ExecutionPlan; use async_trait::async_trait; use bytes::{Buf, Bytes}; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use object_store::{GetResultPayload, ObjectMeta, ObjectStore}; #[derive(Default)] @@ -249,7 +250,7 @@ impl FileFormat for JsonFormat { input: Arc, _state: &SessionState, conf: FileSinkConfig, - order_requirements: Option>, + order_requirements: Option, ) -> Result> { if conf.overwrite { return not_impl_err!("Overwrites are not implemented yet for Json"); diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index a324a4578424..d21464b74b53 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -45,12 +45,14 @@ use crate::physical_plan::{ExecutionPlan, Statistics}; use arrow_schema::{DataType, Field, Schema}; use datafusion_common::file_options::file_type::FileType; use datafusion_common::{internal_err, not_impl_err, GetExt}; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; use async_trait::async_trait; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use file_compression_type::FileCompressionType; use object_store::{ObjectMeta, ObjectStore}; use std::fmt::Debug; + /// Factory for creating [`FileFormat`] instances based on session and command level options /// /// Users can provide their own `FileFormatFactory` to support arbitrary file formats @@ -132,7 +134,7 @@ pub trait FileFormat: Send + Sync + fmt::Debug { _input: Arc, _state: &SessionState, _conf: FileSinkConfig, - _order_requirements: Option>, + _order_requirements: Option, ) -> Result> { not_impl_err!("Writer not implemented for this format") } diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index f233f3842c8c..23e765f0f2cd 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -51,7 +51,7 @@ use datafusion_common_runtime::SpawnedTask; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation}; use datafusion_execution::TaskContext; use datafusion_functions_aggregate::min_max::{MaxAccumulator, MinAccumulator}; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_plan::metrics::MetricsSet; use async_trait::async_trait; @@ -76,6 +76,7 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::task::JoinSet; use crate::datasource::physical_plan::parquet::ParquetExecBuilder; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::{StreamExt, TryStreamExt}; use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; @@ -376,7 +377,7 @@ impl FileFormat for ParquetFormat { input: Arc, _state: &SessionState, conf: FileSinkConfig, - order_requirements: Option>, + order_requirements: Option, ) -> Result> { if conf.overwrite { return not_impl_err!("Overwrites are not implemented yet for Parquet"); @@ -836,7 +837,10 @@ impl DataSink for ParquetSink { } } - demux_task.join_unwind().await?; + demux_task + .join_unwind() + .await + .map_err(DataFusionError::ExecutionJoin)??; Ok(row_count as u64) } @@ -942,7 +946,10 @@ fn spawn_rg_join_and_finalize_task( let num_cols = column_writer_tasks.len(); let mut finalized_rg = Vec::with_capacity(num_cols); for task in column_writer_tasks.into_iter() { - let (writer, _col_reservation) = task.join_unwind().await?; + let (writer, _col_reservation) = task + .join_unwind() + .await + .map_err(DataFusionError::ExecutionJoin)??; let encoded_size = writer.get_estimated_total_bytes(); rg_reservation.grow(encoded_size); finalized_rg.push(writer.close()?); @@ -1070,7 +1077,8 @@ async fn concatenate_parallel_row_groups( while let Some(task) = serialize_rx.recv().await { let result = task.join_unwind().await; let mut rg_out = parquet_writer.next_row_group()?; - let (serialized_columns, mut rg_reservation, _cnt) = result?; + let (serialized_columns, mut rg_reservation, _cnt) = + result.map_err(DataFusionError::ExecutionJoin)??; for chunk in serialized_columns { chunk.append_to_row_group(&mut rg_out)?; rg_reservation.free(); @@ -1134,7 +1142,10 @@ async fn output_single_parquet_file_parallelized( ) .await?; - launch_serialization_task.join_unwind().await?; + launch_serialization_task + .join_unwind() + .await + .map_err(DataFusionError::ExecutionJoin)??; Ok(file_metadata) } diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs index 1d32063ee9f3..6f27e6f3889f 100644 --- a/datafusion/core/src/datasource/file_format/write/orchestration.rs +++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs @@ -298,8 +298,8 @@ pub(crate) async fn stateless_multipart_put( write_coordinator_task.join_unwind(), demux_task.join_unwind() ); - r1?; - r2?; + r1.map_err(DataFusionError::ExecutionJoin)??; + r2.map_err(DataFusionError::ExecutionJoin)??; let total_count = rx_row_cnt.await.map_err(|_| { internal_datafusion_err!("Did not receive row count from write coordinator") diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index b5dd2dd12e10..f6e938b72dab 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -282,7 +282,7 @@ async fn prune_partitions( Default::default(), )?; - let batch = RecordBatch::try_new(schema.clone(), arrays)?; + let batch = RecordBatch::try_new(schema, arrays)?; // TODO: Plumb this down let props = ExecutionProps::new(); diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 89066d8234ac..a0345a38e40c 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -1016,7 +1016,7 @@ impl ListingTable { .collected_statistics .get_with_extra(&part_file.object_meta.location, &part_file.object_meta) { - Some(statistics) => Ok(statistics.clone()), + Some(statistics) => Ok(statistics), None => { let statistics = self .options diff --git a/datafusion/core/src/datasource/physical_plan/file_groups.rs b/datafusion/core/src/datasource/physical_plan/file_groups.rs index 6456bd5c7276..28f975ae193d 100644 --- a/datafusion/core/src/datasource/physical_plan/file_groups.rs +++ b/datafusion/core/src/datasource/physical_plan/file_groups.rs @@ -256,7 +256,7 @@ impl FileGroupPartitioner { }, ) .flatten() - .group_by(|(partition_idx, _)| *partition_idx) + .chunk_by(|(partition_idx, _)| *partition_idx) .into_iter() .map(|(_, group)| group.map(|(_, vals)| vals).collect_vec()) .collect_vec(); @@ -394,7 +394,7 @@ mod test { #[test] fn repartition_empty_file_only() { let partitioned_file_empty = pfile("empty", 0); - let file_group = vec![vec![partitioned_file_empty.clone()]]; + let file_group = vec![vec![partitioned_file_empty]]; let partitioned_files = FileGroupPartitioner::new() .with_target_partitions(4) @@ -817,10 +817,7 @@ mod test { .with_preserve_order_within_groups(true) .repartition_file_groups(&file_groups); - assert_partitioned_files( - repartitioned.clone(), - repartitioned_preserving_sort.clone(), - ); + assert_partitioned_files(repartitioned.clone(), repartitioned_preserving_sort); repartitioned } } diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs index 34fb6226c1a2..bfa5488e5b5e 100644 --- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs +++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs @@ -908,7 +908,7 @@ mod tests { schema.clone(), Some(vec![0, 3, 5, schema.fields().len()]), Statistics::new_unknown(&schema), - to_partition_cols(partition_cols.clone()), + to_partition_cols(partition_cols), ) .projected_file_schema(); @@ -941,7 +941,7 @@ mod tests { schema.clone(), None, Statistics::new_unknown(&schema), - to_partition_cols(partition_cols.clone()), + to_partition_cols(partition_cols), ) .projected_file_schema(); diff --git a/datafusion/core/src/datasource/schema_adapter.rs b/datafusion/core/src/datasource/schema_adapter.rs index 40cb40a83af2..5d2d0ff91b15 100644 --- a/datafusion/core/src/datasource/schema_adapter.rs +++ b/datafusion/core/src/datasource/schema_adapter.rs @@ -369,7 +369,7 @@ mod tests { let f1 = Field::new("id", DataType::Int32, true); let f2 = Field::new("extra_column", DataType::Utf8, true); - let schema = Arc::new(Schema::new(vec![f1.clone(), f2.clone()])); + let schema = Arc::new(Schema::new(vec![f1, f2])); let extra_column = Arc::new(StringArray::from(vec!["foo"])); let mut new_columns = batch.columns().to_vec(); diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs index 682565aea909..b53fe8663178 100644 --- a/datafusion/core/src/datasource/stream.rs +++ b/datafusion/core/src/datasource/stream.rs @@ -438,6 +438,9 @@ impl DataSink for StreamWrite { } } drop(sender); - write_task.join_unwind().await + write_task + .join_unwind() + .await + .map_err(DataFusionError::ExecutionJoin)? } } diff --git a/datafusion/core/src/datasource/view.rs b/datafusion/core/src/datasource/view.rs index a81942bf769e..947714c1e4f9 100644 --- a/datafusion/core/src/datasource/view.rs +++ b/datafusion/core/src/datasource/view.rs @@ -17,7 +17,7 @@ //! View data source which uses a LogicalPlan as it's input. -use std::{any::Any, sync::Arc}; +use std::{any::Any, borrow::Cow, sync::Arc}; use crate::{ error::Result, @@ -90,8 +90,8 @@ impl TableProvider for ViewTable { self } - fn get_logical_plan(&self) -> Option<&LogicalPlan> { - Some(&self.logical_plan) + fn get_logical_plan(&self) -> Option> { + Some(Cow::Borrowed(&self.logical_plan)) } fn schema(&self) -> SchemaRef { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 972a6f643733..c67424c0fa53 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -128,6 +128,9 @@ where /// the state of the connection between a user and an instance of the /// DataFusion engine. /// +/// See examples below for how to use the `SessionContext` to execute queries +/// and how to configure the session. +/// /// # Overview /// /// [`SessionContext`] provides the following functionality: @@ -200,7 +203,38 @@ where /// # } /// ``` /// -/// # `SessionContext`, `SessionState`, and `TaskContext` +/// # Example: Configuring `SessionContext` +/// +/// The `SessionContext` can be configured by creating a [`SessionState`] using +/// [`SessionStateBuilder`]: +/// +/// ``` +/// # use std::sync::Arc; +/// # use datafusion::prelude::*; +/// # use datafusion::execution::SessionStateBuilder; +/// # use datafusion_execution::runtime_env::RuntimeEnvBuilder; +/// // Configure a 4k batch size +/// let config = SessionConfig::new() .with_batch_size(4 * 1024); +/// +/// // configure a memory limit of 1GB with 20% slop +/// let runtime_env = RuntimeEnvBuilder::new() +/// .with_memory_limit(1024 * 1024 * 1024, 0.80) +/// .build() +/// .unwrap(); +/// +/// // Create a SessionState using the config and runtime_env +/// let state = SessionStateBuilder::new() +/// .with_config(config) +/// .with_runtime_env(Arc::new(runtime_env)) +/// // include support for built in functions and configurations +/// .with_default_features() +/// .build(); +/// +/// // Create a SessionContext +/// let ctx = SessionContext::from(state); +/// ``` +/// +/// # Relationship between `SessionContext`, `SessionState`, and `TaskContext` /// /// The state required to optimize, and evaluate queries is /// broken into three levels to allow tailoring @@ -654,7 +688,7 @@ impl SessionContext { column_defaults, } = cmd; - let input = Arc::try_unwrap(input).unwrap_or_else(|e| e.as_ref().clone()); + let input = Arc::unwrap_or_clone(input); let input = self.state().optimize(&input)?; let table = self.table(name.clone()).await; match (if_not_exists, or_replace, table) { @@ -1131,7 +1165,7 @@ impl SessionContext { // check schema uniqueness let mut batches = batches.into_iter().peekable(); let schema = if let Some(batch) = batches.peek() { - batch.schema().clone() + batch.schema() } else { Arc::new(Schema::empty()) }; @@ -1427,6 +1461,12 @@ impl From<&SessionContext> for TaskContext { } } +impl From for SessionContext { + fn from(state: SessionState) -> Self { + Self::new_with_state(state) + } +} + /// A planner used to add extensions to DataFusion logical and physical plans. #[async_trait] pub trait QueryPlanner { @@ -1583,7 +1623,7 @@ mod tests { use super::{super::options::CsvReadOptions, *}; use crate::assert_batches_eq; use crate::execution::memory_pool::MemoryConsumer; - use crate::execution::runtime_env::RuntimeConfig; + use crate::execution::runtime_env::RuntimeEnvBuilder; use crate::test; use crate::test_util::{plan_and_collect, populate_csv_partitions}; @@ -1718,8 +1758,7 @@ mod tests { let path = path.join("tests/tpch-csv"); let url = format!("file://{}", path.display()); - let rt_cfg = RuntimeConfig::new(); - let runtime = Arc::new(RuntimeEnv::new(rt_cfg).unwrap()); + let runtime = Arc::new(RuntimeEnvBuilder::new().build()?); let cfg = SessionConfig::new() .set_str("datafusion.catalog.location", url.as_str()) .set_str("datafusion.catalog.format", "CSV") diff --git a/datafusion/core/src/execution/mod.rs b/datafusion/core/src/execution/mod.rs index a1b3eab25f33..10aa16ffe47a 100644 --- a/datafusion/core/src/execution/mod.rs +++ b/datafusion/core/src/execution/mod.rs @@ -19,6 +19,8 @@ pub mod context; pub mod session_state; +pub use session_state::{SessionState, SessionStateBuilder}; + mod session_state_defaults; pub use session_state_defaults::SessionStateDefaults; diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index daeb21db9d05..67f3cb01c0a4 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -490,7 +490,6 @@ //! [`PhysicalOptimizerRule`]: datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule //! [`Schema`]: arrow::datatypes::Schema //! [`PhysicalExpr`]: physical_plan::PhysicalExpr -//! [`AggregateExpr`]: physical_plan::AggregateExpr //! [`RecordBatch`]: arrow::record_batch::RecordBatch //! [`RecordBatchReader`]: arrow::record_batch::RecordBatchReader //! [`Array`]: arrow::array::Array @@ -556,11 +555,6 @@ pub mod physical_expr_common { pub use datafusion_physical_expr_common::*; } -/// re-export of [`datafusion_physical_expr_functions_aggregate`] crate -pub mod physical_expr_functions_aggregate { - pub use datafusion_physical_expr_functions_aggregate::*; -} - /// re-export of [`datafusion_physical_expr`] crate pub mod physical_expr { pub use datafusion_physical_expr::*; @@ -678,6 +672,12 @@ doc_comment::doctest!( library_user_guide_sql_api ); +#[cfg(doctest)] +doc_comment::doctest!( + "../../../docs/source/library-user-guide/building-logical-plans.md", + library_user_guide_logical_plans +); + #[cfg(doctest)] doc_comment::doctest!( "../../../docs/source/library-user-guide/using-the-dataframe-api.md", diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs index f65a4c837a60..1a12fc7de888 100644 --- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs @@ -26,7 +26,8 @@ use crate::physical_plan::ExecutionPlan; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_physical_expr::{physical_exprs_equal, AggregateExpr, PhysicalExpr}; +use datafusion_physical_expr::aggregate::AggregateFunctionExpr; +use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr}; use datafusion_physical_optimizer::PhysicalOptimizerRule; /// CombinePartialFinalAggregate optimizer rule combines the adjacent Partial and Final AggregateExecs @@ -51,62 +52,57 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate { _config: &ConfigOptions, ) -> Result> { plan.transform_down(|plan| { - let transformed = - plan.as_any() - .downcast_ref::() - .and_then(|agg_exec| { - if matches!( - agg_exec.mode(), - AggregateMode::Final | AggregateMode::FinalPartitioned - ) { - agg_exec - .input() - .as_any() - .downcast_ref::() - .and_then(|input_agg_exec| { - if matches!( - input_agg_exec.mode(), - AggregateMode::Partial - ) && can_combine( - ( - agg_exec.group_expr(), - agg_exec.aggr_expr(), - agg_exec.filter_expr(), - ), - ( - input_agg_exec.group_expr(), - input_agg_exec.aggr_expr(), - input_agg_exec.filter_expr(), - ), - ) { - let mode = - if agg_exec.mode() == &AggregateMode::Final { - AggregateMode::Single - } else { - AggregateMode::SinglePartitioned - }; - AggregateExec::try_new( - mode, - input_agg_exec.group_expr().clone(), - input_agg_exec.aggr_expr().to_vec(), - input_agg_exec.filter_expr().to_vec(), - input_agg_exec.input().clone(), - input_agg_exec.input_schema(), - ) - .map(|combined_agg| { - combined_agg.with_limit(agg_exec.limit()) - }) - .ok() - .map(Arc::new) - } else { - None - } - }) - } else { - None - } - }); - + // Check if the plan is AggregateExec + let Some(agg_exec) = plan.as_any().downcast_ref::() else { + return Ok(Transformed::no(plan)); + }; + + if !matches!( + agg_exec.mode(), + AggregateMode::Final | AggregateMode::FinalPartitioned + ) { + return Ok(Transformed::no(plan)); + } + + // Check if the input is AggregateExec + let Some(input_agg_exec) = + agg_exec.input().as_any().downcast_ref::() + else { + return Ok(Transformed::no(plan)); + }; + + let transformed = if matches!(input_agg_exec.mode(), AggregateMode::Partial) + && can_combine( + ( + agg_exec.group_expr(), + agg_exec.aggr_expr(), + agg_exec.filter_expr(), + ), + ( + input_agg_exec.group_expr(), + input_agg_exec.aggr_expr(), + input_agg_exec.filter_expr(), + ), + ) { + let mode = if agg_exec.mode() == &AggregateMode::Final { + AggregateMode::Single + } else { + AggregateMode::SinglePartitioned + }; + AggregateExec::try_new( + mode, + input_agg_exec.group_expr().clone(), + input_agg_exec.aggr_expr().to_vec(), + input_agg_exec.filter_expr().to_vec(), + input_agg_exec.input().clone(), + input_agg_exec.input_schema(), + ) + .map(|combined_agg| combined_agg.with_limit(agg_exec.limit())) + .ok() + .map(Arc::new) + } else { + None + }; Ok(if let Some(transformed) = transformed { Transformed::yes(transformed) } else { @@ -127,7 +123,7 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate { type GroupExprsRef<'a> = ( &'a PhysicalGroupBy, - &'a [Arc], + &'a [Arc], &'a [Option>], ); @@ -176,8 +172,8 @@ mod tests { use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::sum::sum_udaf; + use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::expressions::col; - use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; /// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected macro_rules! assert_optimized { @@ -229,7 +225,7 @@ mod tests { fn partial_aggregate_exec( input: Arc, group_by: PhysicalGroupBy, - aggr_expr: Vec>, + aggr_expr: Vec>, ) -> Arc { let schema = input.schema(); let n_aggr = aggr_expr.len(); @@ -249,7 +245,7 @@ mod tests { fn final_aggregate_exec( input: Arc, group_by: PhysicalGroupBy, - aggr_expr: Vec>, + aggr_expr: Vec>, ) -> Arc { let schema = input.schema(); let n_aggr = aggr_expr.len(); @@ -277,7 +273,7 @@ mod tests { expr: Arc, name: &str, schema: &Schema, - ) -> Arc { + ) -> Arc { AggregateExprBuilder::new(count_udaf(), vec![expr]) .schema(Arc::new(schema.clone())) .alias(name) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 2ee5624c83dd..ba6f7d0439c2 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -1419,6 +1419,7 @@ pub(crate) mod tests { expressions, expressions::binary, expressions::lit, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement, }; + use datafusion_physical_expr_common::sort_expr::LexRequirement; use datafusion_physical_plan::PlanProperties; /// Models operators like BoundedWindowExec that require an input @@ -1489,7 +1490,7 @@ pub(crate) mod tests { } // model that it requires the output ordering of its input - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { if self.expr.is_empty() { vec![None] } else { @@ -3907,7 +3908,7 @@ pub(crate) mod tests { let alias = vec![("a".to_string(), "a".to_string())]; let plan_parquet = aggregate_exec_with_alias(parquet_exec_multiple(), alias.clone()); - let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias.clone()); + let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias); let expected_parquet = [ "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]", @@ -3933,7 +3934,7 @@ pub(crate) mod tests { let alias = vec![("a".to_string(), "a".to_string())]; let plan_parquet = aggregate_exec_with_alias(parquet_exec_multiple(), alias.clone()); - let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias.clone()); + let plan_csv = aggregate_exec_with_alias(csv_exec_multiple(), alias); let expected_parquet = [ "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]", @@ -3963,7 +3964,7 @@ pub(crate) mod tests { options: SortOptions::default(), }]; let plan_parquet = limit_exec(sort_exec(sort_key.clone(), parquet_exec(), false)); - let plan_csv = limit_exec(sort_exec(sort_key.clone(), csv_exec(), false)); + let plan_csv = limit_exec(sort_exec(sort_key, csv_exec(), false)); let expected_parquet = &[ "GlobalLimitExec: skip=0, fetch=100", @@ -3999,8 +4000,7 @@ pub(crate) mod tests { parquet_exec(), false, ))); - let plan_csv = - limit_exec(filter_exec(sort_exec(sort_key.clone(), csv_exec(), false))); + let plan_csv = limit_exec(filter_exec(sort_exec(sort_key, csv_exec(), false))); let expected_parquet = &[ "GlobalLimitExec: skip=0, fetch=100", @@ -4041,7 +4041,7 @@ pub(crate) mod tests { ); let plan_csv = aggregate_exec_with_alias( limit_exec(filter_exec(limit_exec(csv_exec()))), - alias.clone(), + alias, ); let expected_parquet = &[ @@ -4125,7 +4125,7 @@ pub(crate) mod tests { ); let plan_csv = sort_preserving_merge_exec( sort_key.clone(), - csv_exec_with_sort(vec![sort_key.clone()]), + csv_exec_with_sort(vec![sort_key]), ); // parallelization is not beneficial for SortPreservingMerge @@ -4153,7 +4153,7 @@ pub(crate) mod tests { union_exec(vec![parquet_exec_with_sort(vec![sort_key.clone()]); 2]); let input_csv = union_exec(vec![csv_exec_with_sort(vec![sort_key.clone()]); 2]); let plan_parquet = sort_preserving_merge_exec(sort_key.clone(), input_parquet); - let plan_csv = sort_preserving_merge_exec(sort_key.clone(), input_csv); + let plan_csv = sort_preserving_merge_exec(sort_key, input_csv); // should not repartition (union doesn't benefit from increased parallelism) // should not sort (as the data was already sorted) @@ -4223,8 +4223,8 @@ pub(crate) mod tests { ("c".to_string(), "c2".to_string()), ]; let proj_parquet = projection_exec_with_alias( - parquet_exec_with_sort(vec![sort_key.clone()]), - alias_pairs.clone(), + parquet_exec_with_sort(vec![sort_key]), + alias_pairs, ); let sort_key_after_projection = vec![PhysicalSortExpr { expr: col("c2", &proj_parquet.schema()).unwrap(), @@ -4559,7 +4559,7 @@ pub(crate) mod tests { }]; let alias = vec![("a".to_string(), "a".to_string())]; let input = parquet_exec_with_sort(vec![sort_key]); - let physical_plan = aggregate_exec_with_alias(input, alias.clone()); + let physical_plan = aggregate_exec_with_alias(input, alias); let expected = &[ "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]", @@ -4583,7 +4583,7 @@ pub(crate) mod tests { let alias = vec![("a".to_string(), "a".to_string())]; let input = parquet_exec_multiple_sorted(vec![sort_key]); let aggregate = aggregate_exec_with_alias(input, alias.clone()); - let physical_plan = aggregate_exec_with_alias(aggregate, alias.clone()); + let physical_plan = aggregate_exec_with_alias(aggregate, alias); let expected = &[ "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]", diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index b849df88e4aa..2643ade8f481 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -908,7 +908,7 @@ mod tests_statistical { ); let optimized_join = JoinSelection::new() - .optimize(join.clone(), &ConfigOptions::new()) + .optimize(join, &ConfigOptions::new()) .unwrap(); let swapping_projection = optimized_join @@ -964,7 +964,7 @@ mod tests_statistical { ); let optimized_join = JoinSelection::new() - .optimize(join.clone(), &ConfigOptions::new()) + .optimize(join, &ConfigOptions::new()) .unwrap(); let swapped_join = optimized_join @@ -1140,7 +1140,7 @@ mod tests_statistical { ); let optimized_join = JoinSelection::new() - .optimize(join.clone(), &ConfigOptions::new()) + .optimize(join, &ConfigOptions::new()) .unwrap(); let swapped_join = optimized_join @@ -1180,7 +1180,7 @@ mod tests_statistical { ); let optimized_join = JoinSelection::new() - .optimize(join.clone(), &ConfigOptions::new()) + .optimize(join, &ConfigOptions::new()) .unwrap(); let swapping_projection = optimized_join @@ -1356,7 +1356,7 @@ mod tests_statistical { Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()) as _, )]; check_join_partition_mode( - big.clone(), + big, small.clone(), join_on, true, @@ -1380,8 +1380,8 @@ mod tests_statistical { Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()) as _, )]; check_join_partition_mode( - empty.clone(), - small.clone(), + empty, + small, join_on, true, PartitionMode::CollectLeft, @@ -1424,7 +1424,7 @@ mod tests_statistical { Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, )]; check_join_partition_mode( - bigger.clone(), + bigger, big.clone(), join_on, true, @@ -1472,7 +1472,7 @@ mod tests_statistical { ); let optimized_join = JoinSelection::new() - .optimize(join.clone(), &ConfigOptions::new()) + .optimize(join, &ConfigOptions::new()) .unwrap(); if !is_swapped { @@ -1913,8 +1913,7 @@ mod hash_join_tests { false, )?); - let optimized_join_plan = - hash_join_swap_subrule(join.clone(), &ConfigOptions::new())?; + let optimized_join_plan = hash_join_swap_subrule(join, &ConfigOptions::new())?; // If swap did happen let projection_added = optimized_join_plan.as_any().is::(); diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index 9c545c17da3c..b3f3f90154d0 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -1692,12 +1692,9 @@ mod tests { ])); Arc::new( CsvExec::builder( - FileScanConfig::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema.clone(), - ) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_projection(Some(vec![0, 1, 2, 3, 4])), + FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_projection(Some(vec![0, 1, 2, 3, 4])), ) .with_has_header(false) .with_delimeter(0) @@ -1719,12 +1716,9 @@ mod tests { ])); Arc::new( CsvExec::builder( - FileScanConfig::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema.clone(), - ) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_projection(Some(vec![3, 2, 1])), + FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_projection(Some(vec![3, 2, 1])), ) .with_has_header(false) .with_delimeter(0) diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index 0ef390fff45c..a16abc607ee6 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -1369,7 +1369,6 @@ fn build_predicate_expression( let change_expr = in_list .list() .iter() - .cloned() .map(|e| { Arc::new(phys_expr::BinaryExpr::new( in_list.expr().clone(), diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs index 17d63a06a6f8..9ab6802d18f1 100644 --- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs @@ -36,6 +36,7 @@ use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::{ LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement, }; +use datafusion_physical_expr_common::sort_expr::LexRequirement; /// This is a "data class" we use within the [`EnforceSorting`] rule to push /// down [`SortExec`] in the plan. In some cases, we can reduce the total @@ -46,7 +47,7 @@ use datafusion_physical_expr::{ /// [`EnforceSorting`]: crate::physical_optimizer::enforce_sorting::EnforceSorting #[derive(Default, Clone)] pub struct ParentRequirements { - ordering_requirement: Option>, + ordering_requirement: Option, fetch: Option, } @@ -159,7 +160,7 @@ fn pushdown_sorts_helper( fn pushdown_requirement_to_children( plan: &Arc, parent_required: LexRequirementRef, -) -> Result>>>> { +) -> Result>>> { let maintains_input_order = plan.maintains_input_order(); if is_window(plan) { let required_input_ordering = plan.required_input_ordering(); @@ -345,7 +346,7 @@ fn try_pushdown_requirements_to_join( parent_required: LexRequirementRef, sort_expr: Vec, push_side: JoinSide, -) -> Result>>>> { +) -> Result>>> { let left_eq_properties = smj.left().equivalence_properties(); let right_eq_properties = smj.right().equivalence_properties(); let mut smj_required_orderings = smj.required_input_ordering(); @@ -460,7 +461,7 @@ fn expr_source_side( fn shift_right_required( parent_required: LexRequirementRef, left_columns_len: usize, -) -> Result> { +) -> Result { let new_right_required = parent_required .iter() .filter_map(|r| { @@ -486,7 +487,7 @@ enum RequirementsCompatibility { /// Requirements satisfy Satisfy, /// Requirements compatible - Compatible(Option>), + Compatible(Option), /// Requirements not compatible NonCompatible, } diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs index 90853c347672..98f1a7c21a39 100644 --- a/datafusion/core/src/physical_optimizer/test_utils.rs +++ b/datafusion/core/src/physical_optimizer/test_utils.rs @@ -56,7 +56,9 @@ use datafusion_physical_plan::{ use async_trait::async_trait; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; -use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement; +use datafusion_physical_expr_common::sort_expr::{ + LexRequirement, PhysicalSortRequirement, +}; async fn register_current_csv( ctx: &SessionContext, @@ -416,7 +418,7 @@ impl ExecutionPlan for RequirementsTestExec { self.input.properties() } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { let requirement = PhysicalSortRequirement::from_sort_exprs(&self.required_input_ordering); vec![Some(requirement)] diff --git a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs index f8edf73e3d2a..a2726d62e9f6 100644 --- a/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs +++ b/datafusion/core/src/physical_optimizer/update_aggr_exprs.rs @@ -23,8 +23,9 @@ use std::sync::Arc; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{plan_datafusion_err, Result}; +use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use datafusion_physical_expr::{ - reverse_order_bys, AggregateExpr, EquivalenceProperties, PhysicalSortRequirement, + reverse_order_bys, EquivalenceProperties, PhysicalSortRequirement, }; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::aggregates::concat_slices; @@ -117,7 +118,7 @@ impl PhysicalOptimizerRule for OptimizeAggregateOrder { /// /// # Parameters /// -/// * `aggr_exprs` - A vector of `Arc` representing the +/// * `aggr_exprs` - A vector of `Arc` representing the /// aggregate expressions to be optimized. /// * `prefix_requirement` - An array slice representing the ordering /// requirements preceding the aggregate expressions. @@ -130,10 +131,10 @@ impl PhysicalOptimizerRule for OptimizeAggregateOrder { /// successfully. Any errors occurring during the conversion process are /// passed through. fn try_convert_aggregate_if_better( - aggr_exprs: Vec>, + aggr_exprs: Vec>, prefix_requirement: &[PhysicalSortRequirement], eq_properties: &EquivalenceProperties, -) -> Result>> { +) -> Result>> { aggr_exprs .into_iter() .map(|aggr_expr| { diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 6536f9a01439..fe8d79846630 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -58,8 +58,8 @@ use crate::physical_plan::unnest::UnnestExec; use crate::physical_plan::values::ValuesExec; use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec}; use crate::physical_plan::{ - displayable, windows, AggregateExpr, ExecutionPlan, ExecutionPlanProperties, - InputOrderMode, Partitioning, PhysicalExpr, WindowExpr, + displayable, windows, ExecutionPlan, ExecutionPlanProperties, InputOrderMode, + Partitioning, PhysicalExpr, WindowExpr, }; use arrow::compute::SortOptions; @@ -73,8 +73,7 @@ use datafusion_common::{ }; use datafusion_expr::dml::CopyTo; use datafusion_expr::expr::{ - self, create_function_physical_name, physical_name, AggregateFunction, Alias, - GroupingSet, WindowFunction, + self, physical_name, AggregateFunction, Alias, GroupingSet, WindowFunction, }; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; @@ -82,9 +81,9 @@ use datafusion_expr::{ DescribeTable, DmlStatement, Extension, Filter, RecursiveQuery, StringifiedPlan, WindowFrame, WindowFrameBound, WriteOp, }; +use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use datafusion_physical_expr::expressions::Literal; use datafusion_physical_expr::LexOrdering; -use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_sql::utils::window_expr_common_partition_keys; @@ -702,7 +701,7 @@ impl DefaultPhysicalPlanner { let initial_aggr = Arc::new(AggregateExec::try_new( AggregateMode::Partial, groups.clone(), - aggregates.clone(), + aggregates, filters.clone(), input_exec, physical_input_schema.clone(), @@ -720,7 +719,7 @@ impl DefaultPhysicalPlanner { // optimization purposes. For example, a FIRST_VALUE may turn // into a LAST_VALUE with the reverse ordering requirement. // To reflect such changes to subsequent stages, use the updated - // `AggregateExpr`/`PhysicalSortExpr` objects. + // `AggregateFunctionExpr`/`PhysicalSortExpr` objects. let updated_aggregates = initial_aggr.aggr_expr().to_vec(); let next_partition_mode = if can_repartition { @@ -1542,7 +1541,7 @@ pub fn create_window_expr( } type AggregateExprWithOptionalArgs = ( - Arc, + Arc, // The filter clause, if any Option>, // Ordering requirements, if any @@ -1569,12 +1568,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( let name = if let Some(name) = name { name } else { - create_function_physical_name( - func.name(), - *distinct, - args, - order_by.as_ref(), - )? + physical_name(e)? }; let physical_args = @@ -1588,8 +1582,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( None => None, }; - let ignore_nulls = null_treatment - .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls) + let ignore_nulls = null_treatment.unwrap_or(NullTreatment::RespectNulls) == NullTreatment::IgnoreNulls; let (agg_expr, filter, order_by) = { @@ -2576,7 +2569,7 @@ mod tests { impl NoOpExecutionPlan { fn new(schema: SchemaRef) -> Self { - let cache = Self::compute_properties(schema.clone()); + let cache = Self::compute_properties(schema); Self { cache } } diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs index ca8376fdec0a..faa9378535fd 100644 --- a/datafusion/core/src/test_util/mod.rs +++ b/datafusion/core/src/test_util/mod.rs @@ -48,13 +48,11 @@ use datafusion_common::TableReference; use datafusion_expr::utils::COUNT_STAR_EXPANSION; use datafusion_expr::{CreateExternalTable, Expr, TableType}; use datafusion_functions_aggregate::count::count_udaf; -use datafusion_physical_expr::{ - expressions, AggregateExpr, EquivalenceProperties, PhysicalExpr, -}; +use datafusion_physical_expr::{expressions, EquivalenceProperties, PhysicalExpr}; use async_trait::async_trait; use datafusion_catalog::Session; -use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; +use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use futures::Stream; use tempfile::TempDir; // backwards compatibility @@ -429,7 +427,7 @@ impl TestAggregate { } /// Return appropriate expr depending if COUNT is for col or table (*) - pub fn count_expr(&self, schema: &Schema) -> Arc { + pub fn count_expr(&self, schema: &Schema) -> Arc { AggregateExprBuilder::new(count_udaf(), vec![self.column()]) .schema(Arc::new(schema.clone())) .alias(self.column_name()) diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 138e5bda7f39..62e9be63983c 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -25,7 +25,7 @@ use arrow::util::pretty::pretty_format_batches; use arrow_array::types::Int64Type; use datafusion::common::Result; use datafusion::datasource::MemTable; -use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; +use datafusion::physical_expr::aggregate::AggregateExprBuilder; use datafusion::physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; diff --git a/datafusion/core/tests/fuzz_cases/join_fuzz.rs b/datafusion/core/tests/fuzz_cases/join_fuzz.rs index f1cca66712d7..1c2d8ece2f36 100644 --- a/datafusion/core/tests/fuzz_cases/join_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/join_fuzz.rs @@ -350,12 +350,10 @@ impl JoinFuzzTestCase { fn left_right(&self) -> (Arc, Arc) { let schema1 = self.input1[0].schema(); let schema2 = self.input2[0].schema(); - let left = Arc::new( - MemoryExec::try_new(&[self.input1.clone()], schema1.clone(), None).unwrap(), - ); - let right = Arc::new( - MemoryExec::try_new(&[self.input2.clone()], schema2.clone(), None).unwrap(), - ); + let left = + Arc::new(MemoryExec::try_new(&[self.input1.clone()], schema1, None).unwrap()); + let right = + Arc::new(MemoryExec::try_new(&[self.input2.clone()], schema2, None).unwrap()); (left, right) } diff --git a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs index f4b4f16aa160..1980589491a5 100644 --- a/datafusion/core/tests/fuzz_cases/sort_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/sort_fuzz.rs @@ -22,7 +22,7 @@ use arrow::{ compute::SortOptions, record_batch::RecordBatch, }; -use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::physical_plan::expressions::PhysicalSortExpr; use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::sorts::sort::SortExec; @@ -136,9 +136,12 @@ impl SortTest { .sort_spill_reservation_bytes, ); - let runtime_config = RuntimeConfig::new() - .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size))); - let runtime = Arc::new(RuntimeEnv::new(runtime_config).unwrap()); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size))) + .build() + .unwrap(), + ); SessionContext::new_with_config_rt(session_config, runtime) } else { SessionContext::new_with_config(session_config) diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index dbd5592e8020..592c25dedc50 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -40,7 +40,7 @@ use tokio::fs::File; use datafusion::datasource::streaming::StreamingTable; use datafusion::datasource::{MemTable, TableProvider}; use datafusion::execution::disk_manager::DiskManagerConfig; -use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::session_state::SessionStateBuilder; use datafusion::physical_optimizer::join_selection::JoinSelection; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; @@ -509,17 +509,17 @@ impl TestCase { let table = scenario.table(); - let mut rt_config = RuntimeConfig::new() + let rt_config = RuntimeEnvBuilder::new() // disk manager setting controls the spilling .with_disk_manager(disk_manager_config) .with_memory_limit(memory_limit, MEMORY_FRACTION); - if let Some(pool) = memory_pool { - rt_config = rt_config.with_memory_pool(pool); + let runtime = if let Some(pool) = memory_pool { + rt_config.with_memory_pool(pool).build().unwrap() + } else { + rt_config.build().unwrap() }; - let runtime = RuntimeEnv::new(rt_config).unwrap(); - // Configure execution let builder = SessionStateBuilder::new() .with_config(config) diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index bf25b36f48e8..bd251f1a6669 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -33,7 +33,7 @@ use datafusion_execution::cache::cache_unit::{ DefaultFileStatisticsCache, DefaultListFilesCache, }; use datafusion_execution::config::SessionConfig; -use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; +use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::session_state::SessionStateBuilder; use tempfile::tempdir; @@ -198,7 +198,10 @@ fn get_cache_runtime_state() -> ( .with_list_files_cache(Some(list_file_cache.clone())); let rt = Arc::new( - RuntimeEnv::new(RuntimeConfig::new().with_cache_manager(cache_config)).unwrap(), + RuntimeEnvBuilder::new() + .with_cache_manager(cache_config) + .build() + .expect("could not build runtime environment"), ); let state = SessionContext::new_with_config_rt(SessionConfig::default(), rt).state(); diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs index b051feb5750e..750544ecdec1 100644 --- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs @@ -54,7 +54,7 @@ impl PartitionStream for DummyStreamPartition { fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero( ) -> datafusion_common::Result<()> { let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone())?; + let streaming_table = streaming_table_exec(schema)?; let global_limit = global_limit_exec(streaming_table, 0, Some(5)); let initial = get_plan_string(&global_limit); @@ -79,7 +79,7 @@ fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero( fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero( ) -> datafusion_common::Result<()> { let schema = create_schema(); - let streaming_table = streaming_table_exec(schema.clone())?; + let streaming_table = streaming_table_exec(schema)?; let global_limit = global_limit_exec(streaming_table, 2, Some(5)); let initial = get_plan_string(&global_limit); @@ -107,7 +107,7 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi let schema = create_schema(); let streaming_table = streaming_table_exec(schema.clone())?; let repartition = repartition_exec(streaming_table)?; - let filter = filter_exec(schema.clone(), repartition)?; + let filter = filter_exec(schema, repartition)?; let coalesce_batches = coalesce_batches_exec(filter); let local_limit = local_limit_exec(coalesce_batches, 5); let coalesce_partitions = coalesce_partitions_exec(local_limit); @@ -146,7 +146,7 @@ fn pushes_global_limit_exec_through_projection_exec() -> datafusion_common::Resu let schema = create_schema(); let streaming_table = streaming_table_exec(schema.clone())?; let filter = filter_exec(schema.clone(), streaming_table)?; - let projection = projection_exec(schema.clone(), filter)?; + let projection = projection_exec(schema, filter)?; let global_limit = global_limit_exec(projection, 0, Some(5)); let initial = get_plan_string(&global_limit); @@ -178,7 +178,7 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc let schema = create_schema(); let streaming_table = streaming_table_exec(schema.clone()).unwrap(); let coalesce_batches = coalesce_batches_exec(streaming_table); - let projection = projection_exec(schema.clone(), coalesce_batches)?; + let projection = projection_exec(schema, coalesce_batches)?; let global_limit = global_limit_exec(projection, 0, Some(5)); let initial = get_plan_string(&global_limit); @@ -256,7 +256,7 @@ fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions( let schema = create_schema(); let streaming_table = streaming_table_exec(schema.clone())?; let repartition = repartition_exec(streaming_table)?; - let filter = filter_exec(schema.clone(), repartition)?; + let filter = filter_exec(schema, repartition)?; let coalesce_partitions = coalesce_partitions_exec(filter); let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); @@ -398,9 +398,7 @@ fn streaming_table_exec( ) -> datafusion_common::Result> { Ok(Arc::new(StreamingTableExec::try_new( schema.clone(), - vec![Arc::new(DummyStreamPartition { - schema: schema.clone(), - }) as _], + vec![Arc::new(DummyStreamPartition { schema }) as _], None, None, true, diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs index 48389b0304f6..042f6d622565 100644 --- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs @@ -316,11 +316,11 @@ fn test_no_group_by() -> Result<()> { // `SELECT FROM MemoryExec LIMIT 10;`, Single AggregateExec let single_agg = AggregateExec::try_new( AggregateMode::Single, - build_group_by(&schema.clone(), vec![]), - vec![], /* aggr_expr */ - vec![], /* filter_expr */ - source, /* input */ - schema.clone(), /* input_schema */ + build_group_by(&schema, vec![]), + vec![], /* aggr_expr */ + vec![], /* filter_expr */ + source, /* input */ + schema, /* input_schema */ )?; let limit_exec = LocalLimitExec::new( Arc::new(single_agg), @@ -346,7 +346,7 @@ fn test_has_aggregate_expression() -> Result<()> { // `SELECT FROM MemoryExec LIMIT 10;`, Single AggregateExec let single_agg = AggregateExec::try_new( AggregateMode::Single, - build_group_by(&schema.clone(), vec!["a".to_string()]), + build_group_by(&schema, vec!["a".to_string()]), vec![agg.count_expr(&schema)], /* aggr_expr */ vec![None], /* filter_expr */ source, /* input */ @@ -418,11 +418,11 @@ fn test_has_order_by() -> Result<()> { // the `a > 1` filter is applied in the AggregateExec let single_agg = AggregateExec::try_new( AggregateMode::Single, - build_group_by(&schema.clone(), vec!["a".to_string()]), - vec![], /* aggr_expr */ - vec![], /* filter_expr */ - source, /* input */ - schema.clone(), /* input_schema */ + build_group_by(&schema, vec!["a".to_string()]), + vec![], /* aggr_expr */ + vec![], /* filter_expr */ + source, /* input */ + schema, /* input_schema */ )?; let limit_exec = LocalLimitExec::new( Arc::new(single_agg), diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs index 93550d38021a..1e0d3d9d514e 100644 --- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs +++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs @@ -272,7 +272,7 @@ async fn deregister_udaf() -> Result<()> { Arc::new(vec![DataType::UInt64, DataType::Float64]), ); - ctx.register_udaf(my_avg.clone()); + ctx.register_udaf(my_avg); assert!(ctx.state().aggregate_functions().contains_key("my_avg")); diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 259cce74f2e5..0f1c3b8e53c4 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -594,7 +594,7 @@ async fn deregister_udf() -> Result<()> { let cast2i64 = ScalarUDF::from(CastToI64UDF::new()); let ctx = SessionContext::new(); - ctx.register_udf(cast2i64.clone()); + ctx.register_udf(cast2i64); assert!(ctx.udfs().contains("cast_to_i64")); diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs index d3cd93979baf..e169c1f319cc 100644 --- a/datafusion/execution/src/memory_pool/pool.rs +++ b/datafusion/execution/src/memory_pool/pool.rs @@ -366,7 +366,7 @@ impl MemoryPool for TrackConsumersPool { // wrap OOM message in top consumers DataFusionError::ResourcesExhausted( provide_top_memory_consumers_to_error_msg( - e.to_owned(), + e, self.report_top(self.top.into()), ), ) @@ -540,7 +540,7 @@ mod tests { // Test: will be the same per Top Consumers reported. r0.grow(10); // make r0=10, pool available=90 let new_consumer_same_name = MemoryConsumer::new(same_name); - let mut r1 = new_consumer_same_name.clone().register(&pool); + let mut r1 = new_consumer_same_name.register(&pool); // TODO: the insufficient_capacity_err() message is per reservation, not per consumer. // a followup PR will clarify this message "0 bytes already allocated for this reservation" let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool"; diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index 420246595558..e7b48be95cff 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -41,7 +41,7 @@ use url::Url; /// Execution runtime environment that manages system resources such /// as memory, disk, cache and storage. /// -/// A [`RuntimeEnv`] is created from a [`RuntimeConfig`] and has the +/// A [`RuntimeEnv`] is created from a [`RuntimeEnvBuilder`] and has the /// following resource management functionality: /// /// * [`MemoryPool`]: Manage memory @@ -147,13 +147,17 @@ impl RuntimeEnv { impl Default for RuntimeEnv { fn default() -> Self { - RuntimeEnv::new(RuntimeConfig::new()).unwrap() + RuntimeEnvBuilder::new().build().unwrap() } } +/// Please see: +/// This a type alias for backwards compatibility. +pub type RuntimeConfig = RuntimeEnvBuilder; + #[derive(Clone)] /// Execution runtime configuration -pub struct RuntimeConfig { +pub struct RuntimeEnvBuilder { /// DiskManager to manage temporary disk file usage pub disk_manager: DiskManagerConfig, /// [`MemoryPool`] from which to allocate memory @@ -166,13 +170,13 @@ pub struct RuntimeConfig { pub object_store_registry: Arc, } -impl Default for RuntimeConfig { +impl Default for RuntimeEnvBuilder { fn default() -> Self { Self::new() } } -impl RuntimeConfig { +impl RuntimeEnvBuilder { /// New with default values pub fn new() -> Self { Self { @@ -228,4 +232,18 @@ impl RuntimeConfig { pub fn with_temp_file_path(self, path: impl Into) -> Self { self.with_disk_manager(DiskManagerConfig::new_specified(vec![path.into()])) } + + /// Build a RuntimeEnv + pub fn build(self) -> Result { + let memory_pool = self + .memory_pool + .unwrap_or_else(|| Arc::new(UnboundedMemoryPool::default())); + + Ok(RuntimeEnv { + memory_pool, + disk_manager: DiskManager::try_new(self.disk_manager)?, + cache_manager: CacheManager::try_new(&self.cache_manager)?, + object_store_registry: self.object_store_registry, + }) + } } diff --git a/datafusion/execution/src/task.rs b/datafusion/execution/src/task.rs index 21a644284c42..35689b8e08df 100644 --- a/datafusion/execution/src/task.rs +++ b/datafusion/execution/src/task.rs @@ -24,7 +24,7 @@ use crate::{ config::SessionConfig, memory_pool::MemoryPool, registry::FunctionRegistry, - runtime_env::{RuntimeConfig, RuntimeEnv}, + runtime_env::{RuntimeEnv, RuntimeEnvBuilder}, }; use datafusion_common::{plan_datafusion_err, DataFusionError, Result}; use datafusion_expr::planner::ExprPlanner; @@ -57,7 +57,8 @@ pub struct TaskContext { impl Default for TaskContext { fn default() -> Self { - let runtime = RuntimeEnv::new(RuntimeConfig::new()) + let runtime = RuntimeEnvBuilder::new() + .build() .expect("default runtime created successfully"); // Create a default task context, mostly useful for testing diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index e3ff412e785b..6424888c896a 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -1877,11 +1877,7 @@ mod tests { .sub(value.clone()) .unwrap() .lt(&eps)); - assert!(value - .clone() - .sub(prev_value(value.clone())) - .unwrap() - .lt(&eps)); + assert!(value.sub(prev_value(value.clone())).unwrap().lt(&eps)); assert_ne!(next_value(value.clone()), value); assert_ne!(prev_value(value.clone()), value); }); @@ -1913,11 +1909,11 @@ mod tests { min_max.into_iter().zip(inf).for_each(|((min, max), inf)| { assert_eq!(next_value(max.clone()), inf); assert_ne!(prev_value(max.clone()), max); - assert_ne!(prev_value(max.clone()), inf); + assert_ne!(prev_value(max), inf); assert_eq!(prev_value(min.clone()), inf); assert_ne!(next_value(min.clone()), min); - assert_ne!(next_value(min.clone()), inf); + assert_ne!(next_value(min), inf); assert_eq!(next_value(inf.clone()), inf); assert_eq!(prev_value(inf.clone()), inf); diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 6d2fb660f669..3617f56905a9 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -481,16 +481,22 @@ fn type_union_resolution_coercion( } } -/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a comparison operation -/// Unlike `coerced_from`, usually the coerced type is for comparison only. -/// For example, compare with Dictionary and Dictionary, only value type is what we care about +/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a +/// comparison operation +/// +/// Example comparison operations are `lhs = rhs` and `lhs > rhs` +/// +/// Binary comparison kernels require the two arguments to be the (exact) same +/// data type. However, users can write queries where the two arguments are +/// different data types. In such cases, the data types are automatically cast +/// (coerced) to a single data type to pass to the kernels. pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { if lhs_type == rhs_type { // same type => equality is possible return Some(lhs_type.clone()); } binary_numeric_coercion(lhs_type, rhs_type) - .or_else(|| dictionary_coercion(lhs_type, rhs_type, true)) + .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, true)) .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type)) .or_else(|| string_coercion(lhs_type, rhs_type)) .or_else(|| list_coercion(lhs_type, rhs_type)) @@ -501,7 +507,11 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { if lhs_type == rhs_type { // same type => equality is possible @@ -883,7 +893,7 @@ fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) -> /// /// Not all operators support dictionaries, if `preserve_dictionaries` is true /// dictionaries will be preserved if possible -fn dictionary_coercion( +fn dictionary_comparison_coercion( lhs_type: &DataType, rhs_type: &DataType, preserve_dictionaries: bool, @@ -912,26 +922,25 @@ fn dictionary_coercion( /// Coercion rules for string concat. /// This is a union of string coercion rules and specified rules: -/// 1. At lease one side of lhs and rhs should be string type (Utf8 / LargeUtf8) +/// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8) /// 2. Data type of the other side should be able to cast to string type fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; - match (lhs_type, rhs_type) { - // If Utf8View is in any side, we coerce to Utf8. - // Ref: https://github.com/apache/datafusion/pull/11796 - (Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => { - Some(Utf8) + string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) { + (Utf8View, from_type) | (from_type, Utf8View) => { + string_concat_internal_coercion(from_type, &Utf8View) } - _ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) { - (Utf8, from_type) | (from_type, Utf8) => { - string_concat_internal_coercion(from_type, &Utf8) - } - (LargeUtf8, from_type) | (from_type, LargeUtf8) => { - string_concat_internal_coercion(from_type, &LargeUtf8) - } - _ => None, - }), - } + (Utf8, from_type) | (from_type, Utf8) => { + string_concat_internal_coercion(from_type, &Utf8) + } + (LargeUtf8, from_type) | (from_type, LargeUtf8) => { + string_concat_internal_coercion(from_type, &LargeUtf8) + } + (Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => { + string_coercion(lhs_value_type, rhs_value_type).or(None) + } + _ => None, + }) } fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { @@ -942,6 +951,8 @@ fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option } } +/// If `from_type` can be casted to `to_type`, return `to_type`, otherwise +/// return `None`. fn string_concat_internal_coercion( from_type: &DataType, to_type: &DataType, @@ -967,6 +978,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option } // Then, if LargeUtf8 is in any side, we coerce to LargeUtf8. (LargeUtf8, Utf8 | LargeUtf8) | (Utf8, LargeUtf8) => Some(LargeUtf8), + // Utf8 coerces to Utf8 (Utf8, Utf8) => Some(Utf8), _ => None, } @@ -1044,7 +1056,7 @@ pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option Option { string_coercion(lhs_type, rhs_type) - .or_else(|| dictionary_coercion(lhs_type, rhs_type, false)) + .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, false)) .or_else(|| regex_null_coercion(lhs_type, rhs_type)) } @@ -1324,38 +1336,50 @@ mod tests { let lhs_type = Dictionary(Box::new(Int8), Box::new(Int32)); let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16)); - assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Int32)); assert_eq!( - dictionary_coercion(&lhs_type, &rhs_type, false), + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(Int32) + ); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, false), Some(Int32) ); // Since we can coerce values of Int16 to Utf8 can support this let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16)); - assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), Some(Utf8)); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), + Some(Utf8) + ); // Since we can coerce values of Utf8 to Binary can support this let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary)); assert_eq!( - dictionary_coercion(&lhs_type, &rhs_type, true), + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), Some(Binary) ); let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); let rhs_type = Utf8; - assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8)); assert_eq!( - dictionary_coercion(&lhs_type, &rhs_type, true), + dictionary_comparison_coercion(&lhs_type, &rhs_type, false), + Some(Utf8) + ); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), Some(lhs_type.clone()) ); let lhs_type = Utf8; let rhs_type = Dictionary(Box::new(Int8), Box::new(Utf8)); - assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, false), Some(Utf8)); assert_eq!( - dictionary_coercion(&lhs_type, &rhs_type, true), + dictionary_comparison_coercion(&lhs_type, &rhs_type, false), + Some(Utf8) + ); + assert_eq!( + dictionary_comparison_coercion(&lhs_type, &rhs_type, true), Some(rhs_type.clone()) ); } @@ -1857,7 +1881,7 @@ mod tests { ); test_coercion_binary_rule!( DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())), - DataType::Timestamp(TimeUnit::Second, utc.clone()), + DataType::Timestamp(TimeUnit::Second, utc), Operator::Eq, DataType::Timestamp(TimeUnit::Second, Some("Europe/Brussels".into())) ); diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 88939ccf41b8..85ba80396c8e 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -38,8 +38,7 @@ use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, }; use datafusion_common::{ - internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue, - TableReference, + plan_err, Column, DFSchema, Result, ScalarValue, TableReference, }; use sqlparser::ast::{ display_comma_separated, ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, @@ -1082,7 +1081,7 @@ impl Expr { /// For example, for a projection (e.g. `SELECT `) the resulting arrow /// [`Schema`] will have a field with this name. /// - /// Note that the resulting string is subtlety different than the `Display` + /// Note that the resulting string is subtlety different from the `Display` /// representation for certain `Expr`. Some differences: /// /// 1. [`Expr::Alias`], which shows only the alias itself @@ -1104,6 +1103,7 @@ impl Expr { } /// Returns a full and complete string representation of this expression. + #[deprecated(note = "use format! instead")] pub fn canonical_name(&self) -> String { format!("{self}") } @@ -2386,263 +2386,13 @@ fn fmt_function( write!(f, "{}({}{})", fun, distinct_str, args.join(", ")) } -pub fn create_function_physical_name( - fun: &str, - distinct: bool, - args: &[Expr], - order_by: Option<&Vec>, -) -> Result { - let names: Vec = args - .iter() - .map(|e| create_physical_name(e, false)) - .collect::>()?; - - let distinct_str = match distinct { - true => "DISTINCT ", - false => "", - }; - - let phys_name = format!("{}({}{})", fun, distinct_str, names.join(",")); - - Ok(order_by - .map(|order_by| format!("{} ORDER BY [{}]", phys_name, expr_vec_fmt!(order_by))) - .unwrap_or(phys_name)) -} - -pub fn physical_name(e: &Expr) -> Result { - create_physical_name(e, true) -} - -fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { - match e { - Expr::Unnest(_) => { - internal_err!( - "Expr::Unnest should have been converted to LogicalPlan::Unnest" - ) - } - Expr::Column(c) => { - if is_first_expr { - Ok(c.name.clone()) - } else { - Ok(c.flat_name()) - } - } - Expr::Alias(Alias { name, .. }) => Ok(name.clone()), - Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")), - Expr::Literal(value) => Ok(format!("{value:?}")), - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - let left = create_physical_name(left, false)?; - let right = create_physical_name(right, false)?; - Ok(format!("{left} {op} {right}")) - } - Expr::Case(case) => { - let mut name = "CASE ".to_string(); - if let Some(e) = &case.expr { - let _ = write!(name, "{} ", create_physical_name(e, false)?); - } - for (w, t) in &case.when_then_expr { - let _ = write!( - name, - "WHEN {} THEN {} ", - create_physical_name(w, false)?, - create_physical_name(t, false)? - ); - } - if let Some(e) = &case.else_expr { - let _ = write!(name, "ELSE {} ", create_physical_name(e, false)?); - } - name += "END"; - Ok(name) - } - Expr::Cast(Cast { expr, .. }) => { - // CAST does not change the expression name - create_physical_name(expr, false) - } - Expr::TryCast(TryCast { expr, .. }) => { - // CAST does not change the expression name - create_physical_name(expr, false) - } - Expr::Not(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("NOT {expr}")) - } - Expr::Negative(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("(- {expr})")) - } - Expr::IsNull(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS NULL")) - } - Expr::IsNotNull(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS NOT NULL")) - } - Expr::IsTrue(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS TRUE")) - } - Expr::IsFalse(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS FALSE")) - } - Expr::IsUnknown(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS UNKNOWN")) - } - Expr::IsNotTrue(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS NOT TRUE")) - } - Expr::IsNotFalse(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS NOT FALSE")) - } - Expr::IsNotUnknown(expr) => { - let expr = create_physical_name(expr, false)?; - Ok(format!("{expr} IS NOT UNKNOWN")) - } - Expr::ScalarFunction(fun) => fun.func.schema_name(&fun.args), - Expr::WindowFunction(WindowFunction { - fun, - args, - order_by, - .. - }) => { - create_function_physical_name(&fun.to_string(), false, args, Some(order_by)) - } - Expr::AggregateFunction(AggregateFunction { - func, - distinct, - args, - filter: _, - order_by, - null_treatment: _, - }) => { - create_function_physical_name(func.name(), *distinct, args, order_by.as_ref()) - } - Expr::GroupingSet(grouping_set) => match grouping_set { - GroupingSet::Rollup(exprs) => Ok(format!( - "ROLLUP ({})", - exprs - .iter() - .map(|e| create_physical_name(e, false)) - .collect::>>()? - .join(", ") - )), - GroupingSet::Cube(exprs) => Ok(format!( - "CUBE ({})", - exprs - .iter() - .map(|e| create_physical_name(e, false)) - .collect::>>()? - .join(", ") - )), - GroupingSet::GroupingSets(lists_of_exprs) => { - let mut strings = vec![]; - for exprs in lists_of_exprs { - let exprs_str = exprs - .iter() - .map(|e| create_physical_name(e, false)) - .collect::>>()? - .join(", "); - strings.push(format!("({exprs_str})")); - } - Ok(format!("GROUPING SETS ({})", strings.join(", "))) - } - }, - - Expr::InList(InList { - expr, - list, - negated, - }) => { - let expr = create_physical_name(expr, false)?; - let list = list.iter().map(|expr| create_physical_name(expr, false)); - if *negated { - Ok(format!("{expr} NOT IN ({list:?})")) - } else { - Ok(format!("{expr} IN ({list:?})")) - } - } - Expr::Exists { .. } => { - not_impl_err!("EXISTS is not yet supported in the physical plan") - } - Expr::InSubquery(_) => { - not_impl_err!("IN subquery is not yet supported in the physical plan") - } - Expr::ScalarSubquery(_) => { - not_impl_err!("Scalar subqueries are not yet supported in the physical plan") - } - Expr::Between(Between { - expr, - negated, - low, - high, - }) => { - let expr = create_physical_name(expr, false)?; - let low = create_physical_name(low, false)?; - let high = create_physical_name(high, false)?; - if *negated { - Ok(format!("{expr} NOT BETWEEN {low} AND {high}")) - } else { - Ok(format!("{expr} BETWEEN {low} AND {high}")) - } - } - Expr::Like(Like { - negated, - expr, - pattern, - escape_char, - case_insensitive, - }) => { - let expr = create_physical_name(expr, false)?; - let pattern = create_physical_name(pattern, false)?; - let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" }; - let escape = if let Some(char) = escape_char { - format!("CHAR '{char}'") - } else { - "".to_string() - }; - if *negated { - Ok(format!("{expr} NOT {op_name} {pattern}{escape}")) - } else { - Ok(format!("{expr} {op_name} {pattern}{escape}")) - } - } - Expr::SimilarTo(Like { - negated, - expr, - pattern, - escape_char, - case_insensitive: _, - }) => { - let expr = create_physical_name(expr, false)?; - let pattern = create_physical_name(pattern, false)?; - let escape = if let Some(char) = escape_char { - format!("CHAR '{char}'") - } else { - "".to_string() - }; - if *negated { - Ok(format!("{expr} NOT SIMILAR TO {pattern}{escape}")) - } else { - Ok(format!("{expr} SIMILAR TO {pattern}{escape}")) - } - } - Expr::Sort { .. } => { - internal_err!("Create physical name does not support sort expression") - } - Expr::Wildcard { qualifier, options } => match qualifier { - Some(qualifier) => Ok(format!("{}.*{}", qualifier, options)), - None => Ok(format!("*{}", options)), - }, - Expr::Placeholder(_) => { - internal_err!("Create physical name does not support placeholder") - } - Expr::OuterReferenceColumn(_, _) => { - internal_err!("Create physical name does not support OuterReferenceColumn") - } +/// The name of the column (field) that this `Expr` will produce in the physical plan. +/// The difference from [Expr::schema_name] is that top-level columns are unqualified. +pub fn physical_name(expr: &Expr) -> Result { + if let Expr::Column(col) = expr { + Ok(col.name.clone()) + } else { + Ok(expr.schema_name().to_string()) } } @@ -2658,6 +2408,7 @@ mod test { use std::any::Any; #[test] + #[allow(deprecated)] fn format_case_when() -> Result<()> { let expr = case(col("a")) .when(lit(1), lit(true)) @@ -2670,6 +2421,7 @@ mod test { } #[test] + #[allow(deprecated)] fn format_cast() -> Result<()> { let expr = Expr::Cast(Cast { expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)))), diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4e6022399653..1e0b601146dd 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -394,7 +394,7 @@ pub fn create_udf( volatility: Volatility, fun: ScalarFunctionImplementation, ) -> ScalarUDF { - let return_type = Arc::try_unwrap(return_type).unwrap_or_else(|t| t.as_ref().clone()); + let return_type = Arc::unwrap_or_clone(return_type); ScalarUDF::from(SimpleScalarUDF::new( name, input_types, @@ -476,8 +476,8 @@ pub fn create_udaf( accumulator: AccumulatorFactoryFunction, state_type: Arc>, ) -> AggregateUDF { - let return_type = Arc::try_unwrap(return_type).unwrap_or_else(|t| t.as_ref().clone()); - let state_type = Arc::try_unwrap(state_type).unwrap_or_else(|t| t.as_ref().clone()); + let return_type = Arc::unwrap_or_clone(return_type); + let state_type = Arc::unwrap_or_clone(state_type); let state_fields = state_type .into_iter() .enumerate() @@ -594,7 +594,7 @@ pub fn create_udwf( volatility: Volatility, partition_evaluator_factory: PartitionEvaluatorFactory, ) -> WindowUDF { - let return_type = Arc::try_unwrap(return_type).unwrap_or_else(|t| t.as_ref().clone()); + let return_type = Arc::unwrap_or_clone(return_type); WindowUDF::from(SimpleWindowUDF::new( name, input_type, diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index c26970cb053a..768c4aabc840 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -279,6 +279,57 @@ where expr.alias_if_changed(original_name) } +/// Handles ensuring the name of rewritten expressions is not changed. +/// +/// For example, if an expression `1 + 2` is rewritten to `3`, the name of the +/// expression should be preserved: `3 as "1 + 2"` +/// +/// See for details +pub struct NamePreserver { + use_alias: bool, +} + +/// If the name of an expression is remembered, it will be preserved when +/// rewriting the expression +pub struct SavedName(Option); + +impl NamePreserver { + /// Create a new NamePreserver for rewriting the `expr` that is part of the specified plan + pub fn new(plan: &LogicalPlan) -> Self { + Self { + use_alias: !matches!(plan, LogicalPlan::Filter(_) | LogicalPlan::Join(_)), + } + } + + /// Create a new NamePreserver for rewriting the `expr`s in `Projection` + /// + /// This will use aliases + pub fn new_for_projection() -> Self { + Self { use_alias: true } + } + + pub fn save(&self, expr: &Expr) -> Result { + let original_name = if self.use_alias { + Some(expr.name_for_alias()?) + } else { + None + }; + + Ok(SavedName(original_name)) + } +} + +impl SavedName { + /// Ensures the name of the rewritten expression is preserved + pub fn restore(self, expr: Expr) -> Result { + let Self(original_name) = self; + match original_name { + Some(name) => expr.alias_if_changed(name), + None => Ok(expr), + } + } +} + #[cfg(test)] mod test { use std::ops::Add; diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 10ec10e61239..3920a1a3517c 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -184,7 +184,7 @@ impl ExprSchemable for Expr { err, utils::generate_signature_error_msg( fun.name(), - fun.signature().clone(), + fun.signature(), &data_types ) ) @@ -199,7 +199,7 @@ impl ExprSchemable for Expr { err, utils::generate_signature_error_msg( fun.name(), - fun.signature().clone(), + fun.signature(), &data_types ) ) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index a96caa03d611..2c2300b123c2 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -41,9 +41,8 @@ use crate::utils::{ find_valid_equijoin_key_pair, group_window_expr_by_sort_keys, }; use crate::{ - and, binary_expr, logical_plan::tree_node::unwrap_arc, DmlStatement, Expr, - ExprSchemable, Operator, RecursiveQuery, TableProviderFilterPushDown, TableSource, - WriteOp, + and, binary_expr, DmlStatement, Expr, ExprSchemable, Operator, RecursiveQuery, + TableProviderFilterPushDown, TableSource, WriteOp, }; use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; @@ -60,6 +59,7 @@ pub const UNNAMED_TABLE: &str = "?table?"; /// Builder for logical plans /// +/// # Example building a simple plan /// ``` /// # use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan}; /// # use datafusion_common::Result; @@ -88,17 +88,27 @@ pub const UNNAMED_TABLE: &str = "?table?"; /// .project(vec![col("last_name")])? /// .build()?; /// +/// // Convert from plan back to builder +/// let builder = LogicalPlanBuilder::from(plan); +/// /// # Ok(()) /// # } /// ``` #[derive(Debug, Clone)] pub struct LogicalPlanBuilder { - plan: LogicalPlan, + plan: Arc, } impl LogicalPlanBuilder { /// Create a builder from an existing plan - pub fn from(plan: LogicalPlan) -> Self { + pub fn new(plan: LogicalPlan) -> Self { + Self { + plan: Arc::new(plan), + } + } + + /// Create a builder from an existing plan + pub fn new_from_arc(plan: Arc) -> Self { Self { plan } } @@ -116,7 +126,7 @@ impl LogicalPlanBuilder { /// /// `produce_one_row` set to true means this empty node needs to produce a placeholder row. pub fn empty(produce_one_row: bool) -> Self { - Self::from(LogicalPlan::EmptyRelation(EmptyRelation { + Self::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row, schema: DFSchemaRef::new(DFSchema::empty()), })) @@ -125,7 +135,7 @@ impl LogicalPlanBuilder { /// Convert a regular plan into a recursive query. /// `is_distinct` indicates whether the recursive term should be de-duplicated (`UNION`) after each iteration or not (`UNION ALL`). pub fn to_recursive_query( - &self, + self, name: String, recursive_term: LogicalPlan, is_distinct: bool, @@ -150,7 +160,7 @@ impl LogicalPlanBuilder { coerce_plan_expr_for_schema(recursive_term, self.plan.schema())?; Ok(Self::from(LogicalPlan::RecursiveQuery(RecursiveQuery { name, - static_term: Arc::new(self.plan.clone()), + static_term: self.plan, recursive_term: Arc::new(coerced_recursive_term), is_distinct, }))) @@ -201,7 +211,7 @@ impl LogicalPlanBuilder { }; common_type = Some(new_type); } else { - common_type = Some(data_type.clone()); + common_type = Some(data_type); } } field_types.push(common_type.unwrap_or(DataType::Utf8)); @@ -210,7 +220,7 @@ impl LogicalPlanBuilder { for row in &mut values { for (j, field_type) in field_types.iter().enumerate() { if let Expr::Literal(ScalarValue::Null) = row[j] { - row[j] = Expr::Literal(ScalarValue::try_from(field_type.clone())?); + row[j] = Expr::Literal(ScalarValue::try_from(field_type)?); } else { row[j] = std::mem::take(&mut row[j]).cast_to(field_type, &empty_schema)?; @@ -228,7 +238,7 @@ impl LogicalPlanBuilder { .collect::>(); let dfschema = DFSchema::from_unqualified_fields(fields.into(), HashMap::new())?; let schema = DFSchemaRef::new(dfschema); - Ok(Self::from(LogicalPlan::Values(Values { schema, values }))) + Ok(Self::new(LogicalPlan::Values(Values { schema, values }))) } /// Convert a table provider into a builder with a TableScan @@ -279,7 +289,7 @@ impl LogicalPlanBuilder { options: HashMap, partition_by: Vec, ) -> Result { - Ok(Self::from(LogicalPlan::Copy(CopyTo { + Ok(Self::new(LogicalPlan::Copy(CopyTo { input: Arc::new(input), output_url, partition_by, @@ -303,7 +313,7 @@ impl LogicalPlanBuilder { WriteOp::InsertInto }; - Ok(Self::from(LogicalPlan::Dml(DmlStatement::new( + Ok(Self::new(LogicalPlan::Dml(DmlStatement::new( table_name.into(), table_schema, op, @@ -320,7 +330,7 @@ impl LogicalPlanBuilder { ) -> Result { TableScan::try_new(table_name, table_source, projection, filters, None) .map(LogicalPlan::TableScan) - .map(Self::from) + .map(Self::new) } /// Wrap a plan in a window @@ -365,7 +375,7 @@ impl LogicalPlanBuilder { self, expr: impl IntoIterator>, ) -> Result { - project(self.plan, expr).map(Self::from) + project(Arc::unwrap_or_clone(self.plan), expr).map(Self::new) } /// Select the given column indices @@ -380,17 +390,25 @@ impl LogicalPlanBuilder { /// Apply a filter pub fn filter(self, expr: impl Into) -> Result { let expr = normalize_col(expr.into(), &self.plan)?; - Filter::try_new(expr, Arc::new(self.plan)) + Filter::try_new(expr, self.plan) + .map(LogicalPlan::Filter) + .map(Self::new) + } + + /// Apply a filter which is used for a having clause + pub fn having(self, expr: impl Into) -> Result { + let expr = normalize_col(expr.into(), &self.plan)?; + Filter::try_new_with_having(expr, self.plan) .map(LogicalPlan::Filter) .map(Self::from) } /// Make a builder for a prepare logical plan from the builder's plan pub fn prepare(self, name: String, data_types: Vec) -> Result { - Ok(Self::from(LogicalPlan::Prepare(Prepare { + Ok(Self::new(LogicalPlan::Prepare(Prepare { name, data_types, - input: Arc::new(self.plan), + input: self.plan, }))) } @@ -401,16 +419,16 @@ impl LogicalPlanBuilder { /// `fetch` - Maximum number of rows to fetch, after skipping `skip` rows, /// if specified. pub fn limit(self, skip: usize, fetch: Option) -> Result { - Ok(Self::from(LogicalPlan::Limit(Limit { + Ok(Self::new(LogicalPlan::Limit(Limit { skip, fetch, - input: Arc::new(self.plan), + input: self.plan, }))) } /// Apply an alias pub fn alias(self, alias: impl Into) -> Result { - subquery_alias(self.plan, alias).map(Self::from) + subquery_alias(Arc::unwrap_or_clone(self.plan), alias).map(Self::new) } /// Add missing sort columns to all downstream projection @@ -465,7 +483,7 @@ impl LogicalPlanBuilder { Self::ambiguous_distinct_check(&missing_exprs, missing_cols, &expr)?; } expr.extend(missing_exprs); - project((*input).clone(), expr) + project(Arc::unwrap_or_clone(input), expr) } _ => { let is_distinct = @@ -534,25 +552,22 @@ impl LogicalPlanBuilder { // Collect sort columns that are missing in the input plan's schema let mut missing_cols: Vec = vec![]; - exprs - .clone() - .into_iter() - .try_for_each::<_, Result<()>>(|expr| { - let columns = expr.column_refs(); + exprs.iter().try_for_each::<_, Result<()>>(|expr| { + let columns = expr.column_refs(); - columns.into_iter().for_each(|c| { - if !schema.has_column(c) { - missing_cols.push(c.clone()); - } - }); + columns.into_iter().for_each(|c| { + if !schema.has_column(c) { + missing_cols.push(c.clone()); + } + }); - Ok(()) - })?; + Ok(()) + })?; if missing_cols.is_empty() { - return Ok(Self::from(LogicalPlan::Sort(Sort { + return Ok(Self::new(LogicalPlan::Sort(Sort { expr: normalize_cols(exprs, &self.plan)?, - input: Arc::new(self.plan), + input: self.plan, fetch: None, }))); } @@ -561,7 +576,11 @@ impl LogicalPlanBuilder { let new_expr = schema.columns().into_iter().map(Expr::Column).collect(); let is_distinct = false; - let plan = Self::add_missing_columns(self.plan, &missing_cols, is_distinct)?; + let plan = Self::add_missing_columns( + Arc::unwrap_or_clone(self.plan), + &missing_cols, + is_distinct, + )?; let sort_plan = LogicalPlan::Sort(Sort { expr: normalize_cols(exprs, &plan)?, input: Arc::new(plan), @@ -570,29 +589,27 @@ impl LogicalPlanBuilder { Projection::try_new(new_expr, Arc::new(sort_plan)) .map(LogicalPlan::Projection) - .map(Self::from) + .map(Self::new) } /// Apply a union, preserving duplicate rows pub fn union(self, plan: LogicalPlan) -> Result { - union(self.plan, plan).map(Self::from) + union(Arc::unwrap_or_clone(self.plan), plan).map(Self::new) } /// Apply a union, removing duplicate rows pub fn union_distinct(self, plan: LogicalPlan) -> Result { - let left_plan: LogicalPlan = self.plan; + let left_plan: LogicalPlan = Arc::unwrap_or_clone(self.plan); let right_plan: LogicalPlan = plan; - Ok(Self::from(LogicalPlan::Distinct(Distinct::All(Arc::new( + Ok(Self::new(LogicalPlan::Distinct(Distinct::All(Arc::new( union(left_plan, right_plan)?, ))))) } /// Apply deduplication: Only distinct (different) values are returned) pub fn distinct(self) -> Result { - Ok(Self::from(LogicalPlan::Distinct(Distinct::All(Arc::new( - self.plan, - ))))) + Ok(Self::new(LogicalPlan::Distinct(Distinct::All(self.plan)))) } /// Project first values of the specified expression list according to the provided @@ -603,8 +620,8 @@ impl LogicalPlanBuilder { select_expr: Vec, sort_expr: Option>, ) -> Result { - Ok(Self::from(LogicalPlan::Distinct(Distinct::On( - DistinctOn::try_new(on_expr, select_expr, sort_expr, Arc::new(self.plan))?, + Ok(Self::new(LogicalPlan::Distinct(Distinct::On( + DistinctOn::try_new(on_expr, select_expr, sort_expr, self.plan)?, )))) } @@ -690,7 +707,7 @@ impl LogicalPlanBuilder { pub(crate) fn normalize( plan: &LogicalPlan, - column: impl Into + Clone, + column: impl Into, ) -> Result { let schema = plan.schema(); let fallback_schemas = plan.fallback_normalize_schemas(); @@ -819,8 +836,8 @@ impl LogicalPlanBuilder { let join_schema = build_join_schema(self.plan.schema(), right.schema(), &join_type)?; - Ok(Self::from(LogicalPlan::Join(Join { - left: Arc::new(self.plan), + Ok(Self::new(LogicalPlan::Join(Join { + left: self.plan, right: Arc::new(right), on, filter, @@ -883,8 +900,8 @@ impl LogicalPlanBuilder { DataFusionError::Internal("filters should not be None here".to_string()) })?) } else { - Ok(Self::from(LogicalPlan::Join(Join { - left: Arc::new(self.plan), + Ok(Self::new(LogicalPlan::Join(Join { + left: self.plan, right: Arc::new(right), on: join_on, filter: filters, @@ -900,8 +917,8 @@ impl LogicalPlanBuilder { pub fn cross_join(self, right: LogicalPlan) -> Result { let join_schema = build_join_schema(self.plan.schema(), right.schema(), &JoinType::Inner)?; - Ok(Self::from(LogicalPlan::CrossJoin(CrossJoin { - left: Arc::new(self.plan), + Ok(Self::new(LogicalPlan::CrossJoin(CrossJoin { + left: self.plan, right: Arc::new(right), schema: DFSchemaRef::new(join_schema), }))) @@ -909,8 +926,8 @@ impl LogicalPlanBuilder { /// Repartition pub fn repartition(self, partitioning_scheme: Partitioning) -> Result { - Ok(Self::from(LogicalPlan::Repartition(Repartition { - input: Arc::new(self.plan), + Ok(Self::new(LogicalPlan::Repartition(Repartition { + input: self.plan, partitioning_scheme, }))) } @@ -922,9 +939,9 @@ impl LogicalPlanBuilder { ) -> Result { let window_expr = normalize_cols(window_expr, &self.plan)?; validate_unique_names("Windows", &window_expr)?; - Ok(Self::from(LogicalPlan::Window(Window::try_new( + Ok(Self::new(LogicalPlan::Window(Window::try_new( window_expr, - Arc::new(self.plan), + self.plan, )?))) } @@ -941,9 +958,9 @@ impl LogicalPlanBuilder { let group_expr = add_group_by_exprs_from_dependencies(group_expr, self.plan.schema())?; - Aggregate::try_new(Arc::new(self.plan), group_expr, aggr_expr) + Aggregate::try_new(self.plan, group_expr, aggr_expr) .map(LogicalPlan::Aggregate) - .map(Self::from) + .map(Self::new) } /// Create an expression to represent the explanation of the plan @@ -957,18 +974,18 @@ impl LogicalPlanBuilder { let schema = schema.to_dfschema_ref()?; if analyze { - Ok(Self::from(LogicalPlan::Analyze(Analyze { + Ok(Self::new(LogicalPlan::Analyze(Analyze { verbose, - input: Arc::new(self.plan), + input: self.plan, schema, }))) } else { let stringified_plans = vec![self.plan.to_stringified(PlanType::InitialLogicalPlan)]; - Ok(Self::from(LogicalPlan::Explain(Explain { + Ok(Self::new(LogicalPlan::Explain(Explain { verbose, - plan: Arc::new(self.plan), + plan: self.plan, stringified_plans, schema, logical_optimization_succeeded: false, @@ -1046,7 +1063,7 @@ impl LogicalPlanBuilder { /// Build the plan pub fn build(self) -> Result { - Ok(self.plan) + Ok(Arc::unwrap_or_clone(self.plan)) } /// Apply a join with the expression on constraint. @@ -1106,8 +1123,8 @@ impl LogicalPlanBuilder { let join_schema = build_join_schema(self.plan.schema(), right.schema(), &join_type)?; - Ok(Self::from(LogicalPlan::Join(Join { - left: Arc::new(self.plan), + Ok(Self::new(LogicalPlan::Join(Join { + left: self.plan, right: Arc::new(right), on: join_key_pairs, filter, @@ -1120,7 +1137,7 @@ impl LogicalPlanBuilder { /// Unnest the given column. pub fn unnest_column(self, column: impl Into) -> Result { - Ok(Self::from(unnest(self.plan, vec![column.into()])?)) + unnest(Arc::unwrap_or_clone(self.plan), vec![column.into()]).map(Self::new) } /// Unnest the given column given [`UnnestOptions`] @@ -1129,11 +1146,12 @@ impl LogicalPlanBuilder { column: impl Into, options: UnnestOptions, ) -> Result { - Ok(Self::from(unnest_with_options( - self.plan, + unnest_with_options( + Arc::unwrap_or_clone(self.plan), vec![column.into()], options, - )?)) + ) + .map(Self::new) } /// Unnest the given columns with the given [`UnnestOptions`] @@ -1142,45 +1160,20 @@ impl LogicalPlanBuilder { columns: Vec, options: UnnestOptions, ) -> Result { - Ok(Self::from(unnest_with_options( - self.plan, columns, options, - )?)) + unnest_with_options(Arc::unwrap_or_clone(self.plan), columns, options) + .map(Self::new) } } -/// Converts a `Arc` into `LogicalPlanBuilder` -/// ``` -/// # use datafusion_expr::{Expr, expr, col, LogicalPlanBuilder, logical_plan::table_scan}; -/// # use datafusion_common::Result; -/// # use arrow::datatypes::{Schema, DataType, Field}; -/// # fn main() -> Result<()> { -/// # -/// # fn employee_schema() -> Schema { -/// # Schema::new(vec![ -/// # Field::new("id", DataType::Int32, false), -/// # Field::new("first_name", DataType::Utf8, false), -/// # Field::new("last_name", DataType::Utf8, false), -/// # Field::new("state", DataType::Utf8, false), -/// # Field::new("salary", DataType::Int32, false), -/// # ]) -/// # } -/// # -/// // Create the plan -/// let plan = table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3, 4]))? -/// .sort(vec![ -/// Expr::Sort(expr::Sort::new(Box::new(col("state")), true, true)), -/// Expr::Sort(expr::Sort::new(Box::new(col("salary")), false, false)), -/// ])? -/// .build()?; -/// // Convert LogicalPlan into LogicalPlanBuilder -/// let plan_builder: LogicalPlanBuilder = std::sync::Arc::new(plan).into(); -/// # Ok(()) -/// # } -/// ``` +impl From for LogicalPlanBuilder { + fn from(plan: LogicalPlan) -> Self { + LogicalPlanBuilder::new(plan) + } +} impl From> for LogicalPlanBuilder { fn from(plan: Arc) -> Self { - LogicalPlanBuilder::from(unwrap_arc(plan)) + LogicalPlanBuilder::new_from_arc(plan) } } @@ -1342,7 +1335,17 @@ pub fn validate_unique_names<'a>( }) } -/// Union two logical plans. +/// Union two [`LogicalPlan`]s. +/// +/// Constructs the UNION plan, but does not perform type-coercion. Therefore the +/// subtree expressions will not be properly typed until the optimizer pass. +/// +/// If a properly typed UNION plan is needed, refer to [`TypeCoercionRewriter::coerce_union`] +/// or alternatively, merge the union input schema using [`coerce_union_schema`] and +/// apply the expression rewrite with [`coerce_plan_expr_for_schema`]. +/// +/// [`TypeCoercionRewriter::coerce_union`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/struct.TypeCoercionRewriter.html#method.coerce_union +/// [`coerce_union_schema`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/fn.coerce_union_schema.html pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result { // Temporarily use the schema from the left input and later rely on the analyzer to // coerce the two schemas into a common one. @@ -1530,7 +1533,7 @@ pub fn get_unnested_columns( | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => { let new_field = Arc::new(Field::new( - col_name.clone(), + col_name, field.data_type().clone(), // Unnesting may produce NULLs even if the list is not null. // For example: unnset([1], []) -> 1, null @@ -2069,7 +2072,7 @@ mod tests { let schema = Schema::new(vec![ Field::new("scalar", DataType::UInt32, false), Field::new_list("strings", string_field, false), - Field::new_list("structs", struct_field_in_list.clone(), false), + Field::new_list("structs", struct_field_in_list, false), Field::new( "struct_singular", DataType::Struct(Fields::from(vec![ diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index 343eda056ffe..5a881deb54e1 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -387,19 +387,16 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> { } if !full_filter.is_empty() { - object["Full Filters"] = serde_json::Value::String( - expr_vec_fmt!(full_filter).to_string(), - ); + object["Full Filters"] = + serde_json::Value::String(expr_vec_fmt!(full_filter)); }; if !partial_filter.is_empty() { - object["Partial Filters"] = serde_json::Value::String( - expr_vec_fmt!(partial_filter).to_string(), - ); + object["Partial Filters"] = + serde_json::Value::String(expr_vec_fmt!(partial_filter)); } if !unsupported_filters.is_empty() { - object["Unsupported Filters"] = serde_json::Value::String( - expr_vec_fmt!(unsupported_filters).to_string(), - ); + object["Unsupported Filters"] = + serde_json::Value::String(expr_vec_fmt!(unsupported_filters)); } } @@ -595,9 +592,8 @@ impl<'a, 'b> PgJsonVisitor<'a, 'b> { "Select": expr_vec_fmt!(select_expr), }); if let Some(sort_expr) = sort_expr { - object["Sort"] = serde_json::Value::String( - expr_vec_fmt!(sort_expr).to_string(), - ); + object["Sort"] = + serde_json::Value::String(expr_vec_fmt!(sort_expr)); } object diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index b58208591920..5b5a842fa4cf 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -26,7 +26,7 @@ pub mod tree_node; pub use builder::{ build_join_schema, table_scan, union, wrap_projection_for_join_if_necessary, - LogicalPlanBuilder, UNNAMED_TABLE, + LogicalPlanBuilder, LogicalTableSource, UNNAMED_TABLE, }; pub use ddl::{ CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateFunction, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index f93b7c0fedd0..359de2d30a57 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -26,7 +26,7 @@ use super::dml::CopyTo; use super::DdlStatement; use crate::builder::{change_redundant_column, unnest_with_options}; use crate::expr::{Placeholder, Sort as SortExpr, WindowFunction}; -use crate::expr_rewriter::{create_col_from_scalar_expr, normalize_cols}; +use crate::expr_rewriter::{create_col_from_scalar_expr, normalize_cols, NamePreserver}; use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor}; use crate::logical_plan::extension::UserDefinedLogicalNode; use crate::logical_plan::{DmlStatement, Statement}; @@ -51,7 +51,6 @@ use datafusion_common::{ // backwards compatibility use crate::display::PgJsonVisitor; -use crate::logical_plan::tree_node::unwrap_arc; pub use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan}; pub use datafusion_common::{JoinConstraint, JoinType}; @@ -643,9 +642,12 @@ impl LogicalPlan { // todo it isn't clear why the schema is not recomputed here Ok(LogicalPlan::Values(Values { schema, values })) } - LogicalPlan::Filter(Filter { predicate, input }) => { - Filter::try_new(predicate, input).map(LogicalPlan::Filter) - } + LogicalPlan::Filter(Filter { + predicate, + input, + having, + }) => Filter::try_new_internal(predicate, input, having) + .map(LogicalPlan::Filter), LogicalPlan::Repartition(_) => Ok(self), LogicalPlan::Window(Window { input, @@ -767,7 +769,7 @@ impl LogicalPlan { .. }) => { // Update schema with unnested column type. - unnest_with_options(unwrap_arc(input), exec_columns, options) + unnest_with_options(Arc::unwrap_or_clone(input), exec_columns, options) } } } @@ -1157,10 +1159,7 @@ impl LogicalPlan { Ok(if let LogicalPlan::Prepare(prepare_lp) = plan_with_values { param_values.verify(&prepare_lp.data_types)?; // try and take ownership of the input if is not shared, clone otherwise - match Arc::try_unwrap(prepare_lp.input) { - Ok(input) => input, - Err(arc_input) => arc_input.as_ref().clone(), - } + Arc::unwrap_or_clone(prepare_lp.input) } else { plan_with_values }) @@ -1336,15 +1335,20 @@ impl LogicalPlan { ) -> Result { self.transform_up_with_subqueries(|plan| { let schema = Arc::clone(plan.schema()); + let name_preserver = NamePreserver::new(&plan); plan.map_expressions(|e| { - e.infer_placeholder_types(&schema)?.transform_up(|e| { - if let Expr::Placeholder(Placeholder { id, .. }) = e { - let value = param_values.get_placeholders_with_values(&id)?; - Ok(Transformed::yes(Expr::Literal(value))) - } else { - Ok(Transformed::no(e)) - } - }) + let original_name = name_preserver.save(&e)?; + let transformed_expr = + e.infer_placeholder_types(&schema)?.transform_up(|e| { + if let Expr::Placeholder(Placeholder { id, .. }) = e { + let value = param_values.get_placeholders_with_values(&id)?; + Ok(Transformed::yes(Expr::Literal(value))) + } else { + Ok(Transformed::no(e)) + } + })?; + // Preserve name to avoid breaking column references to this expression + transformed_expr.map_data(|expr| original_name.restore(expr)) }) }) .map(|res| res.data) @@ -2080,6 +2084,8 @@ pub struct Filter { pub predicate: Expr, /// The incoming logical plan pub input: Arc, + /// The flag to indicate if the filter is a having clause + pub having: bool, } impl Filter { @@ -2088,6 +2094,20 @@ impl Filter { /// Notes: as Aliases have no effect on the output of a filter operator, /// they are removed from the predicate expression. pub fn try_new(predicate: Expr, input: Arc) -> Result { + Self::try_new_internal(predicate, input, false) + } + + /// Create a new filter operator for a having clause. + /// This is similar to a filter, but its having flag is set to true. + pub fn try_new_with_having(predicate: Expr, input: Arc) -> Result { + Self::try_new_internal(predicate, input, true) + } + + fn try_new_internal( + predicate: Expr, + input: Arc, + having: bool, + ) -> Result { // Filter predicates must return a boolean value so we try and validate that here. // Note that it is not always possible to resolve the predicate expression during plan // construction (such as with correlated subqueries) so we make a best effort here and @@ -2104,6 +2124,7 @@ impl Filter { Ok(Self { predicate: predicate.unalias_nested().data, input, + having, }) } @@ -2907,7 +2928,11 @@ impl Debug for Subquery { } } -/// Logical partitioning schemes supported by the repartition operator. +/// Logical partitioning schemes supported by [`LogicalPlan::Repartition`] +/// +/// See [`Partitioning`] for more details on partitioning +/// +/// [`Partitioning`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html# #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Partitioning { /// Allocate batches using a round-robin algorithm and the specified number of partitions diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index dbe43128fd38..273404c8df31 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -87,8 +87,17 @@ impl TreeNode for LogicalPlan { schema, }) }), - LogicalPlan::Filter(Filter { predicate, input }) => rewrite_arc(input, f)? - .update_data(|input| LogicalPlan::Filter(Filter { predicate, input })), + LogicalPlan::Filter(Filter { + predicate, + input, + having, + }) => rewrite_arc(input, f)?.update_data(|input| { + LogicalPlan::Filter(Filter { + predicate, + input, + having, + }) + }), LogicalPlan::Repartition(Repartition { input, partitioning_scheme, @@ -370,21 +379,12 @@ impl TreeNode for LogicalPlan { } } -/// Converts a `Arc` without copying, if possible. Copies the plan -/// if there is a shared reference -pub fn unwrap_arc(plan: Arc) -> LogicalPlan { - Arc::try_unwrap(plan) - // if None is returned, there is another reference to this - // LogicalPlan, so we can not own it, and must clone instead - .unwrap_or_else(|node| node.as_ref().clone()) -} - /// Applies `f` to rewrite a `Arc` without copying, if possible fn rewrite_arc Result>>( plan: Arc, mut f: F, ) -> Result>> { - f(unwrap_arc(plan))?.map_data(|new_plan| Ok(Arc::new(new_plan))) + f(Arc::unwrap_or_clone(plan))?.map_data(|new_plan| Ok(Arc::new(new_plan))) } /// rewrite a `Vec` of `Arc` without copying, if possible @@ -561,10 +561,17 @@ impl LogicalPlan { value.into_iter().map_until_stop_and_collect(&mut f) })? .update_data(|values| LogicalPlan::Values(Values { schema, values })), - LogicalPlan::Filter(Filter { predicate, input }) => f(predicate)? - .update_data(|predicate| { - LogicalPlan::Filter(Filter { predicate, input }) - }), + LogicalPlan::Filter(Filter { + predicate, + input, + having, + }) => f(predicate)?.update_data(|predicate| { + LogicalPlan::Filter(Filter { + predicate, + input, + having, + }) + }), LogicalPlan::Repartition(Repartition { input, partitioning_scheme, diff --git a/datafusion/expr/src/table_source.rs b/datafusion/expr/src/table_source.rs index 2de3cc923315..8b8d2dfcf2df 100644 --- a/datafusion/expr/src/table_source.rs +++ b/datafusion/expr/src/table_source.rs @@ -22,7 +22,7 @@ use crate::{Expr, LogicalPlan}; use arrow::datatypes::SchemaRef; use datafusion_common::{Constraints, Result}; -use std::any::Any; +use std::{any::Any, borrow::Cow}; /// Indicates how a filter expression is handled by /// [`TableProvider::scan`]. @@ -122,7 +122,7 @@ pub trait TableSource: Sync + Send { } /// Get the Logical plan of this table provider, if available. - fn get_logical_plan(&self) -> Option<&LogicalPlan> { + fn get_logical_plan(&self) -> Option> { None } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index cb278c767974..7b4b3bb95c46 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -337,6 +337,9 @@ where /// let expr = geometric_mean.call(vec![col("a")]); /// ``` pub trait AggregateUDFImpl: Debug + Send + Sync { + // Note: When adding any methods (with default implementations), remember to add them also + // into the AliasedAggregateUDFImpl below! + /// Returns this object as an [`Any`] trait object fn as_any(&self) -> &dyn Any; @@ -635,6 +638,60 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl { &self.aliases } + fn state_fields(&self, args: StateFieldsArgs) -> Result> { + self.inner.state_fields(args) + } + + fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool { + self.inner.groups_accumulator_supported(args) + } + + fn create_groups_accumulator( + &self, + args: AccumulatorArgs, + ) -> Result> { + self.inner.create_groups_accumulator(args) + } + + fn create_sliding_accumulator( + &self, + args: AccumulatorArgs, + ) -> Result> { + self.inner.accumulator(args) + } + + fn with_beneficial_ordering( + self: Arc, + beneficial_ordering: bool, + ) -> Result>> { + Arc::clone(&self.inner) + .with_beneficial_ordering(beneficial_ordering) + .map(|udf| { + udf.map(|udf| { + Arc::new(AliasedAggregateUDFImpl { + inner: udf, + aliases: self.aliases.clone(), + }) as Arc + }) + }) + } + + fn order_sensitivity(&self) -> AggregateOrderSensitivity { + self.inner.order_sensitivity() + } + + fn simplify(&self) -> Option { + self.inner.simplify() + } + + fn reverse_expr(&self) -> ReversedUDAF { + self.inner.reverse_expr() + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + self.inner.coerce_types(arg_types) + } + fn equals(&self, other: &dyn AggregateUDFImpl) -> bool { if let Some(other) = other.as_any().downcast_ref::() { self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases @@ -649,6 +706,10 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl { self.aliases.hash(hasher); hasher.finish() } + + fn is_descending(&self) -> Option { + self.inner.is_descending() + } } /// Implementation of [`AggregateUDFImpl`] that wraps the function style pointers diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index a4584038e48b..be3f811dbe51 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -346,6 +346,9 @@ where /// let expr = add_one.call(vec![col("a")]); /// ``` pub trait ScalarUDFImpl: Debug + Send + Sync { + // Note: When adding any methods (with default implementations), remember to add them also + // into the AliasedScalarUDFImpl below! + /// Returns this object as an [`Any`] trait object fn as_any(&self) -> &dyn Any; @@ -632,6 +635,14 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { self.inner.name() } + fn display_name(&self, args: &[Expr]) -> Result { + self.inner.display_name(args) + } + + fn schema_name(&self, args: &[Expr]) -> Result { + self.inner.schema_name(args) + } + fn signature(&self) -> &Signature { self.inner.signature() } @@ -640,12 +651,57 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { self.inner.return_type(arg_types) } + fn aliases(&self) -> &[String] { + &self.aliases + } + + fn return_type_from_exprs( + &self, + args: &[Expr], + schema: &dyn ExprSchema, + arg_types: &[DataType], + ) -> Result { + self.inner.return_type_from_exprs(args, schema, arg_types) + } + fn invoke(&self, args: &[ColumnarValue]) -> Result { self.inner.invoke(args) } - fn aliases(&self) -> &[String] { - &self.aliases + fn invoke_no_args(&self, number_rows: usize) -> Result { + self.inner.invoke_no_args(number_rows) + } + + fn simplify( + &self, + args: Vec, + info: &dyn SimplifyInfo, + ) -> Result { + self.inner.simplify(args, info) + } + + fn short_circuits(&self) -> bool { + self.inner.short_circuits() + } + + fn evaluate_bounds(&self, input: &[&Interval]) -> Result { + self.inner.evaluate_bounds(input) + } + + fn propagate_constraints( + &self, + interval: &Interval, + inputs: &[&Interval], + ) -> Result>> { + self.inner.propagate_constraints(interval, inputs) + } + + fn output_ordering(&self, inputs: &[ExprProperties]) -> Result { + self.inner.output_ordering(inputs) + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + self.inner.coerce_types(arg_types) } fn equals(&self, other: &dyn ScalarUDFImpl) -> bool { diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 88b3d613cb43..e5fdaaceb439 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -266,6 +266,9 @@ where /// .unwrap(); /// ``` pub trait WindowUDFImpl: Debug + Send + Sync { + // Note: When adding any methods (with default implementations), remember to add them also + // into the AliasedWindowUDFImpl below! + /// Returns this object as an [`Any`] trait object fn as_any(&self) -> &dyn Any; @@ -428,6 +431,10 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { &self.aliases } + fn simplify(&self) -> Option { + self.inner.simplify() + } + fn equals(&self, other: &dyn WindowUDFImpl) -> bool { if let Some(other) = other.as_any().downcast_ref::() { self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases @@ -442,6 +449,18 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { self.aliases.hash(hasher); hasher.finish() } + + fn nullable(&self) -> bool { + self.inner.nullable() + } + + fn sort_options(&self) -> Option { + self.inner.sort_options() + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + self.inner.coerce_types(arg_types) + } } /// Implementation of [`WindowUDFImpl`] that wraps the function style pointers diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 5f5c468fa2f5..a01d5ef8973a 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -463,7 +463,7 @@ pub fn expand_qualified_wildcard( /// if bool is true SortExpr comes from `PARTITION BY` column, if false comes from `ORDER BY` column type WindowSortKey = Vec<(Expr, bool)>; -/// Generate a sort key for a given window expr's partition_by and order_bu expr +/// Generate a sort key for a given window expr's partition_by and order_by expr pub fn generate_sort_key( partition_by: &[Expr], order_by: &[Expr], @@ -569,7 +569,7 @@ pub fn compare_sort_expr( } Ordering::Equal } - _ => Ordering::Equal, + _ => panic!("Sort expressions must be of type Sort"), } } @@ -804,6 +804,15 @@ pub fn find_base_plan(input: &LogicalPlan) -> &LogicalPlan { match input { LogicalPlan::Window(window) => find_base_plan(&window.input), LogicalPlan::Aggregate(agg) => find_base_plan(&agg.input), + LogicalPlan::Filter(filter) => { + if filter.having { + // If a filter is used for a having clause, its input plan is an aggregation. + // We should expand the wildcard expression based on the aggregation's input plan. + find_base_plan(&filter.input) + } else { + input + } + } _ => input, } } diff --git a/datafusion/functions-aggregate-common/src/aggregate.rs b/datafusion/functions-aggregate-common/src/aggregate.rs index 698d1350cb61..c9cbaa8396fc 100644 --- a/datafusion/functions-aggregate-common/src/aggregate.rs +++ b/datafusion/functions-aggregate-common/src/aggregate.rs @@ -15,172 +15,5 @@ // specific language governing permissions and limitations // under the License. -//! [`AggregateExpr`] which defines the interface all aggregate expressions -//! (built-in and custom) need to satisfy. - -use crate::order::AggregateOrderSensitivity; -use arrow::datatypes::{DataType, Field}; -use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue}; -use datafusion_expr_common::accumulator::Accumulator; -use datafusion_expr_common::groups_accumulator::GroupsAccumulator; -use datafusion_physical_expr_common::physical_expr::PhysicalExpr; -use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; -use std::fmt::Debug; -use std::{any::Any, sync::Arc}; - pub mod count_distinct; pub mod groups_accumulator; - -/// An aggregate expression that: -/// * knows its resulting field -/// * knows how to create its accumulator -/// * knows its accumulator's state's field -/// * knows the expressions from whose its accumulator will receive values -/// -/// Any implementation of this trait also needs to implement the -/// `PartialEq` to allows comparing equality between the -/// trait objects. -pub trait AggregateExpr: Send + Sync + Debug + PartialEq { - /// Returns the aggregate expression as [`Any`] so that it can be - /// downcast to a specific implementation. - fn as_any(&self) -> &dyn Any; - - /// the field of the final result of this aggregation. - fn field(&self) -> Result; - - /// the accumulator used to accumulate values from the expressions. - /// the accumulator expects the same number of arguments as `expressions` and must - /// return states with the same description as `state_fields` - fn create_accumulator(&self) -> Result>; - - /// the fields that encapsulate the Accumulator's state - /// the number of fields here equals the number of states that the accumulator contains - fn state_fields(&self) -> Result>; - - /// expressions that are passed to the Accumulator. - /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many. - fn expressions(&self) -> Vec>; - - /// Order by requirements for the aggregate function - /// By default it is `None` (there is no requirement) - /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this - fn order_bys(&self) -> Option<&[PhysicalSortExpr]> { - None - } - - /// Indicates whether aggregator can produce the correct result with any - /// arbitrary input ordering. By default, we assume that aggregate expressions - /// are order insensitive. - fn order_sensitivity(&self) -> AggregateOrderSensitivity { - AggregateOrderSensitivity::Insensitive - } - - /// Sets the indicator whether ordering requirements of the aggregator is - /// satisfied by its input. If this is not the case, aggregators with order - /// sensitivity `AggregateOrderSensitivity::Beneficial` can still produce - /// the correct result with possibly more work internally. - /// - /// # Returns - /// - /// Returns `Ok(Some(updated_expr))` if the process completes successfully. - /// If the expression can benefit from existing input ordering, but does - /// not implement the method, returns an error. Order insensitive and hard - /// requirement aggregators return `Ok(None)`. - fn with_beneficial_ordering( - self: Arc, - _requirement_satisfied: bool, - ) -> Result>> { - if self.order_bys().is_some() && self.order_sensitivity().is_beneficial() { - return exec_err!( - "Should implement with satisfied for aggregator :{:?}", - self.name() - ); - } - Ok(None) - } - - /// Human readable name such as `"MIN(c2)"`. The default - /// implementation returns placeholder text. - fn name(&self) -> &str { - "AggregateExpr: default name" - } - - /// If the aggregate expression has a specialized - /// [`GroupsAccumulator`] implementation. If this returns true, - /// `[Self::create_groups_accumulator`] will be called. - fn groups_accumulator_supported(&self) -> bool { - false - } - - /// Return a specialized [`GroupsAccumulator`] that manages state - /// for all groups. - /// - /// For maximum performance, a [`GroupsAccumulator`] should be - /// implemented in addition to [`Accumulator`]. - fn create_groups_accumulator(&self) -> Result> { - not_impl_err!("GroupsAccumulator hasn't been implemented for {self:?} yet") - } - - /// Construct an expression that calculates the aggregate in reverse. - /// Typically the "reverse" expression is itself (e.g. SUM, COUNT). - /// For aggregates that do not support calculation in reverse, - /// returns None (which is the default value). - fn reverse_expr(&self) -> Option> { - None - } - - /// Creates accumulator implementation that supports retract - fn create_sliding_accumulator(&self) -> Result> { - not_impl_err!("Retractable Accumulator hasn't been implemented for {self:?} yet") - } - - /// Returns all expressions used in the [`AggregateExpr`]. - /// These expressions are (1)function arguments, (2) order by expressions. - fn all_expressions(&self) -> AggregatePhysicalExpressions { - let args = self.expressions(); - let order_bys = self.order_bys().unwrap_or(&[]); - let order_by_exprs = order_bys - .iter() - .map(|sort_expr| Arc::clone(&sort_expr.expr)) - .collect::>(); - AggregatePhysicalExpressions { - args, - order_by_exprs, - } - } - - /// Rewrites [`AggregateExpr`], with new expressions given. The argument should be consistent - /// with the return value of the [`AggregateExpr::all_expressions`] method. - /// Returns `Some(Arc)` if re-write is supported, otherwise returns `None`. - fn with_new_expressions( - &self, - _args: Vec>, - _order_by_exprs: Vec>, - ) -> Option> { - None - } - - /// If this function is max, return (output_field, true) - /// if the function is min, return (output_field, false) - /// otherwise return None (the default) - /// - /// output_field is the name of the column produced by this aggregate - /// - /// Note: this is used to use special aggregate implementations in certain conditions - fn get_minmax_desc(&self) -> Option<(Field, bool)> { - None - } - - /// Returns default value of the function given the input is Null - /// Most of the aggregate function return Null if input is Null, - /// while `count` returns 0 if input is Null - fn default_value(&self, data_type: &DataType) -> Result; -} - -/// Stores the physical expressions used inside the `AggregateExpr`. -pub struct AggregatePhysicalExpressions { - /// Aggregate function arguments - pub args: Vec>, - /// Order by expressions - pub order_by_exprs: Vec>, -} diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs index 3984b02c5fbb..1c97d22ec79c 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs @@ -24,7 +24,7 @@ pub mod nulls; pub mod prim_op; use arrow::{ - array::{ArrayRef, AsArray, BooleanArray, PrimitiveArray, UInt32Builder}, + array::{ArrayRef, AsArray, BooleanArray, PrimitiveArray}, compute, datatypes::UInt32Type, }; @@ -170,7 +170,7 @@ impl GroupsAccumulatorAdapter { let mut groups_with_rows = vec![]; // batch_indices holds indices into values, each group is contiguous - let mut batch_indices = UInt32Builder::with_capacity(0); + let mut batch_indices = vec![]; // offsets[i] is index into batch_indices where the rows for // group_index i starts @@ -184,11 +184,11 @@ impl GroupsAccumulatorAdapter { } groups_with_rows.push(group_index); - batch_indices.append_slice(indices); + batch_indices.extend_from_slice(indices); offset_so_far += indices.len(); offsets.push(offset_so_far); } - let batch_indices = batch_indices.finish(); + let batch_indices = batch_indices.into(); // reorder the values and opt_filter by batch_indices so that // all values for each group are contiguous, then invoke the diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs index 455fc5fec450..a0475fe8e446 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs @@ -91,36 +91,9 @@ impl NullState { /// * `opt_filter`: if present, only rows for which is Some(true) are included /// * `value_fn`: function invoked for (group_index, value) where value is non null /// - /// # Example + /// See [`accumulate`], for more details on how value_fn is called /// - /// ```text - /// ┌─────────┐ ┌─────────┐ ┌ ─ ─ ─ ─ ┐ - /// │ ┌─────┐ │ │ ┌─────┐ │ ┌─────┐ - /// │ │ 2 │ │ │ │ 200 │ │ │ │ t │ │ - /// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ - /// │ │ 2 │ │ │ │ 100 │ │ │ │ f │ │ - /// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ - /// │ │ 0 │ │ │ │ 200 │ │ │ │ t │ │ - /// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ - /// │ │ 1 │ │ │ │ 200 │ │ │ │NULL │ │ - /// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ - /// │ │ 0 │ │ │ │ 300 │ │ │ │ t │ │ - /// │ └─────┘ │ │ └─────┘ │ └─────┘ - /// └─────────┘ └─────────┘ └ ─ ─ ─ ─ ┘ - /// - /// group_indices values opt_filter - /// ``` - /// - /// In the example above, `value_fn` is invoked for each (group_index, - /// value) pair where `opt_filter[i]` is true and values is non null - /// - /// ```text - /// value_fn(2, 200) - /// value_fn(0, 200) - /// value_fn(0, 300) - /// ``` - /// - /// It also sets + /// When value_fn is called it also sets /// /// 1. `self.seen_values[group_index]` to true for all rows that had a non null vale pub fn accumulate( @@ -134,105 +107,14 @@ impl NullState { T: ArrowPrimitiveType + Send, F: FnMut(usize, T::Native) + Send, { - let data: &[T::Native] = values.values(); - assert_eq!(data.len(), group_indices.len()); - // ensure the seen_values is big enough (start everything at // "not seen" valid) let seen_values = initialize_builder(&mut self.seen_values, total_num_groups, false); - - match (values.null_count() > 0, opt_filter) { - // no nulls, no filter, - (false, None) => { - let iter = group_indices.iter().zip(data.iter()); - for (&group_index, &new_value) in iter { - seen_values.set_bit(group_index, true); - value_fn(group_index, new_value); - } - } - // nulls, no filter - (true, None) => { - let nulls = values.nulls().unwrap(); - // This is based on (ahem, COPY/PASTE) arrow::compute::aggregate::sum - // iterate over in chunks of 64 bits for more efficient null checking - let group_indices_chunks = group_indices.chunks_exact(64); - let data_chunks = data.chunks_exact(64); - let bit_chunks = nulls.inner().bit_chunks(); - - let group_indices_remainder = group_indices_chunks.remainder(); - let data_remainder = data_chunks.remainder(); - - group_indices_chunks - .zip(data_chunks) - .zip(bit_chunks.iter()) - .for_each(|((group_index_chunk, data_chunk), mask)| { - // index_mask has value 1 << i in the loop - let mut index_mask = 1; - group_index_chunk.iter().zip(data_chunk.iter()).for_each( - |(&group_index, &new_value)| { - // valid bit was set, real value - let is_valid = (mask & index_mask) != 0; - if is_valid { - seen_values.set_bit(group_index, true); - value_fn(group_index, new_value); - } - index_mask <<= 1; - }, - ) - }); - - // handle any remaining bits (after the initial 64) - let remainder_bits = bit_chunks.remainder_bits(); - group_indices_remainder - .iter() - .zip(data_remainder.iter()) - .enumerate() - .for_each(|(i, (&group_index, &new_value))| { - let is_valid = remainder_bits & (1 << i) != 0; - if is_valid { - seen_values.set_bit(group_index, true); - value_fn(group_index, new_value); - } - }); - } - // no nulls, but a filter - (false, Some(filter)) => { - assert_eq!(filter.len(), group_indices.len()); - // The performance with a filter could be improved by - // iterating over the filter in chunks, rather than a single - // iterator. TODO file a ticket - group_indices - .iter() - .zip(data.iter()) - .zip(filter.iter()) - .for_each(|((&group_index, &new_value), filter_value)| { - if let Some(true) = filter_value { - seen_values.set_bit(group_index, true); - value_fn(group_index, new_value); - } - }) - } - // both null values and filters - (true, Some(filter)) => { - assert_eq!(filter.len(), group_indices.len()); - // The performance with a filter could be improved by - // iterating over the filter in chunks, rather than using - // iterators. TODO file a ticket - filter - .iter() - .zip(group_indices.iter()) - .zip(values.iter()) - .for_each(|((filter_value, &group_index), new_value)| { - if let Some(true) = filter_value { - if let Some(new_value) = new_value { - seen_values.set_bit(group_index, true); - value_fn(group_index, new_value) - } - } - }) - } - } + accumulate(group_indices, values, opt_filter, |group_index, value| { + seen_values.set_bit(group_index, true); + value_fn(group_index, value); + }); } /// Invokes `value_fn(group_index, value)` for each non null, non @@ -351,6 +233,144 @@ impl NullState { } } +/// Invokes `value_fn(group_index, value)` for each non null, non +/// filtered value of `value`, +/// +/// # Arguments: +/// +/// * `group_indices`: To which groups do the rows in `values` belong, (aka group_index) +/// * `values`: the input arguments to the accumulator +/// * `opt_filter`: if present, only rows for which is Some(true) are included +/// * `value_fn`: function invoked for (group_index, value) where value is non null +/// +/// # Example +/// +/// ```text +/// ┌─────────┐ ┌─────────┐ ┌ ─ ─ ─ ─ ┐ +/// │ ┌─────┐ │ │ ┌─────┐ │ ┌─────┐ +/// │ │ 2 │ │ │ │ 200 │ │ │ │ t │ │ +/// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ +/// │ │ 2 │ │ │ │ 100 │ │ │ │ f │ │ +/// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ +/// │ │ 0 │ │ │ │ 200 │ │ │ │ t │ │ +/// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ +/// │ │ 1 │ │ │ │ 200 │ │ │ │NULL │ │ +/// │ ├─────┤ │ │ ├─────┤ │ ├─────┤ +/// │ │ 0 │ │ │ │ 300 │ │ │ │ t │ │ +/// │ └─────┘ │ │ └─────┘ │ └─────┘ +/// └─────────┘ └─────────┘ └ ─ ─ ─ ─ ┘ +/// +/// group_indices values opt_filter +/// ``` +/// +/// In the example above, `value_fn` is invoked for each (group_index, +/// value) pair where `opt_filter[i]` is true and values is non null +/// +/// ```text +/// value_fn(2, 200) +/// value_fn(0, 200) +/// value_fn(0, 300) +/// ``` +pub fn accumulate( + group_indices: &[usize], + values: &PrimitiveArray, + opt_filter: Option<&BooleanArray>, + mut value_fn: F, +) where + T: ArrowPrimitiveType + Send, + F: FnMut(usize, T::Native) + Send, +{ + let data: &[T::Native] = values.values(); + assert_eq!(data.len(), group_indices.len()); + + match (values.null_count() > 0, opt_filter) { + // no nulls, no filter, + (false, None) => { + let iter = group_indices.iter().zip(data.iter()); + for (&group_index, &new_value) in iter { + value_fn(group_index, new_value); + } + } + // nulls, no filter + (true, None) => { + let nulls = values.nulls().unwrap(); + // This is based on (ahem, COPY/PASTE) arrow::compute::aggregate::sum + // iterate over in chunks of 64 bits for more efficient null checking + let group_indices_chunks = group_indices.chunks_exact(64); + let data_chunks = data.chunks_exact(64); + let bit_chunks = nulls.inner().bit_chunks(); + + let group_indices_remainder = group_indices_chunks.remainder(); + let data_remainder = data_chunks.remainder(); + + group_indices_chunks + .zip(data_chunks) + .zip(bit_chunks.iter()) + .for_each(|((group_index_chunk, data_chunk), mask)| { + // index_mask has value 1 << i in the loop + let mut index_mask = 1; + group_index_chunk.iter().zip(data_chunk.iter()).for_each( + |(&group_index, &new_value)| { + // valid bit was set, real value + let is_valid = (mask & index_mask) != 0; + if is_valid { + value_fn(group_index, new_value); + } + index_mask <<= 1; + }, + ) + }); + + // handle any remaining bits (after the initial 64) + let remainder_bits = bit_chunks.remainder_bits(); + group_indices_remainder + .iter() + .zip(data_remainder.iter()) + .enumerate() + .for_each(|(i, (&group_index, &new_value))| { + let is_valid = remainder_bits & (1 << i) != 0; + if is_valid { + value_fn(group_index, new_value); + } + }); + } + // no nulls, but a filter + (false, Some(filter)) => { + assert_eq!(filter.len(), group_indices.len()); + // The performance with a filter could be improved by + // iterating over the filter in chunks, rather than a single + // iterator. TODO file a ticket + group_indices + .iter() + .zip(data.iter()) + .zip(filter.iter()) + .for_each(|((&group_index, &new_value), filter_value)| { + if let Some(true) = filter_value { + value_fn(group_index, new_value); + } + }) + } + // both null values and filters + (true, Some(filter)) => { + assert_eq!(filter.len(), group_indices.len()); + // The performance with a filter could be improved by + // iterating over the filter in chunks, rather than using + // iterators. TODO file a ticket + filter + .iter() + .zip(group_indices.iter()) + .zip(values.iter()) + .for_each(|((filter_value, &group_index), new_value)| { + if let Some(true) = filter_value { + if let Some(new_value) = new_value { + value_fn(group_index, new_value) + } + } + }) + } + } +} + /// This function is called to update the accumulator state per row /// when the value is not needed (e.g. COUNT) /// diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs index b5c6171af37c..8bbcf756c37c 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs @@ -167,7 +167,7 @@ where // Rebuilding input values with a new nulls mask, which is equal to // the union of original nulls and filter mask - let (dt, values_buf, original_nulls) = values.clone().into_parts(); + let (dt, values_buf, original_nulls) = values.into_parts(); let nulls_buf = NullBuffer::union(original_nulls.as_ref(), Some(&filter_nulls)); PrimitiveArray::::new(values_buf, nulls_buf).with_data_type(dt) diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs index 7b8ce0397af8..4fba772d8ddc 100644 --- a/datafusion/functions-aggregate-common/src/utils.rs +++ b/datafusion/functions-aggregate-common/src/utils.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::{any::Any, sync::Arc}; +use std::sync::Arc; use arrow::array::{ArrayRef, AsArray}; use arrow::datatypes::ArrowNativeType; @@ -32,25 +32,6 @@ use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_expr_common::accumulator::Accumulator; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; -use crate::aggregate::AggregateExpr; - -/// Downcast a `Box` or `Arc` -/// and return the inner trait object as [`Any`] so -/// that it can be downcast to a specific implementation. -/// -/// This method is used when implementing the `PartialEq` -/// for [`AggregateExpr`] aggregation expressions and allows comparing the equality -/// between the trait objects. -pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any { - if let Some(obj) = any.downcast_ref::>() { - obj.as_any() - } else if let Some(obj) = any.downcast_ref::>() { - obj.as_any() - } else { - any - } -} - /// Convert scalar values from an accumulator into arrays. pub fn get_accum_scalar_values_as_arrays( accum: &mut dyn Accumulator, diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs index 89d827e86859..5578aebbf403 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs @@ -31,8 +31,8 @@ use arrow::{ use arrow_schema::{Field, Schema}; use datafusion_common::{ - downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result, - ScalarValue, + downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err, + DataFusionError, Result, ScalarValue, }; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS}; @@ -126,9 +126,9 @@ impl ApproxPercentileCont { | DataType::Float32 | DataType::Float64) => { if let Some(max_size) = tdigest_max_size { - ApproxPercentileAccumulator::new_with_max_size(percentile, t.clone(), max_size) + ApproxPercentileAccumulator::new_with_max_size(percentile, t, max_size) }else{ - ApproxPercentileAccumulator::new(percentile, t.clone()) + ApproxPercentileAccumulator::new(percentile, t) } } @@ -154,7 +154,8 @@ fn get_scalar_value(expr: &Arc) -> Result { } fn validate_input_percentile_expr(expr: &Arc) -> Result { - let percentile = match get_scalar_value(expr)? { + let percentile = match get_scalar_value(expr) + .map_err(|_| not_impl_datafusion_err!("Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? { ScalarValue::Float32(Some(value)) => { value as f64 } @@ -179,7 +180,8 @@ fn validate_input_percentile_expr(expr: &Arc) -> Result { } fn validate_input_max_size_expr(expr: &Arc) -> Result { - let max_size = match get_scalar_value(expr)? { + let max_size = match get_scalar_value(expr) + .map_err(|_| not_impl_datafusion_err!("Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? { ScalarValue::UInt8(Some(q)) => q as usize, ScalarValue::UInt16(Some(q)) => q as usize, ScalarValue::UInt32(Some(q)) => q as usize, @@ -193,7 +195,7 @@ fn validate_input_max_size_expr(expr: &Arc) -> Result { "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).", sv.data_type() ) - } + }, }; Ok(max_size) @@ -458,10 +460,6 @@ impl Accumulator for ApproxPercentileAccumulator { + self.return_type.size() - std::mem::size_of_val(&self.return_type) } - - fn supports_retract_batch(&self) -> bool { - true - } } #[cfg(test)] diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index b641d388a7c5..15146fc4a2d8 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -501,11 +501,8 @@ impl OrderSensitiveArrayAggAccumulator { column_wise_ordering_values.push(array); } - let ordering_array = StructArray::try_new( - struct_field.clone(), - column_wise_ordering_values, - None, - )?; + let ordering_array = + StructArray::try_new(struct_field, column_wise_ordering_values, None)?; Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( Arc::new(ordering_array), )))) diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs index cb1ddd4738c4..7425bdfa18e7 100644 --- a/datafusion/functions-aggregate/src/nth_value.rs +++ b/datafusion/functions-aggregate/src/nth_value.rs @@ -374,11 +374,8 @@ impl NthValueAccumulator { column_wise_ordering_values.push(array); } - let ordering_array = StructArray::try_new( - struct_field.clone(), - column_wise_ordering_values, - None, - )?; + let ordering_array = + StructArray::try_new(struct_field, column_wise_ordering_values, None)?; Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( Arc::new(ordering_array), diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs index 180f4ad3cf37..3534fb5b4d26 100644 --- a/datafusion/functions-aggregate/src/stddev.rs +++ b/datafusion/functions-aggregate/src/stddev.rs @@ -19,17 +19,21 @@ use std::any::Any; use std::fmt::{Debug, Formatter}; +use std::sync::Arc; +use arrow::array::Float64Array; use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field}; use datafusion_common::{internal_err, not_impl_err, Result}; use datafusion_common::{plan_err, ScalarValue}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; use datafusion_expr::utils::format_state_name; -use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility, +}; use datafusion_functions_aggregate_common::stats::StatsType; -use crate::variance::VarianceAccumulator; +use crate::variance::{VarianceAccumulator, VarianceGroupsAccumulator}; make_udaf_expr_and_func!( Stddev, @@ -118,6 +122,17 @@ impl AggregateUDFImpl for Stddev { fn aliases(&self) -> &[String] { &self.alias } + + fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool { + !acc_args.is_distinct + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> Result> { + Ok(Box::new(StddevGroupsAccumulator::new(StatsType::Sample))) + } } make_udaf_expr_and_func!( @@ -201,6 +216,19 @@ impl AggregateUDFImpl for StddevPop { Ok(DataType::Float64) } + + fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool { + !acc_args.is_distinct + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> Result> { + Ok(Box::new(StddevGroupsAccumulator::new( + StatsType::Population, + ))) + } } /// An accumulator to compute the average @@ -267,6 +295,57 @@ impl Accumulator for StddevAccumulator { } } +#[derive(Debug)] +pub struct StddevGroupsAccumulator { + variance: VarianceGroupsAccumulator, +} + +impl StddevGroupsAccumulator { + pub fn new(s_type: StatsType) -> Self { + Self { + variance: VarianceGroupsAccumulator::new(s_type), + } + } +} + +impl GroupsAccumulator for StddevGroupsAccumulator { + fn update_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + opt_filter: Option<&arrow::array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + self.variance + .update_batch(values, group_indices, opt_filter, total_num_groups) + } + + fn merge_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + opt_filter: Option<&arrow::array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + self.variance + .merge_batch(values, group_indices, opt_filter, total_num_groups) + } + + fn evaluate(&mut self, emit_to: datafusion_expr::EmitTo) -> Result { + let (mut variances, nulls) = self.variance.variance(emit_to); + variances.iter_mut().for_each(|v| *v = v.sqrt()); + Ok(Arc::new(Float64Array::new(variances.into(), Some(nulls)))) + } + + fn state(&mut self, emit_to: datafusion_expr::EmitTo) -> Result> { + self.variance.state(emit_to) + } + + fn size(&self) -> usize { + self.variance.size() + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/functions-aggregate/src/variance.rs b/datafusion/functions-aggregate/src/variance.rs index 4c78a42ea494..f5f2d06e3837 100644 --- a/datafusion/functions-aggregate/src/variance.rs +++ b/datafusion/functions-aggregate/src/variance.rs @@ -18,10 +18,11 @@ //! [`VarianceSample`]: variance sample aggregations. //! [`VariancePopulation`]: variance population aggregations. -use std::fmt::Debug; +use std::{fmt::Debug, sync::Arc}; use arrow::{ - array::{ArrayRef, Float64Array, UInt64Array}, + array::{Array, ArrayRef, BooleanArray, Float64Array, UInt64Array}, + buffer::NullBuffer, compute::kernels::cast, datatypes::{DataType, Field}, }; @@ -32,9 +33,11 @@ use datafusion_common::{ use datafusion_expr::{ function::{AccumulatorArgs, StateFieldsArgs}, utils::format_state_name, - Accumulator, AggregateUDFImpl, Signature, Volatility, + Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility, +}; +use datafusion_functions_aggregate_common::{ + aggregate::groups_accumulator::accumulate::accumulate, stats::StatsType, }; -use datafusion_functions_aggregate_common::stats::StatsType; make_udaf_expr_and_func!( VarianceSample, @@ -122,6 +125,17 @@ impl AggregateUDFImpl for VarianceSample { fn aliases(&self) -> &[String] { &self.aliases } + + fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool { + !acc_args.is_distinct + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> Result> { + Ok(Box::new(VarianceGroupsAccumulator::new(StatsType::Sample))) + } } pub struct VariancePopulation { @@ -196,6 +210,19 @@ impl AggregateUDFImpl for VariancePopulation { fn aliases(&self) -> &[String] { &self.aliases } + + fn groups_accumulator_supported(&self, acc_args: AccumulatorArgs) -> bool { + !acc_args.is_distinct + } + + fn create_groups_accumulator( + &self, + _args: AccumulatorArgs, + ) -> Result> { + Ok(Box::new(VarianceGroupsAccumulator::new( + StatsType::Population, + ))) + } } /// An accumulator to compute variance @@ -239,6 +266,36 @@ impl VarianceAccumulator { } } +#[inline] +fn merge( + count: u64, + mean: f64, + m2: f64, + count2: u64, + mean2: f64, + m22: f64, +) -> (u64, f64, f64) { + let new_count = count + count2; + let new_mean = + mean * count as f64 / new_count as f64 + mean2 * count2 as f64 / new_count as f64; + let delta = mean - mean2; + let new_m2 = + m2 + m22 + delta * delta * count as f64 * count2 as f64 / new_count as f64; + + (new_count, new_mean, new_m2) +} + +#[inline] +fn update(count: u64, mean: f64, m2: f64, value: f64) -> (u64, f64, f64) { + let new_count = count + 1; + let delta1 = value - mean; + let new_mean = delta1 / new_count as f64 + mean; + let delta2 = value - new_mean; + let new_m2 = m2 + delta1 * delta2; + + (new_count, new_mean, new_m2) +} + impl Accumulator for VarianceAccumulator { fn state(&mut self) -> Result> { Ok(vec![ @@ -253,15 +310,8 @@ impl Accumulator for VarianceAccumulator { let arr = downcast_value!(values, Float64Array).iter().flatten(); for value in arr { - let new_count = self.count + 1; - let delta1 = value - self.mean; - let new_mean = delta1 / new_count as f64 + self.mean; - let delta2 = value - new_mean; - let new_m2 = self.m2 + delta1 * delta2; - - self.count += 1; - self.mean = new_mean; - self.m2 = new_m2; + (self.count, self.mean, self.m2) = + update(self.count, self.mean, self.m2, value) } Ok(()) @@ -296,17 +346,14 @@ impl Accumulator for VarianceAccumulator { if c == 0_u64 { continue; } - let new_count = self.count + c; - let new_mean = self.mean * self.count as f64 / new_count as f64 - + means.value(i) * c as f64 / new_count as f64; - let delta = self.mean - means.value(i); - let new_m2 = self.m2 - + m2s.value(i) - + delta * delta * self.count as f64 * c as f64 / new_count as f64; - - self.count = new_count; - self.mean = new_mean; - self.m2 = new_m2; + (self.count, self.mean, self.m2) = merge( + self.count, + self.mean, + self.m2, + c, + means.value(i), + m2s.value(i), + ) } Ok(()) } @@ -344,3 +391,183 @@ impl Accumulator for VarianceAccumulator { true } } + +#[derive(Debug)] +pub struct VarianceGroupsAccumulator { + m2s: Vec, + means: Vec, + counts: Vec, + stats_type: StatsType, +} + +impl VarianceGroupsAccumulator { + pub fn new(s_type: StatsType) -> Self { + Self { + m2s: Vec::new(), + means: Vec::new(), + counts: Vec::new(), + stats_type: s_type, + } + } + + fn resize(&mut self, total_num_groups: usize) { + self.m2s.resize(total_num_groups, 0.0); + self.means.resize(total_num_groups, 0.0); + self.counts.resize(total_num_groups, 0); + } + + fn merge( + group_indices: &[usize], + counts: &UInt64Array, + means: &Float64Array, + m2s: &Float64Array, + opt_filter: Option<&BooleanArray>, + mut value_fn: F, + ) where + F: FnMut(usize, u64, f64, f64) + Send, + { + assert_eq!(counts.null_count(), 0); + assert_eq!(means.null_count(), 0); + assert_eq!(m2s.null_count(), 0); + + match opt_filter { + None => { + group_indices + .iter() + .zip(counts.values().iter()) + .zip(means.values().iter()) + .zip(m2s.values().iter()) + .for_each(|(((&group_index, &count), &mean), &m2)| { + value_fn(group_index, count, mean, m2); + }); + } + Some(filter) => { + group_indices + .iter() + .zip(counts.values().iter()) + .zip(means.values().iter()) + .zip(m2s.values().iter()) + .zip(filter.iter()) + .for_each( + |((((&group_index, &count), &mean), &m2), filter_value)| { + if let Some(true) = filter_value { + value_fn(group_index, count, mean, m2); + } + }, + ); + } + } + } + + pub fn variance( + &mut self, + emit_to: datafusion_expr::EmitTo, + ) -> (Vec, NullBuffer) { + let mut counts = emit_to.take_needed(&mut self.counts); + // means are only needed for updating m2s and are not needed for the final result. + // But we still need to take them to ensure the internal state is consistent. + let _ = emit_to.take_needed(&mut self.means); + let m2s = emit_to.take_needed(&mut self.m2s); + + if let StatsType::Sample = self.stats_type { + counts.iter_mut().for_each(|count| { + *count -= 1; + }); + } + let nulls = NullBuffer::from_iter(counts.iter().map(|&count| count != 0)); + let variance = m2s + .iter() + .zip(counts) + .map(|(m2, count)| m2 / count as f64) + .collect(); + (variance, nulls) + } +} + +impl GroupsAccumulator for VarianceGroupsAccumulator { + fn update_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + opt_filter: Option<&arrow::array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + assert_eq!(values.len(), 1, "single argument to update_batch"); + let values = &cast(&values[0], &DataType::Float64)?; + let values = downcast_value!(values, Float64Array); + + self.resize(total_num_groups); + accumulate(group_indices, values, opt_filter, |group_index, value| { + let (new_count, new_mean, new_m2) = update( + self.counts[group_index], + self.means[group_index], + self.m2s[group_index], + value, + ); + self.counts[group_index] = new_count; + self.means[group_index] = new_mean; + self.m2s[group_index] = new_m2; + }); + Ok(()) + } + + fn merge_batch( + &mut self, + values: &[ArrayRef], + group_indices: &[usize], + opt_filter: Option<&arrow::array::BooleanArray>, + total_num_groups: usize, + ) -> Result<()> { + assert_eq!(values.len(), 3, "two arguments to merge_batch"); + // first batch is counts, second is partial means, third is partial m2s + let partial_counts = downcast_value!(values[0], UInt64Array); + let partial_means = downcast_value!(values[1], Float64Array); + let partial_m2s = downcast_value!(values[2], Float64Array); + + self.resize(total_num_groups); + Self::merge( + group_indices, + partial_counts, + partial_means, + partial_m2s, + opt_filter, + |group_index, partial_count, partial_mean, partial_m2| { + let (new_count, new_mean, new_m2) = merge( + self.counts[group_index], + self.means[group_index], + self.m2s[group_index], + partial_count, + partial_mean, + partial_m2, + ); + self.counts[group_index] = new_count; + self.means[group_index] = new_mean; + self.m2s[group_index] = new_m2; + }, + ); + Ok(()) + } + + fn evaluate(&mut self, emit_to: datafusion_expr::EmitTo) -> Result { + let (variances, nulls) = self.variance(emit_to); + Ok(Arc::new(Float64Array::new(variances.into(), Some(nulls)))) + } + + fn state(&mut self, emit_to: datafusion_expr::EmitTo) -> Result> { + let counts = emit_to.take_needed(&mut self.counts); + let means = emit_to.take_needed(&mut self.means); + let m2s = emit_to.take_needed(&mut self.m2s); + + Ok(vec![ + Arc::new(UInt64Array::new(counts.into(), None)), + Arc::new(Float64Array::new(means.into(), None)), + Arc::new(Float64Array::new(m2s.into(), None)), + ]) + } + + fn size(&self) -> usize { + self.m2s.capacity() * std::mem::size_of::() + + self.means.capacity() * std::mem::size_of::() + + self.counts.capacity() * std::mem::size_of::() + } +} diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 6a1973ecfed1..bdfb07031b8c 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -50,7 +50,8 @@ datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-functions = { workspace = true } datafusion-functions-aggregate = { workspace = true } -itertools = { version = "0.12", features = ["use_std"] } +datafusion-physical-expr-common = { workspace = true } +itertools = { workspace = true, features = ["use_std"] } log = { workspace = true } paste = "1.0.14" rand = "0.8.5" diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index fe1df2579932..9b4357d0d14f 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -19,14 +19,17 @@ use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait}; use arrow::datatypes::DataType; -use arrow::row::{RowConverter, SortField}; +use arrow::row::{RowConverter, Rows, SortField}; +use arrow_array::{Datum, GenericListArray, Scalar}; use datafusion_common::cast::as_generic_list_array; -use datafusion_common::{exec_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_common::utils::string_utils::string_array_to_vec; +use datafusion_common::{exec_err, Result, ScalarValue}; +use datafusion_expr::{ColumnarValue, Operator, ScalarUDFImpl, Signature, Volatility}; +use datafusion_physical_expr_common::datum::compare_op_for_nested; use itertools::Itertools; -use crate::utils::check_datatypes; +use crate::utils::make_scalar_function; use std::any::Any; use std::sync::Arc; @@ -93,28 +96,44 @@ impl ScalarUDFImpl for ArrayHas { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - let args = ColumnarValue::values_to_arrays(args)?; - - if args.len() != 2 { - return exec_err!("array_has needs two arguments"); + // Always return null if the second argumet is null + // i.e. array_has(array, null) -> null + if let ColumnarValue::Scalar(s) = &args[1] { + if s.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + } } - let array_type = args[0].data_type(); - - match array_type { - DataType::List(_) => general_array_has_dispatch::( - &args[0], - &args[1], - ComparisonType::Single, - ) - .map(ColumnarValue::Array), - DataType::LargeList(_) => general_array_has_dispatch::( - &args[0], - &args[1], - ComparisonType::Single, - ) - .map(ColumnarValue::Array), - _ => exec_err!("array_has does not support type '{array_type:?}'."), + // first, identify if any of the arguments is an Array. If yes, store its `len`, + // as any scalar will need to be converted to an array of len `len`. + let len = args + .iter() + .fold(Option::::None, |acc, arg| match arg { + ColumnarValue::Scalar(_) => acc, + ColumnarValue::Array(a) => Some(a.len()), + }); + + let is_scalar = len.is_none(); + + let result = match args[1] { + ColumnarValue::Array(_) => { + let args = ColumnarValue::values_to_arrays(args)?; + array_has_inner_for_array(&args[0], &args[1]) + } + ColumnarValue::Scalar(_) => { + let haystack = args[0].to_owned().into_array(1)?; + let needle = args[1].to_owned().into_array(1)?; + let needle = Scalar::new(needle); + array_has_inner_for_scalar(&haystack, &needle) + } + }; + + if is_scalar { + // If all inputs are scalar, keeps output as scalar + let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); + result.map(ColumnarValue::Scalar) + } else { + result.map(ColumnarValue::Array) } } @@ -123,6 +142,116 @@ impl ScalarUDFImpl for ArrayHas { } } +fn array_has_inner_for_scalar( + haystack: &ArrayRef, + needle: &dyn Datum, +) -> Result { + match haystack.data_type() { + DataType::List(_) => array_has_dispatch_for_scalar::(haystack, needle), + DataType::LargeList(_) => array_has_dispatch_for_scalar::(haystack, needle), + _ => exec_err!( + "array_has does not support type '{:?}'.", + haystack.data_type() + ), + } +} + +fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result { + match haystack.data_type() { + DataType::List(_) => array_has_dispatch_for_array::(haystack, needle), + DataType::LargeList(_) => array_has_dispatch_for_array::(haystack, needle), + _ => exec_err!( + "array_has does not support type '{:?}'.", + haystack.data_type() + ), + } +} + +fn array_has_dispatch_for_array( + haystack: &ArrayRef, + needle: &ArrayRef, +) -> Result { + let haystack = as_generic_list_array::(haystack)?; + let mut boolean_builder = BooleanArray::builder(haystack.len()); + + for (i, arr) in haystack.iter().enumerate() { + if arr.is_none() || needle.is_null(i) { + boolean_builder.append_null(); + continue; + } + let arr = arr.unwrap(); + let needle_row = Scalar::new(needle.slice(i, 1)); + let eq_array = compare_op_for_nested(Operator::Eq, &arr, &needle_row)?; + let is_contained = eq_array.true_count() > 0; + boolean_builder.append_value(is_contained) + } + + Ok(Arc::new(boolean_builder.finish())) +} + +fn array_has_dispatch_for_scalar( + haystack: &ArrayRef, + needle: &dyn Datum, +) -> Result { + let haystack = as_generic_list_array::(haystack)?; + let values = haystack.values(); + let offsets = haystack.value_offsets(); + // If first argument is empty list (second argument is non-null), return false + // i.e. array_has([], non-null element) -> false + if values.len() == 0 { + return Ok(Arc::new(BooleanArray::from(vec![Some(false)]))); + } + let eq_array = compare_op_for_nested(Operator::Eq, values, needle)?; + let mut final_contained = vec![None; haystack.len()]; + for (i, offset) in offsets.windows(2).enumerate() { + let start = offset[0].to_usize().unwrap(); + let end = offset[1].to_usize().unwrap(); + let length = end - start; + // For non-nested list, length is 0 for null + if length == 0 { + continue; + } + let sliced_array = eq_array.slice(start, length); + // For nested list, check number of nulls + if sliced_array.null_count() == length { + continue; + } + final_contained[i] = Some(sliced_array.true_count() > 0); + } + + Ok(Arc::new(BooleanArray::from(final_contained))) +} + +fn array_has_all_inner(args: &[ArrayRef]) -> Result { + match args[0].data_type() { + DataType::List(_) => { + array_has_all_and_any_dispatch::(&args[0], &args[1], ComparisonType::All) + } + DataType::LargeList(_) => { + array_has_all_and_any_dispatch::(&args[0], &args[1], ComparisonType::All) + } + _ => exec_err!( + "array_has does not support type '{:?}'.", + args[0].data_type() + ), + } +} + +fn array_has_any_inner(args: &[ArrayRef]) -> Result { + match args[0].data_type() { + DataType::List(_) => { + array_has_all_and_any_dispatch::(&args[0], &args[1], ComparisonType::Any) + } + DataType::LargeList(_) => { + array_has_all_and_any_dispatch::(&args[0], &args[1], ComparisonType::Any) + } + _ => exec_err!( + "array_has does not support type '{:?}'.", + args[0].data_type() + ), + } +} + #[derive(Debug)] pub struct ArrayHasAll { signature: Signature, @@ -161,24 +290,7 @@ impl ScalarUDFImpl for ArrayHasAll { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - let args = ColumnarValue::values_to_arrays(args)?; - if args.len() != 2 { - return exec_err!("array_has_all needs two arguments"); - } - - let array_type = args[0].data_type(); - - match array_type { - DataType::List(_) => { - general_array_has_dispatch::(&args[0], &args[1], ComparisonType::All) - .map(ColumnarValue::Array) - } - DataType::LargeList(_) => { - general_array_has_dispatch::(&args[0], &args[1], ComparisonType::All) - .map(ColumnarValue::Array) - } - _ => exec_err!("array_has_all does not support type '{array_type:?}'."), - } + make_scalar_function(array_has_all_inner)(args) } fn aliases(&self) -> &[String] { @@ -224,25 +336,7 @@ impl ScalarUDFImpl for ArrayHasAny { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - let args = ColumnarValue::values_to_arrays(args)?; - - if args.len() != 2 { - return exec_err!("array_has_any needs two arguments"); - } - - let array_type = args[0].data_type(); - - match array_type { - DataType::List(_) => { - general_array_has_dispatch::(&args[0], &args[1], ComparisonType::Any) - .map(ColumnarValue::Array) - } - DataType::LargeList(_) => { - general_array_has_dispatch::(&args[0], &args[1], ComparisonType::Any) - .map(ColumnarValue::Array) - } - _ => exec_err!("array_has_any does not support type '{array_type:?}'."), - } + make_scalar_function(array_has_any_inner)(args) } fn aliases(&self) -> &[String] { @@ -251,75 +345,114 @@ impl ScalarUDFImpl for ArrayHasAny { } /// Represents the type of comparison for array_has. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] enum ComparisonType { // array_has_all All, // array_has_any Any, - // array_has - Single, } -fn general_array_has_dispatch( +fn array_has_all_and_any_dispatch( haystack: &ArrayRef, needle: &ArrayRef, comparison_type: ComparisonType, ) -> Result { - let array = if comparison_type == ComparisonType::Single { - let arr = as_generic_list_array::(haystack)?; - check_datatypes("array_has", &[arr.values(), needle])?; - arr - } else { - check_datatypes("array_has", &[haystack, needle])?; - as_generic_list_array::(haystack)? - }; + let haystack = as_generic_list_array::(haystack)?; + let needle = as_generic_list_array::(needle)?; + match needle.data_type() { + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + array_has_all_and_any_string_internal::(haystack, needle, comparison_type) + } + _ => general_array_has_for_all_and_any::(haystack, needle, comparison_type), + } +} +// String comparison for array_has_all and array_has_any +fn array_has_all_and_any_string_internal( + array: &GenericListArray, + needle: &GenericListArray, + comparison_type: ComparisonType, +) -> Result { let mut boolean_builder = BooleanArray::builder(array.len()); - - let converter = RowConverter::new(vec![SortField::new(array.value_type())])?; - - let element = Arc::clone(needle); - let sub_array = if comparison_type != ComparisonType::Single { - as_generic_list_array::(needle)? - } else { - array - }; - for (row_idx, (arr, sub_arr)) in array.iter().zip(sub_array.iter()).enumerate() { + for (arr, sub_arr) in array.iter().zip(needle.iter()) { match (arr, sub_arr) { (Some(arr), Some(sub_arr)) => { - let arr_values = converter.convert_columns(&[arr])?; - let sub_arr_values = if comparison_type != ComparisonType::Single { - converter.convert_columns(&[sub_arr])? - } else { - converter.convert_columns(&[Arc::clone(&element)])? - }; - - let mut res = match comparison_type { - ComparisonType::All => sub_arr_values - .iter() - .dedup() - .all(|elem| arr_values.iter().dedup().any(|x| x == elem)), - ComparisonType::Any => sub_arr_values - .iter() - .dedup() - .any(|elem| arr_values.iter().dedup().any(|x| x == elem)), - ComparisonType::Single => arr_values - .iter() - .dedup() - .any(|x| x == sub_arr_values.row(row_idx)), - }; - - if comparison_type == ComparisonType::Any { - res |= res; - } - boolean_builder.append_value(res); + let haystack_array = string_array_to_vec(&arr); + let needle_array = string_array_to_vec(&sub_arr); + boolean_builder.append_value(array_has_string_kernel( + haystack_array, + needle_array, + comparison_type, + )); } - // respect null input (_, _) => { boolean_builder.append_null(); } } } + Ok(Arc::new(boolean_builder.finish())) } + +fn array_has_string_kernel( + haystack: Vec>, + needle: Vec>, + comparison_type: ComparisonType, +) -> bool { + match comparison_type { + ComparisonType::All => needle + .iter() + .dedup() + .all(|x| haystack.iter().dedup().any(|y| y == x)), + ComparisonType::Any => needle + .iter() + .dedup() + .any(|x| haystack.iter().dedup().any(|y| y == x)), + } +} + +// General row comparison for array_has_all and array_has_any +fn general_array_has_for_all_and_any( + haystack: &GenericListArray, + needle: &GenericListArray, + comparison_type: ComparisonType, +) -> Result { + let mut boolean_builder = BooleanArray::builder(haystack.len()); + let converter = RowConverter::new(vec![SortField::new(haystack.value_type())])?; + + for (arr, sub_arr) in haystack.iter().zip(needle.iter()) { + if let (Some(arr), Some(sub_arr)) = (arr, sub_arr) { + let arr_values = converter.convert_columns(&[arr])?; + let sub_arr_values = converter.convert_columns(&[sub_arr])?; + boolean_builder.append_value(general_array_has_all_and_any_kernel( + arr_values, + sub_arr_values, + comparison_type, + )); + } else { + boolean_builder.append_null(); + } + } + + Ok(Arc::new(boolean_builder.finish())) +} + +fn general_array_has_all_and_any_kernel( + haystack_rows: Rows, + needle_rows: Rows, + comparison_type: ComparisonType, +) -> bool { + match comparison_type { + ComparisonType::All => needle_rows.iter().all(|needle_row| { + haystack_rows + .iter() + .any(|haystack_row| haystack_row == needle_row) + }), + ComparisonType::Any => needle_rows.iter().any(|needle_row| { + haystack_rows + .iter() + .any(|haystack_row| haystack_row == needle_row) + }), + } +} diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 2b383af3d456..b04c35667226 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -147,7 +147,7 @@ fn flatten_internal( let list_arr = GenericListArray::::new(field, offsets, values, None); Ok(list_arr) } else { - Ok(list_arr.clone()) + Ok(list_arr) } } } diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index 688e1633e5cf..3d5b261618d5 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -296,8 +296,7 @@ mod tests { let array3d_1 = Arc::new(array_into_list_array_nullable(array2d_1)) as ArrayRef; let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned()); let res = - align_array_dimensions::(vec![array1d_1, Arc::new(array3d_2.clone())]) - .unwrap(); + align_array_dimensions::(vec![array1d_1, Arc::new(array3d_2)]).unwrap(); let expected = as_list_array(&array3d_1).unwrap(); let expected_dim = datafusion_common::utils::list_ndims(array3d_1.data_type()); diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index aea3d4a59e02..43d2796ad7dc 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -52,13 +52,13 @@ pub fn row_number_udwf() -> std::sync::Arc { /// row_number expression #[derive(Debug)] -struct RowNumber { +pub struct RowNumber { signature: Signature, } impl RowNumber { /// Create a new `row_number` function - fn new() -> Self { + pub fn new() -> Self { Self { signature: Signature::any(0, Volatility::Immutable), } diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 9ef020b772f0..337379a74670 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -161,3 +161,8 @@ required-features = ["string_expressions"] harness = false name = "random" required-features = ["math_expressions"] + +[[bench]] +harness = false +name = "substr" +required-features = ["unicode_expressions"] diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index fa963f174e46..934c1c6bd189 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -17,8 +17,10 @@ extern crate criterion; -use arrow::array::{ArrayRef, StringArray}; -use arrow::util::bench_util::create_string_array_with_len; +use arrow::array::{ArrayRef, StringArray, StringViewBuilder}; +use arrow::util::bench_util::{ + create_string_array_with_len, create_string_view_array_with_len, +}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_expr::ColumnarValue; use datafusion_functions::string; @@ -65,6 +67,58 @@ fn create_args3(size: usize) -> Vec { vec![ColumnarValue::Array(array)] } +/// Create an array of args containing StringViews, where all the values in the +/// StringViews are ASCII. +/// * `size` - the length of the StringViews, and +/// * `str_len` - the length of the strings within the array. +/// * `null_density` - the density of null values in the array. +/// * `mixed` - whether the array is mixed between inlined and referenced strings. +fn create_args4( + size: usize, + str_len: usize, + null_density: f32, + mixed: bool, +) -> Vec { + let array = Arc::new(create_string_view_array_with_len( + size, + null_density, + str_len, + mixed, + )); + + vec![ColumnarValue::Array(array)] +} + +/// Create an array of args containing a StringViewArray, where some of the values in the +/// array are non-ASCII. +/// * `size` - the length of the StringArray, and +/// * `non_ascii_density` - the density of non-ASCII values in the array. +/// * `null_density` - the density of null values in the array. +fn create_args5( + size: usize, + non_ascii_density: f32, + null_density: f32, +) -> Vec { + let mut string_view_builder = StringViewBuilder::with_capacity(size); + for _ in 0..size { + // sample null_density to determine if the value should be null + if rand::random::() < null_density { + string_view_builder.append_null(); + continue; + } + + // sample non_ascii_density to determine if the value should be non-ASCII + if rand::random::() < non_ascii_density { + string_view_builder.append_value("农历新年农历新年农历新年农历新年农历新年"); + } else { + string_view_builder.append_value("DATAFUSIONDATAFUSIONDATAFUSION"); + } + } + + let array = Arc::new(string_view_builder.finish()) as ArrayRef; + vec![ColumnarValue::Array(array)] +} + fn criterion_benchmark(c: &mut Criterion) { let lower = string::lower(); for size in [1024, 4096, 8192] { @@ -85,6 +139,40 @@ fn criterion_benchmark(c: &mut Criterion) { |b| b.iter(|| black_box(lower.invoke(&args))), ); } + + let sizes = [4096, 8192]; + let str_lens = [10, 64, 128]; + let mixes = [true, false]; + let null_densities = [0.0f32, 0.1f32]; + + for null_density in &null_densities { + for &mixed in &mixes { + for &str_len in &str_lens { + for &size in &sizes { + let args = create_args4(size, str_len, *null_density, mixed); + c.bench_function( + &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", + size, str_len, null_density, mixed), + |b| b.iter(|| black_box(lower.invoke(&args))), + ); + + let args = create_args4(size, str_len, *null_density, mixed); + c.bench_function( + &format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}", + size, str_len, null_density, mixed), + |b| b.iter(|| black_box(lower.invoke(&args))), + ); + + let args = create_args5(size, 0.1, *null_density); + c.bench_function( + &format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}", + size, str_len, 0.1, null_density, mixed), + |b| b.iter(|| black_box(lower.invoke(&args))), + ); + } + } + } + } } criterion_group!(benches, criterion_benchmark); diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index 5ff1e2fb860d..0c496bc63347 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -127,11 +127,12 @@ fn criterion_benchmark(c: &mut Criterion) { group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap())) }); - // - // let args = create_args::(size, 32, true); - // group.bench_function(BenchmarkId::new("stringview type", size), |b| { - // b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap())) - // }); + + // rpad for stringview type + let args = create_args::(size, 32, true); + group.bench_function(BenchmarkId::new("stringview type", size), |b| { + b.iter(|| criterion::black_box(rpad().invoke(&args).unwrap())) + }); group.finish(); } diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs new file mode 100644 index 000000000000..14a3389da380 --- /dev/null +++ b/datafusion/functions/benches/substr.rs @@ -0,0 +1,202 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait}; +use arrow::util::bench_util::{ + create_string_array_with_len, create_string_view_array_with_len, +}; +use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_expr::ColumnarValue; +use datafusion_functions::unicode; +use std::sync::Arc; + +fn create_args_without_count( + size: usize, + str_len: usize, + start_half_way: bool, + use_string_view: bool, +) -> Vec { + let start_array = Arc::new(Int64Array::from( + (0..size) + .map(|_| { + if start_half_way { + (str_len / 2) as i64 + } else { + 1i64 + } + }) + .collect::>(), + )); + + if use_string_view { + let string_array = + Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false)); + vec![ + ColumnarValue::Array(string_array), + ColumnarValue::Array(start_array), + ] + } else { + let string_array = + Arc::new(create_string_array_with_len::(size, 0.1, str_len)); + + vec![ + ColumnarValue::Array(string_array), + ColumnarValue::Array(Arc::clone(&start_array) as ArrayRef), + ] + } +} + +fn create_args_with_count( + size: usize, + str_len: usize, + count_max: usize, + use_string_view: bool, +) -> Vec { + let start_array = + Arc::new(Int64Array::from((0..size).map(|_| 1).collect::>())); + let count = count_max.min(str_len) as i64; + let count_array = Arc::new(Int64Array::from( + (0..size).map(|_| count).collect::>(), + )); + + if use_string_view { + let string_array = + Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false)); + vec![ + ColumnarValue::Array(string_array), + ColumnarValue::Array(start_array), + ColumnarValue::Array(count_array), + ] + } else { + let string_array = + Arc::new(create_string_array_with_len::(size, 0.1, str_len)); + + vec![ + ColumnarValue::Array(string_array), + ColumnarValue::Array(Arc::clone(&start_array) as ArrayRef), + ColumnarValue::Array(Arc::clone(&count_array) as ArrayRef), + ] + } +} + +fn criterion_benchmark(c: &mut Criterion) { + let substr = unicode::substr(); + for size in [1024, 4096] { + // string_len = 12, substring_len=6 (see `create_args_without_count`) + let len = 12; + let mut group = c.benchmark_group("SHORTER THAN 12"); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + + let args = create_args_without_count::(size, len, true, true); + group.bench_function( + &format!("substr_string_view [size={}, strlen={}]", size, len), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + let args = create_args_without_count::(size, len, false, false); + group.bench_function( + &format!("substr_string [size={}, strlen={}]", size, len), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + let args = create_args_without_count::(size, len, true, false); + group.bench_function( + &format!("substr_large_string [size={}, strlen={}]", size, len), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + group.finish(); + + // string_len = 128, start=1, count=64, substring_len=64 + let len = 128; + let count = 64; + let mut group = c.benchmark_group("LONGER THAN 12"); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + + let args = create_args_with_count::(size, len, count, true); + group.bench_function( + &format!( + "substr_string_view [size={}, count={}, strlen={}]", + size, count, len, + ), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + let args = create_args_with_count::(size, len, count, false); + group.bench_function( + &format!( + "substr_string [size={}, count={}, strlen={}]", + size, count, len, + ), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + let args = create_args_with_count::(size, len, count, false); + group.bench_function( + &format!( + "substr_large_string [size={}, count={}, strlen={}]", + size, count, len, + ), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + group.finish(); + + // string_len = 128, start=1, count=6, substring_len=6 + let len = 128; + let count = 6; + let mut group = c.benchmark_group("SRC_LEN > 12, SUB_LEN < 12"); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + + let args = create_args_with_count::(size, len, count, true); + group.bench_function( + &format!( + "substr_string_view [size={}, count={}, strlen={}]", + size, count, len, + ), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + let args = create_args_with_count::(size, len, count, false); + group.bench_function( + &format!( + "substr_string [size={}, count={}, strlen={}]", + size, count, len, + ), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + let args = create_args_with_count::(size, len, count, false); + group.bench_function( + &format!( + "substr_large_string [size={}, count={}, strlen={}]", + size, count, len, + ), + |b| b.iter(|| black_box(substr.invoke(&args))), + ); + + group.finish(); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 54aebb039046..a5dc22b4d9e4 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -15,13 +15,15 @@ // specific language governing permissions and limitations // under the License. +//! Common utilities for implementing string functions + use std::fmt::{Display, Formatter}; use std::sync::Arc; use arrow::array::{ new_null_array, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ArrayRef, GenericStringArray, GenericStringBuilder, OffsetSizeTrait, StringArray, - StringViewArray, + StringBuilder, StringViewArray, }; use arrow::buffer::{Buffer, MutableBuffer, NullBuffer}; use arrow::datatypes::DataType; @@ -212,6 +214,23 @@ where i64, _, >(array, op)?)), + DataType::Utf8View => { + let string_array = as_string_view_array(array)?; + let mut string_builder = StringBuilder::with_capacity( + string_array.len(), + string_array.get_array_memory_size(), + ); + + for str in string_array.iter() { + if let Some(str) = str { + string_builder.append_value(op(str)); + } else { + string_builder.append_null(); + } + } + + Ok(ColumnarValue::Array(Arc::new(string_builder.finish()))) + } other => exec_err!("Unsupported data type {other:?} for function {name}"), }, ColumnarValue::Scalar(scalar) => match scalar { @@ -223,6 +242,10 @@ where let result = a.as_ref().map(|x| op(x)); Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result))) } + ScalarValue::Utf8View(a) => { + let result = a.as_ref().map(|x| op(x)); + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result))) + } other => exec_err!("Unsupported data type {other:?} for function {name}"), }, } @@ -252,7 +275,69 @@ impl<'a> ColumnarValueRef<'a> { } } +/// Abstracts iteration over different types of string arrays. +/// +/// The [`StringArrayType`] trait helps write generic code for string functions that can work with +/// different types of string arrays. +/// +/// Currently three types are supported: +/// - [`StringArray`] +/// - [`LargeStringArray`] +/// - [`StringViewArray`] +/// +/// It is inspired / copied from [arrow-rs]. +/// +/// [arrow-rs]: https://github.com/apache/arrow-rs/blob/bf0ea9129e617e4a3cf915a900b747cc5485315f/arrow-string/src/like.rs#L151-L157 +/// +/// # Examples +/// Generic function that works for [`StringArray`], [`LargeStringArray`] +/// and [`StringViewArray`]: +/// ``` +/// # use arrow::array::{StringArray, LargeStringArray, StringViewArray}; +/// # use datafusion_functions::string::common::StringArrayType; +/// +/// /// Combines string values for any StringArrayType type. It can be invoked on +/// /// and combination of `StringArray`, `LargeStringArray` or `StringViewArray` +/// fn combine_values<'a, S1, S2>(array1: S1, array2: S2) -> Vec +/// where S1: StringArrayType<'a>, S2: StringArrayType<'a> +/// { +/// // iterate over the elements of the 2 arrays in parallel +/// array1 +/// .iter() +/// .zip(array2.iter()) +/// .map(|(s1, s2)| { +/// // if both values are non null, combine them +/// if let (Some(s1), Some(s2)) = (s1, s2) { +/// format!("{s1}{s2}") +/// } else { +/// "None".to_string() +/// } +/// }) +/// .collect() +/// } +/// +/// let string_array = StringArray::from(vec!["foo", "bar"]); +/// let large_string_array = LargeStringArray::from(vec!["foo2", "bar2"]); +/// let string_view_array = StringViewArray::from(vec!["foo3", "bar3"]); +/// +/// // can invoke this function a string array and large string array +/// assert_eq!( +/// combine_values(&string_array, &large_string_array), +/// vec![String::from("foofoo2"), String::from("barbar2")] +/// ); +/// +/// // Can call the same function with string array and string view array +/// assert_eq!( +/// combine_values(&string_array, &string_view_array), +/// vec![String::from("foofoo3"), String::from("barbar3")] +/// ); +/// ``` +/// +/// [`LargeStringArray`]: arrow::array::LargeStringArray pub trait StringArrayType<'a>: ArrayAccessor + Sized { + /// Return an [`ArrayIter`] over the values of the array. + /// + /// This iterator iterates returns `Option<&str>` for each item in the array. fn iter(&self) -> ArrayIter; } diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 29ca682c380b..ca324e69c0d2 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -43,7 +43,7 @@ impl LowerFunc { Self { signature: Signature::uniform( 1, - vec![Utf8, LargeUtf8], + vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index 19721f0fad28..8d292315a35a 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -15,21 +15,23 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::sync::Arc; - -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; -use arrow::datatypes::DataType; - -use datafusion_common::cast::{ - as_generic_string_array, as_int64_array, as_string_view_array, +use arrow::array::{ + ArrayRef, GenericStringArray, Int64Array, OffsetSizeTrait, StringViewArray, }; -use datafusion_common::{exec_err, Result}; +use arrow::array::{AsArray, GenericStringBuilder}; +use arrow::datatypes::DataType; +use datafusion_common::cast::as_int64_array; +use datafusion_common::ScalarValue; +use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_expr::TypeSignature::*; use datafusion_expr::{ColumnarValue, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; +use std::any::Any; +use std::sync::Arc; + +use crate::utils::utf8_to_str_type; -use crate::utils::{make_scalar_function, utf8_to_str_type}; +use super::common::StringArrayType; #[derive(Debug)] pub struct SplitPartFunc { @@ -82,36 +84,121 @@ impl ScalarUDFImpl for SplitPartFunc { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - match (args[0].data_type(), args[1].data_type()) { - ( - DataType::Utf8 | DataType::Utf8View, - DataType::Utf8 | DataType::Utf8View, - ) => make_scalar_function(split_part::, vec![])(args), + // First, determine if any of the arguments is an Array + let len = args.iter().find_map(|arg| match arg { + ColumnarValue::Array(a) => Some(a.len()), + _ => None, + }); + + let inferred_length = len.unwrap_or(1); + let is_scalar = len.is_none(); + + // Convert all ColumnarValues to ArrayRefs + let args = args + .iter() + .map(|arg| match arg { + ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(inferred_length), + ColumnarValue::Array(array) => Ok(Arc::clone(array)), + }) + .collect::>>()?; + + // Unpack the ArrayRefs from the arguments + let n_array = as_int64_array(&args[2])?; + let result = match (args[0].data_type(), args[1].data_type()) { + (DataType::Utf8View, DataType::Utf8View) => { + split_part_impl::<&StringViewArray, &StringViewArray, i32>( + args[0].as_string_view(), + args[1].as_string_view(), + n_array, + ) + } + (DataType::Utf8View, DataType::Utf8) => { + split_part_impl::<&StringViewArray, &GenericStringArray, i32>( + args[0].as_string_view(), + args[1].as_string::(), + n_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8) => { + split_part_impl::<&StringViewArray, &GenericStringArray, i32>( + args[0].as_string_view(), + args[1].as_string::(), + n_array, + ) + } + (DataType::Utf8, DataType::Utf8View) => { + split_part_impl::<&GenericStringArray, &StringViewArray, i32>( + args[0].as_string::(), + args[1].as_string_view(), + n_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View) => { + split_part_impl::<&GenericStringArray, &StringViewArray, i64>( + args[0].as_string::(), + args[1].as_string_view(), + n_array, + ) + } + (DataType::Utf8, DataType::Utf8) => { + split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( + args[0].as_string::(), + args[1].as_string::(), + n_array, + ) + } (DataType::LargeUtf8, DataType::LargeUtf8) => { - make_scalar_function(split_part::, vec![])(args) + split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( + args[0].as_string::(), + args[1].as_string::(), + n_array, + ) } - (_, DataType::LargeUtf8) => { - make_scalar_function(split_part::, vec![])(args) + (DataType::Utf8, DataType::LargeUtf8) => { + split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( + args[0].as_string::(), + args[1].as_string::(), + n_array, + ) } - (DataType::LargeUtf8, _) => { - make_scalar_function(split_part::, vec![])(args) + (DataType::LargeUtf8, DataType::Utf8) => { + split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( + args[0].as_string::(), + args[1].as_string::(), + n_array, + ) } - (first_type, second_type) => exec_err!( - "unsupported first type {} and second type {} for split_part function", - first_type, - second_type - ), + _ => exec_err!("Unsupported combination of argument types for split_part"), + }; + if is_scalar { + // If all inputs are scalar, keep the output as scalar + let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); + result.map(ColumnarValue::Scalar) + } else { + result.map(ColumnarValue::Array) } } } -macro_rules! process_split_part { - ($string_array: expr, $delimiter_array: expr, $n_array: expr) => {{ - let result = $string_array - .iter() - .zip($delimiter_array.iter()) - .zip($n_array.iter()) - .map(|((string, delimiter), n)| match (string, delimiter, n) { +/// impl +pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( + string_array: StringArrType, + delimiter_array: DelimiterArrType, + n_array: &Int64Array, +) -> Result +where + StringArrType: StringArrayType<'a>, + DelimiterArrType: StringArrayType<'a>, + StringArrayLen: OffsetSizeTrait, +{ + let mut builder: GenericStringBuilder = GenericStringBuilder::new(); + + string_array + .iter() + .zip(delimiter_array.iter()) + .zip(n_array.iter()) + .try_for_each(|((string, delimiter), n)| -> Result<(), DataFusionError> { + match (string, delimiter, n) { (Some(string), Some(delimiter), Some(n)) => { let split_string: Vec<&str> = string.split(delimiter).collect(); let len = split_string.len(); @@ -125,58 +212,17 @@ macro_rules! process_split_part { } as usize; if index < len { - Ok(Some(split_string[index])) + builder.append_value(split_string[index]); } else { - Ok(Some("")) + builder.append_value(""); } } - _ => Ok(None), - }) - .collect::>>()?; - Ok(Arc::new(result) as ArrayRef) - }}; -} - -/// Splits string at occurrences of delimiter and returns the n'th field (counting from one). -/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def' -fn split_part( - args: &[ArrayRef], -) -> Result { - let n_array = as_int64_array(&args[2])?; - match (args[0].data_type(), args[1].data_type()) { - (DataType::Utf8View, _) => { - let string_array = as_string_view_array(&args[0])?; - match args[1].data_type() { - DataType::Utf8View => { - let delimiter_array = as_string_view_array(&args[1])?; - process_split_part!(string_array, delimiter_array, n_array) - } - _ => { - let delimiter_array = - as_generic_string_array::(&args[1])?; - process_split_part!(string_array, delimiter_array, n_array) - } - } - } - (_, DataType::Utf8View) => { - let delimiter_array = as_string_view_array(&args[1])?; - match args[0].data_type() { - DataType::Utf8View => { - let string_array = as_string_view_array(&args[0])?; - process_split_part!(string_array, delimiter_array, n_array) - } - _ => { - let string_array = as_generic_string_array::(&args[0])?; - process_split_part!(string_array, delimiter_array, n_array) - } + _ => builder.append_null(), } - } - (_, _) => { - let string_array = as_generic_string_array::(&args[0])?; - let delimiter_array = as_generic_string_array::(&args[1])?; - process_split_part!(string_array, delimiter_array, n_array) - } - } + Ok(()) + })?; + + Ok(Arc::new(builder.finish()) as ArrayRef) } #[cfg(test)] diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index da31948fbcfa..593e33ab6bb4 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -40,7 +40,7 @@ impl UpperFunc { Self { signature: Signature::uniform( 1, - vec![Utf8, LargeUtf8], + vec![Utf8, LargeUtf8, Utf8View], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index 4bcf102c8793..c1d6f327928f 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -15,20 +15,23 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::sync::Arc; - -use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; -use arrow::datatypes::DataType; -use datafusion_common::cast::{ - as_generic_string_array, as_int64_array, as_string_view_array, -}; -use unicode_segmentation::UnicodeSegmentation; - +use crate::string::common::StringArrayType; use crate::utils::{make_scalar_function, utf8_to_str_type}; +use arrow::array::{ + ArrayRef, AsArray, GenericStringArray, GenericStringBuilder, Int64Array, + OffsetSizeTrait, StringViewArray, +}; +use arrow::datatypes::DataType; +use datafusion_common::cast::as_int64_array; +use datafusion_common::DataFusionError; use datafusion_common::{exec_err, Result}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::fmt::Write; +use std::sync::Arc; +use unicode_segmentation::UnicodeSegmentation; +use DataType::{LargeUtf8, Utf8, Utf8View}; #[derive(Debug)] pub struct RPadFunc { @@ -84,170 +87,182 @@ impl ScalarUDFImpl for RPadFunc { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - match args.len() { - 2 => match args[0].data_type() { - DataType::Utf8 | DataType::Utf8View => { - make_scalar_function(rpad::, vec![])(args) - } - DataType::LargeUtf8 => { - make_scalar_function(rpad::, vec![])(args) - } - other => exec_err!("Unsupported data type {other:?} for function rpad"), - }, - 3 => match (args[0].data_type(), args[2].data_type()) { - ( - DataType::Utf8 | DataType::Utf8View, - DataType::Utf8 | DataType::Utf8View, - ) => make_scalar_function(rpad::, vec![])(args), - (DataType::LargeUtf8, DataType::LargeUtf8) => { - make_scalar_function(rpad::, vec![])(args) - } - (DataType::LargeUtf8, DataType::Utf8View | DataType::Utf8) => { - make_scalar_function(rpad::, vec![])(args) - } - (DataType::Utf8View | DataType::Utf8, DataType::LargeUtf8) => { - make_scalar_function(rpad::, vec![])(args) - } - (first_type, last_type) => { - exec_err!("unsupported arguments type for rpad, first argument type is {}, last argument type is {}", first_type, last_type) - } - }, - number => { - exec_err!("unsupported arguments number {} for rpad", number) + match ( + args.len(), + args[0].data_type(), + args.get(2).map(|arg| arg.data_type()), + ) { + (2, Utf8 | Utf8View, _) => { + make_scalar_function(rpad::, vec![])(args) + } + (2, LargeUtf8, _) => make_scalar_function(rpad::, vec![])(args), + (3, Utf8 | Utf8View, Some(Utf8 | Utf8View)) => { + make_scalar_function(rpad::, vec![])(args) + } + (3, LargeUtf8, Some(LargeUtf8)) => { + make_scalar_function(rpad::, vec![])(args) + } + (3, Utf8 | Utf8View, Some(LargeUtf8)) => { + make_scalar_function(rpad::, vec![])(args) + } + (3, LargeUtf8, Some(Utf8 | Utf8View)) => { + make_scalar_function(rpad::, vec![])(args) + } + (_, _, _) => { + exec_err!("Unsupported combination of data types for function rpad") } } } } -macro_rules! process_rpad { - // For the two-argument case - ($string_array:expr, $length_array:expr) => {{ - $string_array - .iter() - .zip($length_array.iter()) - .map(|(string, length)| match (string, length) { - (Some(string), Some(length)) => { - if length > i32::MAX as i64 { - return exec_err!("rpad requested length {} too large", length); - } - - let length = if length < 0 { 0 } else { length as usize }; - if length == 0 { - Ok(Some("".to_string())) - } else { - let graphemes = string.graphemes(true).collect::>(); - if length < graphemes.len() { - Ok(Some(graphemes[..length].concat())) - } else { - let mut s = string.to_string(); - s.push_str(" ".repeat(length - graphemes.len()).as_str()); - Ok(Some(s)) - } - } - } - _ => Ok(None), - }) - .collect::>>() - }}; - - // For the three-argument case - ($string_array:expr, $length_array:expr, $fill_array:expr) => {{ - $string_array - .iter() - .zip($length_array.iter()) - .zip($fill_array.iter()) - .map(|((string, length), fill)| match (string, length, fill) { - (Some(string), Some(length), Some(fill)) => { - if length > i32::MAX as i64 { - return exec_err!("rpad requested length {} too large", length); - } - - let length = if length < 0 { 0 } else { length as usize }; - let graphemes = string.graphemes(true).collect::>(); - let fill_chars = fill.chars().collect::>(); +pub fn rpad( + args: &[ArrayRef], +) -> Result { + if args.len() < 2 || args.len() > 3 { + return exec_err!( + "rpad was called with {} arguments. It requires 2 or 3 arguments.", + args.len() + ); + } - if length < graphemes.len() { - Ok(Some(graphemes[..length].concat())) - } else if fill_chars.is_empty() { - Ok(Some(string.to_string())) - } else { - let mut s = string.to_string(); - let char_vector: Vec = (0..length - graphemes.len()) - .map(|l| fill_chars[l % fill_chars.len()]) - .collect(); - s.push_str(&char_vector.iter().collect::()); - Ok(Some(s)) - } - } - _ => Ok(None), - }) - .collect::>>() - }}; + let length_array = as_int64_array(&args[1])?; + match ( + args.len(), + args[0].data_type(), + args.get(2).map(|arg| arg.data_type()), + ) { + (2, Utf8View, _) => { + rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>( + args[0].as_string_view(), + length_array, + None, + ) + } + (3, Utf8View, Some(Utf8View)) => { + rpad_impl::<&StringViewArray, &StringViewArray, StringArrayLen>( + args[0].as_string_view(), + length_array, + Some(args[2].as_string_view()), + ) + } + (3, Utf8View, Some(Utf8 | LargeUtf8)) => { + rpad_impl::<&StringViewArray, &GenericStringArray, StringArrayLen>( + args[0].as_string_view(), + length_array, + Some(args[2].as_string::()), + ) + } + (3, Utf8 | LargeUtf8, Some(Utf8View)) => rpad_impl::< + &GenericStringArray, + &StringViewArray, + StringArrayLen, + >( + args[0].as_string::(), + length_array, + Some(args[2].as_string_view()), + ), + (_, _, _) => rpad_impl::< + &GenericStringArray, + &GenericStringArray, + StringArrayLen, + >( + args[0].as_string::(), + length_array, + args.get(2).map(|arg| arg.as_string::()), + ), + } } /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated. /// rpad('hi', 5, 'xy') = 'hixyx' -pub fn rpad( - args: &[ArrayRef], -) -> Result { - match (args.len(), args[0].data_type()) { - (2, DataType::Utf8View) => { - let string_array = as_string_view_array(&args[0])?; - let length_array = as_int64_array(&args[1])?; +pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>( + string_array: StringArrType, + length_array: &Int64Array, + fill_array: Option, +) -> Result +where + StringArrType: StringArrayType<'a>, + FillArrType: StringArrayType<'a>, + StringArrayLen: OffsetSizeTrait, +{ + let mut builder: GenericStringBuilder = GenericStringBuilder::new(); - let result = process_rpad!(string_array, length_array)?; - Ok(Arc::new(result) as ArrayRef) + match fill_array { + None => { + string_array.iter().zip(length_array.iter()).try_for_each( + |(string, length)| -> Result<(), DataFusionError> { + match (string, length) { + (Some(string), Some(length)) => { + if length > i32::MAX as i64 { + return exec_err!( + "rpad requested length {} too large", + length + ); + } + let length = if length < 0 { 0 } else { length as usize }; + if length == 0 { + builder.append_value(""); + } else { + let graphemes = + string.graphemes(true).collect::>(); + if length < graphemes.len() { + builder.append_value(graphemes[..length].concat()); + } else { + builder.write_str(string)?; + builder.write_str( + &" ".repeat(length - graphemes.len()), + )?; + builder.append_value(""); + } + } + } + _ => builder.append_null(), + } + Ok(()) + }, + )?; } - (2, _) => { - let string_array = as_generic_string_array::(&args[0])?; - let length_array = as_int64_array(&args[1])?; + Some(fill_array) => { + string_array + .iter() + .zip(length_array.iter()) + .zip(fill_array.iter()) + .try_for_each( + |((string, length), fill)| -> Result<(), DataFusionError> { + match (string, length, fill) { + (Some(string), Some(length), Some(fill)) => { + if length > i32::MAX as i64 { + return exec_err!( + "rpad requested length {} too large", + length + ); + } + let length = if length < 0 { 0 } else { length as usize }; + let graphemes = + string.graphemes(true).collect::>(); - let result = process_rpad!(string_array, length_array)?; - Ok(Arc::new(result) as ArrayRef) - } - (3, DataType::Utf8View) => { - let string_array = as_string_view_array(&args[0])?; - let length_array = as_int64_array(&args[1])?; - match args[2].data_type() { - DataType::Utf8View => { - let fill_array = as_string_view_array(&args[2])?; - let result = process_rpad!(string_array, length_array, fill_array)?; - Ok(Arc::new(result) as ArrayRef) - } - DataType::Utf8 | DataType::LargeUtf8 => { - let fill_array = as_generic_string_array::(&args[2])?; - let result = process_rpad!(string_array, length_array, fill_array)?; - Ok(Arc::new(result) as ArrayRef) - } - other_type => { - exec_err!("unsupported type for rpad's third operator: {}", other_type) - } - } - } - (3, _) => { - let string_array = as_generic_string_array::(&args[0])?; - let length_array = as_int64_array(&args[1])?; - match args[2].data_type() { - DataType::Utf8View => { - let fill_array = as_string_view_array(&args[2])?; - let result = process_rpad!(string_array, length_array, fill_array)?; - Ok(Arc::new(result) as ArrayRef) - } - DataType::Utf8 | DataType::LargeUtf8 => { - let fill_array = as_generic_string_array::(&args[2])?; - let result = process_rpad!(string_array, length_array, fill_array)?; - Ok(Arc::new(result) as ArrayRef) - } - other_type => { - exec_err!("unsupported type for rpad's third operator: {}", other_type) - } - } + if length < graphemes.len() { + builder.append_value(graphemes[..length].concat()); + } else if fill.is_empty() { + builder.append_value(string); + } else { + builder.write_str(string)?; + fill.chars() + .cycle() + .take(length - graphemes.len()) + .for_each(|ch| builder.write_char(ch).unwrap()); + builder.append_value(""); + } + } + _ => builder.append_null(), + } + Ok(()) + }, + )?; } - (other, other_type) => exec_err!( - "rpad requires 2 or 3 arguments with corresponding types, but got {}. number of arguments with {}", - other, other_type - ), } + + Ok(Arc::new(builder.finish()) as ArrayRef) } #[cfg(test)] diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 702baf6e8fa7..cf10b18ae338 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -19,11 +19,10 @@ use std::any::Any; use std::sync::Arc; use arrow::array::{ - ArrayRef, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait, PrimitiveArray, + ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; -use datafusion_common::cast::as_generic_string_array; use datafusion_common::{exec_err, Result}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -52,6 +51,9 @@ impl StrposFunc { Exact(vec![Utf8, LargeUtf8]), Exact(vec![LargeUtf8, Utf8]), Exact(vec![LargeUtf8, LargeUtf8]), + Exact(vec![Utf8View, Utf8View]), + Exact(vec![Utf8View, Utf8]), + Exact(vec![Utf8View, LargeUtf8]), ], Volatility::Immutable, ), @@ -78,21 +80,7 @@ impl ScalarUDFImpl for StrposFunc { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - match (args[0].data_type(), args[1].data_type()) { - (DataType::Utf8, DataType::Utf8) => { - make_scalar_function(strpos::, vec![])(args) - } - (DataType::Utf8, DataType::LargeUtf8) => { - make_scalar_function(strpos::, vec![])(args) - } - (DataType::LargeUtf8, DataType::Utf8) => { - make_scalar_function(strpos::, vec![])(args) - } - (DataType::LargeUtf8, DataType::LargeUtf8) => { - make_scalar_function(strpos::, vec![])(args) - } - other => exec_err!("Unsupported data type {other:?} for function strpos"), - } + make_scalar_function(strpos, vec![])(args) } fn aliases(&self) -> &[String] { @@ -100,30 +88,71 @@ impl ScalarUDFImpl for StrposFunc { } } +fn strpos(args: &[ArrayRef]) -> Result { + match (args[0].data_type(), args[1].data_type()) { + (DataType::Utf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let substring_array = args[1].as_string::(); + calculate_strpos::<_, _, Int32Type>(string_array, substring_array) + } + (DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let substring_array = args[1].as_string::(); + calculate_strpos::<_, _, Int32Type>(string_array, substring_array) + } + (DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let substring_array = args[1].as_string::(); + calculate_strpos::<_, _, Int64Type>(string_array, substring_array) + } + (DataType::LargeUtf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let substring_array = args[1].as_string::(); + calculate_strpos::<_, _, Int64Type>(string_array, substring_array) + } + (DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let substring_array = args[1].as_string_view(); + calculate_strpos::<_, _, Int32Type>(string_array, substring_array) + } + (DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string_view(); + let substring_array = args[1].as_string::(); + calculate_strpos::<_, _, Int32Type>(string_array, substring_array) + } + (DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let substring_array = args[1].as_string::(); + calculate_strpos::<_, _, Int32Type>(string_array, substring_array) + } + + other => { + exec_err!("Unsupported data type combination {other:?} for function strpos") + } + } +} + /// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.) /// strpos('high', 'ig') = 2 /// The implementation uses UTF-8 code points as characters -fn strpos( - args: &[ArrayRef], +fn calculate_strpos<'a, V1, V2, T: ArrowPrimitiveType>( + string_array: V1, + substring_array: V2, ) -> Result where - T0::Native: OffsetSizeTrait, - T1::Native: OffsetSizeTrait, + V1: ArrayAccessor, + V2: ArrayAccessor, { - let string_array: &GenericStringArray = - as_generic_string_array::(&args[0])?; - - let substring_array: &GenericStringArray = - as_generic_string_array::(&args[1])?; + let string_iter = ArrayIter::new(string_array); + let substring_iter = ArrayIter::new(substring_array); - let result = string_array - .iter() - .zip(substring_array.iter()) + let result = string_iter + .zip(substring_iter) .map(|(string, substring)| match (string, substring) { (Some(string), Some(substring)) => { - // the find method returns the byte index of the substring - // Next, we count the number of the chars until that byte - T0::Native::from_usize( + // The `find` method returns the byte index of the substring. + // We count the number of chars up to that byte index. + T::Native::from_usize( string .find(substring) .map(|x| string[..x].chars().count() + 1) @@ -132,20 +161,21 @@ where } _ => None, }) - .collect::>(); + .collect::>(); Ok(Arc::new(result) as ArrayRef) } #[cfg(test)] -mod test { - use super::*; +mod tests { + use arrow::array::{Array, Int32Array, Int64Array}; + use arrow::datatypes::DataType::{Int32, Int64}; + + use datafusion_common::{Result, ScalarValue}; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + use crate::unicode::strpos::StrposFunc; use crate::utils::test::test_function; - use arrow::{ - array::{Array as _, Int32Array, Int64Array}, - datatypes::DataType::{Int32, Int64}, - }; - use datafusion_common::ScalarValue; macro_rules! test_strpos { ($lhs:literal, $rhs:literal -> $result:literal; $t1:ident $t2:ident $t3:ident $t4:ident $t5:ident) => { @@ -164,21 +194,54 @@ mod test { } #[test] - fn strpos() { - test_strpos!("foo", "bar" -> 0; Utf8 Utf8 i32 Int32 Int32Array); - test_strpos!("foobar", "foo" -> 1; Utf8 Utf8 i32 Int32 Int32Array); - test_strpos!("foobar", "bar" -> 4; Utf8 Utf8 i32 Int32 Int32Array); - - test_strpos!("foo", "bar" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); - test_strpos!("foobar", "foo" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); - test_strpos!("foobar", "bar" -> 4; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); - - test_strpos!("foo", "bar" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array); - test_strpos!("foobar", "foo" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array); - test_strpos!("foobar", "bar" -> 4; Utf8 LargeUtf8 i32 Int32 Int32Array); - - test_strpos!("foo", "bar" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array); - test_strpos!("foobar", "foo" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array); - test_strpos!("foobar", "bar" -> 4; LargeUtf8 Utf8 i64 Int64 Int64Array); + fn test_strpos_functions() { + // Utf8 and Utf8 combinations + test_strpos!("alphabet", "ph" -> 3; Utf8 Utf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "a" -> 1; Utf8 Utf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "z" -> 0; Utf8 Utf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "" -> 1; Utf8 Utf8 i32 Int32 Int32Array); + test_strpos!("", "a" -> 0; Utf8 Utf8 i32 Int32 Int32Array); + + // LargeUtf8 and LargeUtf8 combinations + test_strpos!("alphabet", "ph" -> 3; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); + test_strpos!("alphabet", "a" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); + test_strpos!("alphabet", "z" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); + test_strpos!("alphabet", "" -> 1; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); + test_strpos!("", "a" -> 0; LargeUtf8 LargeUtf8 i64 Int64 Int64Array); + + // Utf8 and LargeUtf8 combinations + test_strpos!("alphabet", "ph" -> 3; Utf8 LargeUtf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "a" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "z" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "" -> 1; Utf8 LargeUtf8 i32 Int32 Int32Array); + test_strpos!("", "a" -> 0; Utf8 LargeUtf8 i32 Int32 Int32Array); + + // LargeUtf8 and Utf8 combinations + test_strpos!("alphabet", "ph" -> 3; LargeUtf8 Utf8 i64 Int64 Int64Array); + test_strpos!("alphabet", "a" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array); + test_strpos!("alphabet", "z" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array); + test_strpos!("alphabet", "" -> 1; LargeUtf8 Utf8 i64 Int64 Int64Array); + test_strpos!("", "a" -> 0; LargeUtf8 Utf8 i64 Int64 Int64Array); + + // Utf8View and Utf8View combinations + test_strpos!("alphabet", "ph" -> 3; Utf8View Utf8View i32 Int32 Int32Array); + test_strpos!("alphabet", "a" -> 1; Utf8View Utf8View i32 Int32 Int32Array); + test_strpos!("alphabet", "z" -> 0; Utf8View Utf8View i32 Int32 Int32Array); + test_strpos!("alphabet", "" -> 1; Utf8View Utf8View i32 Int32 Int32Array); + test_strpos!("", "a" -> 0; Utf8View Utf8View i32 Int32 Int32Array); + + // Utf8View and Utf8 combinations + test_strpos!("alphabet", "ph" -> 3; Utf8View Utf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "a" -> 1; Utf8View Utf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "z" -> 0; Utf8View Utf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "" -> 1; Utf8View Utf8 i32 Int32 Int32Array); + test_strpos!("", "a" -> 0; Utf8View Utf8 i32 Int32 Int32Array); + + // Utf8View and LargeUtf8 combinations + test_strpos!("alphabet", "ph" -> 3; Utf8View LargeUtf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "a" -> 1; Utf8View LargeUtf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "z" -> 0; Utf8View LargeUtf8 i32 Int32 Int32Array); + test_strpos!("alphabet", "" -> 1; Utf8View LargeUtf8 i32 Int32 Int32Array); + test_strpos!("", "a" -> 0; Utf8View LargeUtf8 i32 Int32 Int32Array); } } diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 9fd8c75eab23..db218f9127ae 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -25,7 +25,7 @@ use arrow::array::{ use arrow::datatypes::DataType; use datafusion_common::cast::as_int64_array; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{exec_datafusion_err, exec_err, Result}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -144,19 +144,23 @@ where let result = iter .zip(start_array.iter()) .zip(count_array.iter()) - .map(|((string, start), count)| match (string, start, count) { - (Some(string), Some(start), Some(count)) => { - if count < 0 { - exec_err!( + .map(|((string, start), count)| { + match (string, start, count) { + (Some(string), Some(start), Some(count)) => { + if count < 0 { + exec_err!( "negative substring length not allowed: substr(, {start}, {count})" ) - } else { - let skip = max(0, start - 1); - let count = max(0, count + (if start < 1 {start - 1} else {0})); - Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::())) + } else { + let skip = max(0, start.checked_sub(1).ok_or_else( + || exec_datafusion_err!("negative overflow when calculating skip value") + )?); + let count = max(0, count + (if start < 1 { start - 1 } else { 0 })); + Ok(Some(string.chars().skip(skip as usize).take(count as usize).collect::())) + } } + _ => Ok(None), } - _ => Ok(None), }) .collect::>>()?; @@ -482,6 +486,29 @@ mod tests { Utf8, StringArray ); + test_function!( + SubstrFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::from("abc")), + ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)), + ], + Ok(Some("abc")), + &str, + Utf8, + StringArray + ); + test_function!( + SubstrFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::from("overflow")), + ColumnarValue::Scalar(ScalarValue::from(-9223372036854775808i64)), + ColumnarValue::Scalar(ScalarValue::from(1i64)), + ], + exec_err!("negative overflow when calculating skip value"), + &str, + Utf8, + StringArray + ); Ok(()) } diff --git a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs index 53ba3042f522..dd422f7aab95 100644 --- a/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs +++ b/datafusion/optimizer/src/analyzer/expand_wildcard_rule.rs @@ -160,14 +160,13 @@ fn replace_columns( mod tests { use arrow::datatypes::{DataType, Field, Schema}; + use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan}; + use crate::Analyzer; use datafusion_common::{JoinType, TableReference}; use datafusion_expr::{ col, in_subquery, qualified_wildcard, table_scan, wildcard, LogicalPlanBuilder, }; - use crate::test::{assert_analyzed_plan_eq_display_indent, test_table_scan}; - use crate::Analyzer; - use super::*; fn assert_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> { diff --git a/datafusion/optimizer/src/analyzer/inline_table_scan.rs b/datafusion/optimizer/src/analyzer/inline_table_scan.rs index b69b8410da49..d5b3648725b9 100644 --- a/datafusion/optimizer/src/analyzer/inline_table_scan.rs +++ b/datafusion/optimizer/src/analyzer/inline_table_scan.rs @@ -24,7 +24,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{Column, Result}; use datafusion_expr::expr::WildcardOptions; -use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder, TableScan}; +use datafusion_expr::{logical_plan::LogicalPlan, Expr, LogicalPlanBuilder}; /// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`] /// (DataFrame / ViewTable) @@ -56,24 +56,23 @@ fn analyze_internal(plan: LogicalPlan) -> Result> { match plan { // Match only on scans without filter / projection / fetch // Views and DataFrames won't have those added - // during the early stage of planning - LogicalPlan::TableScan(TableScan { - table_name, - source, - projection, - filters, - .. - }) if filters.is_empty() && source.get_logical_plan().is_some() => { - let sub_plan = source.get_logical_plan().unwrap(); - let projection_exprs = generate_projection_expr(&projection, sub_plan)?; - LogicalPlanBuilder::from(sub_plan.clone()) - .project(projection_exprs)? - // Ensures that the reference to the inlined table remains the - // same, meaning we don't have to change any of the parent nodes - // that reference this table. - .alias(table_name)? - .build() - .map(Transformed::yes) + // during the early stage of planning. + LogicalPlan::TableScan(table_scan) if table_scan.filters.is_empty() => { + if let Some(sub_plan) = table_scan.source.get_logical_plan() { + let sub_plan = sub_plan.into_owned(); + let projection_exprs = + generate_projection_expr(&table_scan.projection, &sub_plan)?; + LogicalPlanBuilder::from(sub_plan) + .project(projection_exprs)? + // Ensures that the reference to the inlined table remains the + // same, meaning we don't have to change any of the parent nodes + // that reference this table. + .alias(table_scan.table_name)? + .build() + .map(Transformed::yes) + } else { + Ok(Transformed::no(LogicalPlan::TableScan(table_scan))) + } } _ => Ok(Transformed::no(plan)), } @@ -104,7 +103,7 @@ fn generate_projection_expr( #[cfg(test)] mod tests { - use std::{sync::Arc, vec}; + use std::{borrow::Cow, sync::Arc, vec}; use crate::analyzer::inline_table_scan::InlineTableScan; use crate::test::assert_analyzed_plan_eq; @@ -167,8 +166,8 @@ mod tests { Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, false)])) } - fn get_logical_plan(&self) -> Option<&LogicalPlan> { - Some(&self.plan) + fn get_logical_plan(&self) -> Option> { + Some(Cow::Borrowed(&self.plan)) } } diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 68ab2e13005f..a6b9bad6c5d9 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -37,7 +37,6 @@ use datafusion_expr::expr::{ }; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::expr_schema::cast_subquery; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::logical_plan::Subquery; use datafusion_expr::type_coercion::binary::{ comparison_coercion, get_input_types, like_coercion, @@ -56,6 +55,8 @@ use datafusion_expr::{ Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits, }; +/// Performs type coercion by determining the schema +/// and performing the expression rewrites. #[derive(Default)] pub struct TypeCoercion {} @@ -128,16 +129,23 @@ fn analyze_internal( .map_data(|plan| plan.recompute_schema()) } -pub(crate) struct TypeCoercionRewriter<'a> { +/// Rewrite expressions to apply type coercion. +pub struct TypeCoercionRewriter<'a> { pub(crate) schema: &'a DFSchema, } impl<'a> TypeCoercionRewriter<'a> { + /// Create a new [`TypeCoercionRewriter`] with a provided schema + /// representing both the inputs and output of the [`LogicalPlan`] node. fn new(schema: &'a DFSchema) -> Self { Self { schema } } - fn coerce_plan(&mut self, plan: LogicalPlan) -> Result { + /// Coerce the [`LogicalPlan`]. + /// + /// Refer to [`TypeCoercionRewriter::coerce_join`] and [`TypeCoercionRewriter::coerce_union`] + /// for type-coercion approach. + pub fn coerce_plan(&mut self, plan: LogicalPlan) -> Result { match plan { LogicalPlan::Join(join) => self.coerce_join(join), LogicalPlan::Union(union) => Self::coerce_union(union), @@ -153,7 +161,7 @@ impl<'a> TypeCoercionRewriter<'a> { /// /// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored /// as a list of `(t1.a, t2.b), (t1.x, t2.y)` - fn coerce_join(&mut self, mut join: Join) -> Result { + pub fn coerce_join(&mut self, mut join: Join) -> Result { join.on = join .on .into_iter() @@ -176,7 +184,7 @@ impl<'a> TypeCoercionRewriter<'a> { /// Coerce the union’s inputs to a common schema compatible with all inputs. /// This occurs after wildcard expansion and the coercion of the input expressions. - fn coerce_union(union_plan: Union) -> Result { + pub fn coerce_union(union_plan: Union) -> Result { let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?); let new_inputs = union_plan .inputs @@ -241,15 +249,19 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> { subquery, outer_ref_columns, }) => { - let new_plan = analyze_internal(self.schema, unwrap_arc(subquery))?.data; + let new_plan = + analyze_internal(self.schema, Arc::unwrap_or_clone(subquery))?.data; Ok(Transformed::yes(Expr::ScalarSubquery(Subquery { subquery: Arc::new(new_plan), outer_ref_columns, }))) } Expr::Exists(Exists { subquery, negated }) => { - let new_plan = - analyze_internal(self.schema, unwrap_arc(subquery.subquery))?.data; + let new_plan = analyze_internal( + self.schema, + Arc::unwrap_or_clone(subquery.subquery), + )? + .data; Ok(Transformed::yes(Expr::Exists(Exists { subquery: Subquery { subquery: Arc::new(new_plan), @@ -263,8 +275,11 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> { subquery, negated, }) => { - let new_plan = - analyze_internal(self.schema, unwrap_arc(subquery.subquery))?.data; + let new_plan = analyze_internal( + self.schema, + Arc::unwrap_or_clone(subquery.subquery), + )? + .data; let expr_type = expr.get_type(self.schema)?; let subquery_type = new_plan.schema().field(0).data_type(); let common_type = comparison_coercion(&expr_type, subquery_type).ok_or(plan_datafusion_err!( @@ -809,7 +824,10 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result { } /// Get a common schema that is compatible with all inputs of UNION. -fn coerce_union_schema(inputs: &[Arc]) -> Result { +/// +/// This method presumes that the wildcard expansion is unneeded, or has already +/// been applied. +pub fn coerce_union_schema(inputs: &[Arc]) -> Result { let base_schema = inputs[0].schema(); let mut union_datatypes = base_schema .fields() diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index feccf5679efb..3a2b190359d4 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -33,7 +33,6 @@ use datafusion_common::tree_node::{ }; use datafusion_common::{qualified_name, Column, DFSchema, DFSchemaRef, Result}; use datafusion_expr::expr::{Alias, ScalarFunction}; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::logical_plan::{ Aggregate, Filter, LogicalPlan, Projection, Sort, Window, }; @@ -314,7 +313,7 @@ impl CommonSubexprEliminate { schema, .. } = projection; - let input = unwrap_arc(input); + let input = Arc::unwrap_or_clone(input); self.try_unary_plan(expr, input, config)? .map_data(|(new_expr, new_input)| { Projection::try_new_with_schema(new_expr, Arc::new(new_input), schema) @@ -327,7 +326,7 @@ impl CommonSubexprEliminate { config: &dyn OptimizerConfig, ) -> Result> { let Sort { expr, input, fetch } = sort; - let input = unwrap_arc(input); + let input = Arc::unwrap_or_clone(input); let new_sort = self.try_unary_plan(expr, input, config)?.update_data( |(new_expr, new_input)| { LogicalPlan::Sort(Sort { @@ -348,7 +347,7 @@ impl CommonSubexprEliminate { let Filter { predicate, input, .. } = filter; - let input = unwrap_arc(input); + let input = Arc::unwrap_or_clone(input); let expr = vec![predicate]; self.try_unary_plan(expr, input, config)? .map_data(|(mut new_expr, new_input)| { @@ -458,7 +457,7 @@ impl CommonSubexprEliminate { schema, .. } = aggregate; - let input = unwrap_arc(input); + let input = Arc::unwrap_or_clone(input); // Extract common sub-expressions from the aggregate and grouping expressions. self.find_common_exprs(vec![group_expr, aggr_expr], config, ExprMask::Normal)? .map_data(|common| { @@ -729,7 +728,7 @@ fn get_consecutive_window_exprs( window_expr_list.push(window_expr); window_schemas.push(schema); - plan = unwrap_arc(input); + plan = Arc::unwrap_or_clone(input); } (window_expr_list, window_schemas, plan) } @@ -1432,7 +1431,7 @@ mod test { fn nested_aliases() -> Result<()> { let table_scan = test_table_scan()?; - let plan = LogicalPlanBuilder::from(table_scan.clone()) + let plan = LogicalPlanBuilder::from(table_scan) .project(vec![ (col("a") + col("b") - col("c")).alias("alias1") * (col("a") + col("b")), col("a") + col("b"), @@ -1843,7 +1842,7 @@ mod test { let config = &OptimizerContext::new(); let _common_expr_1 = config.alias_generator().next(CSE_PREFIX); let common_expr_2 = config.alias_generator().next(CSE_PREFIX); - let plan = LogicalPlanBuilder::from(table_scan.clone()) + let plan = LogicalPlanBuilder::from(table_scan) .project(vec![ (col("a") + col("b")).alias(common_expr_2.clone()), col("c"), @@ -1887,7 +1886,7 @@ mod test { let extracted_short_circuit_leg_1 = (col("a") + col("b")).eq(lit(0)); let not_extracted_short_circuit_leg_2 = (col("a") - col("b")).eq(lit(0)); let extracted_short_circuit_leg_3 = (col("a") * col("b")).eq(lit(0)); - let plan = LogicalPlanBuilder::from(table_scan.clone()) + let plan = LogicalPlanBuilder::from(table_scan) .project(vec![ extracted_short_circuit.clone().alias("c1"), extracted_short_circuit.alias("c2"), @@ -1900,7 +1899,7 @@ mod test { .alias("c4"), extracted_short_circuit_leg_3 .clone() - .or(extracted_short_circuit_leg_3.clone()) + .or(extracted_short_circuit_leg_3) .alias("c5"), ])? .build()?; @@ -1921,7 +1920,7 @@ mod test { let extracted_child = col("a") + col("b"); let rand = rand_func().call(vec![]); let not_extracted_volatile = extracted_child + rand; - let plan = LogicalPlanBuilder::from(table_scan.clone()) + let plan = LogicalPlanBuilder::from(table_scan) .project(vec![ not_extracted_volatile.clone().alias("c1"), not_extracted_volatile.alias("c2"), @@ -1948,7 +1947,7 @@ mod test { let not_extracted_short_circuit_leg_2 = col("b").eq(lit(0)); let not_extracted_volatile_short_circuit_2 = rand.eq(lit(0)).or(not_extracted_short_circuit_leg_2); - let plan = LogicalPlanBuilder::from(table_scan.clone()) + let plan = LogicalPlanBuilder::from(table_scan) .project(vec![ not_extracted_volatile_short_circuit_1.clone().alias("c1"), not_extracted_volatile_short_circuit_1.alias("c2"), diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index b6d49490d437..f1cae1099a4d 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -37,7 +37,6 @@ use datafusion_expr::{ LogicalPlan, LogicalPlanBuilder, Operator, }; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use log::debug; /// Optimizer rule for rewriting predicate(IN/EXISTS) subquery to left semi/anti joins @@ -55,8 +54,10 @@ impl DecorrelatePredicateSubquery { mut subquery: Subquery, config: &dyn OptimizerConfig, ) -> Result { - subquery.subquery = - Arc::new(self.rewrite(unwrap_arc(subquery.subquery), config)?.data); + subquery.subquery = Arc::new( + self.rewrite(Arc::unwrap_or_clone(subquery.subquery), config)? + .data, + ); Ok(subquery) } @@ -164,7 +165,7 @@ impl OptimizerRule for DecorrelatePredicateSubquery { } // iterate through all exists clauses in predicate, turning each into a join - let mut cur_input = unwrap_arc(input); + let mut cur_input = Arc::unwrap_or_clone(input); for subquery in subqueries { if let Some(plan) = build_join(&subquery, &cur_input, config.alias_generator())? diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs index fc4eaef80903..20e6641e4d62 100644 --- a/datafusion/optimizer/src/eliminate_cross_join.rs +++ b/datafusion/optimizer/src/eliminate_cross_join.rs @@ -24,7 +24,6 @@ use crate::join_key_set::JoinKeySet; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{internal_err, Result}; use datafusion_expr::expr::{BinaryExpr, Expr}; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::logical_plan::{ CrossJoin, Filter, Join, JoinConstraint, JoinType, LogicalPlan, Projection, }; @@ -114,7 +113,7 @@ impl OptimizerRule for EliminateCrossJoin { input, predicate, .. } = filter; flatten_join_inputs( - unwrap_arc(input), + Arc::unwrap_or_clone(input), &mut possible_join_keys, &mut all_inputs, )?; @@ -217,12 +216,28 @@ fn flatten_join_inputs( ); } possible_join_keys.insert_all_owned(join.on); - flatten_join_inputs(unwrap_arc(join.left), possible_join_keys, all_inputs)?; - flatten_join_inputs(unwrap_arc(join.right), possible_join_keys, all_inputs)?; + flatten_join_inputs( + Arc::unwrap_or_clone(join.left), + possible_join_keys, + all_inputs, + )?; + flatten_join_inputs( + Arc::unwrap_or_clone(join.right), + possible_join_keys, + all_inputs, + )?; } LogicalPlan::CrossJoin(join) => { - flatten_join_inputs(unwrap_arc(join.left), possible_join_keys, all_inputs)?; - flatten_join_inputs(unwrap_arc(join.right), possible_join_keys, all_inputs)?; + flatten_join_inputs( + Arc::unwrap_or_clone(join.left), + possible_join_keys, + all_inputs, + )?; + flatten_join_inputs( + Arc::unwrap_or_clone(join.right), + possible_join_keys, + all_inputs, + )?; } _ => { all_inputs.push(plan); diff --git a/datafusion/optimizer/src/eliminate_filter.rs b/datafusion/optimizer/src/eliminate_filter.rs index 84bb8e782142..bb2b4547e9c2 100644 --- a/datafusion/optimizer/src/eliminate_filter.rs +++ b/datafusion/optimizer/src/eliminate_filter.rs @@ -19,7 +19,6 @@ use datafusion_common::tree_node::Transformed; use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::{EmptyRelation, Expr, Filter, LogicalPlan}; use std::sync::Arc; @@ -65,7 +64,7 @@ impl OptimizerRule for EliminateFilter { input, .. }) => match v { - Some(true) => Ok(Transformed::yes(unwrap_arc(input))), + Some(true) => Ok(Transformed::yes(Arc::unwrap_or_clone(input))), Some(false) | None => Ok(Transformed::yes(LogicalPlan::EmptyRelation( EmptyRelation { produce_one_row: false, diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs index a42fe6a6f95b..e48f37a77cd3 100644 --- a/datafusion/optimizer/src/eliminate_limit.rs +++ b/datafusion/optimizer/src/eliminate_limit.rs @@ -20,7 +20,7 @@ use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::Transformed; use datafusion_common::Result; -use datafusion_expr::logical_plan::{tree_node::unwrap_arc, EmptyRelation, LogicalPlan}; +use datafusion_expr::logical_plan::{EmptyRelation, LogicalPlan}; use std::sync::Arc; /// Optimizer rule to replace `LIMIT 0` or `LIMIT` whose ancestor LIMIT's skip is @@ -74,7 +74,9 @@ impl OptimizerRule for EliminateLimit { } } else if limit.skip == 0 { // input also can be Limit, so we should apply again. - return Ok(self.rewrite(unwrap_arc(limit.input), _config).unwrap()); + return Ok(self + .rewrite(Arc::unwrap_or_clone(limit.input), _config) + .unwrap()); } Ok(Transformed::no(LogicalPlan::Limit(limit))) } diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index 5d7895bba4d8..965771326854 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -21,7 +21,6 @@ use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::Transformed; use datafusion_common::Result; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::{Distinct, LogicalPlan, Union}; use itertools::Itertools; use std::sync::Arc; @@ -69,7 +68,7 @@ impl OptimizerRule for EliminateNestedUnion { }))) } LogicalPlan::Distinct(Distinct::All(nested_plan)) => { - match unwrap_arc(nested_plan) { + match Arc::unwrap_or_clone(nested_plan) { LogicalPlan::Union(Union { inputs, schema }) => { let inputs = inputs .into_iter() @@ -96,16 +95,17 @@ impl OptimizerRule for EliminateNestedUnion { } fn extract_plans_from_union(plan: Arc) -> Vec { - match unwrap_arc(plan) { - LogicalPlan::Union(Union { inputs, .. }) => { - inputs.into_iter().map(unwrap_arc).collect::>() - } + match Arc::unwrap_or_clone(plan) { + LogicalPlan::Union(Union { inputs, .. }) => inputs + .into_iter() + .map(Arc::unwrap_or_clone) + .collect::>(), plan => vec![plan], } } fn extract_plan_from_distinct(plan: Arc) -> Arc { - match unwrap_arc(plan) { + match Arc::unwrap_or_clone(plan) { LogicalPlan::Distinct(Distinct::All(plan)) => plan, plan => Arc::new(plan), } @@ -144,10 +144,7 @@ mod tests { fn eliminate_nothing() -> Result<()> { let plan_builder = table_scan(Some("table"), &schema(), None)?; - let plan = plan_builder - .clone() - .union(plan_builder.clone().build()?)? - .build()?; + let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?; let expected = "\ Union\ @@ -162,7 +159,7 @@ mod tests { let plan = plan_builder .clone() - .union_distinct(plan_builder.clone().build()?)? + .union_distinct(plan_builder.build()?)? .build()?; let expected = "Distinct:\ @@ -180,7 +177,7 @@ mod tests { .clone() .union(plan_builder.clone().build()?)? .union(plan_builder.clone().build()?)? - .union(plan_builder.clone().build()?)? + .union(plan_builder.build()?)? .build()?; let expected = "\ @@ -200,7 +197,7 @@ mod tests { .clone() .union_distinct(plan_builder.clone().build()?)? .union(plan_builder.clone().build()?)? - .union(plan_builder.clone().build()?)? + .union(plan_builder.build()?)? .build()?; let expected = "Union\ @@ -222,7 +219,7 @@ mod tests { .union(plan_builder.clone().build()?)? .union_distinct(plan_builder.clone().build()?)? .union(plan_builder.clone().build()?)? - .union_distinct(plan_builder.clone().build()?)? + .union_distinct(plan_builder.build()?)? .build()?; let expected = "Distinct:\ @@ -243,7 +240,7 @@ mod tests { .clone() .union_distinct(plan_builder.clone().distinct()?.build()?)? .union(plan_builder.clone().distinct()?.build()?)? - .union_distinct(plan_builder.clone().build()?)? + .union_distinct(plan_builder.build()?)? .build()?; let expected = "Distinct:\ @@ -271,7 +268,6 @@ mod tests { )? .union( plan_builder - .clone() .project(vec![col("id").alias("_id"), col("key"), col("value")])? .build()?, )? @@ -300,7 +296,6 @@ mod tests { )? .union_distinct( plan_builder - .clone() .project(vec![col("id").alias("_id"), col("key"), col("value")])? .build()?, )? diff --git a/datafusion/optimizer/src/eliminate_one_union.rs b/datafusion/optimizer/src/eliminate_one_union.rs index 43024107c4f8..7a1c4e118e05 100644 --- a/datafusion/optimizer/src/eliminate_one_union.rs +++ b/datafusion/optimizer/src/eliminate_one_union.rs @@ -16,9 +16,11 @@ // under the License. //! [`EliminateOneUnion`] eliminates single element `Union` + use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::{tree_node::Transformed, Result}; -use datafusion_expr::logical_plan::{tree_node::unwrap_arc, LogicalPlan, Union}; +use datafusion_expr::logical_plan::{LogicalPlan, Union}; +use std::sync::Arc; use crate::optimizer::ApplyOrder; @@ -48,9 +50,9 @@ impl OptimizerRule for EliminateOneUnion { _config: &dyn OptimizerConfig, ) -> Result> { match plan { - LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => { - Ok(Transformed::yes(unwrap_arc(inputs.pop().unwrap()))) - } + LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok( + Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())), + ), _ => Ok(Transformed::no(plan)), } } @@ -92,10 +94,7 @@ mod tests { fn eliminate_nothing() -> Result<()> { let plan_builder = table_scan(Some("table"), &schema(), None)?; - let plan = plan_builder - .clone() - .union(plan_builder.clone().build()?)? - .build()?; + let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?; let expected = "\ Union\ diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs b/datafusion/optimizer/src/eliminate_outer_join.rs index 12534e058152..e7c88df55122 100644 --- a/datafusion/optimizer/src/eliminate_outer_join.rs +++ b/datafusion/optimizer/src/eliminate_outer_join.rs @@ -18,7 +18,6 @@ //! [`EliminateOuterJoin`] converts `LEFT/RIGHT/FULL` joins to `INNER` joins use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::{Column, DFSchema, Result}; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::logical_plan::{Join, JoinType, LogicalPlan}; use datafusion_expr::{Expr, Filter, Operator}; @@ -79,7 +78,7 @@ impl OptimizerRule for EliminateOuterJoin { _config: &dyn OptimizerConfig, ) -> Result> { match plan { - LogicalPlan::Filter(mut filter) => match unwrap_arc(filter.input) { + LogicalPlan::Filter(mut filter) => match Arc::unwrap_or_clone(filter.input) { LogicalPlan::Join(join) => { let mut non_nullable_cols: Vec = vec![]; diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index ac4ed87a4a1a..35b0d07751ff 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -41,7 +41,6 @@ use crate::utils::NamePreserver; use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion, }; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; /// Optimizer rule to prune unnecessary columns from intermediate schemas /// inside the [`LogicalPlan`]. This rule: @@ -181,7 +180,7 @@ fn optimize_projections( let necessary_exprs = necessary_indices.get_required_exprs(schema); return optimize_projections( - unwrap_arc(aggregate.input), + Arc::unwrap_or_clone(aggregate.input), config, necessary_indices, )? @@ -221,7 +220,7 @@ fn optimize_projections( child_reqs.with_exprs(&input_schema, &new_window_expr)?; return optimize_projections( - unwrap_arc(window.input), + Arc::unwrap_or_clone(window.input), config, required_indices.clone(), )? @@ -488,7 +487,7 @@ fn merge_consecutive_projections(proj: Projection) -> Result { let parents_predicates = split_conjunction_owned(filter.predicate); @@ -820,7 +819,7 @@ impl OptimizerRule for PushDownFilter { .map(|e| Ok(Column::from_qualified_name(e.schema_name().to_string()))) .collect::>>()?; - let predicates = split_conjunction_owned(filter.predicate.clone()); + let predicates = split_conjunction_owned(filter.predicate); let mut keep_predicates = vec![]; let mut push_predicates = vec![]; @@ -1139,19 +1138,19 @@ fn convert_to_cross_join_if_beneficial( match plan { // Can be converted back to cross join LogicalPlan::Join(join) if join.on.is_empty() && join.filter.is_none() => { - LogicalPlanBuilder::from(unwrap_arc(join.left)) - .cross_join(unwrap_arc(join.right))? + LogicalPlanBuilder::from(Arc::unwrap_or_clone(join.left)) + .cross_join(Arc::unwrap_or_clone(join.right))? .build() .map(Transformed::yes) } - LogicalPlan::Filter(filter) => convert_to_cross_join_if_beneficial(unwrap_arc( - filter.input, - ))? - .transform_data(|child_plan| { - Filter::try_new(filter.predicate, Arc::new(child_plan)) - .map(LogicalPlan::Filter) - .map(Transformed::yes) - }), + LogicalPlan::Filter(filter) => { + convert_to_cross_join_if_beneficial(Arc::unwrap_or_clone(filter.input))? + .transform_data(|child_plan| { + Filter::try_new(filter.predicate, Arc::new(child_plan)) + .map(LogicalPlan::Filter) + .map(Transformed::yes) + }) + } plan => Ok(Transformed::no(plan)), } } diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs index 290b893577b8..dff0b61c6b22 100644 --- a/datafusion/optimizer/src/push_down_limit.rs +++ b/datafusion/optimizer/src/push_down_limit.rs @@ -26,7 +26,6 @@ use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::Transformed; use datafusion_common::utils::combine_limit; use datafusion_common::Result; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan}; /// Optimization rule that tries to push down `LIMIT`. @@ -83,7 +82,7 @@ impl OptimizerRule for PushDownLimit { }))); }; - match unwrap_arc(input) { + match Arc::unwrap_or_clone(input) { LogicalPlan::TableScan(mut scan) => { let rows_needed = if fetch != 0 { fetch + skip } else { 0 }; let new_fetch = scan diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index c45df74a564d..7129ceb0fea1 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -3407,32 +3407,32 @@ mod tests { let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and( in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false), ); - assert_eq!(simplify(expr.clone()), lit(false)); + assert_eq!(simplify(expr), lit(false)); // 2. c1 IN (1,2,3,4) AND c1 IN (4,5,6,7) -> c1 = 4 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and( in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false), ); - assert_eq!(simplify(expr.clone()), col("c1").eq(lit(4))); + assert_eq!(simplify(expr), col("c1").eq(lit(4))); // 3. c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) -> true let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or( in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true), ); - assert_eq!(simplify(expr.clone()), lit(true)); + assert_eq!(simplify(expr), lit(true)); // 3.5 c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (4, 5, 6, 7) -> c1 != 4 (4 overlaps) let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or( in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true), ); - assert_eq!(simplify(expr.clone()), col("c1").not_eq(lit(4))); + assert_eq!(simplify(expr), col("c1").not_eq(lit(4))); // 4. c1 NOT IN (1,2,3,4) AND c1 NOT IN (4,5,6,7) -> c1 NOT IN (1,2,3,4,5,6,7) let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and( in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true), ); assert_eq!( - simplify(expr.clone()), + simplify(expr), in_list( col("c1"), vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)], @@ -3445,7 +3445,7 @@ mod tests { in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false), ); assert_eq!( - simplify(expr.clone()), + simplify(expr), in_list( col("c1"), vec![lit(1), lit(2), lit(3), lit(4), lit(5)], @@ -3459,7 +3459,7 @@ mod tests { vec![lit(1), lit(2), lit(3), lit(4), lit(5)], true, )); - assert_eq!(simplify(expr.clone()), lit(false)); + assert_eq!(simplify(expr), lit(false)); // 7. c1 NOT IN (1,2,3,4) AND c1 IN (1,2,3,4,5) -> c1 = 5 let expr = @@ -3468,14 +3468,14 @@ mod tests { vec![lit(1), lit(2), lit(3), lit(4), lit(5)], false, )); - assert_eq!(simplify(expr.clone()), col("c1").eq(lit(5))); + assert_eq!(simplify(expr), col("c1").eq(lit(5))); // 8. c1 IN (1,2,3,4) AND c1 NOT IN (5,6,7,8) -> c1 IN (1,2,3,4) let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and( in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true), ); assert_eq!( - simplify(expr.clone()), + simplify(expr), in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false) ); @@ -3493,7 +3493,7 @@ mod tests { )) .and(in_list(col("c1"), vec![lit(3), lit(6)], false)); assert_eq!( - simplify(expr.clone()), + simplify(expr), col("c1").eq(lit(3)).or(col("c1").eq(lit(6))) ); @@ -3507,7 +3507,7 @@ mod tests { )) .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)), ); - assert_eq!(simplify(expr.clone()), col("c1").eq(lit(8))); + assert_eq!(simplify(expr), col("c1").eq(lit(8))); // Contains non-InList expression // c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) -> c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) @@ -3622,7 +3622,7 @@ mod tests { let expr_x = col("c3").gt(lit(3_i64)); let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32)); let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true); - let expr = expr_x.clone().and(expr_y.clone().or(expr_z)); + let expr = expr_x.clone().and(expr_y.or(expr_z)); // All guaranteed null let guarantees = vec![ @@ -3698,7 +3698,7 @@ mod tests { col("c4"), NullableInterval::from(ScalarValue::UInt32(Some(3))), )]; - let output = simplify_with_guarantee(expr.clone(), guarantees); + let output = simplify_with_guarantee(expr, guarantees); assert_eq!(&output, &expr_x); } diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs index 09fdd7685a9c..afcbe528083b 100644 --- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs +++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs @@ -225,12 +225,12 @@ mod tests { // x IS NULL => guaranteed false let expr = col("x").is_null(); - let output = expr.clone().rewrite(&mut rewriter).data().unwrap(); + let output = expr.rewrite(&mut rewriter).data().unwrap(); assert_eq!(output, lit(false)); // x IS NOT NULL => guaranteed true let expr = col("x").is_not_null(); - let output = expr.clone().rewrite(&mut rewriter).data().unwrap(); + let output = expr.rewrite(&mut rewriter).data().unwrap(); assert_eq!(output, lit(true)); } diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs index e0f50a470d43..b17d69437cbe 100644 --- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs +++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs @@ -627,7 +627,7 @@ mod tests { Box::new(DataType::Int32), Box::new(ScalarValue::LargeUtf8(Some("value".to_owned()))), ); - let expr_input = cast(col("largestr"), dict.data_type()).eq(lit(dict.clone())); + let expr_input = cast(col("largestr"), dict.data_type()).eq(lit(dict)); let expected = col("largestr").eq(lit(ScalarValue::LargeUtf8(Some("value".to_owned())))); assert_eq!(optimize_test(expr_input, &schema), expected); diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 05b1744d90c5..45cef55bf272 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -28,6 +28,10 @@ use datafusion_expr::{logical_plan::LogicalPlan, Expr, Operator}; use log::{debug, trace}; +/// Re-export of `NamesPreserver` for backwards compatibility, +/// as it was initially placed here and then moved elsewhere. +pub use datafusion_expr::expr_rewriter::NamePreserver; + /// Convenience rule for writing optimizers: recursively invoke /// optimize on plan's children and then return a node of the same /// type. Useful for optimizer rules which want to leave the type @@ -294,54 +298,3 @@ pub fn only_or_err(slice: &[T]) -> Result<&T> { pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema { expr_utils::merge_schema(inputs) } - -/// Handles ensuring the name of rewritten expressions is not changed. -/// -/// For example, if an expression `1 + 2` is rewritten to `3`, the name of the -/// expression should be preserved: `3 as "1 + 2"` -/// -/// See for details -pub struct NamePreserver { - use_alias: bool, -} - -/// If the name of an expression is remembered, it will be preserved when -/// rewriting the expression -pub struct SavedName(Option); - -impl NamePreserver { - /// Create a new NamePreserver for rewriting the `expr` that is part of the specified plan - pub fn new(plan: &LogicalPlan) -> Self { - Self { - use_alias: !matches!(plan, LogicalPlan::Filter(_) | LogicalPlan::Join(_)), - } - } - - /// Create a new NamePreserver for rewriting the `expr`s in `Projection` - /// - /// This will use aliases - pub fn new_for_projection() -> Self { - Self { use_alias: true } - } - - pub fn save(&self, expr: &Expr) -> Result { - let original_name = if self.use_alias { - Some(expr.name_for_alias()?) - } else { - None - }; - - Ok(SavedName(original_name)) - } -} - -impl SavedName { - /// Ensures the name of the rewritten expression is preserved - pub fn restore(self, expr: Expr) -> Result { - let Self(original_name) = self; - match original_name { - Some(name) => expr.alias_if_changed(name), - None => Ok(expr), - } - } -} diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index 9dc54d2eb2d0..745ec543c31a 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -221,7 +221,7 @@ impl PhysicalSortRequirement { /// [`ExecutionPlan::required_input_ordering`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#method.required_input_ordering pub fn from_sort_exprs<'a>( ordering: impl IntoIterator, - ) -> Vec { + ) -> LexRequirement { ordering .into_iter() .cloned() diff --git a/datafusion/physical-expr-functions-aggregate/Cargo.toml b/datafusion/physical-expr-functions-aggregate/Cargo.toml deleted file mode 100644 index 6eed89614c53..000000000000 --- a/datafusion/physical-expr-functions-aggregate/Cargo.toml +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "datafusion-physical-expr-functions-aggregate" -description = "Logical plan and expression representation for DataFusion query engine" -keywords = ["datafusion", "logical", "plan", "expressions"] -readme = "README.md" -version = { workspace = true } -edition = { workspace = true } -homepage = { workspace = true } -repository = { workspace = true } -license = { workspace = true } -authors = { workspace = true } -rust-version = { workspace = true } - -[lints] -workspace = true - -[lib] -name = "datafusion_physical_expr_functions_aggregate" -path = "src/lib.rs" - -[features] - -[dependencies] -ahash = { workspace = true } -arrow = { workspace = true } -datafusion-common = { workspace = true } -datafusion-expr = { workspace = true } -datafusion-expr-common = { workspace = true } -datafusion-functions-aggregate-common = { workspace = true } -datafusion-physical-expr-common = { workspace = true } -rand = { workspace = true } diff --git a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs similarity index 69% rename from datafusion/physical-expr-functions-aggregate/src/aggregate.rs rename to datafusion/physical-expr/src/aggregate.rs index aa1d1999a339..5c1216f2a386 100644 --- a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -15,30 +15,46 @@ // specific language governing permissions and limitations // under the License. +pub(crate) mod groups_accumulator { + #[allow(unused_imports)] + pub(crate) mod accumulate { + pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState; + } + pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::{ + accumulate::NullState, GroupsAccumulatorAdapter, + }; +} +pub(crate) mod stats { + pub use datafusion_functions_aggregate_common::stats::StatsType; +} +pub mod utils { + pub use datafusion_functions_aggregate_common::utils::{ + adjust_output_array, get_accum_scalar_values_as_arrays, get_sort_options, + ordering_fields, DecimalAverager, Hashable, + }; +} + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::ScalarValue; use datafusion_common::{internal_err, not_impl_err, Result}; -use datafusion_expr::expr::create_function_physical_name; use datafusion_expr::AggregateUDF; use datafusion_expr::ReversedUDAF; use datafusion_expr_common::accumulator::Accumulator; -use datafusion_expr_common::groups_accumulator::GroupsAccumulator; use datafusion_expr_common::type_coercion::aggregates::check_arg_count; use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs; use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs; -use datafusion_functions_aggregate_common::aggregate::AggregateExpr; use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity; -use datafusion_functions_aggregate_common::utils::{self, down_cast_any_ref}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use datafusion_physical_expr_common::utils::reverse_order_bys; +use datafusion_expr_common::groups_accumulator::GroupsAccumulator; use std::fmt::Debug; -use std::{any::Any, sync::Arc}; +use std::sync::Arc; -/// Builder for physical [`AggregateExpr`] +/// Builder for physical [`AggregateFunctionExpr`] /// -/// `AggregateExpr` contains the information necessary to call +/// `AggregateFunctionExpr` contains the information necessary to call /// an aggregate expression. #[derive(Debug, Clone)] pub struct AggregateExprBuilder { @@ -72,7 +88,7 @@ impl AggregateExprBuilder { } } - pub fn build(self) -> Result> { + pub fn build(self) -> Result> { let Self { fun, args, @@ -112,8 +128,7 @@ impl AggregateExprBuilder { let data_type = fun.return_type(&input_exprs_types)?; let is_nullable = fun.is_nullable(); let name = match alias { - // TODO: Ideally, we should build the name from physical expressions - None => create_function_physical_name(fun.name(), is_distinct, &[], None)?, + None => return internal_err!("alias should be provided"), Some(alias) => alias, }; @@ -206,6 +221,17 @@ impl AggregateFunctionExpr { &self.fun } + /// expressions that are passed to the Accumulator. + /// Single-column aggregations such as `sum` return a single value, others (e.g. `cov`) return many. + pub fn expressions(&self) -> Vec> { + self.args.clone() + } + + /// Human readable name such as `"MIN(c2)"`. + pub fn name(&self) -> &str { + &self.name + } + /// Return if the aggregation is distinct pub fn is_distinct(&self) -> bool { self.is_distinct @@ -221,34 +247,13 @@ impl AggregateFunctionExpr { self.is_reversed } + /// Return if the aggregation is nullable pub fn is_nullable(&self) -> bool { self.is_nullable } -} -impl AggregateExpr for AggregateFunctionExpr { - /// Return a reference to Any that can be used for downcasting - fn as_any(&self) -> &dyn Any { - self - } - - fn expressions(&self) -> Vec> { - self.args.clone() - } - - fn state_fields(&self) -> Result> { - let args = StateFieldsArgs { - name: &self.name, - input_types: &self.input_types, - return_type: &self.data_type, - ordering_fields: &self.ordering_fields, - is_distinct: self.is_distinct, - }; - - self.fun.state_fields(args) - } - - fn field(&self) -> Result { + /// the field of the final result of this aggregation. + pub fn field(&self) -> Result { Ok(Field::new( &self.name, self.data_type.clone(), @@ -256,7 +261,10 @@ impl AggregateExpr for AggregateFunctionExpr { )) } - fn create_accumulator(&self) -> Result> { + /// the accumulator used to accumulate values from the expressions. + /// the accumulator expects the same number of arguments as `expressions` and must + /// return states with the same description as `state_fields` + pub fn create_accumulator(&self) -> Result> { let acc_args = AccumulatorArgs { return_type: &self.data_type, schema: &self.schema, @@ -271,7 +279,83 @@ impl AggregateExpr for AggregateFunctionExpr { self.fun.accumulator(acc_args) } - fn create_sliding_accumulator(&self) -> Result> { + /// the field of the final result of this aggregation. + pub fn state_fields(&self) -> Result> { + let args = StateFieldsArgs { + name: &self.name, + input_types: &self.input_types, + return_type: &self.data_type, + ordering_fields: &self.ordering_fields, + is_distinct: self.is_distinct, + }; + + self.fun.state_fields(args) + } + + /// Order by requirements for the aggregate function + /// By default it is `None` (there is no requirement) + /// Order-sensitive aggregators, such as `FIRST_VALUE(x ORDER BY y)` should implement this + pub fn order_bys(&self) -> Option<&[PhysicalSortExpr]> { + if self.ordering_req.is_empty() { + return None; + } + + if !self.order_sensitivity().is_insensitive() { + return Some(&self.ordering_req); + } + + None + } + + /// Indicates whether aggregator can produce the correct result with any + /// arbitrary input ordering. By default, we assume that aggregate expressions + /// are order insensitive. + pub fn order_sensitivity(&self) -> AggregateOrderSensitivity { + if !self.ordering_req.is_empty() { + // If there is requirement, use the sensitivity of the implementation + self.fun.order_sensitivity() + } else { + // If no requirement, aggregator is order insensitive + AggregateOrderSensitivity::Insensitive + } + } + + /// Sets the indicator whether ordering requirements of the aggregator is + /// satisfied by its input. If this is not the case, aggregators with order + /// sensitivity `AggregateOrderSensitivity::Beneficial` can still produce + /// the correct result with possibly more work internally. + /// + /// # Returns + /// + /// Returns `Ok(Some(updated_expr))` if the process completes successfully. + /// If the expression can benefit from existing input ordering, but does + /// not implement the method, returns an error. Order insensitive and hard + /// requirement aggregators return `Ok(None)`. + pub fn with_beneficial_ordering( + self: Arc, + beneficial_ordering: bool, + ) -> Result>> { + let Some(updated_fn) = self + .fun + .clone() + .with_beneficial_ordering(beneficial_ordering)? + else { + return Ok(None); + }; + + AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec()) + .order_by(self.ordering_req.to_vec()) + .schema(Arc::new(self.schema.clone())) + .alias(self.name().to_string()) + .with_ignore_nulls(self.ignore_nulls) + .with_distinct(self.is_distinct) + .with_reversed(self.is_reversed) + .build() + .map(Some) + } + + /// Creates accumulator implementation that supports retract + pub fn create_sliding_accumulator(&self) -> Result> { let args = AccumulatorArgs { return_type: &self.data_type, schema: &self.schema, @@ -337,11 +421,10 @@ impl AggregateExpr for AggregateFunctionExpr { Ok(accumulator) } - fn name(&self) -> &str { - &self.name - } - - fn groups_accumulator_supported(&self) -> bool { + /// If the aggregate expression has a specialized + /// [`GroupsAccumulator`] implementation. If this returns true, + /// `[Self::create_groups_accumulator`] will be called. + pub fn groups_accumulator_supported(&self) -> bool { let args = AccumulatorArgs { return_type: &self.data_type, schema: &self.schema, @@ -355,7 +438,12 @@ impl AggregateExpr for AggregateFunctionExpr { self.fun.groups_accumulator_supported(args) } - fn create_groups_accumulator(&self) -> Result> { + /// Return a specialized [`GroupsAccumulator`] that manages state + /// for all groups. + /// + /// For maximum performance, a [`GroupsAccumulator`] should be + /// implemented in addition to [`Accumulator`]. + pub fn create_groups_accumulator(&self) -> Result> { let args = AccumulatorArgs { return_type: &self.data_type, schema: &self.schema, @@ -369,52 +457,11 @@ impl AggregateExpr for AggregateFunctionExpr { self.fun.create_groups_accumulator(args) } - fn order_bys(&self) -> Option<&[PhysicalSortExpr]> { - if self.ordering_req.is_empty() { - return None; - } - - if !self.order_sensitivity().is_insensitive() { - return Some(&self.ordering_req); - } - - None - } - - fn order_sensitivity(&self) -> AggregateOrderSensitivity { - if !self.ordering_req.is_empty() { - // If there is requirement, use the sensitivity of the implementation - self.fun.order_sensitivity() - } else { - // If no requirement, aggregator is order insensitive - AggregateOrderSensitivity::Insensitive - } - } - - fn with_beneficial_ordering( - self: Arc, - beneficial_ordering: bool, - ) -> Result>> { - let Some(updated_fn) = self - .fun - .clone() - .with_beneficial_ordering(beneficial_ordering)? - else { - return Ok(None); - }; - - AggregateExprBuilder::new(Arc::new(updated_fn), self.args.to_vec()) - .order_by(self.ordering_req.to_vec()) - .schema(Arc::new(self.schema.clone())) - .alias(self.name().to_string()) - .with_ignore_nulls(self.ignore_nulls) - .with_distinct(self.is_distinct) - .with_reversed(self.is_reversed) - .build() - .map(Some) - } - - fn reverse_expr(&self) -> Option> { + /// Construct an expression that calculates the aggregate in reverse. + /// Typically the "reverse" expression is itself (e.g. SUM, COUNT). + /// For aggregates that do not support calculation in reverse, + /// returns None (which is the default value). + pub fn reverse_expr(&self) -> Option> { match self.fun.reverse_udf() { ReversedUDAF::NotSupported => None, ReversedUDAF::Identical => Some(Arc::new(self.clone())), @@ -442,33 +489,72 @@ impl AggregateExpr for AggregateFunctionExpr { } } - fn get_minmax_desc(&self) -> Option<(Field, bool)> { + /// Returns all expressions used in the [`AggregateFunctionExpr`]. + /// These expressions are (1)function arguments, (2) order by expressions. + pub fn all_expressions(&self) -> AggregatePhysicalExpressions { + let args = self.expressions(); + let order_bys = self.order_bys().unwrap_or(&[]); + let order_by_exprs = order_bys + .iter() + .map(|sort_expr| Arc::clone(&sort_expr.expr)) + .collect::>(); + AggregatePhysicalExpressions { + args, + order_by_exprs, + } + } + + /// Rewrites [`AggregateFunctionExpr`], with new expressions given. The argument should be consistent + /// with the return value of the [`AggregateFunctionExpr::all_expressions`] method. + /// Returns `Some(Arc)` if re-write is supported, otherwise returns `None`. + pub fn with_new_expressions( + &self, + _args: Vec>, + _order_by_exprs: Vec>, + ) -> Option> { + None + } + + /// If this function is max, return (output_field, true) + /// if the function is min, return (output_field, false) + /// otherwise return None (the default) + /// + /// output_field is the name of the column produced by this aggregate + /// + /// Note: this is used to use special aggregate implementations in certain conditions + pub fn get_minmax_desc(&self) -> Option<(Field, bool)> { self.fun .is_descending() .and_then(|flag| self.field().ok().map(|f| (f, flag))) } - fn default_value(&self, data_type: &DataType) -> Result { + /// Returns default value of the function given the input is Null + /// Most of the aggregate function return Null if input is Null, + /// while `count` returns 0 if input is Null + pub fn default_value(&self, data_type: &DataType) -> Result { self.fun.default_value(data_type) } } -impl PartialEq for AggregateFunctionExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| { - self.name == x.name - && self.data_type == x.data_type - && self.fun == x.fun - && self.args.len() == x.args.len() - && self - .args - .iter() - .zip(x.args.iter()) - .all(|(this_arg, other_arg)| this_arg.eq(other_arg)) - }) - .unwrap_or(false) +/// Stores the physical expressions used inside the `AggregateExpr`. +pub struct AggregatePhysicalExpressions { + /// Aggregate function arguments + pub args: Vec>, + /// Order by expressions + pub order_by_exprs: Vec>, +} + +impl PartialEq for AggregateFunctionExpr { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + && self.data_type == other.data_type + && self.fun == other.fun + && self.args.len() == other.args.len() + && self + .args + .iter() + .zip(other.args.iter()) + .all(|(this_arg, other_arg)| this_arg.eq(other_arg)) } } diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs index b9228282b081..d862eda5018e 100644 --- a/datafusion/physical-expr/src/equivalence/mod.rs +++ b/datafusion/physical-expr/src/equivalence/mod.rs @@ -239,7 +239,7 @@ mod tests { // Convert each tuple to PhysicalSortRequirement pub fn convert_to_sort_reqs( in_data: &[(&Arc, Option)], - ) -> Vec { + ) -> LexRequirement { in_data .iter() .map(|(expr, options)| { diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index c4b8a5c46563..49a0de7252ab 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -272,7 +272,7 @@ mod tests { // Crude ordering doesn't satisfy finer ordering. should return false let mut eq_properties_crude = EquivalenceProperties::new(Arc::clone(&input_schema)); - eq_properties_crude.oeq_class.push(crude.clone()); + eq_properties_crude.oeq_class.push(crude); assert!(!eq_properties_crude.ordering_satisfy(&finer)); Ok(()) } diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 26885ae1350c..2680a7930ff1 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -41,6 +41,7 @@ use datafusion_expr::type_coercion::binary::get_result_type; use datafusion_expr::{ColumnarValue, Operator}; use datafusion_physical_expr_common::datum::{apply, apply_cmp, apply_cmp_for_nested}; +use crate::expressions::binary::kernels::concat_elements_utf8view; use kernels::{ bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar, bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn, @@ -131,34 +132,6 @@ impl std::fmt::Display for BinaryExpr { } } -/// Invoke a compute kernel on a pair of binary data arrays -macro_rules! compute_utf8_op { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ - let ll = $LEFT - .as_any() - .downcast_ref::<$DT>() - .expect("compute_op failed to downcast left side array"); - let rr = $RIGHT - .as_any() - .downcast_ref::<$DT>() - .expect("compute_op failed to downcast right side array"); - Ok(Arc::new(paste::expr! {[<$OP _utf8>]}(&ll, &rr)?)) - }}; -} - -macro_rules! binary_string_array_op { - ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ - match $LEFT.data_type() { - DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray), - DataType::LargeUtf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, LargeStringArray), - other => internal_err!( - "Data type {:?} not supported for binary operation '{}' on string arrays", - other, stringify!($OP) - ), - } - }}; -} - /// Invoke a boolean kernel on a pair of arrays macro_rules! boolean_op { ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ @@ -662,7 +635,7 @@ impl BinaryExpr { BitwiseXor => bitwise_xor_dyn(left, right), BitwiseShiftRight => bitwise_shift_right_dyn(left, right), BitwiseShiftLeft => bitwise_shift_left_dyn(left, right), - StringConcat => binary_string_array_op!(left, right, concat_elements), + StringConcat => concat_elements(left, right), AtArrow | ArrowAt => { unreachable!("ArrowAt and AtArrow should be rewritten to function") } @@ -670,6 +643,28 @@ impl BinaryExpr { } } +fn concat_elements(left: Arc, right: Arc) -> Result { + Ok(match left.data_type() { + DataType::Utf8 => Arc::new(concat_elements_utf8( + left.as_string::(), + right.as_string::(), + )?), + DataType::LargeUtf8 => Arc::new(concat_elements_utf8( + left.as_string::(), + right.as_string::(), + )?), + DataType::Utf8View => Arc::new(concat_elements_utf8view( + left.as_string_view(), + right.as_string_view(), + )?), + other => { + return internal_err!( + "Data type {other:?} not supported for binary operation 'concat_elements' on string arrays" + ); + } + }) +} + /// Create a binary expression whose arguments are correctly coerced. /// This function errors if it is not possible to coerce the arguments /// to computational types supported by the operator. @@ -2587,7 +2582,7 @@ mod tests { &a, &b, Operator::RegexIMatch, - regex_expected.clone(), + regex_expected, )?; apply_logic_op( &Arc::new(schema.clone()), @@ -2601,7 +2596,7 @@ mod tests { &a, &b, Operator::RegexNotIMatch, - regex_not_expected.clone(), + regex_not_expected, )?; Ok(()) diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs index b0736e140fec..1f9cfed1a44f 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs @@ -27,6 +27,7 @@ use arrow::datatypes::DataType; use datafusion_common::internal_err; use datafusion_common::{Result, ScalarValue}; +use arrow_schema::ArrowError; use std::sync::Arc; /// Downcasts $LEFT and $RIGHT to $ARRAY_TYPE and then calls $KERNEL($LEFT, $RIGHT) @@ -131,3 +132,35 @@ create_dyn_scalar_kernel!(bitwise_or_dyn_scalar, bitwise_or_scalar); create_dyn_scalar_kernel!(bitwise_xor_dyn_scalar, bitwise_xor_scalar); create_dyn_scalar_kernel!(bitwise_shift_right_dyn_scalar, bitwise_shift_right_scalar); create_dyn_scalar_kernel!(bitwise_shift_left_dyn_scalar, bitwise_shift_left_scalar); + +pub fn concat_elements_utf8view( + left: &StringViewArray, + right: &StringViewArray, +) -> std::result::Result { + let capacity = left + .data_buffers() + .iter() + .zip(right.data_buffers().iter()) + .map(|(b1, b2)| b1.len() + b2.len()) + .sum(); + let mut result = StringViewBuilder::with_capacity(capacity); + + // Avoid reallocations by writing to a reused buffer (note we + // could be even more efficient r by creating the view directly + // here and avoid the buffer but that would be more complex) + let mut buffer = String::new(); + + for (left, right) in left.iter().zip(right.iter()) { + if let (Some(left), Some(right)) = (left, right) { + use std::fmt::Write; + buffer.clear(); + write!(&mut buffer, "{left}{right}") + .expect("writing into string buffer failed"); + result.append_value(&buffer); + } else { + // at least one of the values is null, so the output is also null + result.append_null() + } + } + Ok(result.finish()) +} diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index c6afb5c05985..712175c9afbe 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -380,7 +380,7 @@ impl CaseExpr { // keep `else_expr`'s data type and return type consistent let e = self.else_expr.as_ref().unwrap(); - let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone()) + let expr = try_cast(Arc::clone(e), &batch.schema(), return_type) .unwrap_or_else(|_| Arc::clone(e)); let else_ = Scalar::new(expr.evaluate(batch)?.into_array(1)?); diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs index c4255172d680..7db7188b85d3 100644 --- a/datafusion/physical-expr/src/lib.rs +++ b/datafusion/physical-expr/src/lib.rs @@ -19,27 +19,7 @@ #![deny(clippy::clone_on_ref_ptr)] // Backward compatibility -pub mod aggregate { - pub(crate) mod groups_accumulator { - #[allow(unused_imports)] - pub(crate) mod accumulate { - pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState; - } - pub use datafusion_functions_aggregate_common::aggregate::groups_accumulator::{ - accumulate::NullState, GroupsAccumulatorAdapter, - }; - } - pub(crate) mod stats { - pub use datafusion_functions_aggregate_common::stats::StatsType; - } - pub mod utils { - pub use datafusion_functions_aggregate_common::utils::{ - adjust_output_array, down_cast_any_ref, get_accum_scalar_values_as_arrays, - get_sort_options, ordering_fields, DecimalAverager, Hashable, - }; - } - pub use datafusion_functions_aggregate_common::aggregate::AggregateExpr; -} +pub mod aggregate; pub mod analysis; pub mod binary_map { pub use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType}; @@ -67,9 +47,6 @@ pub mod execution_props { pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, NullState}; pub use analysis::{analyze, AnalysisContext, ExprBoundaries}; -pub use datafusion_functions_aggregate_common::aggregate::{ - AggregateExpr, AggregatePhysicalExpressions, -}; pub use equivalence::{calculate_union, ConstExpr, EquivalenceProperties}; pub use partitioning::{Distribution, Partitioning}; pub use physical_expr::{ diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs index 6472dd47489c..45beeb7b81af 100644 --- a/datafusion/physical-expr/src/partitioning.rs +++ b/datafusion/physical-expr/src/partitioning.rs @@ -24,8 +24,8 @@ use crate::{physical_exprs_equal, EquivalenceProperties, PhysicalExpr}; /// Output partitioning supported by [`ExecutionPlan`]s. /// -/// When `executed`, `ExecutionPlan`s produce one or more independent stream of -/// data batches in parallel, referred to as partitions. The streams are Rust +/// Calling [`ExecutionPlan::execute`] produce one or more independent streams of +/// [`RecordBatch`]es in parallel, referred to as partitions. The streams are Rust /// `async` [`Stream`]s (a special kind of future). The number of output /// partitions varies based on the input and the operation performed. /// @@ -102,6 +102,8 @@ use crate::{physical_exprs_equal, EquivalenceProperties, PhysicalExpr}; /// Plans such as `FilterExec` produce the same number of output streams /// (partitions) as input streams (partitions). /// +/// [`RecordBatch`]: arrow::record_batch::RecordBatch +/// [`ExecutionPlan::execute`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#tymethod.execute /// [`ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html /// [`Stream`]: https://docs.rs/futures/latest/futures/stream/trait.Stream.html #[derive(Debug, Clone)] diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index 52015f425217..5439e140502a 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -29,20 +29,19 @@ use datafusion_common::ScalarValue; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::{Accumulator, WindowFrame}; +use crate::aggregate::AggregateFunctionExpr; use crate::window::window_expr::AggregateWindowExpr; use crate::window::{ PartitionBatches, PartitionWindowAggStates, SlidingAggregateWindowExpr, WindowExpr, }; -use crate::{ - expressions::PhysicalSortExpr, reverse_order_bys, AggregateExpr, PhysicalExpr, -}; +use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr}; /// A window expr that takes the form of an aggregate function. /// /// See comments on [`WindowExpr`] for more details. #[derive(Debug)] pub struct PlainAggregateWindowExpr { - aggregate: Arc, + aggregate: Arc, partition_by: Vec>, order_by: Vec, window_frame: Arc, @@ -51,7 +50,7 @@ pub struct PlainAggregateWindowExpr { impl PlainAggregateWindowExpr { /// Create a new aggregate window function expression pub fn new( - aggregate: Arc, + aggregate: Arc, partition_by: &[Arc], order_by: &[PhysicalSortExpr], window_frame: Arc, @@ -65,7 +64,7 @@ impl PlainAggregateWindowExpr { } /// Get aggregate expr of AggregateWindowExpr - pub fn get_aggregate_expr(&self) -> &Arc { + pub fn get_aggregate_expr(&self) -> &Arc { &self.aggregate } } diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs index afa799e86953..ac3a4f4c09ec 100644 --- a/datafusion/physical-expr/src/window/sliding_aggregate.rs +++ b/datafusion/physical-expr/src/window/sliding_aggregate.rs @@ -28,13 +28,12 @@ use arrow::record_batch::RecordBatch; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{Accumulator, WindowFrame}; +use crate::aggregate::AggregateFunctionExpr; use crate::window::window_expr::AggregateWindowExpr; use crate::window::{ PartitionBatches, PartitionWindowAggStates, PlainAggregateWindowExpr, WindowExpr, }; -use crate::{ - expressions::PhysicalSortExpr, reverse_order_bys, AggregateExpr, PhysicalExpr, -}; +use crate::{expressions::PhysicalSortExpr, reverse_order_bys, PhysicalExpr}; /// A window expr that takes the form of an aggregate function that /// can be incrementally computed over sliding windows. @@ -42,7 +41,7 @@ use crate::{ /// See comments on [`WindowExpr`] for more details. #[derive(Debug)] pub struct SlidingAggregateWindowExpr { - aggregate: Arc, + aggregate: Arc, partition_by: Vec>, order_by: Vec, window_frame: Arc, @@ -51,7 +50,7 @@ pub struct SlidingAggregateWindowExpr { impl SlidingAggregateWindowExpr { /// Create a new (sliding) aggregate window function expression. pub fn new( - aggregate: Arc, + aggregate: Arc, partition_by: &[Arc], order_by: &[PhysicalSortExpr], window_frame: Arc, @@ -64,8 +63,8 @@ impl SlidingAggregateWindowExpr { } } - /// Get the [AggregateExpr] of this object. - pub fn get_aggregate_expr(&self) -> &Arc { + /// Get the [AggregateFunctionExpr] of this object. + pub fn get_aggregate_expr(&self) -> &Arc { &self.aggregate } } diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs index 66b250c5063b..2b8725b5bac7 100644 --- a/datafusion/physical-optimizer/src/aggregate_statistics.rs +++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs @@ -23,7 +23,7 @@ use datafusion_common::scalar::ScalarValue; use datafusion_common::Result; use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::projection::ProjectionExec; -use datafusion_physical_plan::{expressions, AggregateExpr, ExecutionPlan, Statistics}; +use datafusion_physical_plan::{expressions, ExecutionPlan, Statistics}; use crate::PhysicalOptimizerRule; use datafusion_common::stats::Precision; @@ -58,12 +58,12 @@ impl PhysicalOptimizerRule for AggregateStatistics { let mut projections = vec![]; for expr in partial_agg_exec.aggr_expr() { if let Some((non_null_rows, name)) = - take_optimizable_column_and_table_count(&**expr, &stats) + take_optimizable_column_and_table_count(expr, &stats) { projections.push((expressions::lit(non_null_rows), name.to_owned())); - } else if let Some((min, name)) = take_optimizable_min(&**expr, &stats) { + } else if let Some((min, name)) = take_optimizable_min(expr, &stats) { projections.push((expressions::lit(min), name.to_owned())); - } else if let Some((max, name)) = take_optimizable_max(&**expr, &stats) { + } else if let Some((max, name)) = take_optimizable_max(expr, &stats) { projections.push((expressions::lit(max), name.to_owned())); } else { // TODO: we need all aggr_expr to be resolved (cf TODO fullres) @@ -137,7 +137,7 @@ fn take_optimizable(node: &dyn ExecutionPlan) -> Option> /// If this agg_expr is a count that can be exactly derived from the statistics, return it. fn take_optimizable_column_and_table_count( - agg_expr: &dyn AggregateExpr, + agg_expr: &AggregateFunctionExpr, stats: &Statistics, ) -> Option<(ScalarValue, String)> { let col_stats = &stats.column_statistics; @@ -174,7 +174,7 @@ fn take_optimizable_column_and_table_count( /// If this agg_expr is a min that is exactly defined in the statistics, return it. fn take_optimizable_min( - agg_expr: &dyn AggregateExpr, + agg_expr: &AggregateFunctionExpr, stats: &Statistics, ) -> Option<(ScalarValue, String)> { if let Precision::Exact(num_rows) = &stats.num_rows { @@ -220,7 +220,7 @@ fn take_optimizable_min( /// If this agg_expr is a max that is exactly defined in the statistics, return it. fn take_optimizable_max( - agg_expr: &dyn AggregateExpr, + agg_expr: &AggregateFunctionExpr, stats: &Statistics, ) -> Option<(ScalarValue, String)> { if let Precision::Exact(num_rows) = &stats.num_rows { @@ -266,33 +266,27 @@ fn take_optimizable_max( // TODO: Move this check into AggregateUDFImpl // https://github.com/apache/datafusion/issues/11153 -fn is_non_distinct_count(agg_expr: &dyn AggregateExpr) -> bool { - if let Some(agg_expr) = agg_expr.as_any().downcast_ref::() { - if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() { - return true; - } +fn is_non_distinct_count(agg_expr: &AggregateFunctionExpr) -> bool { + if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() { + return true; } false } // TODO: Move this check into AggregateUDFImpl // https://github.com/apache/datafusion/issues/11153 -fn is_min(agg_expr: &dyn AggregateExpr) -> bool { - if let Some(agg_expr) = agg_expr.as_any().downcast_ref::() { - if agg_expr.fun().name().to_lowercase() == "min" { - return true; - } +fn is_min(agg_expr: &AggregateFunctionExpr) -> bool { + if agg_expr.fun().name().to_lowercase() == "min" { + return true; } false } // TODO: Move this check into AggregateUDFImpl // https://github.com/apache/datafusion/issues/11153 -fn is_max(agg_expr: &dyn AggregateExpr) -> bool { - if let Some(agg_expr) = agg_expr.as_any().downcast_ref::() { - if agg_expr.fun().name().to_lowercase() == "max" { - return true; - } +fn is_max(agg_expr: &AggregateFunctionExpr) -> bool { + if agg_expr.fun().name().to_lowercase() == "max" { + return true; } false } diff --git a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs index e18e530072db..8653ad19da77 100644 --- a/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs +++ b/datafusion/physical-optimizer/src/limited_distinct_aggregation.rs @@ -33,8 +33,8 @@ use itertools::Itertools; /// An optimizer rule that passes a `limit` hint into grouped aggregations which don't require all /// rows in the group to be processed for correctness. Example queries fitting this description are: -/// `SELECT distinct l_orderkey FROM lineitem LIMIT 10;` -/// `SELECT l_orderkey FROM lineitem GROUP BY l_orderkey LIMIT 10;` +/// - `SELECT distinct l_orderkey FROM lineitem LIMIT 10;` +/// - `SELECT l_orderkey FROM lineitem GROUP BY l_orderkey LIMIT 10;` pub struct LimitedDistinctAggregation {} impl LimitedDistinctAggregation { diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index fdfdd349e36e..4f6f91a2348f 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -165,7 +165,7 @@ impl ExecutionPlan for OutputRequirementExec { vec![&self.input] } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { vec![self.order_requirement.clone()] } diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 78da4dc9c53f..24387c5f15ee 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -55,7 +55,6 @@ datafusion-functions-aggregate = { workspace = true } datafusion-functions-aggregate-common = { workspace = true } datafusion-physical-expr = { workspace = true, default-features = true } datafusion-physical-expr-common = { workspace = true } -datafusion-physical-expr-functions-aggregate = { workspace = true } futures = { workspace = true } half = { workspace = true } hashbrown = { workspace = true } diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 89d4c452cca6..0f33a9d7b992 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -42,10 +42,11 @@ use datafusion_expr::Accumulator; use datafusion_physical_expr::{ equivalence::{collapse_lex_req, ProjectionMapping}, expressions::{Column, UnKnownColumn}, - physical_exprs_contains, AggregateExpr, EquivalenceProperties, LexOrdering, - LexRequirement, PhysicalExpr, PhysicalSortRequirement, + physical_exprs_contains, EquivalenceProperties, LexOrdering, LexRequirement, + PhysicalExpr, PhysicalSortRequirement, }; +use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use itertools::Itertools; pub mod group_values; @@ -110,9 +111,9 @@ impl AggregateMode { /// Represents `GROUP BY` clause in the plan (including the more general GROUPING SET) /// In the case of a simple `GROUP BY a, b` clause, this will contain the expression [a, b] /// and a single group [false, false]. -/// In the case of `GROUP BY GROUPING SET/CUBE/ROLLUP` the planner will expand the expression +/// In the case of `GROUP BY GROUPING SETS/CUBE/ROLLUP` the planner will expand the expression /// into multiple groups, using null expressions to align each group. -/// For example, with a group by clause `GROUP BY GROUPING SET ((a,b),(a),(b))` the planner should +/// For example, with a group by clause `GROUP BY GROUPING SETS ((a,b),(a),(b))` the planner should /// create a `PhysicalGroupBy` like /// ```text /// PhysicalGroupBy { @@ -133,7 +134,7 @@ pub struct PhysicalGroupBy { null_expr: Vec<(Arc, String)>, /// Null mask for each group in this grouping set. Each group is /// composed of either one of the group expressions in expr or a null - /// expression in null_expr. If `groups[i][j]` is true, then the the + /// expression in null_expr. If `groups[i][j]` is true, then the /// j-th expression in the i-th group is NULL, otherwise it is `expr[j]`. groups: Vec>, } @@ -253,7 +254,7 @@ pub struct AggregateExec { /// Group by expressions group_by: PhysicalGroupBy, /// Aggregate expressions - aggr_expr: Vec>, + aggr_expr: Vec>, /// FILTER (WHERE clause) expression for each aggregate expression filter_expr: Vec>>, /// Set if the output of this aggregation is truncated by a upstream sort/limit clause @@ -280,7 +281,10 @@ impl AggregateExec { /// Function used in `ConvertFirstLast` optimizer rule, /// where we need parts of the new value, others cloned from the old one /// Rewrites aggregate exec with new aggregate expressions. - pub fn with_new_aggr_exprs(&self, aggr_expr: Vec>) -> Self { + pub fn with_new_aggr_exprs( + &self, + aggr_expr: Vec>, + ) -> Self { Self { aggr_expr, // clone the rest of the fields @@ -306,7 +310,7 @@ impl AggregateExec { pub fn try_new( mode: AggregateMode, group_by: PhysicalGroupBy, - aggr_expr: Vec>, + aggr_expr: Vec>, filter_expr: Vec>>, input: Arc, input_schema: SchemaRef, @@ -343,7 +347,7 @@ impl AggregateExec { fn try_new_with_schema( mode: AggregateMode, group_by: PhysicalGroupBy, - mut aggr_expr: Vec>, + mut aggr_expr: Vec>, filter_expr: Vec>>, input: Arc, input_schema: SchemaRef, @@ -451,7 +455,7 @@ impl AggregateExec { } /// Aggregate expressions - pub fn aggr_expr(&self) -> &[Arc] { + pub fn aggr_expr(&self) -> &[Arc] { &self.aggr_expr } @@ -788,7 +792,7 @@ impl ExecutionPlan for AggregateExec { fn create_schema( input_schema: &Schema, group_expr: &[(Arc, String)], - aggr_expr: &[Arc], + aggr_expr: &[Arc], contains_null_expr: bool, mode: AggregateMode, ) -> Result { @@ -834,7 +838,7 @@ fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef { /// /// # Parameters /// -/// - `aggr_expr`: A reference to an `Arc` representing the +/// - `aggr_expr`: A reference to an `Arc` representing the /// aggregate expression. /// - `group_by`: A reference to a `PhysicalGroupBy` instance representing the /// physical GROUP BY expression. @@ -846,7 +850,7 @@ fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef { /// A `LexOrdering` instance indicating the lexical ordering requirement for /// the aggregate expression. fn get_aggregate_expr_req( - aggr_expr: &Arc, + aggr_expr: &Arc, group_by: &PhysicalGroupBy, agg_mode: &AggregateMode, ) -> LexOrdering { @@ -894,7 +898,7 @@ fn get_aggregate_expr_req( /// the aggregator requirement is incompatible. fn finer_ordering( existing_req: &LexOrdering, - aggr_expr: &Arc, + aggr_expr: &Arc, group_by: &PhysicalGroupBy, eq_properties: &EquivalenceProperties, agg_mode: &AggregateMode, @@ -912,7 +916,7 @@ pub fn concat_slices(lhs: &[T], rhs: &[T]) -> Vec { /// /// # Parameters /// -/// - `aggr_exprs`: A slice of `Arc` containing all the +/// - `aggr_exprs`: A slice of `Arc` containing all the /// aggregate expressions. /// - `group_by`: A reference to a `PhysicalGroupBy` instance representing the /// physical GROUP BY expression. @@ -926,7 +930,7 @@ pub fn concat_slices(lhs: &[T], rhs: &[T]) -> Vec { /// A `LexRequirement` instance, which is the requirement that satisfies all the /// aggregate requirements. Returns an error in case of conflicting requirements. pub fn get_finer_aggregate_exprs_requirement( - aggr_exprs: &mut [Arc], + aggr_exprs: &mut [Arc], group_by: &PhysicalGroupBy, eq_properties: &EquivalenceProperties, agg_mode: &AggregateMode, @@ -996,10 +1000,10 @@ pub fn get_finer_aggregate_exprs_requirement( /// returns physical expressions for arguments to evaluate against a batch /// The expressions are different depending on `mode`: -/// * Partial: AggregateExpr::expressions -/// * Final: columns of `AggregateExpr::state_fields()` +/// * Partial: AggregateFunctionExpr::expressions +/// * Final: columns of `AggregateFunctionExpr::state_fields()` pub fn aggregate_expressions( - aggr_expr: &[Arc], + aggr_expr: &[Arc], mode: &AggregateMode, col_idx_base: usize, ) -> Result>>> { @@ -1035,12 +1039,12 @@ pub fn aggregate_expressions( } /// uses `state_fields` to build a vec of physical column expressions required to merge the -/// AggregateExpr' accumulator's state. +/// AggregateFunctionExpr' accumulator's state. /// /// `index_base` is the starting physical column index for the next expanded state field. fn merge_expressions( index_base: usize, - expr: &Arc, + expr: &Arc, ) -> Result>> { expr.state_fields().map(|fields| { fields @@ -1054,7 +1058,7 @@ fn merge_expressions( pub type AccumulatorItem = Box; pub fn create_accumulators( - aggr_expr: &[Arc], + aggr_expr: &[Arc], ) -> Result> { aggr_expr .iter() @@ -1208,7 +1212,7 @@ mod tests { }; use datafusion_execution::config::SessionConfig; use datafusion_execution::memory_pool::FairSpillPool; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_functions_aggregate::array_agg::array_agg_udaf; use datafusion_functions_aggregate::average::avg_udaf; use datafusion_functions_aggregate::count::count_udaf; @@ -1218,8 +1222,8 @@ mod tests { use datafusion_physical_expr::PhysicalSortExpr; use crate::common::collect; + use datafusion_physical_expr::aggregate::AggregateExprBuilder; use datafusion_physical_expr::expressions::Literal; - use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; use futures::{FutureExt, Stream}; // Generate a schema which consists of 5 columns (a, b, c, d, e) @@ -1320,11 +1324,10 @@ mod tests { fn new_spill_ctx(batch_size: usize, max_memory: usize) -> Arc { let session_config = SessionConfig::new().with_batch_size(batch_size); let runtime = Arc::new( - RuntimeEnv::new( - RuntimeConfig::default() - .with_memory_pool(Arc::new(FairSpillPool::new(max_memory))), - ) - .unwrap(), + RuntimeEnvBuilder::default() + .with_memory_pool(Arc::new(FairSpillPool::new(max_memory))) + .build() + .unwrap(), ); let task_ctx = TaskContext::default() .with_session_config(session_config) @@ -1496,13 +1499,12 @@ mod tests { groups: vec![vec![false]], }; - let aggregates: Vec> = - vec![ - AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?]) - .schema(Arc::clone(&input_schema)) - .alias("AVG(b)") - .build()?, - ]; + let aggregates: Vec> = vec![ + AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?]) + .schema(Arc::clone(&input_schema)) + .alias("AVG(b)") + .build()?, + ]; let task_ctx = if spill { // set to an appropriate value to trigger spill @@ -1793,7 +1795,7 @@ mod tests { } // Median(a) - fn test_median_agg_expr(schema: SchemaRef) -> Result> { + fn test_median_agg_expr(schema: SchemaRef) -> Result> { AggregateExprBuilder::new(median_udaf(), vec![col("a", &schema)?]) .schema(schema) .alias("MEDIAN(a)") @@ -1806,7 +1808,9 @@ mod tests { let input_schema = input.schema(); let runtime = Arc::new( - RuntimeEnv::new(RuntimeConfig::default().with_memory_limit(1, 1.0)).unwrap(), + RuntimeEnvBuilder::default() + .with_memory_limit(1, 1.0) + .build()?, ); let task_ctx = TaskContext::default().with_runtime(runtime); let task_ctx = Arc::new(task_ctx); @@ -1819,17 +1823,16 @@ mod tests { }; // something that allocates within the aggregator - let aggregates_v0: Vec> = + let aggregates_v0: Vec> = vec![test_median_agg_expr(Arc::clone(&input_schema))?]; // use fast-path in `row_hash.rs`. - let aggregates_v2: Vec> = - vec![ - AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?]) - .schema(Arc::clone(&input_schema)) - .alias("AVG(b)") - .build()?, - ]; + let aggregates_v2: Vec> = vec![ + AggregateExprBuilder::new(avg_udaf(), vec![col("b", &input_schema)?]) + .schema(Arc::clone(&input_schema)) + .alias("AVG(b)") + .build()?, + ]; for (version, groups, aggregates) in [ (0, groups_none, aggregates_v0), @@ -1883,13 +1886,12 @@ mod tests { let groups = PhysicalGroupBy::default(); - let aggregates: Vec> = - vec![ - AggregateExprBuilder::new(avg_udaf(), vec![col("a", &schema)?]) - .schema(Arc::clone(&schema)) - .alias("AVG(a)") - .build()?, - ]; + let aggregates: Vec> = vec![ + AggregateExprBuilder::new(avg_udaf(), vec![col("a", &schema)?]) + .schema(Arc::clone(&schema)) + .alias("AVG(a)") + .build()?, + ]; let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1)); let refs = blocking_exec.refs(); @@ -1923,13 +1925,12 @@ mod tests { let groups = PhysicalGroupBy::new_single(vec![(col("a", &schema)?, "a".to_string())]); - let aggregates: Vec> = - vec![ - AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?]) - .schema(Arc::clone(&schema)) - .alias("AVG(b)") - .build()?, - ]; + let aggregates: Vec> = vec![ + AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?]) + .schema(Arc::clone(&schema)) + .alias("AVG(b)") + .build()?, + ]; let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1)); let refs = blocking_exec.refs(); @@ -1974,7 +1975,7 @@ mod tests { fn test_first_value_agg_expr( schema: &Schema, sort_options: SortOptions, - ) -> Result> { + ) -> Result> { let ordering_req = [PhysicalSortExpr { expr: col("b", schema)?, options: sort_options, @@ -1992,7 +1993,7 @@ mod tests { fn test_last_value_agg_expr( schema: &Schema, sort_options: SortOptions, - ) -> Result> { + ) -> Result> { let ordering_req = [PhysicalSortExpr { expr: col("b", schema)?, options: sort_options, @@ -2047,7 +2048,7 @@ mod tests { descending: false, nulls_first: false, }; - let aggregates: Vec> = if is_first_acc { + let aggregates: Vec> = if is_first_acc { vec![test_first_value_agg_expr(&schema, sort_options)?] } else { vec![test_last_value_agg_expr(&schema, sort_options)?] @@ -2179,6 +2180,7 @@ mod tests { .map(|order_by_expr| { let ordering_req = order_by_expr.unwrap_or_default(); AggregateExprBuilder::new(array_agg_udaf(), vec![Arc::clone(col_a)]) + .alias("a") .order_by(ordering_req.to_vec()) .schema(Arc::clone(&test_schema)) .build() @@ -2211,7 +2213,7 @@ mod tests { }; let groups = PhysicalGroupBy::new_single(vec![(col_a, "a".to_string())]); - let aggregates: Vec> = vec![ + let aggregates: Vec> = vec![ test_first_value_agg_expr(&schema, option_desc)?, test_last_value_agg_expr(&schema, option_desc)?, ]; @@ -2219,7 +2221,7 @@ mod tests { let aggregate_exec = Arc::new(AggregateExec::try_new( AggregateMode::Partial, groups, - aggregates.clone(), + aggregates, vec![None, None], Arc::clone(&blocking_exec) as Arc, schema, @@ -2269,7 +2271,7 @@ mod tests { ], ); - let aggregates: Vec> = + let aggregates: Vec> = vec![AggregateExprBuilder::new(count_udaf(), vec![lit(1)]) .schema(Arc::clone(&schema)) .alias("1") diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index b3221752d034..d022bb007d9b 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -47,10 +47,9 @@ use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::{ - AggregateExpr, GroupsAccumulatorAdapter, PhysicalSortExpr, -}; +use datafusion_physical_expr::{GroupsAccumulatorAdapter, PhysicalSortExpr}; +use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use futures::ready; use futures::stream::{Stream, StreamExt}; use log::debug; @@ -76,35 +75,45 @@ use super::AggregateExec; /// This encapsulates the spilling state struct SpillState { - /// If data has previously been spilled, the locations of the - /// spill files (in Arrow IPC format) - spills: Vec, - + // ======================================================================== + // PROPERTIES: + // These fields are initialized at the start and remain constant throughout + // the execution. + // ======================================================================== /// Sorting expression for spilling batches spill_expr: Vec, /// Schema for spilling batches spill_schema: SchemaRef, - /// true when streaming merge is in progress - is_stream_merging: bool, - /// aggregate_arguments for merging spilled data merging_aggregate_arguments: Vec>>, /// GROUP BY expressions for merging spilled data merging_group_by: PhysicalGroupBy, + + // ======================================================================== + // STATES: + // Fields changes during execution. Can be buffer, or state flags that + // influence the exeuction in parent `GroupedHashAggregateStream` + // ======================================================================== + /// If data has previously been spilled, the locations of the + /// spill files (in Arrow IPC format) + spills: Vec, + + /// true when streaming merge is in progress + is_stream_merging: bool, } /// Tracks if the aggregate should skip partial aggregations /// /// See "partial aggregation" discussion on [`GroupedHashAggregateStream`] struct SkipAggregationProbe { - /// Number of processed input rows (updated during probing) - input_rows: usize, - /// Number of total group values for `input_rows` (updated during probing) - num_groups: usize, - + // ======================================================================== + // PROPERTIES: + // These fields are initialized at the start and remain constant throughout + // the execution. + // ======================================================================== /// Aggregation ratio check performed when the number of input rows exceeds /// this threshold (from `SessionConfig`) probe_rows_threshold: usize, @@ -113,6 +122,16 @@ struct SkipAggregationProbe { /// is skipped and input rows are directly converted to output probe_ratio_threshold: f64, + // ======================================================================== + // STATES: + // Fields changes during execution. Can be buffer, or state flags that + // influence the exeuction in parent `GroupedHashAggregateStream` + // ======================================================================== + /// Number of processed input rows (updated during probing) + input_rows: usize, + /// Number of total group values for `input_rows` (updated during probing) + num_groups: usize, + /// Flag indicating further data aggregation may be skipped (decision made /// when probing complete) should_skip: bool, @@ -316,17 +335,15 @@ impl SkipAggregationProbe { /// └─────────────────┘ └─────────────────┘ /// ``` pub(crate) struct GroupedHashAggregateStream { + // ======================================================================== + // PROPERTIES: + // These fields are initialized at the start and remain constant throughout + // the execution. + // ======================================================================== schema: SchemaRef, input: SendableRecordBatchStream, mode: AggregateMode, - /// Accumulators, one for each `AggregateExpr` in the query - /// - /// For example, if the query has aggregates, `SUM(x)`, - /// `COUNT(y)`, there will be two accumulators, each one - /// specialized for that particular aggregate and its input types - accumulators: Vec>, - /// Arguments to pass to each accumulator. /// /// The arguments in `accumulator[i]` is passed `aggregate_arguments[i]` @@ -347,9 +364,30 @@ pub(crate) struct GroupedHashAggregateStream { /// GROUP BY expressions group_by: PhysicalGroupBy, - /// The memory reservation for this grouping - reservation: MemoryReservation, + /// max rows in output RecordBatches + batch_size: usize, + + /// Optional soft limit on the number of `group_values` in a batch + /// If the number of `group_values` in a single batch exceeds this value, + /// the `GroupedHashAggregateStream` operation immediately switches to + /// output mode and emits all groups. + group_values_soft_limit: Option, + // ======================================================================== + // STATE FLAGS: + // These fields will be updated during the execution. And control the flow of + // the execution. + // ======================================================================== + /// Tracks if this stream is generating input or output + exec_state: ExecutionState, + + /// Have we seen the end of the input + input_done: bool, + + // ======================================================================== + // STATE BUFFERS: + // These fields will accumulate intermediate results during the execution. + // ======================================================================== /// An interning store of group keys group_values: Box, @@ -357,38 +395,41 @@ pub(crate) struct GroupedHashAggregateStream { /// processed. Reused across batches here to avoid reallocations current_group_indices: Vec, - /// Tracks if this stream is generating input or output - exec_state: ExecutionState, - - /// Execution metrics - baseline_metrics: BaselineMetrics, - - /// max rows in output RecordBatches - batch_size: usize, + /// Accumulators, one for each `AggregateFunctionExpr` in the query + /// + /// For example, if the query has aggregates, `SUM(x)`, + /// `COUNT(y)`, there will be two accumulators, each one + /// specialized for that particular aggregate and its input types + accumulators: Vec>, + // ======================================================================== + // TASK-SPECIFIC STATES: + // Inner states groups together properties, states for a specific task. + // ======================================================================== /// Optional ordering information, that might allow groups to be /// emitted from the hash table prior to seeing the end of the /// input group_ordering: GroupOrdering, - /// Have we seen the end of the input - input_done: bool, - - /// The [`RuntimeEnv`] associated with the [`TaskContext`] argument - runtime: Arc, - /// The spill state object spill_state: SpillState, - /// Optional soft limit on the number of `group_values` in a batch - /// If the number of `group_values` in a single batch exceeds this value, - /// the `GroupedHashAggregateStream` operation immediately switches to - /// output mode and emits all groups. - group_values_soft_limit: Option, - /// Optional probe for skipping data aggregation, if supported by /// current stream. skip_aggregation_probe: Option, + + // ======================================================================== + // EXECUTION RESOURCES: + // Fields related to managing execution resources and monitoring performance. + // ======================================================================== + /// The memory reservation for this grouping + reservation: MemoryReservation, + + /// Execution metrics + baseline_metrics: BaselineMetrics, + + /// The [`RuntimeEnv`] associated with the [`TaskContext`] argument + runtime: Arc, } impl GroupedHashAggregateStream { @@ -537,7 +578,7 @@ impl GroupedHashAggregateStream { /// that is supported by the aggregate, or a /// [`GroupsAccumulatorAdapter`] if not. pub(crate) fn create_group_accumulator( - agg_expr: &Arc, + agg_expr: &Arc, ) -> Result> { if agg_expr.groups_accumulator_supported() { agg_expr.create_groups_accumulator() diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs new file mode 100644 index 000000000000..5befa5ecda99 --- /dev/null +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -0,0 +1,588 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::compute::concat_batches; +use arrow_array::builder::StringViewBuilder; +use arrow_array::cast::AsArray; +use arrow_array::{Array, ArrayRef, RecordBatch}; +use arrow_schema::SchemaRef; +use std::sync::Arc; + +/// Concatenate multiple [`RecordBatch`]es +/// +/// `BatchCoalescer` concatenates multiple small [`RecordBatch`]es, produced by +/// operations such as `FilterExec` and `RepartitionExec`, into larger ones for +/// more efficient processing by subsequent operations. +/// +/// # Background +/// +/// Generally speaking, larger [`RecordBatch`]es are more efficient to process +/// than smaller record batches (until the CPU cache is exceeded) because there +/// is fixed processing overhead per batch. DataFusion tries to operate on +/// batches of `target_batch_size` rows to amortize this overhead +/// +/// ```text +/// ┌────────────────────┐ +/// │ RecordBatch │ +/// │ num_rows = 23 │ +/// └────────────────────┘ ┌────────────────────┐ +/// │ │ +/// ┌────────────────────┐ Coalesce │ │ +/// │ │ Batches │ │ +/// │ RecordBatch │ │ │ +/// │ num_rows = 50 │ ─ ─ ─ ─ ─ ─ ▶ │ │ +/// │ │ │ RecordBatch │ +/// │ │ │ num_rows = 106 │ +/// └────────────────────┘ │ │ +/// │ │ +/// ┌────────────────────┐ │ │ +/// │ │ │ │ +/// │ RecordBatch │ │ │ +/// │ num_rows = 33 │ └────────────────────┘ +/// │ │ +/// └────────────────────┘ +/// ``` +/// +/// # Notes: +/// +/// 1. Output rows are produced in the same order as the input rows +/// +/// 2. The output is a sequence of batches, with all but the last being at least +/// `target_batch_size` rows. +/// +/// 3. Eventually this may also be able to handle other optimizations such as a +/// combined filter/coalesce operation. +/// +#[derive(Debug)] +pub struct BatchCoalescer { + /// The input schema + schema: SchemaRef, + /// Minimum number of rows for coalesces batches + target_batch_size: usize, + /// Total number of rows returned so far + total_rows: usize, + /// Buffered batches + buffer: Vec, + /// Buffered row count + buffered_rows: usize, + /// Limit: maximum number of rows to fetch, `None` means fetch all rows + fetch: Option, +} + +impl BatchCoalescer { + /// Create a new `BatchCoalescer` + /// + /// # Arguments + /// - `schema` - the schema of the output batches + /// - `target_batch_size` - the minimum number of rows for each + /// output batch (until limit reached) + /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows + pub fn new( + schema: SchemaRef, + target_batch_size: usize, + fetch: Option, + ) -> Self { + Self { + schema, + target_batch_size, + total_rows: 0, + buffer: vec![], + buffered_rows: 0, + fetch, + } + } + + /// Return the schema of the output batches + pub fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } + + /// Push next batch, and returns [`CoalescerState`] indicating the current + /// state of the buffer. + pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState { + let batch = gc_string_view_batch(&batch); + if self.limit_reached(&batch) { + CoalescerState::LimitReached + } else if self.target_reached(batch) { + CoalescerState::TargetReached + } else { + CoalescerState::Continue + } + } + + /// Return true if the there is no data buffered + pub fn is_empty(&self) -> bool { + self.buffer.is_empty() + } + + /// Checks if the buffer will reach the specified limit after getting + /// `batch`. + /// + /// If fetch would be exceeded, slices the received batch, updates the + /// buffer with it, and returns `true`. + /// + /// Otherwise: does nothing and returns `false`. + fn limit_reached(&mut self, batch: &RecordBatch) -> bool { + match self.fetch { + Some(fetch) if self.total_rows + batch.num_rows() >= fetch => { + // Limit is reached + let remaining_rows = fetch - self.total_rows; + debug_assert!(remaining_rows > 0); + + let batch = batch.slice(0, remaining_rows); + self.buffered_rows += batch.num_rows(); + self.total_rows = fetch; + self.buffer.push(batch); + true + } + _ => false, + } + } + + /// Updates the buffer with the given batch. + /// + /// If the target batch size is reached, returns `true`. Otherwise, returns + /// `false`. + fn target_reached(&mut self, batch: RecordBatch) -> bool { + if batch.num_rows() == 0 { + false + } else { + self.total_rows += batch.num_rows(); + self.buffered_rows += batch.num_rows(); + self.buffer.push(batch); + self.buffered_rows >= self.target_batch_size + } + } + + /// Concatenates and returns all buffered batches, and clears the buffer. + pub fn finish_batch(&mut self) -> datafusion_common::Result { + let batch = concat_batches(&self.schema, &self.buffer)?; + self.buffer.clear(); + self.buffered_rows = 0; + Ok(batch) + } +} + +/// Indicates the state of the [`BatchCoalescer`] buffer after the +/// [`BatchCoalescer::push_batch()`] operation. +/// +/// The caller should take diferent actions, depending on the variant returned. +pub enum CoalescerState { + /// Neither the limit nor the target batch size is reached. + /// + /// Action: continue pushing batches. + Continue, + /// The limit has been reached. + /// + /// Action: call [`BatchCoalescer::finish_batch()`] to get the final + /// buffered results as a batch and finish the query. + LimitReached, + /// The specified minimum number of rows a batch should have is reached. + /// + /// Action: call [`BatchCoalescer::finish_batch()`] to get the current + /// buffered results as a batch and then continue pushing batches. + TargetReached, +} + +/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed +/// +/// Decides when to consolidate the StringView into a new buffer to reduce +/// memory usage and improve string locality for better performance. +/// +/// This differs from `StringViewArray::gc` because: +/// 1. It may not compact the array depending on a heuristic. +/// 2. It uses a precise block size to reduce the number of buffers to track. +/// +/// # Heuristic +/// +/// If the average size of each view is larger than 32 bytes, we compact the array. +/// +/// `StringViewArray` include pointers to buffer that hold the underlying data. +/// One of the great benefits of `StringViewArray` is that many operations +/// (e.g., `filter`) can be done without copying the underlying data. +/// +/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the +/// `StringViewArray` may only refer to a small portion of the buffer, +/// significantly increasing memory usage. +fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { + let new_columns: Vec = batch + .columns() + .iter() + .map(|c| { + // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long. + let Some(s) = c.as_string_view_opt() else { + return Arc::clone(c); + }; + let ideal_buffer_size: usize = s + .views() + .iter() + .map(|v| { + let len = (*v as u32) as usize; + if len > 12 { + len + } else { + 0 + } + }) + .sum(); + let actual_buffer_size = s.get_buffer_memory_size(); + + // Re-creating the array copies data and can be time consuming. + // We only do it if the array is sparse + if actual_buffer_size > (ideal_buffer_size * 2) { + // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches. + // See https://github.com/apache/arrow-rs/issues/6094 for more details. + let mut builder = StringViewBuilder::with_capacity(s.len()); + if ideal_buffer_size > 0 { + builder = builder.with_block_size(ideal_buffer_size as u32); + } + + for v in s.iter() { + builder.append_option(v); + } + + let gc_string = builder.finish(); + + debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0 + + Arc::new(gc_string) + } else { + Arc::clone(c) + } + }) + .collect(); + RecordBatch::try_new(batch.schema(), new_columns) + .expect("Failed to re-create the gc'ed record batch") +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use super::*; + + use arrow::datatypes::{DataType, Field, Schema}; + use arrow_array::builder::ArrayBuilder; + use arrow_array::{StringViewArray, UInt32Array}; + + #[test] + fn test_coalesce() { + let batch = uint32_batch(0..8); + Test::new() + .with_batches(std::iter::repeat(batch).take(10)) + // expected output is batches of at least 20 rows (except for the final batch) + .with_target_batch_size(21) + .with_expected_output_sizes(vec![24, 24, 24, 8]) + .run() + } + + #[test] + fn test_coalesce_with_fetch_larger_than_input_size() { + let batch = uint32_batch(0..8); + Test::new() + .with_batches(std::iter::repeat(batch).take(10)) + // input is 10 batches x 8 rows (80 rows) with fetch limit of 100 + // expected to behave the same as `test_concat_batches` + .with_target_batch_size(21) + .with_fetch(Some(100)) + .with_expected_output_sizes(vec![24, 24, 24, 8]) + .run(); + } + + #[test] + fn test_coalesce_with_fetch_less_than_input_size() { + let batch = uint32_batch(0..8); + Test::new() + .with_batches(std::iter::repeat(batch).take(10)) + // input is 10 batches x 8 rows (80 rows) with fetch limit of 50 + .with_target_batch_size(21) + .with_fetch(Some(50)) + .with_expected_output_sizes(vec![24, 24, 2]) + .run(); + } + + #[test] + fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() { + let batch = uint32_batch(0..8); + Test::new() + .with_batches(std::iter::repeat(batch).take(10)) + // input is 10 batches x 8 rows (80 rows) with fetch limit of 48 + .with_target_batch_size(21) + .with_fetch(Some(48)) + .with_expected_output_sizes(vec![24, 24]) + .run(); + } + + #[test] + fn test_coalesce_with_fetch_less_target_batch_size() { + let batch = uint32_batch(0..8); + Test::new() + .with_batches(std::iter::repeat(batch).take(10)) + // input is 10 batches x 8 rows (80 rows) with fetch limit of 10 + .with_target_batch_size(21) + .with_fetch(Some(10)) + .with_expected_output_sizes(vec![10]) + .run(); + } + + #[test] + fn test_coalesce_single_large_batch_over_fetch() { + let large_batch = uint32_batch(0..100); + Test::new() + .with_batch(large_batch) + .with_target_batch_size(20) + .with_fetch(Some(7)) + .with_expected_output_sizes(vec![7]) + .run() + } + + /// Test for [`BatchCoalescer`] + /// + /// Pushes the input batches to the coalescer and verifies that the resulting + /// batches have the expected number of rows and contents. + #[derive(Debug, Clone, Default)] + struct Test { + /// Batches to feed to the coalescer. Tests must have at least one + /// schema + input_batches: Vec, + /// Expected output sizes of the resulting batches + expected_output_sizes: Vec, + /// target batch size + target_batch_size: usize, + /// Fetch (limit) + fetch: Option, + } + + impl Test { + fn new() -> Self { + Self::default() + } + + /// Set the target batch size + fn with_target_batch_size(mut self, target_batch_size: usize) -> Self { + self.target_batch_size = target_batch_size; + self + } + + /// Set the fetch (limit) + fn with_fetch(mut self, fetch: Option) -> Self { + self.fetch = fetch; + self + } + + /// Extend the input batches with `batch` + fn with_batch(mut self, batch: RecordBatch) -> Self { + self.input_batches.push(batch); + self + } + + /// Extends the input batches with `batches` + fn with_batches( + mut self, + batches: impl IntoIterator, + ) -> Self { + self.input_batches.extend(batches); + self + } + + /// Extends `sizes` to expected output sizes + fn with_expected_output_sizes( + mut self, + sizes: impl IntoIterator, + ) -> Self { + self.expected_output_sizes.extend(sizes); + self + } + + /// Runs the test -- see documentation on [`Test`] for details + fn run(self) { + let Self { + input_batches, + target_batch_size, + fetch, + expected_output_sizes, + } = self; + + let schema = input_batches[0].schema(); + + // create a single large input batch for output comparison + let single_input_batch = concat_batches(&schema, &input_batches).unwrap(); + + let mut coalescer = + BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch); + + let mut output_batches = vec![]; + for batch in input_batches { + match coalescer.push_batch(batch) { + CoalescerState::Continue => {} + CoalescerState::LimitReached => { + output_batches.push(coalescer.finish_batch().unwrap()); + break; + } + CoalescerState::TargetReached => { + coalescer.buffered_rows = 0; + output_batches.push(coalescer.finish_batch().unwrap()); + } + } + } + if coalescer.buffered_rows != 0 { + output_batches.extend(coalescer.buffer); + } + + // make sure we got the expected number of output batches and content + let mut starting_idx = 0; + assert_eq!(expected_output_sizes.len(), output_batches.len()); + for (i, (expected_size, batch)) in + expected_output_sizes.iter().zip(output_batches).enumerate() + { + assert_eq!( + *expected_size, + batch.num_rows(), + "Unexpected number of rows in Batch {i}" + ); + + // compare the contents of the batch (using `==` compares the + // underlying memory layout too) + let expected_batch = + single_input_batch.slice(starting_idx, *expected_size); + let batch_strings = batch_to_pretty_strings(&batch); + let expected_batch_strings = batch_to_pretty_strings(&expected_batch); + let batch_strings = batch_strings.lines().collect::>(); + let expected_batch_strings = + expected_batch_strings.lines().collect::>(); + assert_eq!( + expected_batch_strings, batch_strings, + "Unexpected content in Batch {i}:\ + \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}" + ); + starting_idx += *expected_size; + } + } + } + + /// Return a batch of UInt32 with the specified range + fn uint32_batch(range: Range) -> RecordBatch { + let schema = + Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])); + + RecordBatch::try_new( + Arc::clone(&schema), + vec![Arc::new(UInt32Array::from_iter_values(range))], + ) + .unwrap() + } + + #[test] + fn test_gc_string_view_batch_small_no_compact() { + // view with only short strings (no buffers) --> no need to compact + let array = StringViewTest { + rows: 1000, + strings: vec![Some("a"), Some("b"), Some("c")], + } + .build(); + + let gc_array = do_gc(array.clone()); + compare_string_array_values(&array, &gc_array); + assert_eq!(array.data_buffers().len(), 0); + assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction + } + + #[test] + fn test_gc_string_view_batch_large_no_compact() { + // view with large strings (has buffers) but full --> no need to compact + let array = StringViewTest { + rows: 1000, + strings: vec![Some("This string is longer than 12 bytes")], + } + .build(); + + let gc_array = do_gc(array.clone()); + compare_string_array_values(&array, &gc_array); + assert_eq!(array.data_buffers().len(), 5); + assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction + } + + #[test] + fn test_gc_string_view_batch_large_slice_compact() { + // view with large strings (has buffers) and only partially used --> no need to compact + let array = StringViewTest { + rows: 1000, + strings: vec![Some("this string is longer than 12 bytes")], + } + .build(); + + // slice only 11 rows, so most of the buffer is not used + let array = array.slice(11, 22); + + let gc_array = do_gc(array.clone()); + compare_string_array_values(&array, &gc_array); + assert_eq!(array.data_buffers().len(), 5); + assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer + } + + /// Compares the values of two string view arrays + fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) { + assert_eq!(arr1.len(), arr2.len()); + for (s1, s2) in arr1.iter().zip(arr2.iter()) { + assert_eq!(s1, s2); + } + } + + /// runs garbage collection on string view array + /// and ensures the number of rows are the same + fn do_gc(array: StringViewArray) -> StringViewArray { + let batch = + RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap(); + let gc_batch = gc_string_view_batch(&batch); + assert_eq!(batch.num_rows(), gc_batch.num_rows()); + assert_eq!(batch.schema(), gc_batch.schema()); + gc_batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .clone() + } + + /// Describes parameters for creating a `StringViewArray` + struct StringViewTest { + /// The number of rows in the array + rows: usize, + /// The strings to use in the array (repeated over and over + strings: Vec>, + } + + impl StringViewTest { + /// Create a `StringViewArray` with the parameters specified in this struct + fn build(self) -> StringViewArray { + let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192); + loop { + for &v in self.strings.iter() { + builder.append_option(v); + if builder.len() >= self.rows { + return builder.finish(); + } + } + } + } + } + fn batch_to_pretty_strings(batch: &RecordBatch) -> String { + arrow::util::pretty::pretty_format_batches(&[batch.clone()]) + .unwrap() + .to_string() + } +} diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 5589027694fe..7caf5b8ab65a 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -28,19 +28,17 @@ use crate::{ DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream, }; -use arrow::array::{AsArray, StringViewBuilder}; -use arrow::compute::concat_batches; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; -use arrow_array::{Array, ArrayRef}; use datafusion_common::Result; use datafusion_execution::TaskContext; +use crate::coalesce::{BatchCoalescer, CoalescerState}; use futures::ready; use futures::stream::{Stream, StreamExt}; /// `CoalesceBatchesExec` combines small batches into larger batches for more -/// efficient use of vectorized processing by later operators. +/// efficient vectorized processing by later operators. /// /// The operator buffers batches until it collects `target_batch_size` rows and /// then emits a single concatenated batch. When only a limited number of rows @@ -48,35 +46,7 @@ use futures::stream::{Stream, StreamExt}; /// buffering and returns the final batch once the number of collected rows /// reaches the `fetch` value. /// -/// # Background -/// -/// Generally speaking, larger RecordBatches are more efficient to process than -/// smaller record batches (until the CPU cache is exceeded) because there is -/// fixed processing overhead per batch. This code concatenates multiple small -/// record batches into larger ones to amortize this overhead. -/// -/// ```text -/// ┌────────────────────┐ -/// │ RecordBatch │ -/// │ num_rows = 23 │ -/// └────────────────────┘ ┌────────────────────┐ -/// │ │ -/// ┌────────────────────┐ Coalesce │ │ -/// │ │ Batches │ │ -/// │ RecordBatch │ │ │ -/// │ num_rows = 50 │ ─ ─ ─ ─ ─ ─ ▶ │ │ -/// │ │ │ RecordBatch │ -/// │ │ │ num_rows = 106 │ -/// └────────────────────┘ │ │ -/// │ │ -/// ┌────────────────────┐ │ │ -/// │ │ │ │ -/// │ RecordBatch │ │ │ -/// │ num_rows = 33 │ └────────────────────┘ -/// │ │ -/// └────────────────────┘ -/// ``` - +/// See [`BatchCoalescer`] for more information #[derive(Debug)] pub struct CoalesceBatchesExec { /// The input plan @@ -346,7 +316,7 @@ impl CoalesceBatchesStream { } CoalesceBatchesStreamState::Exhausted => { // Handle the end of the input stream. - return if self.coalescer.buffer.is_empty() { + return if self.coalescer.is_empty() { // If buffer is empty, return None indicating the stream is fully consumed. Poll::Ready(None) } else { @@ -365,511 +335,3 @@ impl RecordBatchStream for CoalesceBatchesStream { self.coalescer.schema() } } - -/// Concatenate multiple record batches into larger batches -/// -/// See [`CoalesceBatchesExec`] for more details. -/// -/// Notes: -/// -/// 1. The output rows is the same order as the input rows -/// -/// 2. The output is a sequence of batches, with all but the last being at least -/// `target_batch_size` rows. -/// -/// 3. Eventually this may also be able to handle other optimizations such as a -/// combined filter/coalesce operation. -#[derive(Debug)] -struct BatchCoalescer { - /// The input schema - schema: SchemaRef, - /// Minimum number of rows for coalesces batches - target_batch_size: usize, - /// Total number of rows returned so far - total_rows: usize, - /// Buffered batches - buffer: Vec, - /// Buffered row count - buffered_rows: usize, - /// Maximum number of rows to fetch, `None` means fetching all rows - fetch: Option, -} - -impl BatchCoalescer { - /// Create a new `BatchCoalescer` - /// - /// # Arguments - /// - `schema` - the schema of the output batches - /// - `target_batch_size` - the minimum number of rows for each - /// output batch (until limit reached) - /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows - fn new(schema: SchemaRef, target_batch_size: usize, fetch: Option) -> Self { - Self { - schema, - target_batch_size, - total_rows: 0, - buffer: vec![], - buffered_rows: 0, - fetch, - } - } - - /// Return the schema of the output batches - fn schema(&self) -> SchemaRef { - Arc::clone(&self.schema) - } - - /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns - /// a variant of [`CoalescerState`] indicating the final state of the buffer. - fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState { - let batch = gc_string_view_batch(&batch); - if self.limit_reached(&batch) { - CoalescerState::LimitReached - } else if self.target_reached(batch) { - CoalescerState::TargetReached - } else { - CoalescerState::Continue - } - } - - /// The function checks if the buffer can reach the specified limit after getting `batch`. - /// If it does, it slices the received batch as needed, updates the buffer with it, and - /// finally returns `true`. Otherwise; the function does nothing and returns `false`. - fn limit_reached(&mut self, batch: &RecordBatch) -> bool { - match self.fetch { - Some(fetch) if self.total_rows + batch.num_rows() >= fetch => { - // Limit is reached - let remaining_rows = fetch - self.total_rows; - debug_assert!(remaining_rows > 0); - - let batch = batch.slice(0, remaining_rows); - self.buffered_rows += batch.num_rows(); - self.total_rows = fetch; - self.buffer.push(batch); - true - } - _ => false, - } - } - - /// Updates the buffer with the given batch. If the target batch size is reached, - /// the function returns `true`. Otherwise, it returns `false`. - fn target_reached(&mut self, batch: RecordBatch) -> bool { - if batch.num_rows() == 0 { - false - } else { - self.total_rows += batch.num_rows(); - self.buffered_rows += batch.num_rows(); - self.buffer.push(batch); - self.buffered_rows >= self.target_batch_size - } - } - - /// Concatenates and returns all buffered batches, and clears the buffer. - fn finish_batch(&mut self) -> Result { - let batch = concat_batches(&self.schema, &self.buffer)?; - self.buffer.clear(); - self.buffered_rows = 0; - Ok(batch) - } -} - -/// This enumeration acts as a status indicator for the [`BatchCoalescer`] after a -/// [`BatchCoalescer::push_batch()`] operation. -enum CoalescerState { - /// Neither the limit nor the target batch size is reached. - Continue, - /// The sufficient row count to produce a complete query result is reached. - LimitReached, - /// The specified minimum number of rows a batch should have is reached. - TargetReached, -} - -/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed -/// -/// This function decides when to consolidate the StringView into a new buffer -/// to reduce memory usage and improve string locality for better performance. -/// -/// This differs from `StringViewArray::gc` because: -/// 1. It may not compact the array depending on a heuristic. -/// 2. It uses a precise block size to reduce the number of buffers to track. -/// -/// # Heuristic -/// -/// If the average size of each view is larger than 32 bytes, we compact the array. -/// -/// `StringViewArray` include pointers to buffer that hold the underlying data. -/// One of the great benefits of `StringViewArray` is that many operations -/// (e.g., `filter`) can be done without copying the underlying data. -/// -/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the -/// `StringViewArray` may only refer to a small portion of the buffer, -/// significantly increasing memory usage. -fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { - let new_columns: Vec = batch - .columns() - .iter() - .map(|c| { - // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long. - let Some(s) = c.as_string_view_opt() else { - return Arc::clone(c); - }; - let ideal_buffer_size: usize = s - .views() - .iter() - .map(|v| { - let len = (*v as u32) as usize; - if len > 12 { - len - } else { - 0 - } - }) - .sum(); - let actual_buffer_size = s.get_buffer_memory_size(); - - // Re-creating the array copies data and can be time consuming. - // We only do it if the array is sparse - if actual_buffer_size > (ideal_buffer_size * 2) { - // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches. - // See https://github.com/apache/arrow-rs/issues/6094 for more details. - let mut builder = StringViewBuilder::with_capacity(s.len()); - if ideal_buffer_size > 0 { - builder = builder.with_block_size(ideal_buffer_size as u32); - } - - for v in s.iter() { - builder.append_option(v); - } - - let gc_string = builder.finish(); - - debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0 - - Arc::new(gc_string) - } else { - Arc::clone(c) - } - }) - .collect(); - RecordBatch::try_new(batch.schema(), new_columns) - .expect("Failed to re-create the gc'ed record batch") -} - -#[cfg(test)] -mod tests { - use std::ops::Range; - - use super::*; - - use arrow::datatypes::{DataType, Field, Schema}; - use arrow_array::builder::ArrayBuilder; - use arrow_array::{StringViewArray, UInt32Array}; - - #[test] - fn test_coalesce() { - let batch = uint32_batch(0..8); - Test::new() - .with_batches(std::iter::repeat(batch).take(10)) - // expected output is batches of at least 20 rows (except for the final batch) - .with_target_batch_size(21) - .with_expected_output_sizes(vec![24, 24, 24, 8]) - .run() - } - - #[test] - fn test_coalesce_with_fetch_larger_than_input_size() { - let batch = uint32_batch(0..8); - Test::new() - .with_batches(std::iter::repeat(batch).take(10)) - // input is 10 batches x 8 rows (80 rows) with fetch limit of 100 - // expected to behave the same as `test_concat_batches` - .with_target_batch_size(21) - .with_fetch(Some(100)) - .with_expected_output_sizes(vec![24, 24, 24, 8]) - .run(); - } - - #[test] - fn test_coalesce_with_fetch_less_than_input_size() { - let batch = uint32_batch(0..8); - Test::new() - .with_batches(std::iter::repeat(batch).take(10)) - // input is 10 batches x 8 rows (80 rows) with fetch limit of 50 - .with_target_batch_size(21) - .with_fetch(Some(50)) - .with_expected_output_sizes(vec![24, 24, 2]) - .run(); - } - - #[test] - fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() { - let batch = uint32_batch(0..8); - Test::new() - .with_batches(std::iter::repeat(batch).take(10)) - // input is 10 batches x 8 rows (80 rows) with fetch limit of 48 - .with_target_batch_size(21) - .with_fetch(Some(48)) - .with_expected_output_sizes(vec![24, 24]) - .run(); - } - - #[test] - fn test_coalesce_with_fetch_less_target_batch_size() { - let batch = uint32_batch(0..8); - Test::new() - .with_batches(std::iter::repeat(batch).take(10)) - // input is 10 batches x 8 rows (80 rows) with fetch limit of 10 - .with_target_batch_size(21) - .with_fetch(Some(10)) - .with_expected_output_sizes(vec![10]) - .run(); - } - - #[test] - fn test_coalesce_single_large_batch_over_fetch() { - let large_batch = uint32_batch(0..100); - Test::new() - .with_batch(large_batch) - .with_target_batch_size(20) - .with_fetch(Some(7)) - .with_expected_output_sizes(vec![7]) - .run() - } - - /// Test for [`BatchCoalescer`] - /// - /// Pushes the input batches to the coalescer and verifies that the resulting - /// batches have the expected number of rows and contents. - #[derive(Debug, Clone, Default)] - struct Test { - /// Batches to feed to the coalescer. Tests must have at least one - /// schema - input_batches: Vec, - /// Expected output sizes of the resulting batches - expected_output_sizes: Vec, - /// target batch size - target_batch_size: usize, - /// Fetch (limit) - fetch: Option, - } - - impl Test { - fn new() -> Self { - Self::default() - } - - /// Set the target batch size - fn with_target_batch_size(mut self, target_batch_size: usize) -> Self { - self.target_batch_size = target_batch_size; - self - } - - /// Set the fetch (limit) - fn with_fetch(mut self, fetch: Option) -> Self { - self.fetch = fetch; - self - } - - /// Extend the input batches with `batch` - fn with_batch(mut self, batch: RecordBatch) -> Self { - self.input_batches.push(batch); - self - } - - /// Extends the input batches with `batches` - fn with_batches( - mut self, - batches: impl IntoIterator, - ) -> Self { - self.input_batches.extend(batches); - self - } - - /// Extends `sizes` to expected output sizes - fn with_expected_output_sizes( - mut self, - sizes: impl IntoIterator, - ) -> Self { - self.expected_output_sizes.extend(sizes); - self - } - - /// Runs the test -- see documentation on [`Test`] for details - fn run(self) { - let Self { - input_batches, - target_batch_size, - fetch, - expected_output_sizes, - } = self; - - let schema = input_batches[0].schema(); - - // create a single large input batch for output comparison - let single_input_batch = concat_batches(&schema, &input_batches).unwrap(); - - let mut coalescer = - BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch); - - let mut output_batches = vec![]; - for batch in input_batches { - match coalescer.push_batch(batch) { - CoalescerState::Continue => {} - CoalescerState::LimitReached => { - output_batches.push(coalescer.finish_batch().unwrap()); - break; - } - CoalescerState::TargetReached => { - coalescer.buffered_rows = 0; - output_batches.push(coalescer.finish_batch().unwrap()); - } - } - } - if coalescer.buffered_rows != 0 { - output_batches.extend(coalescer.buffer); - } - - // make sure we got the expected number of output batches and content - let mut starting_idx = 0; - assert_eq!(expected_output_sizes.len(), output_batches.len()); - for (i, (expected_size, batch)) in - expected_output_sizes.iter().zip(output_batches).enumerate() - { - assert_eq!( - *expected_size, - batch.num_rows(), - "Unexpected number of rows in Batch {i}" - ); - - // compare the contents of the batch (using `==` compares the - // underlying memory layout too) - let expected_batch = - single_input_batch.slice(starting_idx, *expected_size); - let batch_strings = batch_to_pretty_strings(&batch); - let expected_batch_strings = batch_to_pretty_strings(&expected_batch); - let batch_strings = batch_strings.lines().collect::>(); - let expected_batch_strings = - expected_batch_strings.lines().collect::>(); - assert_eq!( - expected_batch_strings, batch_strings, - "Unexpected content in Batch {i}:\ - \n\nExpected:\n{expected_batch_strings:#?}\n\nActual:\n{batch_strings:#?}" - ); - starting_idx += *expected_size; - } - } - } - - /// Return a batch of UInt32 with the specified range - fn uint32_batch(range: Range) -> RecordBatch { - let schema = - Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])); - - RecordBatch::try_new( - Arc::clone(&schema), - vec![Arc::new(UInt32Array::from_iter_values(range))], - ) - .unwrap() - } - - #[test] - fn test_gc_string_view_batch_small_no_compact() { - // view with only short strings (no buffers) --> no need to compact - let array = StringViewTest { - rows: 1000, - strings: vec![Some("a"), Some("b"), Some("c")], - } - .build(); - - let gc_array = do_gc(array.clone()); - compare_string_array_values(&array, &gc_array); - assert_eq!(array.data_buffers().len(), 0); - assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction - } - - #[test] - fn test_gc_string_view_batch_large_no_compact() { - // view with large strings (has buffers) but full --> no need to compact - let array = StringViewTest { - rows: 1000, - strings: vec![Some("This string is longer than 12 bytes")], - } - .build(); - - let gc_array = do_gc(array.clone()); - compare_string_array_values(&array, &gc_array); - assert_eq!(array.data_buffers().len(), 5); - assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction - } - - #[test] - fn test_gc_string_view_batch_large_slice_compact() { - // view with large strings (has buffers) and only partially used --> no need to compact - let array = StringViewTest { - rows: 1000, - strings: vec![Some("this string is longer than 12 bytes")], - } - .build(); - - // slice only 11 rows, so most of the buffer is not used - let array = array.slice(11, 22); - - let gc_array = do_gc(array.clone()); - compare_string_array_values(&array, &gc_array); - assert_eq!(array.data_buffers().len(), 5); - assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer - } - - /// Compares the values of two string view arrays - fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) { - assert_eq!(arr1.len(), arr2.len()); - for (s1, s2) in arr1.iter().zip(arr2.iter()) { - assert_eq!(s1, s2); - } - } - - /// runs garbage collection on string view array - /// and ensures the number of rows are the same - fn do_gc(array: StringViewArray) -> StringViewArray { - let batch = - RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap(); - let gc_batch = gc_string_view_batch(&batch); - assert_eq!(batch.num_rows(), gc_batch.num_rows()); - assert_eq!(batch.schema(), gc_batch.schema()); - gc_batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap() - .clone() - } - - /// Describes parameters for creating a `StringViewArray` - struct StringViewTest { - /// The number of rows in the array - rows: usize, - /// The strings to use in the array (repeated over and over - strings: Vec>, - } - - impl StringViewTest { - /// Create a `StringViewArray` with the parameters specified in this struct - fn build(self) -> StringViewArray { - let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192); - loop { - for &v in self.strings.iter() { - builder.append_option(v); - if builder.len() >= self.rows { - return builder.finish(); - } - } - } - } - } - fn batch_to_pretty_strings(batch: &RecordBatch) -> String { - arrow::util::pretty::pretty_format_batches(&[batch.clone()]) - .unwrap() - .to_string() - } -} diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index a6a15e46860c..c1c66f6d3923 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -34,11 +34,10 @@ pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; pub use datafusion_expr::{Accumulator, ColumnarValue}; pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ - expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr, -}; -use datafusion_physical_expr::{ - EquivalenceProperties, LexOrdering, PhysicalSortExpr, PhysicalSortRequirement, + expressions, functions, udf, Distribution, Partitioning, PhysicalExpr, }; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::display::DisplayableExecutionPlan; @@ -125,7 +124,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// NOTE that checking `!is_empty()` does **not** check for a /// required input ordering. Instead, the correct check is that at /// least one entry must be `Some` - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { vec![None; self.children().len()] } diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs index 5cd864125e29..5dc27bc239d2 100644 --- a/datafusion/physical-plan/src/insert.rs +++ b/datafusion/physical-plan/src/insert.rs @@ -35,11 +35,10 @@ use arrow_array::{ArrayRef, UInt64Array}; use arrow_schema::{DataType, Field, Schema}; use datafusion_common::{internal_err, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{ - Distribution, EquivalenceProperties, PhysicalSortRequirement, -}; +use datafusion_physical_expr::{Distribution, EquivalenceProperties}; use async_trait::async_trait; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::StreamExt; /// `DataSink` implements writing streams of [`RecordBatch`]es to @@ -90,7 +89,7 @@ pub struct DataSinkExec { /// Schema describing the structure of the output data. count_schema: SchemaRef, /// Optional required sort order for output data. - sort_order: Option>, + sort_order: Option, cache: PlanProperties, } @@ -106,7 +105,7 @@ impl DataSinkExec { input: Arc, sink: Arc, sink_schema: SchemaRef, - sort_order: Option>, + sort_order: Option, ) -> Self { let count_schema = make_count_schema(); let cache = Self::create_schema(&input, count_schema); @@ -131,7 +130,7 @@ impl DataSinkExec { } /// Optional sort order for output data - pub fn sort_order(&self) -> &Option> { + pub fn sort_order(&self) -> &Option { &self.sort_order } @@ -189,7 +188,7 @@ impl ExecutionPlan for DataSinkExec { vec![Distribution::SinglePartition; self.children().len()] } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { // The required input ordering is set externally (e.g. by a `ListingTable`). // Otherwise, there is no specific requirement (i.e. `sort_expr` is `None`). vec![self.sort_order.as_ref().cloned()] diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 0868ee721665..b99d4f17c42a 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -488,7 +488,7 @@ mod tests { use crate::test::build_table_scan_i32; use datafusion_common::{assert_batches_sorted_eq, assert_contains}; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; async fn join_collect( left: Arc, @@ -673,8 +673,11 @@ mod tests { #[tokio::test] async fn test_overallocation() -> Result<()> { - let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .build()?, + ); let task_ctx = TaskContext::default().with_runtime(runtime); let task_ctx = Arc::new(task_ctx); diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index e40a07cf6220..f20d00e1a298 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -49,8 +49,7 @@ use crate::{ }; use arrow::array::{ - Array, ArrayRef, BooleanArray, BooleanBufferBuilder, PrimitiveArray, UInt32Array, - UInt64Array, + Array, ArrayRef, BooleanArray, BooleanBufferBuilder, UInt32Array, UInt64Array, }; use arrow::compute::kernels::cmp::{eq, not_distinct}; use arrow::compute::{and, concat_batches, take, FilterBuilder}; @@ -1204,13 +1203,11 @@ fn lookup_join_hashmap( }) .collect::>>()?; - let (mut probe_builder, mut build_builder, next_offset) = build_hashmap + let (probe_indices, build_indices, next_offset) = build_hashmap .get_matched_indices_with_limit_offset(hashes_buffer, None, limit, offset); - let build_indices: UInt64Array = - PrimitiveArray::new(build_builder.finish().into(), None); - let probe_indices: UInt32Array = - PrimitiveArray::new(probe_builder.finish().into(), None); + let build_indices: UInt64Array = build_indices.into(); + let probe_indices: UInt32Array = probe_indices.into(); let (build_indices, probe_indices) = equal_rows_arr( &build_indices, @@ -1566,7 +1563,7 @@ mod tests { test::build_table_i32, test::exec::MockExec, }; - use arrow::array::{Date32Array, Int32Array, UInt32Builder, UInt64Builder}; + use arrow::array::{Date32Array, Int32Array}; use arrow::datatypes::{DataType, Field}; use arrow_array::StructArray; use arrow_buffer::NullBuffer; @@ -1575,7 +1572,7 @@ mod tests { ScalarValue, }; use datafusion_execution::config::SessionConfig; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; @@ -3169,17 +3166,13 @@ mod tests { (0, None), )?; - let mut left_ids = UInt64Builder::with_capacity(0); - left_ids.append_value(0); - left_ids.append_value(1); + let left_ids: UInt64Array = vec![0, 1].into(); - let mut right_ids = UInt32Builder::with_capacity(0); - right_ids.append_value(0); - right_ids.append_value(1); + let right_ids: UInt32Array = vec![0, 1].into(); - assert_eq!(left_ids.finish(), l); + assert_eq!(left_ids, l); - assert_eq!(right_ids.finish(), r); + assert_eq!(right_ids, r); Ok(()) } @@ -3805,8 +3798,11 @@ mod tests { ]; for join_type in join_types { - let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .build()?, + ); let task_ctx = TaskContext::default().with_runtime(runtime); let task_ctx = Arc::new(task_ctx); @@ -3878,8 +3874,11 @@ mod tests { ]; for join_type in join_types { - let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .build()?, + ); let session_config = SessionConfig::default().with_batch_size(50); let task_ctx = TaskContext::default() .with_session_config(session_config) diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 04a025c93288..3cd373544157 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -40,13 +40,12 @@ use crate::{ RecordBatchStream, SendableRecordBatchStream, }; -use arrow::array::{ - BooleanBufferBuilder, UInt32Array, UInt32Builder, UInt64Array, UInt64Builder, -}; +use arrow::array::{BooleanBufferBuilder, UInt32Array, UInt64Array}; use arrow::compute::concat_batches; -use arrow::datatypes::{Schema, SchemaRef}; +use arrow::datatypes::{Schema, SchemaRef, UInt64Type}; use arrow::record_batch::RecordBatch; use arrow::util::bit_util; +use arrow_array::PrimitiveArray; use datafusion_common::{exec_datafusion_err, JoinSide, Result, Statistics}; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; @@ -573,23 +572,21 @@ fn join_left_and_right_batch( ) })?; - let mut left_indices_builder = UInt64Builder::new(); - let mut right_indices_builder = UInt32Builder::new(); + let mut left_indices_builder: Vec = vec![]; + let mut right_indices_builder: Vec = vec![]; for (left_side, right_side) in indices { - left_indices_builder - .append_values(left_side.values(), &vec![true; left_side.len()]); - right_indices_builder - .append_values(right_side.values(), &vec![true; right_side.len()]); + left_indices_builder.extend(left_side.values()); + right_indices_builder.extend(right_side.values()); } - let left_side = left_indices_builder.finish(); - let right_side = right_indices_builder.finish(); + let left_side: PrimitiveArray = left_indices_builder.into(); + let right_side = right_indices_builder.into(); // set the left bitmap // and only full join need the left bitmap if need_produce_result_in_final(join_type) { let mut bitmap = visited_left_side.lock(); - left_side.iter().flatten().for_each(|x| { - bitmap.set_bit(x as usize, true); + left_side.values().iter().for_each(|x| { + bitmap.set_bit(*x as usize, true); }); } // adjust the two side indices base on the join type @@ -647,7 +644,7 @@ mod tests { use arrow::datatypes::{DataType, Field}; use datafusion_common::{assert_batches_sorted_eq, assert_contains, ScalarValue}; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; use datafusion_physical_expr::{Partitioning, PhysicalExpr}; @@ -1022,8 +1019,11 @@ mod tests { ]; for join_type in join_types { - let runtime_config = RuntimeConfig::new().with_memory_limit(100, 1.0); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .build()?, + ); let task_ctx = TaskContext::default().with_runtime(runtime); let task_ctx = Arc::new(task_ctx); diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 96d5ba728a30..09fe5d9ebc54 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -22,7 +22,7 @@ use std::any::Any; use std::cmp::Ordering; -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use std::fmt::Formatter; use std::fs::File; use std::io::BufReader; @@ -51,6 +51,7 @@ use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement}; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use crate::expressions::PhysicalSortExpr; use crate::joins::utils::{ @@ -288,7 +289,7 @@ impl ExecutionPlan for SortMergeJoinExec { ] } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { vec![ Some(PhysicalSortRequirement::from_sort_exprs( &self.left_sort_exprs, @@ -595,8 +596,10 @@ struct BufferedBatch { /// Size estimation used for reserving / releasing memory pub size_estimation: usize, /// The indices of buffered batch that failed the join filter. + /// This is a map between buffered row index and a boolean value indicating whether all joined row + /// of the buffered row failed the join filter. /// When dequeuing the buffered batch, we need to produce null joined rows for these indices. - pub join_filter_failed_idxs: HashSet, + pub join_filter_failed_map: HashMap, /// Current buffered batch number of rows. Equal to batch.num_rows() /// but if batch is spilled to disk this property is preferable /// and less expensive @@ -637,7 +640,7 @@ impl BufferedBatch { join_arrays, null_joined: vec![], size_estimation, - join_filter_failed_idxs: HashSet::new(), + join_filter_failed_map: HashMap::new(), num_rows, spill_file: None, } @@ -1229,11 +1232,19 @@ impl SMJStream { } buffered_batch.null_joined.clear(); - // For buffered rows which are joined with streamed side but doesn't satisfy the join filter + // For buffered row which is joined with streamed side rows but all joined rows + // don't satisfy the join filter if output_not_matched_filter { + let not_matched_buffered_indices = buffered_batch + .join_filter_failed_map + .iter() + .filter_map(|(idx, failed)| if *failed { Some(*idx) } else { None }) + .collect::>(); + let buffered_indices = UInt64Array::from_iter_values( - buffered_batch.join_filter_failed_idxs.iter().copied(), + not_matched_buffered_indices.iter().copied(), ); + if let Some(record_batch) = produce_buffered_null_batch( &self.schema, &self.streamed_schema, @@ -1242,7 +1253,7 @@ impl SMJStream { )? { self.output_record_batches.push(record_batch); } - buffered_batch.join_filter_failed_idxs.clear(); + buffered_batch.join_filter_failed_map.clear(); } } Ok(()) @@ -1459,24 +1470,26 @@ impl SMJStream { // If it is joined with streamed side, but doesn't match the join filter, // we need to output it with nulls as streamed side. if matches!(self.join_type, JoinType::Full) { + let buffered_batch = &mut self.buffered_data.batches + [chunk.buffered_batch_idx.unwrap()]; + for i in 0..pre_mask.len() { - let buffered_batch = &mut self.buffered_data.batches - [chunk.buffered_batch_idx.unwrap()]; + // If the buffered row is not joined with streamed side, + // skip it. + if buffered_indices.is_null(i) { + continue; + } + let buffered_index = buffered_indices.value(i); - if !pre_mask.value(i) { - // For a buffered row that is joined with streamed side but doesn't satisfy the join filter, - buffered_batch - .join_filter_failed_idxs - .insert(buffered_index); - } else if buffered_batch - .join_filter_failed_idxs - .contains(&buffered_index) - { - buffered_batch - .join_filter_failed_idxs - .remove(&buffered_index); - } + buffered_batch.join_filter_failed_map.insert( + buffered_index, + *buffered_batch + .join_filter_failed_map + .get(&buffered_index) + .unwrap_or(&true) + && !pre_mask.value(i), + ); } } } @@ -1965,7 +1978,7 @@ mod tests { }; use datafusion_execution::config::SessionConfig; use datafusion_execution::disk_manager::DiskManagerConfig; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_execution::TaskContext; use crate::expressions::Column; @@ -2887,10 +2900,12 @@ mod tests { ]; // Disable DiskManager to prevent spilling - let runtime_config = RuntimeConfig::new() - .with_memory_limit(100, 1.0) - .with_disk_manager(DiskManagerConfig::Disabled); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .with_disk_manager(DiskManagerConfig::Disabled) + .build()?, + ); let session_config = SessionConfig::default().with_batch_size(50); for join_type in join_types { @@ -2972,10 +2987,12 @@ mod tests { ]; // Disable DiskManager to prevent spilling - let runtime_config = RuntimeConfig::new() - .with_memory_limit(100, 1.0) - .with_disk_manager(DiskManagerConfig::Disabled); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .with_disk_manager(DiskManagerConfig::Disabled) + .build()?, + ); let session_config = SessionConfig::default().with_batch_size(50); for join_type in join_types { @@ -3035,10 +3052,12 @@ mod tests { ]; // Enable DiskManager to allow spilling - let runtime_config = RuntimeConfig::new() - .with_memory_limit(100, 1.0) - .with_disk_manager(DiskManagerConfig::NewOs); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(100, 1.0) + .with_disk_manager(DiskManagerConfig::NewOs) + .build()?, + ); for batch_size in [1, 50] { let session_config = SessionConfig::default().with_batch_size(batch_size); @@ -3143,10 +3162,13 @@ mod tests { ]; // Enable DiskManager to allow spilling - let runtime_config = RuntimeConfig::new() - .with_memory_limit(500, 1.0) - .with_disk_manager(DiskManagerConfig::NewOs); - let runtime = Arc::new(RuntimeEnv::new(runtime_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(500, 1.0) + .with_disk_manager(DiskManagerConfig::NewOs) + .build()?, + ); + for batch_size in [1, 50] { let session_config = SessionConfig::default().with_batch_size(batch_size); diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 1bf2ef2fd5f7..ac718a95e9f4 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -72,6 +72,7 @@ use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph; use datafusion_physical_expr::{PhysicalExprRef, PhysicalSortRequirement}; use ahash::RandomState; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::{ready, Stream, StreamExt}; use hashbrown::HashSet; use parking_lot::Mutex; @@ -410,7 +411,7 @@ impl ExecutionPlan for SymmetricHashJoinExec { } } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { vec![ self.left_sort_exprs .as_ref() @@ -929,13 +930,11 @@ fn lookup_join_hashmap( let (mut matched_probe, mut matched_build) = build_hashmap .get_matched_indices(hash_values.iter().enumerate().rev(), deleted_offset); - matched_probe.as_slice_mut().reverse(); - matched_build.as_slice_mut().reverse(); + matched_probe.reverse(); + matched_build.reverse(); - let build_indices: UInt64Array = - PrimitiveArray::new(matched_build.finish().into(), None); - let probe_indices: UInt32Array = - PrimitiveArray::new(matched_probe.finish().into(), None); + let build_indices: UInt64Array = matched_build.into(); + let probe_indices: UInt32Array = matched_probe.into(); let (build_indices, probe_indices) = equal_rows_arr( &build_indices, diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 80d8815bdebc..89f3feaf07be 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -31,7 +31,7 @@ use crate::{ use arrow::array::{ downcast_array, new_null_array, Array, BooleanBufferBuilder, UInt32Array, - UInt32BufferBuilder, UInt32Builder, UInt64Array, UInt64BufferBuilder, + UInt32Builder, UInt64Array, }; use arrow::compute; use arrow::datatypes::{Field, Schema, SchemaBuilder, UInt32Type, UInt64Type}; @@ -163,8 +163,8 @@ macro_rules! chain_traverse { } else { i }; - $match_indices.append(match_row_idx); - $input_indices.append($input_idx as u32); + $match_indices.push(match_row_idx); + $input_indices.push($input_idx as u32); $remaining_output -= 1; // Follow the chain to get the next index value let next = $next_chain[match_row_idx as usize]; @@ -238,9 +238,9 @@ pub trait JoinHashMapType { &self, iter: impl Iterator, deleted_offset: Option, - ) -> (UInt32BufferBuilder, UInt64BufferBuilder) { - let mut input_indices = UInt32BufferBuilder::new(0); - let mut match_indices = UInt64BufferBuilder::new(0); + ) -> (Vec, Vec) { + let mut input_indices = vec![]; + let mut match_indices = vec![]; let hash_map = self.get_map(); let next_chain = self.get_list(); @@ -261,8 +261,8 @@ pub trait JoinHashMapType { } else { i }; - match_indices.append(match_row_idx); - input_indices.append(row_idx as u32); + match_indices.push(match_row_idx); + input_indices.push(row_idx as u32); // Follow the chain to get the next index value let next = next_chain[match_row_idx as usize]; if next == 0 { @@ -289,13 +289,9 @@ pub trait JoinHashMapType { deleted_offset: Option, limit: usize, offset: JoinHashMapOffset, - ) -> ( - UInt32BufferBuilder, - UInt64BufferBuilder, - Option, - ) { - let mut input_indices = UInt32BufferBuilder::new(0); - let mut match_indices = UInt64BufferBuilder::new(0); + ) -> (Vec, Vec, Option) { + let mut input_indices = vec![]; + let mut match_indices = vec![]; let mut remaining_output = limit; @@ -2447,7 +2443,7 @@ mod tests { Statistics { num_rows: Absent, total_byte_size: Absent, - column_statistics: dummy_column_stats.clone(), + column_statistics: dummy_column_stats, }, &join_on, ); diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index 59c5da6b6fb2..026798c5798b 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -31,7 +31,7 @@ pub use datafusion_expr::{Accumulator, ColumnarValue}; pub use datafusion_physical_expr::window::WindowExpr; use datafusion_physical_expr::PhysicalSortExpr; pub use datafusion_physical_expr::{ - expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr, + expressions, functions, udf, Distribution, Partitioning, PhysicalExpr, }; pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; @@ -82,8 +82,9 @@ pub mod windows; pub mod work_table; pub mod udaf { - pub use datafusion_physical_expr_functions_aggregate::aggregate::AggregateFunctionExpr; + pub use datafusion_physical_expr::aggregate::AggregateFunctionExpr; } +pub mod coalesce; #[cfg(test)] pub mod test; diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index bd9303f97db0..e9ea9d4f5032 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -394,7 +394,7 @@ impl Stream for RecursiveQueryStream { self.recursive_stream = None; self.poll_next_iteration(cx) } - Some(Ok(batch)) => self.push_batch(batch.clone()), + Some(Ok(batch)) => self.push_batch(batch), _ => Poll::Ready(batch_result), } } else { diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 656d82215bbe..650006a9d02d 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -38,9 +38,10 @@ use crate::sorts::streaming_merge; use crate::stream::RecordBatchStreamAdapter; use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics}; -use arrow::array::{ArrayRef, UInt64Builder}; -use arrow::datatypes::SchemaRef; +use arrow::array::ArrayRef; +use arrow::datatypes::{SchemaRef, UInt64Type}; use arrow::record_batch::RecordBatch; +use arrow_array::PrimitiveArray; use datafusion_common::utils::transpose; use datafusion_common::{arrow_datafusion_err, not_impl_err, DataFusionError, Result}; use datafusion_common_runtime::SpawnedTask; @@ -275,12 +276,11 @@ impl BatchPartitioner { create_hashes(&arrays, random_state, hash_buffer)?; let mut indices: Vec<_> = (0..*partitions) - .map(|_| UInt64Builder::with_capacity(batch.num_rows())) + .map(|_| Vec::with_capacity(batch.num_rows())) .collect(); for (index, hash) in hash_buffer.iter().enumerate() { - indices[(*hash % *partitions as u64) as usize] - .append_value(index as u64); + indices[(*hash % *partitions as u64) as usize].push(index as u64); } // Finished building index-arrays for output partitions @@ -291,8 +291,8 @@ impl BatchPartitioner { let it = indices .into_iter() .enumerate() - .filter_map(|(partition, mut indices)| { - let indices = indices.finish(); + .filter_map(|(partition, indices)| { + let indices: PrimitiveArray = indices.into(); (!indices.is_empty()).then_some((partition, indices)) }) .map(move |(partition, indices)| { @@ -1025,7 +1025,7 @@ mod tests { use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::cast::as_string_array; use datafusion_common::{assert_batches_sorted_eq, exec_err}; - use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use tokio::task::JoinSet; @@ -1507,7 +1507,9 @@ mod tests { // setup up context let runtime = Arc::new( - RuntimeEnv::new(RuntimeConfig::default().with_memory_limit(1, 1.0)).unwrap(), + RuntimeEnvBuilder::default() + .with_memory_limit(1, 1.0) + .build()?, ); let task_ctx = TaskContext::default().with_runtime(runtime); diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index e7e1c5481f80..e92a57493141 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -499,6 +499,12 @@ impl ExternalSorter { metrics: BaselineMetrics, ) -> Result { assert_ne!(self.in_mem_batches.len(), 0); + + // The elapsed compute timer is updated when the value is dropped. + // There is no need for an explicit call to drop. + let elapsed_compute = metrics.elapsed_compute().clone(); + let _timer = elapsed_compute.timer(); + if self.in_mem_batches.len() == 1 { let batch = self.in_mem_batches.remove(0); let reservation = self.reservation.take(); @@ -552,7 +558,9 @@ impl ExternalSorter { let fetch = self.fetch; let expressions = Arc::clone(&self.expr); let stream = futures::stream::once(futures::future::lazy(move |_| { + let timer = metrics.elapsed_compute().timer(); let sorted = sort_batch(&batch, &expressions, fetch)?; + timer.done(); metrics.record_output(sorted.num_rows()); drop(batch); drop(reservation); @@ -958,7 +966,7 @@ mod tests { use arrow::datatypes::*; use datafusion_common::cast::as_primitive_array; use datafusion_execution::config::SessionConfig; - use datafusion_execution::runtime_env::RuntimeConfig; + use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_common::ScalarValue; use datafusion_physical_expr::expressions::Literal; @@ -1001,9 +1009,11 @@ mod tests { .options() .execution .sort_spill_reservation_bytes; - let rt_config = RuntimeConfig::new() - .with_memory_limit(sort_spill_reservation_bytes + 12288, 1.0); - let runtime = Arc::new(RuntimeEnv::new(rt_config)?); + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit(sort_spill_reservation_bytes + 12288, 1.0) + .build()?, + ); let task_ctx = Arc::new( TaskContext::default() .with_session_config(session_config) @@ -1077,11 +1087,14 @@ mod tests { .execution .sort_spill_reservation_bytes; - let rt_config = RuntimeConfig::new().with_memory_limit( - sort_spill_reservation_bytes + avg_batch_size * (partitions - 1), - 1.0, + let runtime = Arc::new( + RuntimeEnvBuilder::new() + .with_memory_limit( + sort_spill_reservation_bytes + avg_batch_size * (partitions - 1), + 1.0, + ) + .build()?, ); - let runtime = Arc::new(RuntimeEnv::new(rt_config)?); let task_ctx = Arc::new( TaskContext::default() .with_runtime(runtime) diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 7ba1d77aea4e..131fa71217cc 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -22,6 +22,7 @@ use std::sync::Arc; use crate::common::spawn_buffered; use crate::expressions::PhysicalSortExpr; +use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::sorts::streaming_merge; use crate::{ @@ -34,6 +35,7 @@ use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; use datafusion_physical_expr::PhysicalSortRequirement; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use log::{debug, trace}; /// Sort preserving merge execution plan @@ -186,7 +188,7 @@ impl ExecutionPlan for SortPreservingMergeExec { vec![false] } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { vec![Some(PhysicalSortRequirement::from_sort_exprs(&self.expr))] } @@ -238,12 +240,23 @@ impl ExecutionPlan for SortPreservingMergeExec { 0 => internal_err!( "SortPreservingMergeExec requires at least one input partition" ), - 1 => { - // bypass if there is only one partition to merge (no metrics in this case either) - let result = self.input.execute(0, context); - debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input"); - result - } + 1 => match self.fetch { + Some(fetch) => { + let stream = self.input.execute(0, context)?; + debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input with {fetch}"); + Ok(Box::pin(LimitStream::new( + stream, + 0, + Some(fetch), + BaselineMetrics::new(&self.metrics, partition), + ))) + } + None => { + let stream = self.input.execute(0, context); + debug!("Done getting stream for SortPreservingMergeExec::execute with 1 input without fetch"); + stream + } + }, _ => { let receivers = (0..input_partitions) .map(|partition| { @@ -817,6 +830,79 @@ mod tests { ); } + #[tokio::test] + async fn test_sort_merge_single_partition_with_fetch() { + let task_ctx = Arc::new(TaskContext::default()); + let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 7, 9, 3])); + let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])); + let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap(); + let schema = batch.schema(); + + let sort = vec![PhysicalSortExpr { + expr: col("b", &schema).unwrap(), + options: SortOptions { + descending: false, + nulls_first: true, + }, + }]; + let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap(); + let merge = Arc::new( + SortPreservingMergeExec::new(sort, Arc::new(exec)).with_fetch(Some(2)), + ); + + let collected = collect(merge, task_ctx).await.unwrap(); + assert_eq!(collected.len(), 1); + + assert_batches_eq!( + &[ + "+---+---+", + "| a | b |", + "+---+---+", + "| 1 | a |", + "| 2 | b |", + "+---+---+", + ], + collected.as_slice() + ); + } + + #[tokio::test] + async fn test_sort_merge_single_partition_without_fetch() { + let task_ctx = Arc::new(TaskContext::default()); + let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 7, 9, 3])); + let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])); + let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).unwrap(); + let schema = batch.schema(); + + let sort = vec![PhysicalSortExpr { + expr: col("b", &schema).unwrap(), + options: SortOptions { + descending: false, + nulls_first: true, + }, + }]; + let exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap(); + let merge = Arc::new(SortPreservingMergeExec::new(sort, Arc::new(exec))); + + let collected = collect(merge, task_ctx).await.unwrap(); + assert_eq!(collected.len(), 1); + + assert_batches_eq!( + &[ + "+---+---+", + "| a | b |", + "+---+---+", + "| 1 | a |", + "| 2 | b |", + "| 7 | c |", + "| 9 | d |", + "| 3 | e |", + "+---+---+", + ], + collected.as_slice() + ); + } + #[tokio::test] async fn test_async() -> Result<()> { let task_ctx = Arc::new(TaskContext::default()); diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index efb5dea1ec6e..c1bcd83a6fd2 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -59,7 +59,8 @@ use datafusion_expr::ColumnarValue; use datafusion_physical_expr::window::{ PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState, }; -use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement}; +use datafusion_physical_expr::PhysicalExpr; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::stream::Stream; use futures::{ready, StreamExt}; use hashbrown::raw::RawTable; @@ -253,7 +254,7 @@ impl ExecutionPlan for BoundedWindowAggExec { vec![&self.input] } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { let partition_bys = self.window_expr()[0].partition_by(); let order_keys = self.window_expr()[0].order_by(); if self.input_order_mode != InputOrderMode::Sorted @@ -1311,7 +1312,7 @@ mod tests { &args, &partitionby_exprs, &orderby_exprs, - Arc::new(window_frame.clone()), + Arc::new(window_frame), &input.schema(), false, )?], @@ -1484,7 +1485,7 @@ mod tests { let partitions = vec![ Arc::new(TestStreamPartition { schema: Arc::clone(&schema), - batches: batches.clone(), + batches, idx: 0, state: PolingState::BatchReturn, sleep_duration: per_batch_wait_duration, diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index f938f4410a99..56823e6dec2d 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -30,19 +30,20 @@ use crate::{ use arrow::datatypes::Schema; use arrow_schema::{DataType, Field, SchemaRef}; -use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{ + exec_datafusion_err, exec_err, DataFusionError, Result, ScalarValue, +}; use datafusion_expr::{ BuiltInWindowFunction, PartitionEvaluator, WindowFrame, WindowFunctionDefinition, WindowUDF, }; +use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr}; use datafusion_physical_expr::equivalence::collapse_lex_req; use datafusion_physical_expr::{ reverse_order_bys, window::{BuiltInWindowFunctionExpr, SlidingAggregateWindowExpr}, - AggregateExpr, ConstExpr, EquivalenceProperties, LexOrdering, - PhysicalSortRequirement, + ConstExpr, EquivalenceProperties, LexOrdering, PhysicalSortRequirement, }; -use datafusion_physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; use itertools::Itertools; mod bounded_window_agg_exec; @@ -54,6 +55,7 @@ use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, }; +use datafusion_physical_expr_common::sort_expr::LexRequirement; pub use window_agg_exec::WindowAggExec; /// Build field from window function and add it into schema @@ -139,7 +141,7 @@ fn window_expr_from_aggregate_expr( partition_by: &[Arc], order_by: &[PhysicalSortExpr], window_frame: Arc, - aggregate: Arc, + aggregate: Arc, ) -> Arc { // Is there a potentially unlimited sized window frame? let unbounded_window = window_frame.start_bound.is_unbounded(); @@ -284,7 +286,9 @@ fn create_built_in_window_expr( args[1] .as_any() .downcast_ref::() - .unwrap() + .ok_or_else(|| { + exec_datafusion_err!("Expected a signed integer literal for the second argument of nth_value, got {}", args[1]) + })? .value() .clone(), )?; @@ -397,7 +401,7 @@ pub(crate) fn calc_requirements< >( partition_by_exprs: impl IntoIterator, orderby_sort_exprs: impl IntoIterator, -) -> Option> { +) -> Option { let mut sort_reqs = partition_by_exprs .into_iter() .map(|partition_by| { @@ -567,7 +571,7 @@ pub fn get_window_mode( input: &Arc, ) -> Option<(bool, InputOrderMode)> { let input_eqs = input.equivalence_properties().clone(); - let mut partition_by_reqs: Vec = vec![]; + let mut partition_by_reqs: LexRequirement = vec![]; let (_, indices) = input_eqs.find_longest_permutation(partitionby_exprs); partition_by_reqs.extend(indices.iter().map(|&idx| PhysicalSortRequirement { expr: Arc::clone(&partitionby_exprs[idx]), @@ -724,7 +728,7 @@ mod tests { orderbys.push(PhysicalSortExpr { expr, options }); } - let mut expected: Option> = None; + let mut expected: Option = None; for (col_name, reqs) in expected_params { let options = reqs.map(|(descending, nulls_first)| SortOptions { descending, diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index d2f7090fca17..afe9700ed08c 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -43,7 +43,7 @@ use datafusion_common::stats::Precision; use datafusion_common::utils::{evaluate_partition_ranges, transpose}; use datafusion_common::{internal_err, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::PhysicalSortRequirement; +use datafusion_physical_expr_common::sort_expr::LexRequirement; use futures::{ready, Stream, StreamExt}; /// Window execution plan @@ -191,7 +191,7 @@ impl ExecutionPlan for WindowAggExec { vec![true] } - fn required_input_ordering(&self) -> Vec>> { + fn required_input_ordering(&self) -> Vec> { let partition_bys = self.window_expr()[0].partition_by(); let order_keys = self.window_expr()[0].order_by(); if self.ordered_partition_by_indices.len() < partition_bys.len() { diff --git a/datafusion/proto/gen/src/main.rs b/datafusion/proto/gen/src/main.rs index d3b3c92f6065..be61ff58fa8d 100644 --- a/datafusion/proto/gen/src/main.rs +++ b/datafusion/proto/gen/src/main.rs @@ -55,7 +55,7 @@ fn main() -> Result<(), String> { let common_path = proto_dir.join("src/datafusion_common.rs"); println!( "Copying {} to {}", - prost.clone().display(), + prost.display(), proto_dir.join("src/generated/prost.rs").display() ); std::fs::copy(prost, proto_dir.join("src/generated/prost.rs")).unwrap(); diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index acf540d44465..826992e132ba 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -1140,6 +1140,7 @@ message NestedLoopJoinExecNode { message CoalesceBatchesExecNode { PhysicalPlanNode input = 1; uint32 target_batch_size = 2; + optional uint32 fetch = 3; } message CoalescePartitionsExecNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 489b6c67534f..b4d63798f080 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -2000,6 +2000,9 @@ impl serde::Serialize for CoalesceBatchesExecNode { if self.target_batch_size != 0 { len += 1; } + if self.fetch.is_some() { + len += 1; + } let mut struct_ser = serializer.serialize_struct("datafusion.CoalesceBatchesExecNode", len)?; if let Some(v) = self.input.as_ref() { struct_ser.serialize_field("input", v)?; @@ -2007,6 +2010,9 @@ impl serde::Serialize for CoalesceBatchesExecNode { if self.target_batch_size != 0 { struct_ser.serialize_field("targetBatchSize", &self.target_batch_size)?; } + if let Some(v) = self.fetch.as_ref() { + struct_ser.serialize_field("fetch", v)?; + } struct_ser.end() } } @@ -2020,12 +2026,14 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode { "input", "target_batch_size", "targetBatchSize", + "fetch", ]; #[allow(clippy::enum_variant_names)] enum GeneratedField { Input, TargetBatchSize, + Fetch, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -2049,6 +2057,7 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode { match value { "input" => Ok(GeneratedField::Input), "targetBatchSize" | "target_batch_size" => Ok(GeneratedField::TargetBatchSize), + "fetch" => Ok(GeneratedField::Fetch), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -2070,6 +2079,7 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode { { let mut input__ = None; let mut target_batch_size__ = None; + let mut fetch__ = None; while let Some(k) = map_.next_key()? { match k { GeneratedField::Input => { @@ -2086,11 +2096,20 @@ impl<'de> serde::Deserialize<'de> for CoalesceBatchesExecNode { Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0) ; } + GeneratedField::Fetch => { + if fetch__.is_some() { + return Err(serde::de::Error::duplicate_field("fetch")); + } + fetch__ = + map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0) + ; + } } } Ok(CoalesceBatchesExecNode { input: input__, target_batch_size: target_batch_size__.unwrap_or_default(), + fetch: fetch__, }) } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index c98c950d35f9..875d2af75dd7 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -1813,6 +1813,8 @@ pub struct CoalesceBatchesExecNode { pub input: ::core::option::Option<::prost::alloc::boxed::Box>, #[prost(uint32, tag = "2")] pub target_batch_size: u32, + #[prost(uint32, optional, tag = "3")] + pub fetch: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index b74237b5281b..acda1298dd80 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -586,7 +586,10 @@ pub fn parse_expr( parse_exprs(&pb.args, registry, codec)?, pb.distinct, parse_optional_expr(pb.filter.as_deref(), registry, codec)?.map(Box::new), - parse_vec_expr(&pb.order_by, registry, codec)?, + match pb.order_by.len() { + 0 => None, + _ => Some(parse_exprs(&pb.order_by, registry, codec)?), + }, None, ))) } @@ -676,16 +679,6 @@ pub fn from_proto_binary_op(op: &str) -> Result { } } -fn parse_vec_expr( - p: &[protobuf::LogicalExprNode], - registry: &dyn FunctionRegistry, - codec: &dyn LogicalExtensionCodec, -) -> Result>, Error> { - let res = parse_exprs(p, registry, codec)?; - // Convert empty vector to None. - Ok((!res.is_empty()).then_some(res)) -} - fn parse_optional_expr( p: Option<&protobuf::LogicalExprNode>, registry: &dyn FunctionRegistry, diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs index bc019725f36c..67977b1795a6 100644 --- a/datafusion/proto/src/logical_plan/mod.rs +++ b/datafusion/proto/src/logical_plan/mod.rs @@ -29,7 +29,7 @@ use crate::{ }, }; -use crate::protobuf::{proto_error, FromProtoError, ToProtoError}; +use crate::protobuf::{proto_error, ToProtoError}; use arrow::datatypes::{DataType, Schema, SchemaRef}; #[cfg(feature = "parquet")] use datafusion::datasource::file_format::parquet::ParquetFormat; @@ -66,11 +66,10 @@ use datafusion_expr::{ }; use datafusion_expr::{AggregateUDF, Unnest}; +use self::to_proto::{serialize_expr, serialize_exprs}; use prost::bytes::BufMut; use prost::Message; -use self::to_proto::serialize_expr; - pub mod file_formats; pub mod from_proto; pub mod to_proto; @@ -273,13 +272,7 @@ impl AsLogicalPlan for LogicalPlanNode { values .values_list .chunks_exact(n_cols) - .map(|r| { - r.iter() - .map(|expr| { - from_proto::parse_expr(expr, ctx, extension_codec) - }) - .collect::, FromProtoError>>() - }) + .map(|r| from_proto::parse_exprs(r, ctx, extension_codec)) .collect::, _>>() .map_err(|e| e.into()) }?; @@ -288,11 +281,8 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlanType::Projection(projection) => { let input: LogicalPlan = into_logical_plan!(projection.input, ctx, extension_codec)?; - let expr: Vec = projection - .expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let expr: Vec = + from_proto::parse_exprs(&projection.expr, ctx, extension_codec)?; let new_proj = project(input, expr)?; match projection.optional_alias.as_ref() { @@ -324,26 +314,17 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlanType::Window(window) => { let input: LogicalPlan = into_logical_plan!(window.input, ctx, extension_codec)?; - let window_expr = window - .window_expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let window_expr = + from_proto::parse_exprs(&window.window_expr, ctx, extension_codec)?; LogicalPlanBuilder::from(input).window(window_expr)?.build() } LogicalPlanType::Aggregate(aggregate) => { let input: LogicalPlan = into_logical_plan!(aggregate.input, ctx, extension_codec)?; - let group_expr = aggregate - .group_expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; - let aggr_expr = aggregate - .aggr_expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let group_expr = + from_proto::parse_exprs(&aggregate.group_expr, ctx, extension_codec)?; + let aggr_expr = + from_proto::parse_exprs(&aggregate.aggr_expr, ctx, extension_codec)?; LogicalPlanBuilder::from(input) .aggregate(group_expr, aggr_expr)? .build() @@ -361,20 +342,16 @@ impl AsLogicalPlan for LogicalPlanNode { projection = Some(column_indices); } - let filters = scan - .filters - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let filters = + from_proto::parse_exprs(&scan.filters, ctx, extension_codec)?; let mut all_sort_orders = vec![]; for order in &scan.file_sort_order { - let file_sort_order = order - .logical_expr_nodes - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; - all_sort_orders.push(file_sort_order) + all_sort_orders.push(from_proto::parse_exprs( + &order.logical_expr_nodes, + ctx, + extension_codec, + )?) } let file_format: Arc = @@ -475,11 +452,8 @@ impl AsLogicalPlan for LogicalPlanNode { projection = Some(column_indices); } - let filters = scan - .filters - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let filters = + from_proto::parse_exprs(&scan.filters, ctx, extension_codec)?; let table_name = from_table_reference(scan.table_name.as_ref(), "CustomScan")?; @@ -502,11 +476,8 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlanType::Sort(sort) => { let input: LogicalPlan = into_logical_plan!(sort.input, ctx, extension_codec)?; - let sort_expr: Vec = sort - .expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let sort_expr: Vec = + from_proto::parse_exprs(&sort.expr, ctx, extension_codec)?; LogicalPlanBuilder::from(input).sort(sort_expr)?.build() } LogicalPlanType::Repartition(repartition) => { @@ -525,12 +496,7 @@ impl AsLogicalPlan for LogicalPlanNode { hash_expr: pb_hash_expr, partition_count, }) => Partitioning::Hash( - pb_hash_expr - .iter() - .map(|expr| { - from_proto::parse_expr(expr, ctx, extension_codec) - }) - .collect::, _>>()?, + from_proto::parse_exprs(pb_hash_expr, ctx, extension_codec)?, *partition_count as usize, ), PartitionMethod::RoundRobin(partition_count) => { @@ -570,12 +536,11 @@ impl AsLogicalPlan for LogicalPlanNode { let mut order_exprs = vec![]; for expr in &create_extern_table.order_exprs { - let order_expr = expr - .logical_expr_nodes - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; - order_exprs.push(order_expr) + order_exprs.push(from_proto::parse_exprs( + &expr.logical_expr_nodes, + ctx, + extension_codec, + )?); } let mut column_defaults = @@ -693,16 +658,10 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlanBuilder::from(input).limit(skip, fetch)?.build() } LogicalPlanType::Join(join) => { - let left_keys: Vec = join - .left_join_key - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; - let right_keys: Vec = join - .right_join_key - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let left_keys: Vec = + from_proto::parse_exprs(&join.left_join_key, ctx, extension_codec)?; + let right_keys: Vec = + from_proto::parse_exprs(&join.right_join_key, ctx, extension_codec)?; let join_type = protobuf::JoinType::try_from(join.join_type).map_err(|_| { proto_error(format!( @@ -804,27 +763,20 @@ impl AsLogicalPlan for LogicalPlanNode { LogicalPlanType::DistinctOn(distinct_on) => { let input: LogicalPlan = into_logical_plan!(distinct_on.input, ctx, extension_codec)?; - let on_expr = distinct_on - .on_expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; - let select_expr = distinct_on - .select_expr - .iter() - .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec)) - .collect::, _>>()?; + let on_expr = + from_proto::parse_exprs(&distinct_on.on_expr, ctx, extension_codec)?; + let select_expr = from_proto::parse_exprs( + &distinct_on.select_expr, + ctx, + extension_codec, + )?; let sort_expr = match distinct_on.sort_expr.len() { 0 => None, - _ => Some( - distinct_on - .sort_expr - .iter() - .map(|expr| { - from_proto::parse_expr(expr, ctx, extension_codec) - }) - .collect::, _>>()?, - ), + _ => Some(from_proto::parse_exprs( + &distinct_on.sort_expr, + ctx, + extension_codec, + )?), }; LogicalPlanBuilder::from(input) .distinct_on(on_expr, select_expr, sort_expr)? @@ -943,11 +895,8 @@ impl AsLogicalPlan for LogicalPlanNode { } else { values[0].len() } as u64; - let values_list = values - .iter() - .flatten() - .map(|v| serialize_expr(v, extension_codec)) - .collect::, _>>()?; + let values_list = + serialize_exprs(values.iter().flatten(), extension_codec)?; Ok(protobuf::LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::Values( protobuf::ValuesNode { @@ -982,10 +931,8 @@ impl AsLogicalPlan for LogicalPlanNode { }; let schema: protobuf::Schema = schema.as_ref().try_into()?; - let filters: Vec = filters - .iter() - .map(|filter| serialize_expr(filter, extension_codec)) - .collect::, _>>()?; + let filters: Vec = + serialize_exprs(filters, extension_codec)?; if let Some(listing_table) = source.downcast_ref::() { let any = listing_table.options().format.as_any(); @@ -1037,10 +984,7 @@ impl AsLogicalPlan for LogicalPlanNode { let mut exprs_vec: Vec = vec![]; for order in &options.file_sort_order { let expr_vec = LogicalExprNodeCollection { - logical_expr_nodes: order - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, ToProtoError>>()?, + logical_expr_nodes: serialize_exprs(order, extension_codec)?, }; exprs_vec.push(expr_vec); } @@ -1118,10 +1062,7 @@ impl AsLogicalPlan for LogicalPlanNode { extension_codec, )?, )), - expr: expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, ToProtoError>>()?, + expr: serialize_exprs(expr, extension_codec)?, optional_alias: None, }, ))), @@ -1173,22 +1114,13 @@ impl AsLogicalPlan for LogicalPlanNode { )?; let sort_expr = match sort_expr { None => vec![], - Some(sort_expr) => sort_expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, _>>()?, + Some(sort_expr) => serialize_exprs(sort_expr, extension_codec)?, }; Ok(protobuf::LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::DistinctOn(Box::new( protobuf::DistinctOnNode { - on_expr: on_expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, _>>()?, - select_expr: select_expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, _>>()?, + on_expr: serialize_exprs(on_expr, extension_codec)?, + select_expr: serialize_exprs(select_expr, extension_codec)?, sort_expr, input: Some(Box::new(input)), }, @@ -1207,10 +1139,7 @@ impl AsLogicalPlan for LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::Window(Box::new( protobuf::WindowNode { input: Some(Box::new(input)), - window_expr: window_expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, _>>()?, + window_expr: serialize_exprs(window_expr, extension_codec)?, }, ))), }) @@ -1230,14 +1159,8 @@ impl AsLogicalPlan for LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::Aggregate(Box::new( protobuf::AggregateNode { input: Some(Box::new(input)), - group_expr: group_expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, _>>()?, - aggr_expr: aggr_expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, _>>()?, + group_expr: serialize_exprs(group_expr, extension_codec)?, + aggr_expr: serialize_exprs(aggr_expr, extension_codec)?, }, ))), }) @@ -1335,10 +1258,8 @@ impl AsLogicalPlan for LogicalPlanNode { input.as_ref(), extension_codec, )?; - let selection_expr: Vec = expr - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, ToProtoError>>()?; + let selection_expr: Vec = + serialize_exprs(expr, extension_codec)?; Ok(protobuf::LogicalPlanNode { logical_plan_type: Some(LogicalPlanType::Sort(Box::new( protobuf::SortNode { @@ -1367,10 +1288,7 @@ impl AsLogicalPlan for LogicalPlanNode { let pb_partition_method = match partitioning_scheme { Partitioning::Hash(exprs, partition_count) => { PartitionMethod::Hash(protobuf::HashRepartition { - hash_expr: exprs - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, ToProtoError>>()?, + hash_expr: serialize_exprs(exprs, extension_codec)?, partition_count: *partition_count as u64, }) } @@ -1419,10 +1337,7 @@ impl AsLogicalPlan for LogicalPlanNode { let mut converted_order_exprs: Vec = vec![]; for order in order_exprs { let temp = LogicalExprNodeCollection { - logical_expr_nodes: order - .iter() - .map(|expr| serialize_expr(expr, extension_codec)) - .collect::, ToProtoError>>()?, + logical_expr_nodes: serialize_exprs(order, extension_codec)?, }; converted_order_exprs.push(temp); } diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 0f6722dd375b..78f370c714cc 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -18,7 +18,7 @@ use std::fmt::Debug; use std::sync::Arc; -use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; +use datafusion::physical_expr::aggregate::AggregateExprBuilder; use prost::bytes::BufMut; use prost::Message; @@ -34,6 +34,7 @@ use datafusion::datasource::physical_plan::ParquetExec; use datafusion::datasource::physical_plan::{AvroExec, CsvExec}; use datafusion::execution::runtime_env::RuntimeEnv; use datafusion::execution::FunctionRegistry; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; use datafusion::physical_expr::{PhysicalExprRef, PhysicalSortRequirement}; use datafusion::physical_plan::aggregates::AggregateMode; use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy}; @@ -59,7 +60,7 @@ use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMerge use datafusion::physical_plan::union::{InterleaveExec, UnionExec}; use datafusion::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec}; use datafusion::physical_plan::{ - AggregateExpr, ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr, + ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr, }; use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result}; use datafusion_expr::{AggregateUDF, ScalarUDF}; @@ -259,10 +260,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { runtime, extension_codec, )?; - Ok(Arc::new(CoalesceBatchesExec::new( - input, - coalesce_batches.target_batch_size as usize, - ))) + Ok(Arc::new( + CoalesceBatchesExec::new( + input, + coalesce_batches.target_batch_size as usize, + ) + .with_fetch(coalesce_batches.fetch.map(|f| f as usize)), + )) } PhysicalPlanType::Merge(merge) => { let input: Arc = @@ -464,7 +468,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { }) .collect::, _>>()?; - let physical_aggr_expr: Vec> = hash_agg + let physical_aggr_expr: Vec> = hash_agg .aggr_expr .iter() .zip(hash_agg.aggr_expr_name.iter()) @@ -1536,6 +1540,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { protobuf::CoalesceBatchesExecNode { input: Some(Box::new(input)), target_batch_size: coalesce_batches.target_batch_size() as u32, + fetch: coalesce_batches.fetch().map(|n| n as u32), }, ))), }); diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index 7949a457f40f..555ad22a9bc1 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -29,7 +29,7 @@ use datafusion::physical_plan::expressions::{ }; use datafusion::physical_plan::udaf::AggregateFunctionExpr; use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr}; -use datafusion::physical_plan::{AggregateExpr, Partitioning, PhysicalExpr, WindowExpr}; +use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr}; use datafusion::{ datasource::{ file_format::{csv::CsvSink, json::JsonSink}, @@ -49,58 +49,50 @@ use crate::protobuf::{ use super::PhysicalExtensionCodec; pub fn serialize_physical_aggr_expr( - aggr_expr: Arc, + aggr_expr: Arc, codec: &dyn PhysicalExtensionCodec, ) -> Result { let expressions = serialize_physical_exprs(aggr_expr.expressions(), codec)?; let ordering_req = aggr_expr.order_bys().unwrap_or(&[]).to_vec(); let ordering_req = serialize_physical_sort_exprs(ordering_req, codec)?; - if let Some(a) = aggr_expr.as_any().downcast_ref::() { - let name = a.fun().name().to_string(); - let mut buf = Vec::new(); - codec.try_encode_udaf(a.fun(), &mut buf)?; - Ok(protobuf::PhysicalExprNode { - expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr( - protobuf::PhysicalAggregateExprNode { - aggregate_function: Some(physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(name)), - expr: expressions, - ordering_req, - distinct: a.is_distinct(), - ignore_nulls: a.ignore_nulls(), - fun_definition: (!buf.is_empty()).then_some(buf) - }, - )), - }) - } else { - unreachable!("No other types exists besides AggergationFunctionExpr"); - } + let name = aggr_expr.fun().name().to_string(); + let mut buf = Vec::new(); + codec.try_encode_udaf(aggr_expr.fun(), &mut buf)?; + Ok(protobuf::PhysicalExprNode { + expr_type: Some(protobuf::physical_expr_node::ExprType::AggregateExpr( + protobuf::PhysicalAggregateExprNode { + aggregate_function: Some(physical_aggregate_expr_node::AggregateFunction::UserDefinedAggrFunction(name)), + expr: expressions, + ordering_req, + distinct: aggr_expr.is_distinct(), + ignore_nulls: aggr_expr.ignore_nulls(), + fun_definition: (!buf.is_empty()).then_some(buf) + }, + )), + }) } fn serialize_physical_window_aggr_expr( - aggr_expr: &dyn AggregateExpr, + aggr_expr: &AggregateFunctionExpr, _window_frame: &WindowFrame, codec: &dyn PhysicalExtensionCodec, ) -> Result<(physical_window_expr_node::WindowFunction, Option>)> { - if let Some(a) = aggr_expr.as_any().downcast_ref::() { - if a.is_distinct() || a.ignore_nulls() { - // TODO - return not_impl_err!( - "Distinct aggregate functions not supported in window expressions" - ); - } - - let mut buf = Vec::new(); - codec.try_encode_udaf(a.fun(), &mut buf)?; - Ok(( - physical_window_expr_node::WindowFunction::UserDefinedAggrFunction( - a.fun().name().to_string(), - ), - (!buf.is_empty()).then_some(buf), - )) - } else { - unreachable!("No other types exists besides AggergationFunctionExpr"); + if aggr_expr.is_distinct() || aggr_expr.ignore_nulls() { + // TODO + return not_impl_err!( + "Distinct aggregate functions not supported in window expressions" + ); } + + let mut buf = Vec::new(); + codec.try_encode_udaf(aggr_expr.fun(), &mut buf)?; + Ok(( + physical_window_expr_node::WindowFunction::UserDefinedAggrFunction( + aggr_expr.fun().name().to_string(), + ), + (!buf.is_empty()).then_some(buf), + )) } pub fn serialize_physical_window_expr( diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 09c5f0f8bd3d..94ac913e1968 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -104,10 +104,8 @@ fn roundtrip_json_test(_proto: &protobuf::LogicalExprNode) {} fn roundtrip_expr_test(initial_struct: Expr, ctx: SessionContext) { let extension_codec = DefaultLogicalExtensionCodec {}; let proto: protobuf::LogicalExprNode = - match serialize_expr(&initial_struct, &extension_codec) { - Ok(p) => p, - Err(e) => panic!("Error serializing expression: {:?}", e), - }; + serialize_expr(&initial_struct, &extension_codec) + .unwrap_or_else(|e| panic!("Error serializing expression: {:?}", e)); let round_trip: Expr = from_proto::parse_expr(&proto, &ctx, &extension_codec).unwrap(); @@ -2436,7 +2434,7 @@ fn roundtrip_window() { WindowFunctionDefinition::AggregateUDF(avg_udaf()), vec![col("col1")], )) - .window_frame(row_number_frame.clone()) + .window_frame(row_number_frame) .build() .unwrap(); diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 6766468ef443..3e49dc24fd5a 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -24,7 +24,8 @@ use std::vec; use arrow::array::RecordBatch; use arrow::csv::WriterBuilder; -use datafusion::physical_expr_functions_aggregate::aggregate::AggregateExprBuilder; +use datafusion::physical_expr::aggregate::AggregateExprBuilder; +use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec; use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf; use datafusion_functions_aggregate::array_agg::array_agg_udaf; use datafusion_functions_aggregate::min_max::max_udaf; @@ -46,7 +47,6 @@ use datafusion::datasource::physical_plan::{ use datafusion::execution::FunctionRegistry; use datafusion::functions_aggregate::sum::sum_udaf; use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility}; -use datafusion::physical_expr::aggregate::utils::down_cast_any_ref; use datafusion::physical_expr::expressions::Literal; use datafusion::physical_expr::window::SlidingAggregateWindowExpr; use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr}; @@ -69,13 +69,12 @@ use datafusion::physical_plan::placeholder_row::PlaceholderRowExec; use datafusion::physical_plan::projection::ProjectionExec; use datafusion::physical_plan::repartition::RepartitionExec; use datafusion::physical_plan::sorts::sort::SortExec; +use datafusion::physical_plan::udaf::AggregateFunctionExpr; use datafusion::physical_plan::union::{InterleaveExec, UnionExec}; use datafusion::physical_plan::windows::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowAggExec, }; -use datafusion::physical_plan::{ - AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, Statistics, -}; +use datafusion::physical_plan::{ExecutionPlan, Partitioning, PhysicalExpr, Statistics}; use datafusion::prelude::SessionContext; use datafusion::scalar::ScalarValue; use datafusion_common::config::TableParquetOptions; @@ -361,7 +360,7 @@ fn rountrip_aggregate() -> Result<()> { .alias("NTH_VALUE(b, 1)") .build()?; - let test_cases: Vec>> = vec![ + let test_cases: Vec>> = vec![ // AVG vec![avg_expr], // NTH_VALUE @@ -394,7 +393,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> { let groups: Vec<(Arc, String)> = vec![(col("a", &schema)?, "unused".to_string())]; - let aggregates: Vec> = + let aggregates: Vec> = vec![ AggregateExprBuilder::new(avg_udaf(), vec![col("b", &schema)?]) .schema(Arc::clone(&schema)) @@ -405,7 +404,7 @@ fn rountrip_aggregate_with_limit() -> Result<()> { let agg = AggregateExec::try_new( AggregateMode::Final, PhysicalGroupBy::new_single(groups.clone()), - aggregates.clone(), + aggregates, vec![None], Arc::new(EmptyExec::new(schema.clone())), schema, @@ -423,7 +422,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> { let groups: Vec<(Arc, String)> = vec![(col("a", &schema)?, "unused".to_string())]; - let aggregates: Vec> = vec![AggregateExprBuilder::new( + let aggregates: Vec> = vec![AggregateExprBuilder::new( approx_percentile_cont_udaf(), vec![col("b", &schema)?, lit(0.5)], ) @@ -434,7 +433,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> { let agg = AggregateExec::try_new( AggregateMode::Final, PhysicalGroupBy::new_single(groups.clone()), - aggregates.clone(), + aggregates, vec![None], Arc::new(EmptyExec::new(schema.clone())), schema, @@ -458,7 +457,7 @@ fn rountrip_aggregate_with_sort() -> Result<()> { }, }]; - let aggregates: Vec> = + let aggregates: Vec> = vec![ AggregateExprBuilder::new(array_agg_udaf(), vec![col("b", &schema)?]) .schema(Arc::clone(&schema)) @@ -470,7 +469,7 @@ fn rountrip_aggregate_with_sort() -> Result<()> { let agg = AggregateExec::try_new( AggregateMode::Final, PhysicalGroupBy::new_single(groups.clone()), - aggregates.clone(), + aggregates, vec![None], Arc::new(EmptyExec::new(schema.clone())), schema, @@ -525,7 +524,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> { let groups: Vec<(Arc, String)> = vec![(col("a", &schema)?, "unused".to_string())]; - let aggregates: Vec> = + let aggregates: Vec> = vec![ AggregateExprBuilder::new(Arc::new(udaf), vec![col("b", &schema)?]) .schema(Arc::clone(&schema)) @@ -537,7 +536,7 @@ fn roundtrip_aggregate_udaf() -> Result<()> { Arc::new(AggregateExec::try_new( AggregateMode::Final, PhysicalGroupBy::new_single(groups.clone()), - aggregates.clone(), + aggregates, vec![None], Arc::new(EmptyExec::new(schema.clone())), schema, @@ -629,6 +628,23 @@ fn roundtrip_sort_preserve_partitioning() -> Result<()> { )) } +#[test] +fn roundtrip_coalesce_with_fetch() -> Result<()> { + let field_a = Field::new("a", DataType::Boolean, false); + let field_b = Field::new("b", DataType::Int64, false); + let schema = Arc::new(Schema::new(vec![field_a, field_b])); + + roundtrip_test(Arc::new(CoalesceBatchesExec::new( + Arc::new(EmptyExec::new(schema.clone())), + 8096, + )))?; + + roundtrip_test(Arc::new( + CoalesceBatchesExec::new(Arc::new(EmptyExec::new(schema)), 8096) + .with_fetch(Some(10)), + )) +} + #[test] fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> { let scan_config = FileScanConfig { @@ -730,7 +746,7 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> { } impl PartialEq for CustomPredicateExpr { fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) + other .downcast_ref::() .map(|x| self.inner.eq(&x.inner)) .unwrap_or(false) @@ -975,18 +991,16 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> { )), input, )?); - let aggr_expr = AggregateExprBuilder::new( - max_udaf(), - vec![udf_expr.clone() as Arc], - ) - .schema(schema.clone()) - .alias("max") - .build()?; + let aggr_expr = + AggregateExprBuilder::new(max_udaf(), vec![udf_expr as Arc]) + .schema(schema.clone()) + .alias("max") + .build()?; let window = Arc::new(WindowAggExec::try_new( vec![Arc::new(PlainAggregateWindowExpr::new( aggr_expr.clone(), - &[col("author", &schema.clone())?], + &[col("author", &schema)?], &[], Arc::new(WindowFrame::new(None)), ))], @@ -997,10 +1011,10 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> { let aggregate = Arc::new(AggregateExec::try_new( AggregateMode::Final, PhysicalGroupBy::new(vec![], vec![], vec![]), - vec![aggr_expr.clone()], + vec![aggr_expr], vec![None], window, - schema.clone(), + schema, )?); let ctx = SessionContext::new(); @@ -1038,7 +1052,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> { Arc::new(BinaryExpr::new( col("published", &schema)?, Operator::And, - Arc::new(BinaryExpr::new(udf_expr.clone(), Operator::Gt, lit(0))), + Arc::new(BinaryExpr::new(udf_expr, Operator::Gt, lit(0))), )), input, )?); @@ -1067,7 +1081,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> { vec![aggr_expr], vec![None], window, - schema.clone(), + schema, )?); let ctx = SessionContext::new(); @@ -1142,7 +1156,7 @@ fn roundtrip_json_sink() -> Result<()> { roundtrip_test(Arc::new(DataSinkExec::new( input, data_sink, - schema.clone(), + schema, Some(sort_order), ))) } @@ -1181,7 +1195,7 @@ fn roundtrip_csv_sink() -> Result<()> { Arc::new(DataSinkExec::new( input, data_sink, - schema.clone(), + schema, Some(sort_order), )), &ctx, @@ -1237,7 +1251,7 @@ fn roundtrip_parquet_sink() -> Result<()> { roundtrip_test(Arc::new(DataSinkExec::new( input, data_sink, - schema.clone(), + schema, Some(sort_order), ))) } @@ -1326,7 +1340,7 @@ fn roundtrip_interleave() -> Result<()> { )?; let right = RepartitionExec::try_new( Arc::new(EmptyExec::new(Arc::new(schema_right))), - partition.clone(), + partition, )?; let inputs: Vec> = vec![Arc::new(left), Arc::new(right)]; let interleave = InterleaveExec::try_new(inputs)?; diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index b95414a8cafd..71e40c20b80a 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -245,8 +245,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Build Unnest expression if name.eq("unnest") { - let mut exprs = - self.function_args_to_expr(args.clone(), schema, planner_context)?; + let mut exprs = self.function_args_to_expr(args, schema, planner_context)?; if exprs.len() != 1 { return plan_err!("unnest() requires exactly one argument"); } @@ -295,8 +294,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Some(false) }; } + Some(false) + } else { + panic!("order_by expression must be of type Sort"); } - Some(false) }); let window_frame = window diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 86e49780724b..f8ebb04f3810 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -214,7 +214,7 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result> // 2. RANK / ROW_NUMBER ... => Handled by a `WindowAggr` and its requirements. // 3. LIMIT => Handled by a `Sort`, so we need to search for it. let mut has_limit = false; - let new_plan = plan.clone().transform_down(|c| { + let new_plan = plan.transform_down(|c| { if let LogicalPlan::Limit(_) = c { has_limit = true; return Ok(Transformed::no(c)); diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 4e0ce33f1334..384893bfa94c 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -33,7 +33,6 @@ use datafusion_expr::expr::{Alias, PlannedReplaceSelectItem, WildcardOptions}; use datafusion_expr::expr_rewriter::{ normalize_col, normalize_col_with_schemas_and_ambiguity_check, normalize_cols, }; -use datafusion_expr::logical_plan::tree_node::unwrap_arc; use datafusion_expr::utils::{ expr_as_column_expr, expr_to_columns, find_aggregate_exprs, find_window_exprs, }; @@ -215,7 +214,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let plan = if let Some(having_expr_post_aggr) = having_expr_post_aggr { LogicalPlanBuilder::from(plan) - .filter(having_expr_post_aggr)? + .having(having_expr_post_aggr)? .build()? } else { plan @@ -361,9 +360,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .build() } LogicalPlan::Filter(mut filter) => { - filter.input = Arc::new( - self.try_process_aggregate_unnest(unwrap_arc(filter.input))?, - ); + filter.input = + Arc::new(self.try_process_aggregate_unnest(Arc::unwrap_or_clone( + filter.input, + ))?); Ok(LogicalPlan::Filter(filter)) } _ => Ok(input), @@ -401,7 +401,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Projection: tab.array_col AS unnest(tab.array_col) // TableScan: tab // ``` - let mut intermediate_plan = unwrap_arc(input); + let mut intermediate_plan = Arc::unwrap_or_clone(input); let mut intermediate_select_exprs = group_expr; loop { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 9ce627aecc76..0dbcba162bc0 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1864,7 +1864,7 @@ mod tests { r#"EXISTS (SELECT * FROM t WHERE (t.a = 1))"#, ), ( - not_exists(Arc::new(dummy_logical_plan.clone())), + not_exists(Arc::new(dummy_logical_plan)), r#"NOT EXISTS (SELECT * FROM t WHERE (t.a = 1))"#, ), ( diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 8b5a5b0942b8..106705c322fc 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -528,24 +528,10 @@ impl Unparser<'_> { fn sort_to_sql(&self, sort_exprs: Vec) -> Result> { sort_exprs .iter() - .map(|expr: &Expr| match expr { - Expr::Sort(sort_expr) => { - let col = self.expr_to_sql(&sort_expr.expr)?; - - let nulls_first = if self.dialect.supports_nulls_first_in_sort() { - Some(sort_expr.nulls_first) - } else { - None - }; - - Ok(ast::OrderByExpr { - asc: Some(sort_expr.asc), - expr: col, - nulls_first, - with_fill: None, - }) - } - _ => plan_err!("Expecting Sort expr"), + .map(|expr: &Expr| { + self.expr_to_unparsed(expr)? + .into_order_by_expr() + .or(plan_err!("Expecting Sort expr")) }) .collect::>>() } diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs index f6725485f920..9e1adcf4df31 100644 --- a/datafusion/sql/src/unparser/rewrite.rs +++ b/datafusion/sql/src/unparser/rewrite.rs @@ -59,10 +59,7 @@ pub(super) fn normalize_union_schema(plan: &LogicalPlan) -> Result let transformed_plan = plan.transform_up(|plan| match plan { LogicalPlan::Union(mut union) => { - let schema = match Arc::try_unwrap(union.schema) { - Ok(inner) => inner, - Err(schema) => (*schema).clone(), - }; + let schema = Arc::unwrap_or_clone(union.schema); let schema = schema.strip_qualifiers(); union.schema = Arc::new(schema); @@ -164,6 +161,8 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields( for expr in &sort.expr { if let Expr::Sort(s) = expr { collects.push(s.expr.as_ref().clone()); + } else { + panic!("sort expression must be of type Sort"); } } diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index af161bba45c1..c32acecaae5f 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -17,8 +17,6 @@ //! SQL Utility Functions -use std::collections::HashMap; - use arrow_schema::{ DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE, }; @@ -33,6 +31,7 @@ use datafusion_expr::expr::{Alias, GroupingSet, Unnest, WindowFunction}; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; use datafusion_expr::{expr_vec_fmt, Expr, ExprSchemable, LogicalPlan}; use sqlparser::ast::{Ident, Value}; +use std::collections::HashMap; /// Make a best-effort attempt at resolving all columns in the expression tree pub(crate) fn resolve_columns(expr: &Expr, plan: &LogicalPlan) -> Result { diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index ed23fada0cfb..cdc7bef06afd 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -203,7 +203,7 @@ fn roundtrip_crossjoin() -> Result<()> { println!("plan {}", plan.display_indent()); let plan_roundtrip = sql_to_rel - .sql_statement_to_plan(roundtrip_statement.clone()) + .sql_statement_to_plan(roundtrip_statement) .unwrap(); let expected = "Projection: j1.j1_id, j2.j2_string\ diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 5685e09c9c9f..5a203703e967 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -3813,7 +3813,7 @@ fn test_prepare_statement_to_plan_params_as_constants() { /////////////////// // replace params with values let param_values = vec![ScalarValue::Int32(Some(10))]; - let expected_plan = "Projection: Int32(10)\n EmptyRelation"; + let expected_plan = "Projection: Int32(10) AS $1\n EmptyRelation"; prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); @@ -3829,7 +3829,8 @@ fn test_prepare_statement_to_plan_params_as_constants() { /////////////////// // replace params with values let param_values = vec![ScalarValue::Int32(Some(10))]; - let expected_plan = "Projection: Int64(1) + Int32(10)\n EmptyRelation"; + let expected_plan = + "Projection: Int64(1) + Int32(10) AS Int64(1) + $1\n EmptyRelation"; prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); @@ -3848,7 +3849,9 @@ fn test_prepare_statement_to_plan_params_as_constants() { ScalarValue::Int32(Some(10)), ScalarValue::Float64(Some(10.0)), ]; - let expected_plan = "Projection: Int64(1) + Int32(10) + Float64(10)\n EmptyRelation"; + let expected_plan = + "Projection: Int64(1) + Int32(10) + Float64(10) AS Int64(1) + $1 + $2\ + \n EmptyRelation"; prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); } @@ -4063,7 +4066,7 @@ fn test_prepare_statement_insert_infer() { \n Projection: column1 AS id, column2 AS first_name, column3 AS last_name, \ CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, \ CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀\ - \n Values: (UInt32(1), Utf8(\"Alan\"), Utf8(\"Turing\"))"; + \n Values: (UInt32(1) AS $1, Utf8(\"Alan\") AS $2, Utf8(\"Turing\") AS $3)"; let plan = plan.replace_params_with_values(¶m_values).unwrap(); prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); @@ -4144,7 +4147,7 @@ fn test_prepare_statement_to_plan_multi_params() { ScalarValue::from("xyz"), ]; let expected_plan = - "Projection: person.id, person.age, Utf8(\"xyz\")\ + "Projection: person.id, person.age, Utf8(\"xyz\") AS $6\ \n Filter: person.age IN ([Int32(10), Int32(20)]) AND person.salary > Float64(100) AND person.salary < Float64(200) OR person.first_name < Utf8(\"abc\")\ \n TableScan: person"; @@ -4213,7 +4216,7 @@ fn test_prepare_statement_to_plan_value_list() { let expected_plan = "Projection: *\ \n SubqueryAlias: t\ \n Projection: column1 AS num, column2 AS letter\ - \n Values: (Int64(1), Utf8(\"a\")), (Int64(2), Utf8(\"b\"))"; + \n Values: (Int64(1), Utf8(\"a\") AS $1), (Int64(2), Utf8(\"b\") AS $2)"; prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan); } diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index b8b93b28aff6..45cb4d4615d7 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -98,6 +98,17 @@ SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100 statement error DataFusion error: Error during planning: Error during planning: Coercion from \[Float64, Float64, Float64\] to the signature OneOf(.*) failed(.|\n)* SELECT approx_percentile_cont(c12, 0.95, 111.1) FROM aggregate_test_100 +statement error DataFusion error: This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal +SELECT approx_percentile_cont(c12, c12) FROM aggregate_test_100 + +statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal +SELECT approx_percentile_cont(c12, 0.95, c5) FROM aggregate_test_100 + +# Not supported over sliding windows +query error This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented +SELECT approx_percentile_cont(c3, 0.5) OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) +FROM aggregate_test_100 + # array agg can use order by query ? SELECT array_agg(c13 ORDER BY c13) @@ -500,6 +511,85 @@ select stddev(sq.column1) from (values (1.1), (2.0), (3.0)) as sq ---- 0.950438495292 +# csv_query_stddev_7 +query IR +SELECT c2, stddev_samp(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 +---- +1 0.303641032262 +2 0.284581967411 +3 0.296002660506 +4 0.284324609109 +5 0.331034486752 + +# csv_query_stddev_8 +query IR +SELECT c2, stddev_pop(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 +---- +1 0.296659845456 +2 0.278038978602 +3 0.288107833475 +4 0.278074953424 +5 0.318992813225 + +# csv_query_stddev_9 +query IR +SELECT c2, var_pop(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 +---- +1 0.088007063906 +2 0.077305673622 +3 0.083006123709 +4 0.077325679722 +5 0.101756414889 + +# csv_query_stddev_10 +query IR +SELECT c2, var_samp(c12) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 +---- +1 0.092197876473 +2 0.080986896176 +3 0.087617575027 +4 0.080840483345 +5 0.109583831419 + +# csv_query_stddev_11 +query IR +SELECT c2, var_samp(c12) FROM aggregate_test_100 WHERE c12 > 0.90 GROUP BY c2 ORDER BY c2 +---- +1 0.000889240174 +2 0.000785878272 +3 NULL +4 NULL +5 0.000269544643 + +# Use PostgresSQL dialect +statement ok +set datafusion.sql_parser.dialect = 'Postgres'; + +# csv_query_stddev_12 +query IR +SELECT c2, var_samp(c12) FILTER (WHERE c12 > 0.90) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 +---- +1 0.000889240174 +2 0.000785878272 +3 NULL +4 NULL +5 0.000269544643 + +# Restore the default dialect +statement ok +set datafusion.sql_parser.dialect = 'Generic'; + +# csv_query_stddev_13 +query IR +SELECT c2, var_samp(CASE WHEN c12 > 0.90 THEN c12 ELSE null END) FROM aggregate_test_100 GROUP BY c2 ORDER BY c2 +---- +1 0.000889240174 +2 0.000785878272 +3 NULL +4 NULL +5 0.000269544643 + + # csv_query_approx_median_1 query I SELECT approx_median(c2) FROM aggregate_test_100 @@ -5387,6 +5477,18 @@ physical_plan statement ok DROP TABLE empty; +# verify count aggregate function should not be nullable +statement ok +create table empty; + +query I +select distinct count() from empty; +---- +0 + +statement ok +DROP TABLE empty; + statement ok CREATE TABLE t(col0 INTEGER) as VALUES(2); @@ -5643,6 +5745,97 @@ select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(N ---- 0 NULL NULL NULL NULL NULL NULL NULL +statement ok +create table having_test(v1 int, v2 int) + +statement ok +create table join_table(v1 int, v2 int) + +statement ok +insert into having_test values (1, 2), (2, 3), (3, 4) + +statement ok +insert into join_table values (1, 2), (2, 3), (3, 4) + + +query II +select * from having_test group by v1, v2 having max(v1) = 3 +---- +3 4 + +query TT +EXPLAIN select * from having_test group by v1, v2 having max(v1) = 3 +---- +logical_plan +01)Projection: having_test.v1, having_test.v2 +02)--Filter: max(having_test.v1) = Int32(3) +03)----Aggregate: groupBy=[[having_test.v1, having_test.v2]], aggr=[[max(having_test.v1)]] +04)------TableScan: having_test projection=[v1, v2] +physical_plan +01)ProjectionExec: expr=[v1@0 as v1, v2@1 as v2] +02)--CoalesceBatchesExec: target_batch_size=8192 +03)----FilterExec: max(having_test.v1)@2 = 3 +04)------AggregateExec: mode=FinalPartitioned, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)] +05)--------CoalesceBatchesExec: target_batch_size=8192 +06)----------RepartitionExec: partitioning=Hash([v1@0, v2@1], 4), input_partitions=4 +07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +08)--------------AggregateExec: mode=Partial, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)] +09)----------------MemoryExec: partitions=1, partition_sizes=[1] + + +query error +select * from having_test having max(v1) = 3 + +query I +select max(v1) from having_test having max(v1) = 3 +---- +3 + +query I +select max(v1), * exclude (v1, v2) from having_test having max(v1) = 3 +---- +3 + +# because v1, v2 is not in the group by clause, the sql is invalid +query III +select max(v1), * replace ('v1' as v3) from having_test group by v1, v2 having max(v1) = 3 +---- +3 3 4 + +query III +select max(v1), t.* from having_test t group by v1, v2 having max(v1) = 3 +---- +3 3 4 + +# j.* should also be included in the group-by clause +query error +select max(t.v1), j.* from having_test t join join_table j on t.v1 = j.v1 group by t.v1, t.v2 having max(t.v1) = 3 + +query III +select max(t.v1), j.* from having_test t join join_table j on t.v1 = j.v1 group by j.v1, j.v2 having max(t.v1) = 3 +---- +3 3 4 + +# If the select items only contain scalar expressions, the having clause is valid. +query P +select now() from having_test having max(v1) = 4 +---- + +# If the select items only contain scalar expressions, the having clause is valid. +query I +select 0 from having_test having max(v1) = 4 +---- + +# v2 should also be included in group-by clause +query error +select * from having_test group by v1 having max(v1) = 3 + +statement ok +drop table having_test + +statement ok +drop table join_table + # test min/max Float16 without group expression query RRTT WITH data AS ( diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 249241a51aea..c80fd7e92417 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -4972,11 +4972,19 @@ NULL 1 1 ## array_has/array_has_all/array_has_any -query BB +# If lhs is empty, return false +query B +select array_has([], 1); +---- +false + +# If rhs is Null, we returns Null +query BBB select array_has([], null), - array_has([1, 2, 3], null); + array_has([1, 2, 3], null), + array_has([null, 1], null); ---- -false false +NULL NULL NULL #TODO: array_has_all and array_has_any cannot handle NULL #query BBBB diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index b7a0a74913b0..270e4beccc52 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -376,11 +376,10 @@ SELECT MAP {'a':1, 'b':2, 'c':3 }['a'] FROM t; 1 1 -# TODO(https://github.com/sqlparser-rs/sqlparser-rs/pull/1361): support parsing an empty map. Enable this after upgrading sqlparser-rs. -# query ? -# SELECT MAP {}; -# ---- -# {} +query ? +SELECT MAP {}; +---- +{} # values contain null query ? diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt index ea3088e69674..2c28a5feadba 100644 --- a/datafusion/sqllogictest/test_files/sort_merge_join.slt +++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt @@ -134,7 +134,6 @@ Alice 100 NULL NULL Alice 50 Alice 2 Bob 1 NULL NULL NULL NULL Alice 1 -NULL NULL Alice 2 query TITI rowsort SELECT * FROM t1 FULL JOIN t2 ON t1.a = t2.a AND t1.b > t2.b + 50 @@ -624,6 +623,32 @@ NULL NULL 7 9 NULL NULL 8 10 NULL NULL 9 11 +query IIII +select * from ( +with t as ( + select id_a id_a_1, id_a % 5 id_a_2 from (select unnest(make_array(5, 6, 7, 8, 9, 0, 1, 2, 3, 4)) id_a) +), t1 as ( + select id_b % 10 id_b_1, id_b + 2 id_b_2 from (select unnest(make_array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) id_b) +) +select * from t full join t1 on t.id_a_2 = t1.id_b_1 and t.id_a_1 > t1.id_b_2 +) order by 1, 2, 3, 4 +---- +0 0 NULL NULL +1 1 NULL NULL +2 2 NULL NULL +3 3 NULL NULL +4 4 NULL NULL +5 0 0 2 +6 1 1 3 +7 2 2 4 +8 3 3 5 +9 4 4 6 +NULL NULL 5 7 +NULL NULL 6 8 +NULL NULL 7 9 +NULL NULL 8 10 +NULL NULL 9 11 + # return sql params back to default values statement ok set datafusion.optimizer.prefer_hash_join = true; diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 0b441bcbeb8f..83c75b8df38c 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -460,8 +460,6 @@ Xiangpeng Raphael NULL - - ### Initcap query TT @@ -501,7 +499,7 @@ SELECT INITCAP(column1_large_utf8_lower) as c3 FROM test_lowercase; ---- -Andrew Andrew Andrew +Andrew Andrew Andrew Xiangpeng Xiangpeng Xiangpeng Raphael Raphael Raphael NULL NULL NULL @@ -828,16 +826,42 @@ logical_plan 02)--TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for LOWER -## TODO https://github.com/apache/datafusion/issues/11855 query TT EXPLAIN SELECT LOWER(column1_utf8view) as c1 FROM test; ---- logical_plan -01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1 +01)Projection: lower(test.column1_utf8view) AS c1 02)--TableScan: test projection=[column1_utf8view] +query T +SELECT LOWER(column1_utf8view) as c1 +FROM test; +---- +andrew +xiangpeng +raphael +NULL + +## Ensure no casts for UPPER +query TT +EXPLAIN SELECT + UPPER(column1_utf8view) as c1 +FROM test; +---- +logical_plan +01)Projection: upper(test.column1_utf8view) AS c1 +02)--TableScan: test projection=[column1_utf8view] + +query T +SELECT UPPER(column1_utf8view) as c1 +FROM test; +---- +ANDREW +XIANGPENG +RAPHAEL +NULL ## Ensure no casts for LPAD query TT @@ -1066,9 +1090,8 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: strpos(__common_expr_1, Utf8("f")) AS c, strpos(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2 -02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view -03)----TableScan: test projection=[column1_utf8view, column2_utf8view] +01)Projection: strpos(test.column1_utf8view, Utf8("f")) AS c, strpos(test.column1_utf8view, test.column2_utf8view) AS c2 +02)--TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for SUBSTR ## TODO file ticket @@ -1145,6 +1168,63 @@ FROM test; 0 NULL +# || mixed types +# expect all results to be the same for each row as they all have the same values +query TTTTTTTT +SELECT + column1_utf8view || column2_utf8view, + column1_utf8 || column2_utf8view, + column1_large_utf8 || column2_utf8view, + column1_dict || column2_utf8view, + -- reverse argument order + column2_utf8view || column1_utf8view, + column2_utf8view || column1_utf8, + column2_utf8view || column1_large_utf8, + column2_utf8view || column1_dict +FROM test; +---- +AndrewX AndrewX AndrewX AndrewX XAndrew XAndrew XAndrew XAndrew +XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng +RaphaelR RaphaelR RaphaelR RaphaelR RRaphael RRaphael RRaphael RRaphael +NULL NULL NULL NULL NULL NULL NULL NULL + +# || constants +# expect all results to be the same for each row as they all have the same values +query TTTTTTTT +SELECT + column1_utf8view || 'foo', + column1_utf8 || 'foo', + column1_large_utf8 || 'foo', + column1_dict || 'foo', + -- reverse argument order + 'foo' || column1_utf8view, + 'foo' || column1_utf8, + 'foo' || column1_large_utf8, + 'foo' || column1_dict +FROM test; +---- +Andrewfoo Andrewfoo Andrewfoo Andrewfoo fooAndrew fooAndrew fooAndrew fooAndrew +Xiangpengfoo Xiangpengfoo Xiangpengfoo Xiangpengfoo fooXiangpeng fooXiangpeng fooXiangpeng fooXiangpeng +Raphaelfoo Raphaelfoo Raphaelfoo Raphaelfoo fooRaphael fooRaphael fooRaphael fooRaphael +NULL NULL NULL NULL NULL NULL NULL NULL + +# || same type (column1 has null, so also tests NULL || NULL) +# expect all results to be the same for each row as they all have the same values +query TTT +SELECT + column1_utf8view || column1_utf8view, + column1_utf8 || column1_utf8, + column1_large_utf8 || column1_large_utf8 + -- Dictionary/Dictionary coercion doesn't work + -- https://github.com/apache/datafusion/issues/12101 + --column1_dict || column1_dict +FROM test; +---- +AndrewAndrew AndrewAndrew AndrewAndrew +XiangpengXiangpeng XiangpengXiangpeng XiangpengXiangpeng +RaphaelRaphael RaphaelRaphael RaphaelRaphael +NULL NULL NULL + statement ok drop table test; @@ -1168,18 +1248,25 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt; statement ok drop table dates; +### Tests for `||` with Utf8View specifically + statement ok create table temp as values ('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')), ('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View')); +query TTT +select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from temp; +---- +Utf8 Utf8View Utf8View +Utf8 Utf8View Utf8View + query T select column2||' is fast' from temp; ---- rust is fast datafusion is fast - query T select column2 || ' is ' || column3 from temp; ---- @@ -1190,15 +1277,15 @@ query TT explain select column2 || 'is' || column3 from temp; ---- logical_plan -01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8) +01)Projection: temp.column2 || Utf8View("is") || temp.column3 AS temp.column2 || Utf8("is") || temp.column3 02)--TableScan: temp projection=[column2, column3] - +# should not cast the column2 to utf8 query TT explain select column2||' is fast' from temp; ---- logical_plan -01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast") +01)Projection: temp.column2 || Utf8View(" is fast") AS temp.column2 || Utf8(" is fast") 02)--TableScan: temp projection=[column2] @@ -1212,7 +1299,7 @@ query TT explain select column2||column3 from temp; ---- logical_plan -01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8) +01)Projection: temp.column2 || temp.column3 02)--TableScan: temp projection=[column2, column3] query T diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt index e420c0cc7155..0f9399cede2e 100644 --- a/datafusion/sqllogictest/test_files/type_coercion.slt +++ b/datafusion/sqllogictest/test_files/type_coercion.slt @@ -50,6 +50,35 @@ select interval '1 month' - '2023-05-01'::date; query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; +# dictionary(int32, utf8) -> utf8 +query T +select arrow_cast('foo', 'Dictionary(Int32, Utf8)') || arrow_cast('bar', 'Dictionary(Int32, Utf8)'); +---- +foobar + +# dictionary(int32, largeUtf8) -> largeUtf8 +query T +select arrow_cast('foo', 'Dictionary(Int32, LargeUtf8)') || arrow_cast('bar', 'Dictionary(Int32, LargeUtf8)'); +---- +foobar + +#################################### +## Concat column dictionary test ## +#################################### +statement ok +create table t as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)'), arrow_cast('bar', 'Dictionary(Int32, Utf8)')); + +query T +select column1 || column2 from t; +---- +foobar + +statement ok +DROP TABLE t + +####################################### +## Concat column dictionary test end ## +####################################### #################################### ## Test type coercion with UNIONs ## diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 78055f8c1c11..5bf5cf83284f 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -4861,3 +4861,16 @@ select a, row_number(a) over (order by b) as rn from t; statement ok drop table t; + +statement ok +DROP TABLE t1; + +# https://github.com/apache/datafusion/issues/12073 +statement ok +CREATE TABLE t1(v1 BIGINT); + +query error DataFusion error: Execution error: Expected a signed integer literal for the second argument of nth_value, got v1@0 +SELECT NTH_VALUE('+Inf'::Double, v1) OVER (PARTITION BY v1) FROM t1; + +statement ok +DROP TABLE t1; \ No newline at end of file diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 9e7ef9632ad3..ff02ef8c7ef6 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -38,9 +38,9 @@ chrono = { workspace = true } datafusion = { workspace = true, default-features = true } itertools = { workspace = true } object_store = { workspace = true } -pbjson-types = "0.6" -prost = "0.12" -substrait = { version = "0.36.0", features = ["serde"] } +pbjson-types = "0.7" +prost = "0.13" +substrait = { version = "0.41", features = ["serde"] } url = { workspace = true } [dev-dependencies] diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index f2756bb06d1e..b1b510f1792d 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -42,14 +42,14 @@ use crate::variation_const::{ DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, - TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, - TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, }; #[allow(deprecated)] use crate::variation_const::{ INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF, - INTERVAL_YEAR_MONTH_TYPE_REF, + INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, + TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, + TIMESTAMP_SECOND_TYPE_VARIATION_REF, }; use datafusion::arrow::array::{new_empty_array, AsArray}; use datafusion::common::scalar::ScalarStructBuilder; @@ -69,6 +69,7 @@ use datafusion::{ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use substrait::proto::exchange_rel::ExchangeKind; +use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode; use substrait::proto::expression::literal::user_defined::Val; use substrait::proto::expression::literal::{ IntervalDayToSecond, IntervalYearToMonth, UserDefined, @@ -95,6 +96,13 @@ use substrait::proto::{ }; use substrait::proto::{FunctionArgument, SortField}; +// Substrait PrecisionTimestampTz indicates that the timestamp is relative to UTC, which +// is the same as the expectation for any non-empty timezone in DF, so any non-empty timezone +// results in correct points on the timeline, and we pick UTC as a reasonable default. +// However, DF uses the timezone also for some arithmetic and display purposes (see e.g. +// https://github.com/apache/arrow-rs/blob/ee5694078c86c8201549654246900a4232d531a9/arrow-cast/src/cast/mod.rs#L1749). +const DEFAULT_TIMEZONE: &str = "UTC"; + pub fn name_to_op(name: &str) -> Option { match name { "equal" => Some(Operator::Eq), @@ -877,8 +885,8 @@ fn from_substrait_jointype(join_type: i32) -> Result { join_rel::JoinType::Left => Ok(JoinType::Left), join_rel::JoinType::Right => Ok(JoinType::Right), join_rel::JoinType::Outer => Ok(JoinType::Full), - join_rel::JoinType::Anti => Ok(JoinType::LeftAnti), - join_rel::JoinType::Semi => Ok(JoinType::LeftSemi), + join_rel::JoinType::LeftAnti => Ok(JoinType::LeftAnti), + join_rel::JoinType::LeftSemi => Ok(JoinType::LeftSemi), _ => plan_err!("unsupported join type {substrait_join_type:?}"), } } else { @@ -1369,23 +1377,51 @@ fn from_substrait_type( }, r#type::Kind::Fp32(_) => Ok(DataType::Float32), r#type::Kind::Fp64(_) => Ok(DataType::Float64), - r#type::Kind::Timestamp(ts) => match ts.type_variation_reference { - TIMESTAMP_SECOND_TYPE_VARIATION_REF => { - Ok(DataType::Timestamp(TimeUnit::Second, None)) - } - TIMESTAMP_MILLI_TYPE_VARIATION_REF => { - Ok(DataType::Timestamp(TimeUnit::Millisecond, None)) - } - TIMESTAMP_MICRO_TYPE_VARIATION_REF => { - Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) - } - TIMESTAMP_NANO_TYPE_VARIATION_REF => { - Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) + r#type::Kind::Timestamp(ts) => { + // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead + #[allow(deprecated)] + match ts.type_variation_reference { + TIMESTAMP_SECOND_TYPE_VARIATION_REF => { + Ok(DataType::Timestamp(TimeUnit::Second, None)) + } + TIMESTAMP_MILLI_TYPE_VARIATION_REF => { + Ok(DataType::Timestamp(TimeUnit::Millisecond, None)) + } + TIMESTAMP_MICRO_TYPE_VARIATION_REF => { + Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) + } + TIMESTAMP_NANO_TYPE_VARIATION_REF => { + Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) + } + v => not_impl_err!( + "Unsupported Substrait type variation {v} of type {s_kind:?}" + ), } - v => not_impl_err!( - "Unsupported Substrait type variation {v} of type {s_kind:?}" - ), - }, + } + r#type::Kind::PrecisionTimestamp(pts) => { + let unit = match pts.precision { + 0 => Ok(TimeUnit::Second), + 3 => Ok(TimeUnit::Millisecond), + 6 => Ok(TimeUnit::Microsecond), + 9 => Ok(TimeUnit::Nanosecond), + p => not_impl_err!( + "Unsupported Substrait precision {p} for PrecisionTimestamp" + ), + }?; + Ok(DataType::Timestamp(unit, None)) + } + r#type::Kind::PrecisionTimestampTz(pts) => { + let unit = match pts.precision { + 0 => Ok(TimeUnit::Second), + 3 => Ok(TimeUnit::Millisecond), + 6 => Ok(TimeUnit::Microsecond), + 9 => Ok(TimeUnit::Nanosecond), + p => not_impl_err!( + "Unsupported Substrait precision {p} for PrecisionTimestampTz" + ), + }?; + Ok(DataType::Timestamp(unit, Some(DEFAULT_TIMEZONE.into()))) + } r#type::Kind::Date(date) => match date.type_variation_reference { DATE_32_TYPE_VARIATION_REF => Ok(DataType::Date32), DATE_64_TYPE_VARIATION_REF => Ok(DataType::Date64), @@ -1465,22 +1501,10 @@ fn from_substrait_type( "Unsupported Substrait type variation {v} of type {s_kind:?}" ), }, - r#type::Kind::IntervalYear(i) => match i.type_variation_reference { - DEFAULT_TYPE_VARIATION_REF => { - Ok(DataType::Interval(IntervalUnit::YearMonth)) - } - v => not_impl_err!( - "Unsupported Substrait type variation {v} of type {s_kind:?}" - ), - }, - r#type::Kind::IntervalDay(i) => match i.type_variation_reference { - DEFAULT_TYPE_VARIATION_REF => { - Ok(DataType::Interval(IntervalUnit::DayTime)) - } - v => not_impl_err!( - "Unsupported Substrait type variation {v} of type {s_kind:?}" - ), - }, + r#type::Kind::IntervalYear(_) => { + Ok(DataType::Interval(IntervalUnit::YearMonth)) + } + r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)), r#type::Kind::UserDefined(u) => { if let Some(name) = extensions.types.get(&u.type_reference) { match name.as_ref() { @@ -1676,21 +1700,59 @@ fn from_substrait_literal( }, Some(LiteralType::Fp32(f)) => ScalarValue::Float32(Some(*f)), Some(LiteralType::Fp64(f)) => ScalarValue::Float64(Some(*f)), - Some(LiteralType::Timestamp(t)) => match lit.type_variation_reference { - TIMESTAMP_SECOND_TYPE_VARIATION_REF => { - ScalarValue::TimestampSecond(Some(*t), None) - } - TIMESTAMP_MILLI_TYPE_VARIATION_REF => { - ScalarValue::TimestampMillisecond(Some(*t), None) - } - TIMESTAMP_MICRO_TYPE_VARIATION_REF => { - ScalarValue::TimestampMicrosecond(Some(*t), None) + Some(LiteralType::Timestamp(t)) => { + // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead + #[allow(deprecated)] + match lit.type_variation_reference { + TIMESTAMP_SECOND_TYPE_VARIATION_REF => { + ScalarValue::TimestampSecond(Some(*t), None) + } + TIMESTAMP_MILLI_TYPE_VARIATION_REF => { + ScalarValue::TimestampMillisecond(Some(*t), None) + } + TIMESTAMP_MICRO_TYPE_VARIATION_REF => { + ScalarValue::TimestampMicrosecond(Some(*t), None) + } + TIMESTAMP_NANO_TYPE_VARIATION_REF => { + ScalarValue::TimestampNanosecond(Some(*t), None) + } + others => { + return substrait_err!("Unknown type variation reference {others}"); + } } - TIMESTAMP_NANO_TYPE_VARIATION_REF => { - ScalarValue::TimestampNanosecond(Some(*t), None) + } + Some(LiteralType::PrecisionTimestamp(pt)) => match pt.precision { + 0 => ScalarValue::TimestampSecond(Some(pt.value), None), + 3 => ScalarValue::TimestampMillisecond(Some(pt.value), None), + 6 => ScalarValue::TimestampMicrosecond(Some(pt.value), None), + 9 => ScalarValue::TimestampNanosecond(Some(pt.value), None), + p => { + return not_impl_err!( + "Unsupported Substrait precision {p} for PrecisionTimestamp" + ); } - others => { - return substrait_err!("Unknown type variation reference {others}"); + }, + Some(LiteralType::PrecisionTimestampTz(pt)) => match pt.precision { + 0 => ScalarValue::TimestampSecond( + Some(pt.value), + Some(DEFAULT_TIMEZONE.into()), + ), + 3 => ScalarValue::TimestampMillisecond( + Some(pt.value), + Some(DEFAULT_TIMEZONE.into()), + ), + 6 => ScalarValue::TimestampMicrosecond( + Some(pt.value), + Some(DEFAULT_TIMEZONE.into()), + ), + 9 => ScalarValue::TimestampNanosecond( + Some(pt.value), + Some(DEFAULT_TIMEZONE.into()), + ), + p => { + return not_impl_err!( + "Unsupported Substrait precision {p} for PrecisionTimestamp" + ); } }, Some(LiteralType::Date(d)) => ScalarValue::Date32(Some(*d)), @@ -1881,10 +1943,24 @@ fn from_substrait_literal( Some(LiteralType::IntervalDayToSecond(IntervalDayToSecond { days, seconds, - microseconds, + subseconds, + precision_mode, })) => { - // DF only supports millisecond precision, so we lose the micros here - ScalarValue::new_interval_dt(*days, (seconds * 1000) + (microseconds / 1000)) + // DF only supports millisecond precision, so for any more granular type we lose precision + let milliseconds = match precision_mode { + Some(PrecisionMode::Microseconds(ms)) => ms / 1000, + Some(PrecisionMode::Precision(0)) => *subseconds as i32 * 1000, + Some(PrecisionMode::Precision(3)) => *subseconds as i32, + Some(PrecisionMode::Precision(6)) => (subseconds / 1000) as i32, + Some(PrecisionMode::Precision(9)) => (subseconds / 1000 / 1000) as i32, + _ => { + return not_impl_err!( + "Unsupported Substrait interval day to second precision mode" + ) + } + }; + + ScalarValue::new_interval_dt(*days, (seconds * 1000) + milliseconds) } Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => { ScalarValue::new_interval_ym(*years, *months) @@ -2026,21 +2102,55 @@ fn from_substrait_null( }, r#type::Kind::Fp32(_) => Ok(ScalarValue::Float32(None)), r#type::Kind::Fp64(_) => Ok(ScalarValue::Float64(None)), - r#type::Kind::Timestamp(ts) => match ts.type_variation_reference { - TIMESTAMP_SECOND_TYPE_VARIATION_REF => { - Ok(ScalarValue::TimestampSecond(None, None)) - } - TIMESTAMP_MILLI_TYPE_VARIATION_REF => { - Ok(ScalarValue::TimestampMillisecond(None, None)) - } - TIMESTAMP_MICRO_TYPE_VARIATION_REF => { - Ok(ScalarValue::TimestampMicrosecond(None, None)) - } - TIMESTAMP_NANO_TYPE_VARIATION_REF => { - Ok(ScalarValue::TimestampNanosecond(None, None)) + r#type::Kind::Timestamp(ts) => { + // Kept for backwards compatibility, new plans should use PrecisionTimestamp(Tz) instead + #[allow(deprecated)] + match ts.type_variation_reference { + TIMESTAMP_SECOND_TYPE_VARIATION_REF => { + Ok(ScalarValue::TimestampSecond(None, None)) + } + TIMESTAMP_MILLI_TYPE_VARIATION_REF => { + Ok(ScalarValue::TimestampMillisecond(None, None)) + } + TIMESTAMP_MICRO_TYPE_VARIATION_REF => { + Ok(ScalarValue::TimestampMicrosecond(None, None)) + } + TIMESTAMP_NANO_TYPE_VARIATION_REF => { + Ok(ScalarValue::TimestampNanosecond(None, None)) + } + v => not_impl_err!( + "Unsupported Substrait type variation {v} of type {kind:?}" + ), } - v => not_impl_err!( - "Unsupported Substrait type variation {v} of type {kind:?}" + } + r#type::Kind::PrecisionTimestamp(pts) => match pts.precision { + 0 => Ok(ScalarValue::TimestampSecond(None, None)), + 3 => Ok(ScalarValue::TimestampMillisecond(None, None)), + 6 => Ok(ScalarValue::TimestampMicrosecond(None, None)), + 9 => Ok(ScalarValue::TimestampNanosecond(None, None)), + p => not_impl_err!( + "Unsupported Substrait precision {p} for PrecisionTimestamp" + ), + }, + r#type::Kind::PrecisionTimestampTz(pts) => match pts.precision { + 0 => Ok(ScalarValue::TimestampSecond( + None, + Some(DEFAULT_TIMEZONE.into()), + )), + 3 => Ok(ScalarValue::TimestampMillisecond( + None, + Some(DEFAULT_TIMEZONE.into()), + )), + 6 => Ok(ScalarValue::TimestampMicrosecond( + None, + Some(DEFAULT_TIMEZONE.into()), + )), + 9 => Ok(ScalarValue::TimestampNanosecond( + None, + Some(DEFAULT_TIMEZONE.into()), + )), + p => not_impl_err!( + "Unsupported Substrait precision {p} for PrecisionTimestamp" ), }, r#type::Kind::Date(date) => match date.type_variation_reference { diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index ee04749f5e6b..72b6760be29c 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -38,8 +38,6 @@ use crate::variation_const::{ DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, - TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, - TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, }; use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait}; @@ -55,10 +53,11 @@ use datafusion::logical_expr::{expr, Between, JoinConstraint, LogicalPlan, Opera use datafusion::prelude::Expr; use pbjson_types::Any as ProtoAny; use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields}; +use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode; use substrait::proto::expression::literal::map::KeyValue; use substrait::proto::expression::literal::{ - user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map, Struct, - UserDefined, + user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map, + PrecisionTimestamp, Struct, UserDefined, }; use substrait::proto::expression::subquery::InPredicate; use substrait::proto::expression::window_function::BoundsType; @@ -658,8 +657,8 @@ fn to_substrait_jointype(join_type: JoinType) -> join_rel::JoinType { JoinType::Left => join_rel::JoinType::Left, JoinType::Right => join_rel::JoinType::Right, JoinType::Full => join_rel::JoinType::Outer, - JoinType::LeftAnti => join_rel::JoinType::Anti, - JoinType::LeftSemi => join_rel::JoinType::Semi, + JoinType::LeftAnti => join_rel::JoinType::LeftAnti, + JoinType::LeftSemi => join_rel::JoinType::LeftSemi, JoinType::RightAnti | JoinType::RightSemi => unimplemented!(), } } @@ -1376,20 +1375,31 @@ fn to_substrait_type( nullability, })), }), - // Timezone is ignored. - DataType::Timestamp(unit, _) => { - let type_variation_reference = match unit { - TimeUnit::Second => TIMESTAMP_SECOND_TYPE_VARIATION_REF, - TimeUnit::Millisecond => TIMESTAMP_MILLI_TYPE_VARIATION_REF, - TimeUnit::Microsecond => TIMESTAMP_MICRO_TYPE_VARIATION_REF, - TimeUnit::Nanosecond => TIMESTAMP_NANO_TYPE_VARIATION_REF, + DataType::Timestamp(unit, tz) => { + let precision = match unit { + TimeUnit::Second => 0, + TimeUnit::Millisecond => 3, + TimeUnit::Microsecond => 6, + TimeUnit::Nanosecond => 9, }; - Ok(substrait::proto::Type { - kind: Some(r#type::Kind::Timestamp(r#type::Timestamp { - type_variation_reference, + let kind = match tz { + None => r#type::Kind::PrecisionTimestamp(r#type::PrecisionTimestamp { + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, - })), - }) + precision, + }), + Some(_) => { + // If timezone is present, no matter what the actual tz value is, it indicates the + // value of the timestamp is tied to UTC epoch. That's all that Substrait cares about. + // As the timezone is lost, this conversion may be lossy for downstream use of the value. + r#type::Kind::PrecisionTimestampTz(r#type::PrecisionTimestampTz { + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, + nullability, + precision, + }) + } + }; + Ok(substrait::proto::Type { kind: Some(kind) }) } DataType::Date32 => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Date(r#type::Date { @@ -1415,6 +1425,7 @@ fn to_substrait_type( kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay { type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, + precision: Some(3), // DayTime precision is always milliseconds })), }), IntervalUnit::MonthDayNano => { @@ -1798,21 +1809,64 @@ fn to_substrait_literal( ScalarValue::Float64(Some(f)) => { (LiteralType::Fp64(*f), DEFAULT_TYPE_VARIATION_REF) } - ScalarValue::TimestampSecond(Some(t), _) => ( - LiteralType::Timestamp(*t), - TIMESTAMP_SECOND_TYPE_VARIATION_REF, + ScalarValue::TimestampSecond(Some(t), None) => ( + LiteralType::PrecisionTimestamp(PrecisionTimestamp { + precision: 0, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, ), - ScalarValue::TimestampMillisecond(Some(t), _) => ( - LiteralType::Timestamp(*t), - TIMESTAMP_MILLI_TYPE_VARIATION_REF, + ScalarValue::TimestampMillisecond(Some(t), None) => ( + LiteralType::PrecisionTimestamp(PrecisionTimestamp { + precision: 3, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, ), - ScalarValue::TimestampMicrosecond(Some(t), _) => ( - LiteralType::Timestamp(*t), - TIMESTAMP_MICRO_TYPE_VARIATION_REF, + ScalarValue::TimestampMicrosecond(Some(t), None) => ( + LiteralType::PrecisionTimestamp(PrecisionTimestamp { + precision: 6, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, ), - ScalarValue::TimestampNanosecond(Some(t), _) => ( - LiteralType::Timestamp(*t), - TIMESTAMP_NANO_TYPE_VARIATION_REF, + ScalarValue::TimestampNanosecond(Some(t), None) => ( + LiteralType::PrecisionTimestamp(PrecisionTimestamp { + precision: 9, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, + ), + // If timezone is present, no matter what the actual tz value is, it indicates the + // value of the timestamp is tied to UTC epoch. That's all that Substrait cares about. + // As the timezone is lost, this conversion may be lossy for downstream use of the value. + ScalarValue::TimestampSecond(Some(t), Some(_)) => ( + LiteralType::PrecisionTimestampTz(PrecisionTimestamp { + precision: 0, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, + ), + ScalarValue::TimestampMillisecond(Some(t), Some(_)) => ( + LiteralType::PrecisionTimestampTz(PrecisionTimestamp { + precision: 3, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, + ), + ScalarValue::TimestampMicrosecond(Some(t), Some(_)) => ( + LiteralType::PrecisionTimestampTz(PrecisionTimestamp { + precision: 6, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, + ), + ScalarValue::TimestampNanosecond(Some(t), Some(_)) => ( + LiteralType::PrecisionTimestampTz(PrecisionTimestamp { + precision: 9, + value: *t, + }), + DEFAULT_TYPE_VARIATION_REF, ), ScalarValue::Date32(Some(d)) => { (LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF) @@ -1847,7 +1901,8 @@ fn to_substrait_literal( LiteralType::IntervalDayToSecond(IntervalDayToSecond { days: i.days, seconds: i.milliseconds / 1000, - microseconds: (i.milliseconds % 1000) * 1000, + subseconds: (i.milliseconds % 1000) as i64, + precision_mode: Some(PrecisionMode::Precision(3)), // 3 for milliseconds }), DEFAULT_TYPE_VARIATION_REF, ), @@ -2142,6 +2197,18 @@ mod test { round_trip_literal(ScalarValue::UInt64(Some(u64::MIN)))?; round_trip_literal(ScalarValue::UInt64(Some(u64::MAX)))?; + for (ts, tz) in [ + (Some(12345), None), + (None, None), + (Some(12345), Some("UTC".into())), + (None, Some("UTC".into())), + ] { + round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?; + round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?; + round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?; + round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?; + } + round_trip_literal(ScalarValue::List(ScalarValue::new_list_nullable( &[ScalarValue::Float32(Some(1.0))], &DataType::Float32, @@ -2271,10 +2338,14 @@ mod test { round_trip_type(DataType::UInt64)?; round_trip_type(DataType::Float32)?; round_trip_type(DataType::Float64)?; - round_trip_type(DataType::Timestamp(TimeUnit::Second, None))?; - round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, None))?; - round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, None))?; - round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, None))?; + + for tz in [None, Some("UTC".into())] { + round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?; + round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?; + round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?; + round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?; + } + round_trip_type(DataType::Date32)?; round_trip_type(DataType::Date64)?; round_trip_type(DataType::Binary)?; diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index c94ad2d669fd..1525da764509 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -38,10 +38,16 @@ /// The "system-preferred" variation (i.e., no variation). pub const DEFAULT_TYPE_VARIATION_REF: u32 = 0; pub const UNSIGNED_INTEGER_TYPE_VARIATION_REF: u32 = 1; + +#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")] pub const TIMESTAMP_SECOND_TYPE_VARIATION_REF: u32 = 0; +#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")] pub const TIMESTAMP_MILLI_TYPE_VARIATION_REF: u32 = 1; +#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")] pub const TIMESTAMP_MICRO_TYPE_VARIATION_REF: u32 = 2; +#[deprecated(since = "42.0.0", note = "Use `PrecisionTimestamp(Tz)` type instead")] pub const TIMESTAMP_NANO_TYPE_VARIATION_REF: u32 = 3; + pub const DATE_32_TYPE_VARIATION_REF: u32 = 0; pub const DATE_64_TYPE_VARIATION_REF: u32 = 1; pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0; diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs index a74cce72ac64..50325d262d1d 100644 --- a/datafusion/wasmtest/src/lib.rs +++ b/datafusion/wasmtest/src/lib.rs @@ -78,9 +78,8 @@ mod test { use super::*; use datafusion::execution::context::SessionContext; use datafusion_execution::{ - config::SessionConfig, - disk_manager::DiskManagerConfig, - runtime_env::{RuntimeConfig, RuntimeEnv}, + config::SessionConfig, disk_manager::DiskManagerConfig, + runtime_env::RuntimeEnvBuilder, }; use datafusion_physical_plan::collect; use datafusion_sql::parser::DFParser; @@ -100,10 +99,10 @@ mod test { // Execute SQL (using datafusion) let rt = Arc::new( - RuntimeEnv::new( - RuntimeConfig::new().with_disk_manager(DiskManagerConfig::Disabled), - ) - .unwrap(), + RuntimeEnvBuilder::new() + .with_disk_manager(DiskManagerConfig::Disabled) + .build() + .unwrap(), ); let session_config = SessionConfig::new().with_target_partitions(1); let session_context = diff --git a/docs/Cargo.toml b/docs/Cargo.toml deleted file mode 100644 index 14398c841579..000000000000 --- a/docs/Cargo.toml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "datafusion-docs-tests" -description = "DataFusion Documentation Tests" -publish = false -version = { workspace = true } -edition = { workspace = true } -readme = { workspace = true } -homepage = { workspace = true } -repository = { workspace = true } -license = { workspace = true } -authors = { workspace = true } -rust-version = { workspace = true } - -[lints] -workspace = true - -[dependencies] -datafusion = { workspace = true } diff --git a/docs/source/index.rst b/docs/source/index.rst index 9c8c886d2502..bb5ea430a321 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -89,6 +89,7 @@ To get started, see user-guide/expressions user-guide/sql/index user-guide/configs + user-guide/explain-usage user-guide/faq .. _toc.library-user-guide: diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index fe922d8eaeb1..556deb02e980 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -31,44 +31,52 @@ explained in more detail in the [Query Planning and Execution Overview] section DataFusion's [LogicalPlan] is an enum containing variants representing all the supported operators, and also contains an `Extension` variant that allows projects building on DataFusion to add custom logical operators. -It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as follows, but is is +It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as shown, but it is much easier to use the [LogicalPlanBuilder], which is described in the next section. Here is an example of building a logical plan directly: - - ```rust -// create a logical table source -let schema = Schema::new(vec![ - Field::new("id", DataType::Int32, true), - Field::new("name", DataType::Utf8, true), -]); -let table_source = LogicalTableSource::new(SchemaRef::new(schema)); - -// create a TableScan plan -let projection = None; // optional projection -let filters = vec![]; // optional filters to push down -let fetch = None; // optional LIMIT -let table_scan = LogicalPlan::TableScan(TableScan::try_new( - "person", - Arc::new(table_source), - projection, - filters, - fetch, -)?); - -// create a Filter plan that evaluates `id > 500` that wraps the TableScan -let filter_expr = col("id").gt(lit(500)); -let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); - -// print the plan -println!("{}", plan.display_indent_schema()); +use datafusion::common::DataFusionError; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::logical_expr::{Filter, LogicalPlan, TableScan, LogicalTableSource}; +use datafusion::prelude::*; +use std::sync::Arc; + +fn main() -> Result<(), DataFusionError> { + // create a logical table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + // create a TableScan plan + let projection = None; // optional projection + let filters = vec![]; // optional filters to push down + let fetch = None; // optional LIMIT + let table_scan = LogicalPlan::TableScan(TableScan::try_new( + "person", + Arc::new(table_source), + projection, + filters, + fetch, + )? + ); + + // create a Filter plan that evaluates `id > 500` that wraps the TableScan + let filter_expr = col("id").gt(lit(500)); + let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan)) ? ); + + // print the plan + println!("{}", plan.display_indent_schema()); + Ok(()) +} ``` This example produces the following plan: -``` +```text Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] TableScan: person [id:Int32;N, name:Utf8;N] ``` @@ -78,7 +86,7 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] DataFusion logical plans can be created using the [LogicalPlanBuilder] struct. There is also a [DataFrame] API which is a higher-level API that delegates to [LogicalPlanBuilder]. -The following associated functions can be used to create a new builder: +There are several functions that can can be used to create a new builder, such as - `empty` - create an empty plan with no fields - `values` - create a plan from a set of literal values @@ -102,41 +110,107 @@ The following example demonstrates building the same simple query plan as the pr ```rust -// create a logical table source -let schema = Schema::new(vec![ - Field::new("id", DataType::Int32, true), - Field::new("name", DataType::Utf8, true), -]); -let table_source = LogicalTableSource::new(SchemaRef::new(schema)); - -// optional projection -let projection = None; - -// create a LogicalPlanBuilder for a table scan -let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; - -// perform a filter operation and build the plan -let plan = builder - .filter(col("id").gt(lit(500)))? // WHERE id > 500 - .build()?; - -// print the plan -println!("{}", plan.display_indent_schema()); +use datafusion::common::DataFusionError; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::logical_expr::{LogicalPlanBuilder, LogicalTableSource}; +use datafusion::prelude::*; +use std::sync::Arc; + +fn main() -> Result<(), DataFusionError> { + // create a logical table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + // optional projection + let projection = None; + + // create a LogicalPlanBuilder for a table scan + let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; + + // perform a filter operation and build the plan + let plan = builder + .filter(col("id").gt(lit(500)))? // WHERE id > 500 + .build()?; + + // print the plan + println!("{}", plan.display_indent_schema()); + Ok(()) +} ``` This example produces the following plan: -``` +```text Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] TableScan: person [id:Int32;N, name:Utf8;N] ``` +## Translating Logical Plan to Physical Plan + +Logical plans can not be directly executed. They must be "compiled" into an +[`ExecutionPlan`], which is often referred to as a "physical plan". + +Compared to `LogicalPlan`s `ExecutionPlans` have many more details such as +specific algorithms and detailed optimizations compared to. Given a +`LogicalPlan` the easiest way to create an `ExecutionPlan` is using +[`SessionState::create_physical_plan`] as shown below + +```rust +use datafusion::datasource::{provider_as_source, MemTable}; +use datafusion::common::DataFusionError; +use datafusion::physical_plan::display::DisplayableExecutionPlan; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::logical_expr::{LogicalPlanBuilder, LogicalTableSource}; +use datafusion::prelude::*; +use std::sync::Arc; + +// Creating physical plans may access remote catalogs and data sources +// thus it must be run with an async runtime. +#[tokio::main] +async fn main() -> Result<(), DataFusionError> { + + // create a default table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + // To create an ExecutionPlan we must provide an actual + // TableProvider. For this example, we don't provide any data + // but in production code, this would have `RecordBatch`es with + // in memory data + let table_provider = Arc::new(MemTable::try_new(Arc::new(schema), vec![])?); + // Use the provider_as_source function to convert the TableProvider to a table source + let table_source = provider_as_source(table_provider); + + // create a LogicalPlanBuilder for a table scan without projection or filters + let logical_plan = LogicalPlanBuilder::scan("person", table_source, None)?.build()?; + + // Now create the physical plan by calling `create_physical_plan` + let ctx = SessionContext::new(); + let physical_plan = ctx.state().create_physical_plan(&logical_plan).await?; + + // print the plan + println!("{}", DisplayableExecutionPlan::new(physical_plan.as_ref()).indent(true)); + Ok(()) +} +``` + +This example produces the following physical plan: + +```text +MemoryExec: partitions=0, partition_sizes=[] +``` + ## Table Sources -The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also -suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner. However, if you -want to use a [TableSource] that can be executed in DataFusion then you will need to use [DefaultTableSource], which is a -wrapper for a [TableProvider]. +The previous examples use a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also +suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner. + +However, it is more common to use a [TableProvider]. To get a [TableSource] from a +[TableProvider], use [provider_as_source] or [DefaultTableSource]. [query planning and execution overview]: https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview [architecture guide]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture @@ -145,5 +219,8 @@ wrapper for a [TableProvider]. [dataframe]: using-the-dataframe-api.md [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html [defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html +[provider_as_source]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/fn.provider_as_source.html [tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html [tablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html +[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html +[`sessionstate::create_physical_plan`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html#method.create_physical_plan diff --git a/docs/source/user-guide/explain-usage.md b/docs/source/user-guide/explain-usage.md new file mode 100644 index 000000000000..a65fad92d104 --- /dev/null +++ b/docs/source/user-guide/explain-usage.md @@ -0,0 +1,365 @@ + + +# Reading Explain Plans + +## Introduction + +This section describes of how to read a DataFusion query plan. While fully +comprehending all details of these plans requires significant expertise in the +DataFusion engine, this guide will help you get started with the basics. + +Datafusion executes queries using a `query plan`. To see the plan without +running the query, add the keyword `EXPLAIN` to your SQL query or call the +[DataFrame::explain] method + +[dataframe::explain]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.explain + +## Example: Select and filter + +In this section, we run example queries against the `hits.parquet` file. See +[below](#data-in-this-example)) for information on how to get this file. + +Let's see how DataFusion runs a query that selects the top 5 watch lists for the +site `http://domcheloveplanet.ru/`: + +```sql +EXPLAIN SELECT "WatchID" AS wid, "hits.parquet"."ClientIP" AS ip +FROM 'hits.parquet' +WHERE starts_with("URL", 'http://domcheloveplanet.ru/') +ORDER BY wid ASC, ip DESC +LIMIT 5; +``` + +The output will look like + +``` ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: wid ASC NULLS LAST, ip DESC NULLS FIRST, fetch=5 | +| | Projection: hits.parquet.WatchID AS wid, hits.parquet.ClientIP AS ip | +| | Filter: starts_with(hits.parquet.URL, Utf8("http://domcheloveplanet.ru/")) | +| | TableScan: hits.parquet projection=[WatchID, ClientIP, URL], partial_filters=[starts_with(hits.parquet.URL, Utf8("http://domcheloveplanet.ru/"))] | +| physical_plan | SortPreservingMergeExec: [wid@0 ASC NULLS LAST,ip@1 DESC], fetch=5 | +| | SortExec: TopK(fetch=5), expr=[wid@0 ASC NULLS LAST,ip@1 DESC], preserve_partitioning=[true] | +| | ProjectionExec: expr=[WatchID@0 as wid, ClientIP@1 as ip] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: starts_with(URL@2, http://domcheloveplanet.ru/) | +| | ParquetExec: file_groups={16 groups: [[hits.parquet:0..923748528], ...]}, projection=[WatchID, ClientIP, URL], predicate=starts_with(URL@13, http://domcheloveplanet.ru/) | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +2 row(s) fetched. +Elapsed 0.060 seconds. +``` + +There are two sections: logical plan and physical plan + +- **Logical Plan:** is a plan generated for a specific SQL query, DataFrame, or other language without the + knowledge of the underlying data organization. +- **Physical Plan:** is a plan generated from a logical plan along with consideration of the hardware + configuration (e.g number of CPUs) and the underlying data organization (e.g number of files). + This physical plan is specific to your hardware configuration and your data. If you load the same + data to different hardware with different configurations, the same query may generate different query plans. + +Understanding a query plan can help to you understand its performance. For example, when the plan shows your query reads +many files, it signals you to either add more filter in the query to read less data or to modify your file +design to make fewer but larger files. This document focuses on how to read a query plan. How to make a +query run faster depends on the reason it is slow and beyond the scope of this document. + +## Query plans are trees + +A query plan is an upside down tree, and we always read from bottom up. The +physical plan in Figure 1 in tree format will look like + +``` + ▲ + │ + │ +┌─────────────────────────────────────────────────┐ +│ SortPreservingMergeExec │ +│ [wid@0 ASC NULLS LAST,ip@1 DESC] │ +│ fetch=5 │ +└─────────────────────────────────────────────────┘ + ▲ + │ +┌─────────────────────────────────────────────────┐ +│ SortExec TopK(fetch=5), │ +│ expr=[wid@0 ASC NULLS LAST,ip@1 DESC], │ +│ preserve_partitioning=[true] │ +└─────────────────────────────────────────────────┘ + ▲ + │ +┌─────────────────────────────────────────────────┐ +│ ProjectionExec │ +│ expr=[WatchID@0 as wid, ClientIP@1 as ip] │ +└─────────────────────────────────────────────────┘ + ▲ + │ +┌─────────────────────────────────────────────────┐ +│ CoalesceBatchesExec │ +└─────────────────────────────────────────────────┘ + ▲ + │ +┌─────────────────────────────────────────────────┐ +│ FilterExec │ +│ starts_with(URL@2, http://domcheloveplanet.ru/) │ +└─────────────────────────────────────────────────┘ + ▲ + │ +┌────────────────────────────────────────────────┐ +│ ParquetExec │ +│ hits.parquet (filter = ...) │ +└────────────────────────────────────────────────┘ +``` + +Each node in the tree/plan ends with `Exec` and is sometimes also called an `operator` or `ExecutionPlan` where data is +processed, transformed and sent up. + +1. First, data in parquet the `hits.parquet` file us read in parallel using 16 cores in 16 "partitions" (more on this later) from `ParquetExec`, which applies a first pass at filtering during the scan. +2. Next, the output is filtered using `FilterExec` to ensure only rows where `starts_with(URL, 'http://domcheloveplanet.ru/')` evaluates to true are passed on +3. The `CoalesceBatchesExec` then ensures that the data is grouped into larger batches for processing +4. The `ProjectionExec` then projects the data to rename the `WatchID` and `ClientIP` columns to `wid` and `ip` respectively. +5. The `SortExec` then sorts the data by `wid ASC, ip DESC`. The `Topk(fetch=5)` indicates that a special implementation is used that only tracks and emits the top 5 values in each partition. +6. Finally the `SortPreservingMergeExec` merges the sorted data from all partitions and returns the top 5 rows overall. + +## Understanding large query plans + +A large query plan may look intimidating, but you can quickly understand what it does by following these steps + +1. As always, read from bottom up, one operator at a time. +2. Understand the job of this operator by reading + the [Physical Plan documentation](https://docs.rs/datafusion/latest/datafusion/physical_plan/index.html). +3. Understand the input data of the operator and how large/small it may be. +4. Understand how much data that operator produces and what it would look like. + +If you can answer those questions, you will be able to estimate how much work +that plan has to do and thus how long it will take. However, the `EXPLAIN` just +shows you the plan without executing it. + +If you want to know more about how much work each operator in query plan does, +you can use the `EXPLAIN ANALYZE` to get the explain with runtime added (see +next section) + +## More Debugging Information: `EXPLAIN VERBOSE` + +If the plan has to read too many files, not all of them will be shown in the +`EXPLAIN`. To see them, use `EXPLAIN VEBOSE`. Like `EXPLAIN`, `EXPLAIN VERBOSE` +does not run the query. Instead it shows the full explain plan, with information +that is omitted from the default explain, as well as all intermediate physical +plans DataFusion generates before returning. This mode can be very helpful for +debugging to see why and when DataFusion added and removed operators from a plan. + +## Execution Counters: `EXPLAIN ANALYZE` + +During execution, DataFusion operators collect detailed metrics. You can access +them programmatically via [`ExecutionPlan::metrics`] as well as with the +`EXPLAIN ANALYZE` command. For example here is the same query query as +above but with `EXPLAIN ANALYZE` (note the output is edited for clarity) + +[`executionplan::metrics`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#method.metrics + +``` +> EXPLAIN ANALYZE SELECT "WatchID" AS wid, "hits.parquet"."ClientIP" AS ip +FROM 'hits.parquet' +WHERE starts_with("URL", 'http://domcheloveplanet.ru/') +ORDER BY wid ASC, ip DESC +LIMIT 5; ++-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Plan with Metrics | SortPreservingMergeExec: [wid@0 ASC NULLS LAST,ip@1 DESC], fetch=5, metrics=[output_rows=5, elapsed_compute=2.375µs] | +| | SortExec: TopK(fetch=5), expr=[wid@0 ASC NULLS LAST,ip@1 DESC], preserve_partitioning=[true], metrics=[output_rows=75, elapsed_compute=7.243038ms, row_replacements=482] | +| | ProjectionExec: expr=[WatchID@0 as wid, ClientIP@1 as ip], metrics=[output_rows=811821, elapsed_compute=66.25µs] | +| | FilterExec: starts_with(URL@2, http://domcheloveplanet.ru/), metrics=[output_rows=811821, elapsed_compute=1.36923816s] | +| | ParquetExec: file_groups={16 groups: [[hits.parquet:0..923748528], ...]}, projection=[WatchID, ClientIP, URL], predicate=starts_with(URL@13, http://domcheloveplanet.ru/), metrics=[output_rows=99997497, elapsed_compute=16ns, ... bytes_scanned=3703192723, ... time_elapsed_opening=308.203002ms, time_elapsed_scanning_total=8.350342183s, ...] | ++-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row(s) fetched. +Elapsed 0.720 seconds. +``` + +In this case, DataFusion actually ran the query, but discarded any results, and +instead returned an annotated plan with a new field, `metrics=[...]` + +Most operators have the common metrics `output_rows` and `elapsed_compute` and +some have operator specific metrics such as `ParquetExec` which has +`bytes_scanned=3703192723`. Note that times and counters are reported across all +cores, so if you have 16 cores, the time reported is the sum of the time taken +by all 16 cores. + +Again, reading from bottom up: + +- `ParquetExec` + - `output_rows=99997497`: A total 99.9M rows were produced + - `bytes_scanned=3703192723`: Of the 14GB file, 3.7GB were actually read (due to projection pushdown) + - `time_elapsed_opening=308.203002ms`: It took 300ms to open the file and prepare to read it + - `time_elapsed_scanning_total=8.350342183s`: It took 8.3 seconds of CPU time (across 16 cores) to actually decode the parquet data +- `FilterExec` + - `output_rows=811821`: Of the 99.9M rows at its input, only 811K rows passed the filter and were produced at the output + - `elapsed_compute=1.36923816s`: In total, 1.36s of CPU time (across 16 cores) was spend evaluating the filter +- `CoalesceBatchesExec` + - `output_rows=811821`, `elapsed_compute=12.873379ms`: Produced 811K rows in 13ms +- `ProjectionExec` + - `output_rows=811821, elapsed_compute=66.25µs`: Produced 811K rows in 66µs (microseconds). This projection is almost instantaneous as it does not manipulate any data +- `SortExec` + - `output_rows=75`: Produced 75 rows in total. Each of 16 cores could produce up to 5 rows, but in this case not all cores did. + - `elapsed_compute=7.243038ms`: 7ms was used to determine the top 5 rows + - `row_replacements=482`: Internally, the TopK operator updated its top list 482 times +- `SortPreservingMergeExec` + - `output_rows=5`, `elapsed_compute=2.375µs`: Produced the final 5 rows in 2.375µs (microseconds) + +## Partitions and Execution + +DataFusion determines the optimal number of cores to use as part of query +planning. Roughly speaking, each "partition" in the plan is run independently using +a separate core. Data crosses between cores only within certain operators such as +`RepartitionExec`, `CoalescePartitions` and `SortPreservingMergeExec` + +You can read more about this in the [Partitoning Docs]. + +[partitoning docs]: https://docs.rs/datafusion/latest/datafusion/physical_expr/enum.Partitioning.html + +## Example of an Aggregate Query + +Let us delve into an example query that aggregates data from the `hits.parquet` +file. For example, this query from ClickBench finds the top 10 users by their +number of hits: + +```sql +SELECT "UserID", COUNT(*) +FROM 'hits.parquet' +GROUP BY "UserID" +ORDER BY COUNT(*) DESC +LIMIT 10; +``` + +We can again see the query plan by using `EXPLAIN`: + +``` +> EXPLAIN SELECT "UserID", COUNT(*) FROM 'hits.parquet' GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Limit: skip=0, fetch=10 | +| | Sort: count(*) DESC NULLS FIRST, fetch=10 | +| | Aggregate: groupBy=[[hits.parquet.UserID]], aggr=[[count(Int64(1)) AS count(*)]] | +| | TableScan: hits.parquet projection=[UserID] | +| physical_plan | GlobalLimitExec: skip=0, fetch=10 | +| | SortPreservingMergeExec: [count(*)@1 DESC], fetch=10 | +| | SortExec: TopK(fetch=10), expr=[count(*)@1 DESC], preserve_partitioning=[true] | +| | AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID], aggr=[count(*)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([UserID@0], 10), input_partitions=10 | +| | AggregateExec: mode=Partial, gby=[UserID@0 as UserID], aggr=[count(*)] | +| | ParquetExec: file_groups={10 groups: [[hits.parquet:0..1477997645], [hits.parquet:1477997645..2955995290], [hits.parquet:2955995290..4433992935], [hits.parquet:4433992935..5911990580], [hits.parquet:5911990580..7389988225], ...]}, projection=[UserID] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +For this query, let's again read the plan from the bottom to the top: + +**Logical plan operators** + +- `TableScan` + - `hits.parquet`: Scans data from the file `hits.parquet`. + - `projection=[UserID]`: Reads only the `UserID` column +- `Aggregate` + - `groupBy=[[hits.parquet.UserID]]`: Groups by `UserID` column. + - `aggr=[[count(Int64(1)) AS count(*)]]`: Applies the `COUNT` aggregate on each distinct group. +- `Sort` + - `count(*) DESC NULLS FIRST`: Sorts the data in descending count order. + - `fetch=10`: Returns only the first 10 rows. +- `Limit` + - `skip=0`: Does not skip any data for the results. + - `fetch=10`: Limits the results to 10 values. + +**Physical plan operators** + +- `ParquetExec` + - `file_groups={10 groups: [...]}`: Reads 10 groups in parallel from `hits.parquet`file. (The example above was run on a machine with 10 cores.) + - `projection=[UserID]`: Pushes down projection of the `UserID` column. The parquet format is columnar and the DataFusion reader only decodes the columns required. +- `AggregateExec` + - `mode=Partial` Runs a [partial aggregation] in parallel across each of the 10 partitions from the `ParquetExec` immediately after reading. + - `gby=[UserID@0 as UserID]`: Represents `GROUP BY` in the [physical plan] and groups together the same values of `UserID`. + - `aggr=[count(*)]`: Applies the `COUNT` aggregate on all rows for each group. +- `RepartitionExec` + - `partitioning=Hash([UserID@0], 10)`: Divides the input into into 10 (new) output partitions based on the value of `hash(UserID)`. You can read more about this in the [partitioning] documentation. + - `input_partitions=10`: Number of input partitions. +- `CoalesceBatchesExec` + - `target_batch_size=8192`: Combines smaller batches in to larger batches. In this case approximately 8192 rows in each batch. +- `AggregateExec` + - `mode=FinalPartitioned`: Performs the final aggregation on each group. See the [documentation on multi phase grouping] for more information. + - `gby=[UserID@0 as UserID]`: Groups by `UserID`. + - `aggr=[count(*)]`: Applies the `COUNT` aggregate on all rows for each group. +- `SortExec` + - `TopK(fetch=10)`: Use a special "TopK" sort that keeps only the largest 10 values in memory at a time. You can read more about this in the [TopK] documentation. + - `expr=[count(*)@1 DESC]`: Sorts all rows in descending order. Note this represents the `ORDER BY` in the physical plan. + - `preserve_partitioning=[true]`: The sort is done in parallel on each partition. In this case the top 10 values are found for each of the 10 partitions, in parallel. +- `SortPreservingMergeExec` + - `[count(*)@1 DESC]`: This operator merges the 10 distinct streams into a single stream using this expression. + - `fetch=10`: Returns only the first 10 rows +- `GlobalLimitExec` + - `skip=0`: Does not skip any rows + - `fetch=10`: Returns only the first 10 rows, denoted by `LIMIT 10` in the query. + +[partial aggregation]: https://docs.rs/datafusion/latest/datafusion/physical_plan/aggregates/enum.AggregateMode.html#variant.Partial +[physical plan]: https://docs.rs/datafusion/latest/datafusion/physical_plan/aggregates/struct.PhysicalGroupBy.html +[partitioning]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html +[topk]: https://docs.rs/datafusion/latest/datafusion/physical_plan/struct.TopK.html +[documentation on multi phase grouping]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state + +### Data in this Example + +The examples in this section use data from [ClickBench], a benchmark for data +analytics. The examples are in terms of the 14GB [`hits.parquet`] file and can be +downloaded from the website or using the following commands: + +```shell +cd benchmarks +./bench.sh data clickbench_1 +*************************** +DataFusion Benchmark Runner and Data Generator +COMMAND: data +BENCHMARK: clickbench_1 +DATA_DIR: /Users/andrewlamb/Software/datafusion2/benchmarks/data +CARGO_COMMAND: cargo run --release +PREFER_HASH_JOIN: true +*************************** +Checking hits.parquet...... found 14779976446 bytes ... Done +``` + +Then you can run `datafusion-cli` to get plans: + +```shell +cd datafusion/benchmarks/data +datafusion-cli + +DataFusion CLI v41.0.0 +> select count(*) from 'hits.parquet'; ++----------+ +| count(*) | ++----------+ +| 99997497 | ++----------+ +1 row(s) fetched. +Elapsed 0.062 seconds. +> +``` + +[clickbench]: https://benchmark.clickhouse.com/ +[`hits.parquet`]: https://datasets.clickhouse.com/hits_compatible/hits.parquet diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md index 22f73e3d76d7..45bb3a57aa7c 100644 --- a/docs/source/user-guide/sql/explain.md +++ b/docs/source/user-guide/sql/explain.md @@ -21,6 +21,8 @@ The `EXPLAIN` command shows the logical and physical execution plan for the specified SQL statement. +See the [Reading Explain Plans](../explain-usage.md) page for more information on how to interpret these plans. +
 EXPLAIN [ANALYZE] [VERBOSE] statement
 
diff --git a/docs/src/lib.rs b/docs/src/lib.rs deleted file mode 100644 index f73132468ec9..000000000000 --- a/docs/src/lib.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[cfg(test)] -mod library_logical_plan; diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs deleted file mode 100644 index 355003941570..000000000000 --- a/docs/src/library_logical_plan.rs +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion::error::Result; -use datafusion::logical_expr::builder::LogicalTableSource; -use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder, TableScan}; -use datafusion::prelude::*; -use std::sync::Arc; - -#[test] -fn plan_1() -> Result<()> { - // create a logical table source - let schema = Schema::new(vec![ - Field::new("id", DataType::Int32, true), - Field::new("name", DataType::Utf8, true), - ]); - let table_source = LogicalTableSource::new(SchemaRef::new(schema)); - - // create a TableScan plan - let projection = None; // optional projection - let filters = vec![]; // optional filters to push down - let fetch = None; // optional LIMIT - let table_scan = LogicalPlan::TableScan(TableScan::try_new( - "person", - Arc::new(table_source), - projection, - filters, - fetch, - )?); - - // create a Filter plan that evaluates `id > 500` and wraps the TableScan - let filter_expr = col("id").gt(lit(500)); - let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); - - // print the plan - println!("{}", plan.display_indent_schema()); - - Ok(()) -} - -#[test] -fn plan_builder_1() -> Result<()> { - // create a logical table source - let schema = Schema::new(vec![ - Field::new("id", DataType::Int32, true), - Field::new("name", DataType::Utf8, true), - ]); - let table_source = LogicalTableSource::new(SchemaRef::new(schema)); - - // optional projection - let projection = None; - - // create a LogicalPlanBuilder for a table scan - let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; - - // perform a filter that evaluates `id > 500`, and build the plan - let plan = builder.filter(col("id").gt(lit(500)))?.build()?; - - // print the plan - println!("{}", plan.display_indent_schema()); - - Ok(()) -}